001/* 002 * (C) Copyright 2002-2007 Nuxeo SAS (http://nuxeo.com/) and contributors. 003 * 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the GNU Lesser General Public License 006 * (LGPL) version 2.1 which accompanies this distribution, and is available at 007 * http://www.gnu.org/licenses/lgpl.html 008 * 009 * This library is distributed in the hope that it will be useful, 010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 012 * Lesser General Public License for more details. 013 * 014 * Contributors: 015 * Nuxeo - initial API and implementation 016 * 017 */ 018package org.nuxeo.ecm.core.convert.plugins.text.extractors; 019 020import java.io.IOException; 021import java.io.InputStream; 022import java.io.Serializable; 023import java.util.Iterator; 024import java.util.Map; 025 026import org.apache.commons.logging.Log; 027import org.apache.commons.logging.LogFactory; 028import org.apache.poi.openxml4j.exceptions.OpenXML4JException; 029import org.apache.poi.openxml4j.opc.OPCPackage; 030import org.apache.poi.ss.usermodel.Cell; 031import org.apache.poi.ss.usermodel.Row; 032import org.apache.poi.xssf.usermodel.XSSFCell; 033import org.apache.poi.xssf.usermodel.XSSFRow; 034import org.apache.poi.xssf.usermodel.XSSFSheet; 035import org.apache.poi.xssf.usermodel.XSSFWorkbook; 036import org.nuxeo.ecm.core.api.Blob; 037import org.nuxeo.ecm.core.api.Blobs; 038import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 039import org.nuxeo.ecm.core.convert.api.ConversionException; 040import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder; 041import org.nuxeo.ecm.core.convert.extension.Converter; 042 043public class XLX2TextConverter extends BaseOfficeXMLTextConverter implements Converter { 044 045 private static final Log log = LogFactory.getLog(XLX2TextConverter.class); 046 047 private static final String CELL_SEP = ""; 048 049 private static final String ROW_SEP = "\n"; 050 051 @Override 052 public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException { 053 054 InputStream stream = null; 055 StringBuffer sb = new StringBuffer(); 056 057 try { 058 Blob blob = blobHolder.getBlob(); 059 060 if (blob.getLength() > maxSize4POI) { 061 return runFallBackConverter(blobHolder, "xl/"); 062 } 063 064 stream = blob.getStream(); 065 066 OPCPackage p = OPCPackage.open(stream); 067 XSSFWorkbook workbook = new XSSFWorkbook(p); 068 for (int i = 0; i < workbook.getNumberOfSheets(); i++) { 069 XSSFSheet sheet = workbook.getSheetAt(i); 070 Iterator<Row> rows = sheet.rowIterator(); 071 while (rows.hasNext()) { 072 XSSFRow row = (XSSFRow) rows.next(); 073 Iterator<Cell> cells = row.cellIterator(); 074 while (cells.hasNext()) { 075 XSSFCell cell = (XSSFCell) cells.next(); 076 appendTextFromCell(cell, sb); 077 } 078 sb.append(ROW_SEP); 079 } 080 } 081 return new SimpleCachableBlobHolder(Blobs.createBlob(sb.toString())); 082 } catch (IOException | OpenXML4JException e) { 083 throw new ConversionException("Error during XLX2Text conversion", e); 084 } finally { 085 if (stream != null) { 086 try { 087 stream.close(); 088 } catch (IOException e) { 089 log.error("Error while closing Blob stream", e); 090 } 091 } 092 } 093 } 094 095 protected void appendTextFromCell(XSSFCell cell, StringBuffer sb) { 096 String cellValue = null; 097 switch (cell.getCellType()) { 098 case XSSFCell.CELL_TYPE_NUMERIC: 099 cellValue = Double.toString(cell.getNumericCellValue()).trim(); 100 break; 101 case XSSFCell.CELL_TYPE_STRING: 102 cellValue = cell.getStringCellValue().trim(); 103 break; 104 } 105 106 if (cellValue != null && cellValue.length() > 0) { 107 sb.append(cellValue).append(CELL_SEP); 108 } 109 } 110 111}