001/* 002 * (C) Copyright 2006-2007 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 * 019 */ 020package org.nuxeo.ecm.core.convert.plugins.text.extractors; 021 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.Serializable; 025import java.util.Iterator; 026import java.util.Map; 027 028import org.apache.commons.logging.Log; 029import org.apache.commons.logging.LogFactory; 030import org.apache.poi.openxml4j.exceptions.OpenXML4JException; 031import org.apache.poi.openxml4j.opc.OPCPackage; 032import org.apache.poi.ss.usermodel.Cell; 033import org.apache.poi.ss.usermodel.Row; 034import org.apache.poi.xssf.usermodel.XSSFCell; 035import org.apache.poi.xssf.usermodel.XSSFRow; 036import org.apache.poi.xssf.usermodel.XSSFSheet; 037import org.apache.poi.xssf.usermodel.XSSFWorkbook; 038import org.nuxeo.ecm.core.api.Blob; 039import org.nuxeo.ecm.core.api.Blobs; 040import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 041import org.nuxeo.ecm.core.convert.api.ConversionException; 042import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder; 043import org.nuxeo.ecm.core.convert.extension.Converter; 044 045public class XLX2TextConverter extends BaseOfficeXMLTextConverter implements Converter { 046 047 private static final Log log = LogFactory.getLog(XLX2TextConverter.class); 048 049 private static final String CELL_SEP = ""; 050 051 private static final String ROW_SEP = "\n"; 052 053 @Override 054 public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException { 055 056 InputStream stream = null; 057 StringBuffer sb = new StringBuffer(); 058 059 try { 060 Blob blob = blobHolder.getBlob(); 061 062 if (blob.getLength() > maxSize4POI) { 063 return runFallBackConverter(blobHolder, "xl/"); 064 } 065 066 stream = blob.getStream(); 067 068 OPCPackage p = OPCPackage.open(stream); 069 XSSFWorkbook workbook = new XSSFWorkbook(p); 070 for (int i = 0; i < workbook.getNumberOfSheets(); i++) { 071 XSSFSheet sheet = workbook.getSheetAt(i); 072 Iterator<Row> rows = sheet.rowIterator(); 073 while (rows.hasNext()) { 074 XSSFRow row = (XSSFRow) rows.next(); 075 Iterator<Cell> cells = row.cellIterator(); 076 while (cells.hasNext()) { 077 XSSFCell cell = (XSSFCell) cells.next(); 078 appendTextFromCell(cell, sb); 079 } 080 sb.append(ROW_SEP); 081 } 082 } 083 return new SimpleCachableBlobHolder(Blobs.createBlob(sb.toString())); 084 } catch (IOException | OpenXML4JException e) { 085 throw new ConversionException("Error during XLX2Text conversion", e); 086 } finally { 087 if (stream != null) { 088 try { 089 stream.close(); 090 } catch (IOException e) { 091 log.error("Error while closing Blob stream", e); 092 } 093 } 094 } 095 } 096 097 protected void appendTextFromCell(XSSFCell cell, StringBuffer sb) { 098 String cellValue = null; 099 switch (cell.getCellType()) { 100 case XSSFCell.CELL_TYPE_NUMERIC: 101 cellValue = Double.toString(cell.getNumericCellValue()).trim(); 102 break; 103 case XSSFCell.CELL_TYPE_STRING: 104 cellValue = cell.getStringCellValue().trim(); 105 break; 106 } 107 108 if (cellValue != null && cellValue.length() > 0) { 109 sb.append(cellValue).append(CELL_SEP); 110 } 111 } 112 113}