001/* 002 * (C) Copyright 2002-2007 Nuxeo SAS (http://nuxeo.com/) and contributors. 003 * 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the GNU Lesser General Public License 006 * (LGPL) version 2.1 which accompanies this distribution, and is available at 007 * http://www.gnu.org/licenses/lgpl.html 008 * 009 * This library is distributed in the hope that it will be useful, 010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 012 * Lesser General Public License for more details. 013 * 014 * Contributors: 015 * Nuxeo - initial API and implementation 016 * 017 */ 018package org.nuxeo.ecm.core.convert.plugins.text.extractors; 019 020import java.io.IOException; 021import java.io.InputStream; 022import java.io.Serializable; 023import java.util.Iterator; 024import java.util.Map; 025 026import org.apache.commons.logging.Log; 027import org.apache.commons.logging.LogFactory; 028import org.apache.poi.hssf.usermodel.HSSFCell; 029import org.apache.poi.hssf.usermodel.HSSFRow; 030import org.apache.poi.hssf.usermodel.HSSFSheet; 031import org.apache.poi.hssf.usermodel.HSSFWorkbook; 032import org.apache.poi.poifs.filesystem.POIFSFileSystem; 033import org.apache.poi.ss.usermodel.Row; 034import org.nuxeo.ecm.core.api.Blobs; 035import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 036import org.nuxeo.ecm.core.convert.api.ConversionException; 037import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder; 038import org.nuxeo.ecm.core.convert.extension.Converter; 039import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor; 040 041public class XL2TextConverter implements Converter { 042 043 private static final Log log = LogFactory.getLog(XL2TextConverter.class); 044 045 private static final String CELL_SEP = " "; 046 047 private static final String ROW_SEP = "\n\n"; 048 049 @Override 050 public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException { 051 052 InputStream stream = null; 053 StringBuffer sb = new StringBuffer(); 054 try { 055 stream = blobHolder.getBlob().getStream(); 056 POIFSFileSystem fs = new POIFSFileSystem(stream); 057 HSSFWorkbook workbook = new HSSFWorkbook(fs); 058 for (int i = 0; i < workbook.getNumberOfSheets(); i++) { 059 HSSFSheet sheet = workbook.getSheetAt(i); 060 Iterator<Row> rows = sheet.rowIterator(); 061 while (rows.hasNext()) { 062 HSSFRow row = (HSSFRow) rows.next(); 063 Iterator<?> cells = row.cellIterator(); 064 while (cells.hasNext()) { 065 HSSFCell cell = (HSSFCell) cells.next(); 066 appendTextFromCell(cell, sb); 067 sb.append(CELL_SEP); 068 } 069 sb.append(ROW_SEP); 070 } 071 } 072 return new SimpleCachableBlobHolder(Blobs.createBlob(sb.toString())); 073 } catch (IOException e) { 074 throw new ConversionException("Error during XL2Text conversion", e); 075 } finally { 076 if (stream != null) { 077 try { 078 stream.close(); 079 } catch (IOException e) { 080 log.error("Error while closing Blob stream", e); 081 } 082 } 083 } 084 } 085 086 protected void appendTextFromCell(HSSFCell cell, StringBuffer sb) { 087 String cellValue = null; 088 switch (cell.getCellType()) { 089 case HSSFCell.CELL_TYPE_NUMERIC: 090 cellValue = Double.toString(cell.getNumericCellValue()).trim(); 091 break; 092 case HSSFCell.CELL_TYPE_STRING: 093 cellValue = cell.getStringCellValue().trim().replaceAll("\n", " "); 094 break; 095 } 096 097 if (cellValue != null && cellValue.length() > 0) { 098 sb.append(cellValue); 099 } 100 } 101 102 @Override 103 public void init(ConverterDescriptor descriptor) { 104 } 105 106}