001/* 002 * (C) Copyright 2006-2007 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 * 019 */ 020package org.nuxeo.ecm.core.convert.plugins.text.extractors; 021 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.Serializable; 025import java.util.Iterator; 026import java.util.Map; 027 028import org.apache.commons.logging.Log; 029import org.apache.commons.logging.LogFactory; 030import org.apache.poi.hssf.usermodel.HSSFCell; 031import org.apache.poi.hssf.usermodel.HSSFRow; 032import org.apache.poi.hssf.usermodel.HSSFSheet; 033import org.apache.poi.hssf.usermodel.HSSFWorkbook; 034import org.apache.poi.poifs.filesystem.POIFSFileSystem; 035import org.apache.poi.ss.usermodel.Row; 036import org.nuxeo.ecm.core.api.Blobs; 037import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 038import org.nuxeo.ecm.core.convert.api.ConversionException; 039import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder; 040import org.nuxeo.ecm.core.convert.extension.Converter; 041import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor; 042 043public class XL2TextConverter implements Converter { 044 045 private static final Log log = LogFactory.getLog(XL2TextConverter.class); 046 047 private static final String CELL_SEP = " "; 048 049 private static final String ROW_SEP = "\n\n"; 050 051 @Override 052 public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException { 053 054 InputStream stream = null; 055 StringBuffer sb = new StringBuffer(); 056 try { 057 stream = blobHolder.getBlob().getStream(); 058 POIFSFileSystem fs = new POIFSFileSystem(stream); 059 HSSFWorkbook workbook = new HSSFWorkbook(fs); 060 for (int i = 0; i < workbook.getNumberOfSheets(); i++) { 061 HSSFSheet sheet = workbook.getSheetAt(i); 062 Iterator<Row> rows = sheet.rowIterator(); 063 while (rows.hasNext()) { 064 HSSFRow row = (HSSFRow) rows.next(); 065 Iterator<?> cells = row.cellIterator(); 066 while (cells.hasNext()) { 067 HSSFCell cell = (HSSFCell) cells.next(); 068 appendTextFromCell(cell, sb); 069 sb.append(CELL_SEP); 070 } 071 sb.append(ROW_SEP); 072 } 073 } 074 return new SimpleCachableBlobHolder(Blobs.createBlob(sb.toString())); 075 } catch (IOException e) { 076 throw new ConversionException("Error during XL2Text conversion", e); 077 } finally { 078 if (stream != null) { 079 try { 080 stream.close(); 081 } catch (IOException e) { 082 log.error("Error while closing Blob stream", e); 083 } 084 } 085 } 086 } 087 088 protected void appendTextFromCell(HSSFCell cell, StringBuffer sb) { 089 String cellValue = null; 090 switch (cell.getCellType()) { 091 case HSSFCell.CELL_TYPE_NUMERIC: 092 cellValue = Double.toString(cell.getNumericCellValue()).trim(); 093 break; 094 case HSSFCell.CELL_TYPE_STRING: 095 cellValue = cell.getStringCellValue().trim().replaceAll("\n", " "); 096 break; 097 } 098 099 if (cellValue != null && cellValue.length() > 0) { 100 sb.append(cellValue); 101 } 102 } 103 104 @Override 105 public void init(ConverterDescriptor descriptor) { 106 } 107 108}