001/*
002 * (C) Copyright 2002-2007 Nuxeo SAS (http://nuxeo.com/) and contributors.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the GNU Lesser General Public License
006 * (LGPL) version 2.1 which accompanies this distribution, and is available at
007 * http://www.gnu.org/licenses/lgpl.html
008 *
009 * This library is distributed in the hope that it will be useful,
010 * but WITHOUT ANY WARRANTY; without even the implied warranty of
011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012 * Lesser General Public License for more details.
013 *
014 * Contributors:
015 *     Nuxeo - initial API and implementation
016 *
017 */
018package org.nuxeo.ecm.core.convert.plugins.text.extractors;
019
020import java.io.IOException;
021import java.io.InputStream;
022import java.io.Serializable;
023import java.util.Iterator;
024import java.util.Map;
025
026import org.apache.commons.logging.Log;
027import org.apache.commons.logging.LogFactory;
028import org.apache.poi.hssf.usermodel.HSSFCell;
029import org.apache.poi.hssf.usermodel.HSSFRow;
030import org.apache.poi.hssf.usermodel.HSSFSheet;
031import org.apache.poi.hssf.usermodel.HSSFWorkbook;
032import org.apache.poi.poifs.filesystem.POIFSFileSystem;
033import org.apache.poi.ss.usermodel.Row;
034import org.nuxeo.ecm.core.api.Blobs;
035import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
036import org.nuxeo.ecm.core.convert.api.ConversionException;
037import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder;
038import org.nuxeo.ecm.core.convert.extension.Converter;
039import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor;
040
041public class XL2TextConverter implements Converter {
042
043    private static final Log log = LogFactory.getLog(XL2TextConverter.class);
044
045    private static final String CELL_SEP = " ";
046
047    private static final String ROW_SEP = "\n\n";
048
049    @Override
050    public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException {
051
052        InputStream stream = null;
053        StringBuffer sb = new StringBuffer();
054        try {
055            stream = blobHolder.getBlob().getStream();
056            POIFSFileSystem fs = new POIFSFileSystem(stream);
057            HSSFWorkbook workbook = new HSSFWorkbook(fs);
058            for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
059                HSSFSheet sheet = workbook.getSheetAt(i);
060                Iterator<Row> rows = sheet.rowIterator();
061                while (rows.hasNext()) {
062                    HSSFRow row = (HSSFRow) rows.next();
063                    Iterator<?> cells = row.cellIterator();
064                    while (cells.hasNext()) {
065                        HSSFCell cell = (HSSFCell) cells.next();
066                        appendTextFromCell(cell, sb);
067                        sb.append(CELL_SEP);
068                    }
069                    sb.append(ROW_SEP);
070                }
071            }
072            return new SimpleCachableBlobHolder(Blobs.createBlob(sb.toString()));
073        } catch (IOException e) {
074            throw new ConversionException("Error during XL2Text conversion", e);
075        } finally {
076            if (stream != null) {
077                try {
078                    stream.close();
079                } catch (IOException e) {
080                    log.error("Error while closing Blob stream", e);
081                }
082            }
083        }
084    }
085
086    protected void appendTextFromCell(HSSFCell cell, StringBuffer sb) {
087        String cellValue = null;
088        switch (cell.getCellType()) {
089        case HSSFCell.CELL_TYPE_NUMERIC:
090            cellValue = Double.toString(cell.getNumericCellValue()).trim();
091            break;
092        case HSSFCell.CELL_TYPE_STRING:
093            cellValue = cell.getStringCellValue().trim().replaceAll("\n", " ");
094            break;
095        }
096
097        if (cellValue != null && cellValue.length() > 0) {
098            sb.append(cellValue);
099        }
100    }
101
102    @Override
103    public void init(ConverterDescriptor descriptor) {
104    }
105
106}