001/*
002 * (C) Copyright 2006-2012 Nuxeo SAS (http://nuxeo.com/) and contributors.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the GNU Lesser General Public License
006 * (LGPL) version 2.1 which accompanies this distribution, and is available at
007 * http://www.gnu.org/licenses/lgpl.html
008 *
009 * This library is distributed in the hope that it will be useful,
010 * but WITHOUT ANY WARRANTY; without even the implied warranty of
011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012 * Lesser General Public License for more details.
013 *
014 * Contributors:
015 *     Nuxeo
016 *     Antoine Taillefer
017 */
018
019package org.nuxeo.ecm.core.convert.plugins.text.extractors;
020
021import java.io.IOException;
022import java.util.zip.ZipEntry;
023import java.util.zip.ZipInputStream;
024
025import org.xml.sax.InputSource;
026import org.xml.sax.SAXException;
027import org.xml.sax.XMLReader;
028
029/**
030 * Based on Apache JackRabbit OOo converter.
031 */
032public class OOo2TextConverter extends XmlZip2TextConverter {
033
034    private static final String CONTENT_ZIP_ENTRY_NAME = "content.xml";
035
036    protected void readXmlZipContent(ZipInputStream zis, XMLReader reader, StringBuilder sb) throws IOException,
037            SAXException {
038
039        ZipEntry zipEntry = zis.getNextEntry();
040        while (zipEntry != null) {
041            if (CONTENT_ZIP_ENTRY_NAME.equals(zipEntry.getName())) {
042                OOoXmlContentHandler contentHandler = new OOoXmlContentHandler();
043                reader.setContentHandler(contentHandler);
044                reader.parse(new InputSource(zis));
045                sb.append(contentHandler.getContent());
046                break;
047            }
048            zipEntry = zis.getNextEntry();
049        }
050    }
051}