001/* 002 * (C) Copyright 2006-2012 Nuxeo SAS (http://nuxeo.com/) and contributors. 003 * 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the GNU Lesser General Public License 006 * (LGPL) version 2.1 which accompanies this distribution, and is available at 007 * http://www.gnu.org/licenses/lgpl.html 008 * 009 * This library is distributed in the hope that it will be useful, 010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 012 * Lesser General Public License for more details. 013 * 014 * Contributors: 015 * Nuxeo 016 * Antoine Taillefer 017 */ 018 019package org.nuxeo.ecm.core.convert.plugins.text.extractors; 020 021import java.io.IOException; 022import java.util.zip.ZipEntry; 023import java.util.zip.ZipInputStream; 024 025import org.xml.sax.InputSource; 026import org.xml.sax.SAXException; 027import org.xml.sax.XMLReader; 028 029/** 030 * Based on Apache JackRabbit OOo converter. 031 */ 032public class OOo2TextConverter extends XmlZip2TextConverter { 033 034 private static final String CONTENT_ZIP_ENTRY_NAME = "content.xml"; 035 036 protected void readXmlZipContent(ZipInputStream zis, XMLReader reader, StringBuilder sb) throws IOException, 037 SAXException { 038 039 ZipEntry zipEntry = zis.getNextEntry(); 040 while (zipEntry != null) { 041 if (CONTENT_ZIP_ENTRY_NAME.equals(zipEntry.getName())) { 042 OOoXmlContentHandler contentHandler = new OOoXmlContentHandler(); 043 reader.setContentHandler(contentHandler); 044 reader.parse(new InputSource(zis)); 045 sb.append(contentHandler.getContent()); 046 break; 047 } 048 zipEntry = zis.getNextEntry(); 049 } 050 } 051}