001/* 002 * (C) Copyright 2006-2016 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 * 019 */ 020package org.nuxeo.ecm.core.convert.plugins.text.extractors; 021 022import java.io.File; 023import java.io.FileInputStream; 024import java.io.FileOutputStream; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.OutputStream; 028import java.io.Serializable; 029import java.util.Map; 030 031import org.apache.commons.logging.Log; 032import org.apache.commons.logging.LogFactory; 033import org.apache.poi.hwpf.extractor.WordExtractor; 034 035import org.nuxeo.ecm.core.api.Blob; 036import org.nuxeo.ecm.core.api.Blobs; 037import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 038import org.nuxeo.ecm.core.convert.api.ConversionException; 039import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder; 040import org.nuxeo.ecm.core.convert.extension.Converter; 041import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor; 042import org.nuxeo.runtime.api.Framework; 043 044/** 045 * @deprecated subsumed by MSOffice2TextConverter 046 */ 047@Deprecated 048public class Word2TextConverter implements Converter { 049 050 private static final Log log = LogFactory.getLog(Word2TextConverter.class); 051 052 @Override 053 public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException { 054 055 File f = null; 056 OutputStream fas = null; 057 058 WordExtractor extractor = null; 059 try { 060 extractor = new WordExtractor(blobHolder.getBlob().getStream()); 061 byte[] bytes = extractor.getText().getBytes(); 062 f = Framework.createTempFile("po-word2text", ".txt"); 063 fas = new FileOutputStream(f); 064 fas.write(bytes); 065 066 Blob blob; 067 try (InputStream in = new FileInputStream(f)) { 068 blob = Blobs.createBlob(in); 069 } 070 blob.setMimeType("text/plain"); 071 072 return new SimpleCachableBlobHolder(blob); 073 } catch (IOException e) { 074 throw new ConversionException("Error during Word2Text conversion", e); 075 } finally { 076 if (extractor != null) { 077 try { 078 extractor.close(); 079 } catch (IOException e) { 080 log.error(e, e); 081 } 082 } 083 if (fas != null) { 084 try { 085 fas.close(); 086 } catch (IOException e) { 087 log.error(e, e); 088 } 089 } 090 if (f != null) { 091 f.delete(); 092 } 093 } 094 } 095 096 @Override 097 public void init(ConverterDescriptor descriptor) { 098 } 099 100}