001/* 002 * (C) Copyright 2006-2016 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 */ 019package org.nuxeo.ecm.core.convert.plugins.text.extractors; 020 021import static java.nio.charset.StandardCharsets.UTF_8; 022 023import java.io.File; 024import java.io.FileInputStream; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.Serializable; 028import java.util.Map; 029 030import javax.swing.text.BadLocationException; 031import javax.swing.text.Document; 032import javax.swing.text.rtf.RTFEditorKit; 033 034import org.apache.commons.io.FileUtils; 035import org.nuxeo.ecm.core.api.Blob; 036import org.nuxeo.ecm.core.api.Blobs; 037import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 038import org.nuxeo.ecm.core.convert.api.ConversionException; 039import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder; 040import org.nuxeo.ecm.core.convert.extension.Converter; 041import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor; 042import org.nuxeo.runtime.api.Framework; 043 044public class RTF2TextConverter implements Converter { 045 046 @Override 047 public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException { 048 File f = null; 049 try { 050 RTFEditorKit rtfParser = new RTFEditorKit(); 051 Document document = rtfParser.createDefaultDocument(); 052 rtfParser.read(blobHolder.getBlob().getStream(), document, 0); 053 String text = document.getText(0, document.getLength()); 054 f = Framework.createTempFile("swing-rtf2text", ".txt"); 055 FileUtils.writeStringToFile(f, text, UTF_8); 056 Blob blob; 057 try (InputStream in = new FileInputStream(f)) { 058 blob = Blobs.createBlob(in, "text/plain"); 059 } 060 return new SimpleCachableBlobHolder(blob); 061 } catch (IOException | BadLocationException e) { 062 throw new ConversionException("Error during Word2Text conversion", blobHolder, e); 063 } finally { 064 if (f != null) { 065 f.delete(); 066 } 067 } 068 } 069 070 @Override 071 public void init(ConverterDescriptor descriptor) { 072 } 073 074}