001/* 002 * (C) Copyright 2006-2007 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 * 019 */ 020package org.nuxeo.ecm.core.convert.plugins.text.extractors; 021 022import java.io.Serializable; 023import java.util.Map; 024 025import org.apache.commons.logging.Log; 026import org.apache.commons.logging.LogFactory; 027import org.nuxeo.ecm.core.api.Blobs; 028import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 029import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder; 030import org.nuxeo.ecm.core.convert.api.ConversionException; 031import org.nuxeo.ecm.core.convert.api.ConversionService; 032import org.nuxeo.ecm.core.convert.extension.Converter; 033import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor; 034import org.nuxeo.runtime.api.Framework; 035 036/** 037 * Converter that tries to find a way to extract full text content according to input mime-type. 038 * 039 * @author tiry 040 */ 041public class FullTextConverter implements Converter { 042 043 private static final String TEXT_PLAIN_MT = "text/plain"; 044 045 private static final Log log = LogFactory.getLog(FullTextConverter.class); 046 047 protected ConverterDescriptor descriptor; 048 049 @Override 050 public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException { 051 052 String srcMT = blobHolder.getBlob().getMimeType(); 053 054 if (TEXT_PLAIN_MT.equals(srcMT)) { 055 // no need to convert ! 056 return blobHolder; 057 } 058 059 ConversionService cs = Framework.getService(ConversionService.class); 060 061 // do not find converter with * as source mime type, we want a specific one 062 String converterName = cs.getConverterName(srcMT, TEXT_PLAIN_MT, false); 063 064 if (converterName != null) { 065 if (converterName.equals(descriptor.getConverterName())) { 066 // Should never happen ! 067 log.debug("Existing from converter to avoid a loop"); 068 return new SimpleBlobHolder(Blobs.createBlob("")); 069 } 070 return cs.convert(converterName, blobHolder, parameters); 071 } else { 072 log.debug("Unable to find full text extractor for source mime type " + srcMT); 073 return new SimpleBlobHolder(Blobs.createBlob("")); 074 } 075 } 076 077 @Override 078 public void init(ConverterDescriptor descriptor) { 079 this.descriptor = descriptor; 080 } 081 082}