001package org.nuxeo.ecm.platform.importer.random;
002
003import java.io.IOException;
004import java.io.Serializable;
005import java.util.Map;
006
007import org.nuxeo.ecm.core.api.Blob;
008import org.nuxeo.ecm.core.api.Blobs;
009import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
010import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder;
011import org.nuxeo.ecm.core.convert.api.ConversionException;
012import org.nuxeo.ecm.core.convert.extension.Converter;
013import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor;
014
015public class PartialTextExtractor implements Converter {
016
017    public static final double TEXT_RATIO = 0.01;
018
019    @Override
020    public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException {
021        try {
022            Blob blob = blobHolder.getBlob();
023
024            String data = blob.getString();
025            int endIdx = new Double(data.length() * TEXT_RATIO).intValue();
026            String txtData = data.substring(0, endIdx);
027            return new SimpleBlobHolder(Blobs.createBlob(txtData));
028
029        } catch (IOException e) {
030            throw new ConversionException("error extracting partial text content", e);
031        }
032    }
033
034    @Override
035    public void init(ConverterDescriptor descriptor) {
036    }
037
038}