001/*
002 * (C) Copyright 2002-2007 Nuxeo SAS (http://nuxeo.com/) and contributors.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the GNU Lesser General Public License
006 * (LGPL) version 2.1 which accompanies this distribution, and is available at
007 * http://www.gnu.org/licenses/lgpl.html
008 *
009 * This library is distributed in the hope that it will be useful,
010 * but WITHOUT ANY WARRANTY; without even the implied warranty of
011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012 * Lesser General Public License for more details.
013 *
014 * Contributors:
015 *     Nuxeo - initial API and implementation
016 *
017 */
018
019package org.nuxeo.ecm.platform.convert.plugins;
020
021import java.io.File;
022import java.io.IOException;
023import java.io.Serializable;
024import java.util.ArrayList;
025import java.util.HashMap;
026import java.util.List;
027import java.util.Map;
028
029import org.nuxeo.common.utils.Path;
030import org.nuxeo.ecm.core.api.Blob;
031import org.nuxeo.ecm.core.api.Blobs;
032import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
033import org.nuxeo.ecm.core.convert.api.ConversionException;
034import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder;
035import org.nuxeo.ecm.platform.commandline.executor.api.CmdParameters;
036
037/**
038 * Pdf2Html converter based on the pdftohtml command-line executable.
039 *
040 * @author tiry
041 * @author Vincent Dutat
042 */
043public class PDF2HtmlConverter extends CommandLineBasedConverter {
044
045    @Override
046    protected BlobHolder buildResult(List<String> cmdOutput, CmdParameters cmdParams) {
047        String outputPath = cmdParams.getParameter("outDirPath");
048        File outputDir = new File(outputPath);
049        File[] files = outputDir.listFiles();
050        List<Blob> blobs = new ArrayList<Blob>();
051
052        for (File file : files) {
053            Blob blob;
054            try {
055                blob = Blobs.createBlob(file);
056            } catch (IOException e) {
057                throw new ConversionException("Cannot create Blob", e);
058            }
059            blob.setFilename(file.getName());
060
061            if (file.getName().equalsIgnoreCase("index.html")) {
062                blobs.add(0, blob);
063            } else {
064                blobs.add(blob);
065            }
066        }
067        return new SimpleCachableBlobHolder(blobs);
068    }
069
070    @Override
071    protected Map<String, Blob> getCmdBlobParameters(BlobHolder blobHolder, Map<String, Serializable> parameters)
072            throws ConversionException {
073
074        Map<String, Blob> cmdBlobParams = new HashMap<String, Blob>();
075        cmdBlobParams.put("inFilePath", blobHolder.getBlob());
076        return cmdBlobParams;
077    }
078
079    @Override
080    protected Map<String, String> getCmdStringParameters(BlobHolder blobHolder, Map<String, Serializable> parameters)
081            throws ConversionException {
082
083        Map<String, String> cmdStringParams = new HashMap<String, String>();
084
085        String baseDir = getTmpDirectory(parameters);
086        Path tmpPath = new Path(baseDir).append("pdf2html_" + System.currentTimeMillis());
087
088        File outDir = new File(tmpPath.toString());
089        boolean dirCreated = outDir.mkdir();
090        if (!dirCreated) {
091            throw new ConversionException("Unable to create tmp dir for transformer output");
092        }
093        cmdStringParams.put("outDirPath", outDir.getAbsolutePath());
094        return cmdStringParams;
095    }
096
097}