001/*
002 * (C) Copyright 2006-2007 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo - initial API and implementation
018 *
019 */
020
021package org.nuxeo.ecm.platform.convert.plugins;
022
023import java.io.File;
024import java.io.IOException;
025import java.io.Serializable;
026import java.util.ArrayList;
027import java.util.HashMap;
028import java.util.List;
029import java.util.Map;
030
031import org.nuxeo.common.utils.Path;
032import org.nuxeo.ecm.core.api.Blob;
033import org.nuxeo.ecm.core.api.Blobs;
034import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
035import org.nuxeo.ecm.core.convert.api.ConversionException;
036import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder;
037import org.nuxeo.ecm.platform.commandline.executor.api.CmdParameters;
038
039/**
040 * Pdf2Html converter based on the pdftohtml command-line executable.
041 *
042 * @author tiry
043 * @author Vincent Dutat
044 */
045public class PDF2HtmlConverter extends CommandLineBasedConverter {
046
047    @Override
048    protected BlobHolder buildResult(List<String> cmdOutput, CmdParameters cmdParams) {
049        String outputPath = cmdParams.getParameter("outDirPath");
050        File outputDir = new File(outputPath);
051        File[] files = outputDir.listFiles();
052        List<Blob> blobs = new ArrayList<Blob>();
053
054        for (File file : files) {
055            Blob blob;
056            try {
057                blob = Blobs.createBlob(file);
058            } catch (IOException e) {
059                throw new ConversionException("Cannot create Blob", e);
060            }
061            blob.setFilename(file.getName());
062
063            if (file.getName().equalsIgnoreCase("index.html")) {
064                blobs.add(0, blob);
065            } else {
066                blobs.add(blob);
067            }
068        }
069        return new SimpleCachableBlobHolder(blobs);
070    }
071
072    @Override
073    protected Map<String, Blob> getCmdBlobParameters(BlobHolder blobHolder, Map<String, Serializable> parameters)
074            throws ConversionException {
075
076        Map<String, Blob> cmdBlobParams = new HashMap<String, Blob>();
077        cmdBlobParams.put("inFilePath", blobHolder.getBlob());
078        return cmdBlobParams;
079    }
080
081    @Override
082    protected Map<String, String> getCmdStringParameters(BlobHolder blobHolder, Map<String, Serializable> parameters)
083            throws ConversionException {
084
085        Map<String, String> cmdStringParams = new HashMap<String, String>();
086
087        String baseDir = getTmpDirectory(parameters);
088        Path tmpPath = new Path(baseDir).append("pdf2html_" + System.currentTimeMillis());
089
090        File outDir = new File(tmpPath.toString());
091        boolean dirCreated = outDir.mkdir();
092        if (!dirCreated) {
093            throw new ConversionException("Unable to create tmp dir for transformer output");
094        }
095        cmdStringParams.put("outDirPath", outDir.getAbsolutePath());
096        return cmdStringParams;
097    }
098
099}