001/*
002 * (C) Copyright 2006-2016 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo
018 *     Florent Guillaume
019 *     Thierry Delprat
020 */
021package org.nuxeo.ecm.platform.convert.plugins;
022
023import java.io.File;
024import java.io.FileInputStream;
025import java.io.IOException;
026import java.io.InputStream;
027import java.io.Serializable;
028import java.util.ArrayList;
029import java.util.HashMap;
030import java.util.List;
031import java.util.Map;
032import java.util.regex.Matcher;
033import java.util.regex.Pattern;
034
035import org.apache.commons.io.FileUtils;
036import org.apache.commons.lang.StringUtils;
037import org.apache.commons.logging.Log;
038import org.apache.commons.logging.LogFactory;
039import org.artofsolving.jodconverter.OfficeDocumentConverter;
040import org.artofsolving.jodconverter.StandardConversionTask;
041import org.artofsolving.jodconverter.document.DocumentFamily;
042import org.artofsolving.jodconverter.document.DocumentFormat;
043import org.nuxeo.common.Environment;
044import org.nuxeo.ecm.core.api.Blob;
045import org.nuxeo.ecm.core.api.Blobs;
046import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
047import org.nuxeo.ecm.core.convert.api.ConversionException;
048import org.nuxeo.ecm.core.convert.api.ConverterCheckResult;
049import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder;
050import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor;
051import org.nuxeo.ecm.core.convert.extension.ExternalConverter;
052import org.nuxeo.ecm.platform.convert.ooomanager.OOoManagerService;
053import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
054import org.nuxeo.runtime.api.Framework;
055
056/**
057 * Converter based on JOD which uses an external OpenOffice process to do actual conversions.
058 *
059 * @deprecated Since 8.4. Use 'soffice' with {@link org.nuxeo.ecm.platform.convert.plugins.CommandLineConverter} instead
060 */
061@Deprecated
062public class JODBasedConverter implements ExternalConverter {
063
064    protected static final String TMP_PATH_PARAMETER = "TmpDirectory";
065
066    private static final Log log = LogFactory.getLog(JODBasedConverter.class);
067
068    /**
069     * Boolean conversion parameter for PDF/A-1.
070     *
071     * @since 5.6
072     */
073    public static final String PDFA1_PARAM = "PDF/A-1";
074
075    /**
076     * Boolean parameter to force update of the document TOC
077     *
078     * @since 5.6
079     */
080    public static final String UPDATE_INDEX_PARAM = StandardConversionTask.UPDATE_DOCUMENT_INDEX;
081
082    protected static final Map<DocumentFamily, String> PDF_FILTER_NAMES = new HashMap<>();
083    {
084        PDF_FILTER_NAMES.put(DocumentFamily.TEXT, "writer_pdf_Export");
085        PDF_FILTER_NAMES.put(DocumentFamily.SPREADSHEET, "calc_pdf_Export");
086        PDF_FILTER_NAMES.put(DocumentFamily.PRESENTATION, "impress_pdf_Export");
087        PDF_FILTER_NAMES.put(DocumentFamily.DRAWING, "draw_pdf_Export");
088    }
089
090    protected ConverterDescriptor descriptor;
091
092    protected String getDestinationMimeType() {
093        return descriptor.getDestinationMimeType();
094    }
095
096    /**
097     * Returns the destination format for the given plugin.
098     * <p>
099     * It takes the actual destination mimetype from the plugin configuration.
100     *
101     * @param sourceFormat the source format
102     * @param pdfa1 true if PDF/A-1 is required
103     */
104    protected DocumentFormat getDestinationFormat(OfficeDocumentConverter documentConverter,
105            DocumentFormat sourceFormat, boolean pdfa1) {
106        String mimeType = getDestinationMimeType();
107        DocumentFormat destinationFormat = documentConverter.getFormatRegistry().getFormatByMediaType(mimeType);
108        if ("application/pdf".equals(mimeType)) {
109            destinationFormat = extendPDFFormat(sourceFormat, destinationFormat, pdfa1);
110        }
111        return destinationFormat;
112    }
113
114    protected DocumentFormat extendPDFFormat(DocumentFormat sourceFormat, DocumentFormat defaultFormat, boolean pdfa1) {
115        DocumentFamily sourceFamily = sourceFormat.getInputFamily();
116        String sourceMediaType = sourceFormat.getMediaType();
117        DocumentFormat pdfFormat = new DocumentFormat(pdfa1 ? "PDF/A-1" : "PDF", "pdf", "application/pdf");
118        Map<DocumentFamily, Map<String, ?>> storePropertiesByFamily = new HashMap<>();
119        Map<DocumentFamily, Map<String, ?>> defaultStorePropertiesByFamily = defaultFormat.getStorePropertiesByFamily();
120        for (DocumentFamily family : defaultStorePropertiesByFamily.keySet()) {
121            if (family.equals(sourceFamily)) {
122                continue;
123            }
124            storePropertiesByFamily.put(family, defaultStorePropertiesByFamily.get(family));
125        }
126        storePropertiesByFamily.put(sourceFamily,
127                extendPDFStoreProperties(sourceMediaType, pdfa1, defaultStorePropertiesByFamily.get(sourceFamily)));
128        pdfFormat.setStorePropertiesByFamily(storePropertiesByFamily);
129        return pdfFormat;
130    }
131
132    protected Map<String, Object> extendPDFStoreProperties(String mediatype, boolean pdfa1,
133            Map<String, ?> originalProperties) {
134        Map<String, Object> extendedProperties = new HashMap<>();
135        for (Map.Entry<String, ?> entry : originalProperties.entrySet()) {
136            extendedProperties.put(entry.getKey(), entry.getValue());
137        }
138        if ("text/html".equals(mediatype)) {
139            extendedProperties.put("FilterName", "writer_web_pdf_Export");
140        }
141        if (pdfa1) {
142            Map<String, Object> filterData = new HashMap<>();
143            filterData.put("SelectPdfVersion", Integer.valueOf(1)); // PDF/A-1
144            filterData.put("UseTaggedPDF", Boolean.TRUE); // per spec
145            extendedProperties.put("FilterData", filterData);
146        }
147        return extendedProperties;
148    }
149
150    /**
151     * Returns the format for the file passed as a parameter.
152     * <p>
153     * We will ask the mimetype registry service to sniff its mimetype.
154     *
155     * @return DocumentFormat for the given file
156     */
157    private static DocumentFormat getSourceFormat(OfficeDocumentConverter documentConverter, File file) {
158        MimetypeRegistry mimetypeRegistry = Framework.getService(MimetypeRegistry.class);
159        String mimetypeStr = mimetypeRegistry.getMimetypeFromFile(file);
160        DocumentFormat format = documentConverter.getFormatRegistry().getFormatByMediaType(mimetypeStr);
161        return format;
162    }
163
164    /**
165     * Returns the DocumentFormat for the given mimetype.
166     *
167     * @return DocumentFormat for the given mimetype
168     */
169    private static DocumentFormat getSourceFormat(OfficeDocumentConverter documentConverter, String mimetype) {
170        return documentConverter.getFormatRegistry().getFormatByMediaType(mimetype);
171    }
172
173    @Override
174    protected void finalize() throws Throwable {
175        super.finalize();
176    }
177
178    @Override
179    public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException {
180        blobHolder = new UTF8CharsetConverter().convert(blobHolder, parameters);
181        Blob inputBlob = blobHolder.getBlob();
182        String blobPath = blobHolder.getFilePath();
183        if (inputBlob == null) {
184            return null;
185        }
186
187        OfficeDocumentConverter documentConverter = newDocumentConverter();
188        // This plugin do deal only with one input source.
189        String sourceMimetype = inputBlob.getMimeType();
190
191        boolean pdfa1 = false;
192        if (parameters != null) {
193            Serializable pdfa1Val = parameters.get(PDFA1_PARAM);
194            if (pdfa1Val instanceof Boolean) {
195                pdfa1 = ((Boolean) pdfa1Val).booleanValue();
196            } else if (pdfa1Val instanceof String) {
197                pdfa1 = Boolean.parseBoolean((String) pdfa1Val);
198            }
199        }
200
201        File sourceFile = null;
202        File outFile = null;
203        File[] files = null;
204        try {
205
206            // If the input blob has the HTML mime type, make sure the
207            // charset meta is present, add it if not
208            if ("text/html".equals(sourceMimetype)) {
209                inputBlob = checkCharsetMeta(inputBlob);
210            }
211
212            // Get original file extension
213            String ext = inputBlob.getFilename();
214            int dotPosition = ext.lastIndexOf('.');
215            if (dotPosition == -1) {
216                ext = ".bin";
217            } else {
218                ext = ext.substring(dotPosition);
219            }
220            // Copy in a file to be able to read it several time
221            sourceFile = Framework.createTempFile("NXJOOoConverterDocumentIn", ext);
222            InputStream stream = inputBlob.getStream();
223            FileUtils.copyInputStreamToFile(stream, sourceFile);
224            stream.close();
225
226            DocumentFormat sourceFormat = null;
227            if (sourceMimetype != null) {
228                // Try to fetch it from the registry.
229                sourceFormat = getSourceFormat(documentConverter, sourceMimetype);
230            }
231            // If not found in the registry or not given as a parameter.
232            // Try to sniff ! What does that smell ? :)
233            if (sourceFormat == null) {
234                sourceFormat = getSourceFormat(documentConverter, sourceFile);
235            }
236
237            // From plugin settings because we know the destination
238            // mimetype.
239            DocumentFormat destinationFormat = getDestinationFormat(documentConverter, sourceFormat, pdfa1);
240
241            // allow HTML2PDF filtering
242
243            List<Blob> blobs = new ArrayList<>();
244
245            if (descriptor.getDestinationMimeType().equals("text/html")) {
246                String tmpDirPath = getTmpDirectory();
247                File myTmpDir = new File(tmpDirPath + "/JODConv_" + System.currentTimeMillis());
248                boolean created = myTmpDir.mkdir();
249                if (!created) {
250                    throw new IOException("Unable to create temp dir");
251                }
252
253                outFile = new File(myTmpDir.getAbsolutePath() + "/" + "NXJOOoConverterDocumentOut."
254                        + destinationFormat.getExtension());
255
256                created = outFile.createNewFile();
257                if (!created) {
258                    throw new IOException("Unable to create temp file");
259                }
260
261                log.debug("Input File = " + outFile.getAbsolutePath());
262                // Perform the actual conversion.
263                documentConverter.convert(sourceFile, outFile, destinationFormat);
264
265                files = myTmpDir.listFiles();
266                for (File file : files) {
267                    // copy the files to a new tmp location, as we'll delete them
268                    Blob blob;
269                    try (FileInputStream in = new FileInputStream(file)) {
270                        blob = Blobs.createBlob(in);
271                    }
272                    blob.setFilename(file.getName());
273                    blobs.add(blob);
274                    // add a blob for the index
275                    if (file.getName().equals(outFile.getName())) {
276                        Blob indexBlob;
277                        try (FileInputStream in = new FileInputStream(file)) {
278                            indexBlob = Blobs.createBlob(in);
279                        }
280                        indexBlob.setFilename("index.html");
281                        blobs.add(0, indexBlob);
282                    }
283                }
284
285            } else {
286                outFile = Framework.createTempFile("NXJOOoConverterDocumentOut", '.' + destinationFormat.getExtension());
287
288                // Perform the actual conversion.
289                documentConverter.convert(sourceFile, outFile, destinationFormat, parameters);
290
291                Blob blob;
292                try (FileInputStream in = new FileInputStream(outFile)) {
293                    blob = Blobs.createBlob(in, getDestinationMimeType());
294                }
295                blobs.add(blob);
296            }
297            return new SimpleCachableBlobHolder(blobs);
298        } catch (IOException e) {
299            String msg = String.format("An error occurred trying to convert file %s to from %s to %s", blobPath,
300                    sourceMimetype, getDestinationMimeType());
301            throw new ConversionException(msg, e);
302        } finally {
303            if (sourceFile != null) {
304                sourceFile.delete();
305            }
306            if (outFile != null) {
307                outFile.delete();
308            }
309
310            if (files != null) {
311                for (File file : files) {
312                    if (file.exists()) {
313                        file.delete();
314                    }
315                }
316            }
317        }
318
319    }
320
321    protected OfficeDocumentConverter newDocumentConverter() throws ConversionException {
322        OOoManagerService oooManagerService = Framework.getService(OOoManagerService.class);
323        OfficeDocumentConverter documentConverter = oooManagerService.getDocumentConverter();
324        if (documentConverter == null) {
325            throw new ConversionException("Could not connect to the remote OpenOffice server");
326        }
327        return documentConverter;
328    }
329
330    @SuppressWarnings("hiding")
331    @Override
332    public void init(ConverterDescriptor descriptor) {
333        this.descriptor = descriptor;
334    }
335
336    @Override
337    public ConverterCheckResult isConverterAvailable() {
338        ConverterCheckResult result = new ConverterCheckResult();
339        OOoManagerService oooManagerService = Framework.getService(OOoManagerService.class);
340        if (!oooManagerService.isOOoManagerStarted()) {
341            result.setAvailable(false);
342        }
343        return result;
344    }
345
346    protected String getTmpDirectory() {
347        String tmp = null;
348        Map<String, String> parameters = descriptor.getParameters();
349        if (parameters != null && parameters.containsKey(TMP_PATH_PARAMETER)) {
350            tmp = parameters.get(TMP_PATH_PARAMETER);
351        }
352        if (tmp == null) {
353            tmp = Environment.getDefault().getTemp().getPath();
354        }
355        return tmp;
356    }
357
358    /**
359     * Checks if the {@code inputBlob} string contains a {@code charset} meta tag. If not, add it.
360     *
361     * @param inputBlob the input blob
362     * @throws IOException Signals that an I/O exception has occurred.
363     */
364    protected Blob checkCharsetMeta(Blob inputBlob) throws IOException {
365
366        String charset = inputBlob.getEncoding();
367        if (!StringUtils.isEmpty(charset)) {
368            Pattern charsetMetaPattern = Pattern.compile(String.format("content=\"text/html;\\s*charset=%s\"", charset));
369            Matcher charsetMetaMatcher = charsetMetaPattern.matcher(inputBlob.getString());
370            if (!charsetMetaMatcher.find()) {
371                String charsetMetaTag = String.format(
372                        "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">", charset);
373                StringBuilder sb = new StringBuilder(charsetMetaTag);
374                sb.append(new String(inputBlob.getByteArray(), charset));
375                Blob blobWithCharsetMetaTag = Blobs.createBlob(sb.toString(), "text/html", charset,
376                        inputBlob.getFilename());
377                return blobWithCharsetMetaTag;
378            }
379        }
380        return inputBlob;
381    }
382}