001/*
002 * (C) Copyright 2006-2012 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo
018 *     Florent Guillaume
019 *     Thierry Delprat
020 */
021package org.nuxeo.ecm.platform.convert.plugins;
022
023import java.io.File;
024import java.io.FileInputStream;
025import java.io.IOException;
026import java.io.InputStream;
027import java.io.Serializable;
028import java.util.ArrayList;
029import java.util.HashMap;
030import java.util.List;
031import java.util.Map;
032import java.util.regex.Matcher;
033import java.util.regex.Pattern;
034
035import org.apache.commons.lang.StringUtils;
036import org.apache.commons.logging.Log;
037import org.apache.commons.logging.LogFactory;
038import org.artofsolving.jodconverter.OfficeDocumentConverter;
039import org.artofsolving.jodconverter.StandardConversionTask;
040import org.artofsolving.jodconverter.document.DocumentFamily;
041import org.artofsolving.jodconverter.document.DocumentFormat;
042
043import org.nuxeo.common.Environment;
044import org.nuxeo.common.utils.FileUtils;
045import org.nuxeo.ecm.core.api.Blob;
046import org.nuxeo.ecm.core.api.Blobs;
047import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
048import org.nuxeo.ecm.core.convert.api.ConversionException;
049import org.nuxeo.ecm.core.convert.api.ConverterCheckResult;
050import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder;
051import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor;
052import org.nuxeo.ecm.core.convert.extension.ExternalConverter;
053import org.nuxeo.ecm.platform.convert.ooomanager.OOoManagerService;
054import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
055import org.nuxeo.runtime.api.Framework;
056
057/**
058 * Converter based on JOD which uses an external OpenOffice process to do actual conversions.
059 *
060 * @deprecated Since 8.4. Use 'soffice' with {@link org.nuxeo.ecm.platform.convert.plugins.CommandLineConverter} instead
061 */
062@Deprecated
063public class JODBasedConverter implements ExternalConverter {
064
065    protected static final String TMP_PATH_PARAMETER = "TmpDirectory";
066
067    private static final Log log = LogFactory.getLog(JODBasedConverter.class);
068
069    /**
070     * Boolean conversion parameter for PDF/A-1.
071     *
072     * @since 5.6
073     */
074    public static final String PDFA1_PARAM = "PDF/A-1";
075
076    /**
077     * Boolean parameter to force update of the document TOC
078     *
079     * @since 5.6
080     */
081    public static final String UPDATE_INDEX_PARAM = StandardConversionTask.UPDATE_DOCUMENT_INDEX;
082
083    protected static final Map<DocumentFamily, String> PDF_FILTER_NAMES = new HashMap<>();
084    {
085        PDF_FILTER_NAMES.put(DocumentFamily.TEXT, "writer_pdf_Export");
086        PDF_FILTER_NAMES.put(DocumentFamily.SPREADSHEET, "calc_pdf_Export");
087        PDF_FILTER_NAMES.put(DocumentFamily.PRESENTATION, "impress_pdf_Export");
088        PDF_FILTER_NAMES.put(DocumentFamily.DRAWING, "draw_pdf_Export");
089    }
090
091    protected ConverterDescriptor descriptor;
092
093    protected String getDestinationMimeType() {
094        return descriptor.getDestinationMimeType();
095    }
096
097    /**
098     * Returns the destination format for the given plugin.
099     * <p>
100     * It takes the actual destination mimetype from the plugin configuration.
101     *
102     * @param sourceFormat the source format
103     * @param pdfa1 true if PDF/A-1 is required
104     */
105    protected DocumentFormat getDestinationFormat(OfficeDocumentConverter documentConverter,
106            DocumentFormat sourceFormat, boolean pdfa1) {
107        String mimeType = getDestinationMimeType();
108        DocumentFormat destinationFormat = documentConverter.getFormatRegistry().getFormatByMediaType(mimeType);
109        if ("application/pdf".equals(mimeType)) {
110            destinationFormat = extendPDFFormat(sourceFormat, destinationFormat, pdfa1);
111        }
112        return destinationFormat;
113    }
114
115    protected DocumentFormat extendPDFFormat(DocumentFormat sourceFormat, DocumentFormat defaultFormat, boolean pdfa1) {
116        DocumentFamily sourceFamily = sourceFormat.getInputFamily();
117        String sourceMediaType = sourceFormat.getMediaType();
118        DocumentFormat pdfFormat = new DocumentFormat(pdfa1 ? "PDF/A-1" : "PDF", "pdf", "application/pdf");
119        Map<DocumentFamily, Map<String, ?>> storePropertiesByFamily = new HashMap<>();
120        Map<DocumentFamily, Map<String, ?>> defaultStorePropertiesByFamily = defaultFormat.getStorePropertiesByFamily();
121        for (DocumentFamily family : defaultStorePropertiesByFamily.keySet()) {
122            if (family.equals(sourceFamily)) {
123                continue;
124            }
125            storePropertiesByFamily.put(family, defaultStorePropertiesByFamily.get(family));
126        }
127        storePropertiesByFamily.put(sourceFamily,
128                extendPDFStoreProperties(sourceMediaType, pdfa1, defaultStorePropertiesByFamily.get(sourceFamily)));
129        pdfFormat.setStorePropertiesByFamily(storePropertiesByFamily);
130        return pdfFormat;
131    }
132
133    protected Map<String, Object> extendPDFStoreProperties(String mediatype, boolean pdfa1,
134            Map<String, ?> originalProperties) {
135        Map<String, Object> extendedProperties = new HashMap<>();
136        for (Map.Entry<String, ?> entry : originalProperties.entrySet()) {
137            extendedProperties.put(entry.getKey(), entry.getValue());
138        }
139        if ("text/html".equals(mediatype)) {
140            extendedProperties.put("FilterName", "writer_web_pdf_Export");
141        }
142        if (pdfa1) {
143            Map<String, Object> filterData = new HashMap<>();
144            filterData.put("SelectPdfVersion", Integer.valueOf(1)); // PDF/A-1
145            filterData.put("UseTaggedPDF", Boolean.TRUE); // per spec
146            extendedProperties.put("FilterData", filterData);
147        }
148        return extendedProperties;
149    }
150
151    /**
152     * Returns the format for the file passed as a parameter.
153     * <p>
154     * We will ask the mimetype registry service to sniff its mimetype.
155     *
156     * @return DocumentFormat for the given file
157     */
158    private static DocumentFormat getSourceFormat(OfficeDocumentConverter documentConverter, File file) {
159        MimetypeRegistry mimetypeRegistry = Framework.getService(MimetypeRegistry.class);
160        String mimetypeStr = mimetypeRegistry.getMimetypeFromFile(file);
161        DocumentFormat format = documentConverter.getFormatRegistry().getFormatByMediaType(mimetypeStr);
162        return format;
163    }
164
165    /**
166     * Returns the DocumentFormat for the given mimetype.
167     *
168     * @return DocumentFormat for the given mimetype
169     */
170    private static DocumentFormat getSourceFormat(OfficeDocumentConverter documentConverter, String mimetype) {
171        return documentConverter.getFormatRegistry().getFormatByMediaType(mimetype);
172    }
173
174    @Override
175    protected void finalize() throws Throwable {
176        super.finalize();
177    }
178
179    @Override
180    public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException {
181        blobHolder = new UTF8CharsetConverter().convert(blobHolder, parameters);
182        Blob inputBlob = blobHolder.getBlob();
183        String blobPath = blobHolder.getFilePath();
184        if (inputBlob == null) {
185            return null;
186        }
187
188        OfficeDocumentConverter documentConverter = newDocumentConverter();
189        // This plugin do deal only with one input source.
190        String sourceMimetype = inputBlob.getMimeType();
191
192        boolean pdfa1 = parameters != null && Boolean.TRUE.equals(parameters.get(PDFA1_PARAM));
193
194        File sourceFile = null;
195        File outFile = null;
196        File[] files = null;
197        try {
198
199            // If the input blob has the HTML mime type, make sure the
200            // charset meta is present, add it if not
201            if ("text/html".equals(sourceMimetype)) {
202                inputBlob = checkCharsetMeta(inputBlob);
203            }
204
205            // Get original file extension
206            String ext = inputBlob.getFilename();
207            int dotPosition = ext.lastIndexOf('.');
208            if (dotPosition == -1) {
209                ext = ".bin";
210            } else {
211                ext = ext.substring(dotPosition);
212            }
213            // Copy in a file to be able to read it several time
214            sourceFile = Framework.createTempFile("NXJOOoConverterDocumentIn", ext);
215            InputStream stream = inputBlob.getStream();
216            FileUtils.copyToFile(stream, sourceFile);
217            stream.close();
218
219            DocumentFormat sourceFormat = null;
220            if (sourceMimetype != null) {
221                // Try to fetch it from the registry.
222                sourceFormat = getSourceFormat(documentConverter, sourceMimetype);
223            }
224            // If not found in the registry or not given as a parameter.
225            // Try to sniff ! What does that smell ? :)
226            if (sourceFormat == null) {
227                sourceFormat = getSourceFormat(documentConverter, sourceFile);
228            }
229
230            // From plugin settings because we know the destination
231            // mimetype.
232            DocumentFormat destinationFormat = getDestinationFormat(documentConverter, sourceFormat, pdfa1);
233
234            // allow HTML2PDF filtering
235
236            List<Blob> blobs = new ArrayList<>();
237
238            if (descriptor.getDestinationMimeType().equals("text/html")) {
239                String tmpDirPath = getTmpDirectory();
240                File myTmpDir = new File(tmpDirPath + "/JODConv_" + System.currentTimeMillis());
241                boolean created = myTmpDir.mkdir();
242                if (!created) {
243                    throw new IOException("Unable to create temp dir");
244                }
245
246                outFile = new File(myTmpDir.getAbsolutePath() + "/" + "NXJOOoConverterDocumentOut."
247                        + destinationFormat.getExtension());
248
249                created = outFile.createNewFile();
250                if (!created) {
251                    throw new IOException("Unable to create temp file");
252                }
253
254                log.debug("Input File = " + outFile.getAbsolutePath());
255                // Perform the actual conversion.
256                documentConverter.convert(sourceFile, outFile, destinationFormat);
257
258                files = myTmpDir.listFiles();
259                for (File file : files) {
260                    // copy the files to a new tmp location, as we'll delete them
261                    Blob blob;
262                    try (FileInputStream in = new FileInputStream(file)) {
263                        blob = Blobs.createBlob(in);
264                    }
265                    blob.setFilename(file.getName());
266                    blobs.add(blob);
267                    // add a blob for the index
268                    if (file.getName().equals(outFile.getName())) {
269                        Blob indexBlob;
270                        try (FileInputStream in = new FileInputStream(file)) {
271                            indexBlob = Blobs.createBlob(in);
272                        }
273                        indexBlob.setFilename("index.html");
274                        blobs.add(0, indexBlob);
275                    }
276                }
277
278            } else {
279                outFile = Framework.createTempFile("NXJOOoConverterDocumentOut", '.' + destinationFormat.getExtension());
280
281                // Perform the actual conversion.
282                documentConverter.convert(sourceFile, outFile, destinationFormat, parameters);
283
284                Blob blob;
285                try (FileInputStream in = new FileInputStream(outFile)) {
286                    blob = Blobs.createBlob(in, getDestinationMimeType());
287                }
288                blobs.add(blob);
289            }
290            return new SimpleCachableBlobHolder(blobs);
291        } catch (IOException e) {
292            String msg = String.format("An error occurred trying to convert file %s to from %s to %s", blobPath,
293                    sourceMimetype, getDestinationMimeType());
294            throw new ConversionException(msg, e);
295        } finally {
296            if (sourceFile != null) {
297                sourceFile.delete();
298            }
299            if (outFile != null) {
300                outFile.delete();
301            }
302
303            if (files != null) {
304                for (File file : files) {
305                    if (file.exists()) {
306                        file.delete();
307                    }
308                }
309            }
310        }
311
312    }
313
314    protected OfficeDocumentConverter newDocumentConverter() throws ConversionException {
315        OOoManagerService oooManagerService = Framework.getService(OOoManagerService.class);
316        OfficeDocumentConverter documentConverter = oooManagerService.getDocumentConverter();
317        if (documentConverter == null) {
318            throw new ConversionException("Could not connect to the remote OpenOffice server");
319        }
320        return documentConverter;
321    }
322
323    @SuppressWarnings("hiding")
324    @Override
325    public void init(ConverterDescriptor descriptor) {
326        this.descriptor = descriptor;
327    }
328
329    @Override
330    public ConverterCheckResult isConverterAvailable() {
331        ConverterCheckResult result = new ConverterCheckResult();
332        OOoManagerService oooManagerService = Framework.getService(OOoManagerService.class);
333        if (!oooManagerService.isOOoManagerStarted()) {
334            result.setAvailable(false);
335        }
336        return result;
337    }
338
339    protected String getTmpDirectory() {
340        String tmp = null;
341        Map<String, String> parameters = descriptor.getParameters();
342        if (parameters != null && parameters.containsKey(TMP_PATH_PARAMETER)) {
343            tmp = parameters.get(TMP_PATH_PARAMETER);
344        }
345        if (tmp == null) {
346            tmp = Environment.getDefault().getTemp().getPath();
347        }
348        return tmp;
349    }
350
351    /**
352     * Checks if the {@code inputBlob} string contains a {@code charset} meta tag. If not, add it.
353     *
354     * @param inputBlob the input blob
355     * @throws IOException Signals that an I/O exception has occurred.
356     */
357    protected Blob checkCharsetMeta(Blob inputBlob) throws IOException {
358
359        String charset = inputBlob.getEncoding();
360        if (!StringUtils.isEmpty(charset)) {
361            Pattern charsetMetaPattern = Pattern.compile(String.format("content=\"text/html;\\s*charset=%s\"", charset));
362            Matcher charsetMetaMatcher = charsetMetaPattern.matcher(inputBlob.getString());
363            if (!charsetMetaMatcher.find()) {
364                String charsetMetaTag = String.format(
365                        "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">", charset);
366                StringBuilder sb = new StringBuilder(charsetMetaTag);
367                sb.append(new String(inputBlob.getByteArray(), charset));
368                Blob blobWithCharsetMetaTag = Blobs.createBlob(sb.toString(), "text/html", charset,
369                        inputBlob.getFilename());
370                return blobWithCharsetMetaTag;
371            }
372        }
373        return inputBlob;
374    }
375}