001/*
002 * (C) Copyright 2006-2018 Nuxeo (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Tiry
018 *     Florent Guillaume
019 *     Estelle Giuly <egiuly@nuxeo.com>
020 */
021package org.nuxeo.ecm.core.convert.service;
022
023import java.io.File;
024import java.io.IOException;
025import java.io.Serializable;
026import java.nio.file.Path;
027import java.util.ArrayList;
028import java.util.Collections;
029import java.util.HashMap;
030import java.util.List;
031import java.util.Map;
032import java.util.function.Function;
033import java.util.regex.Matcher;
034import java.util.regex.Pattern;
035
036import javax.ws.rs.core.MediaType;
037
038import org.apache.commons.io.FilenameUtils;
039import org.apache.commons.lang3.StringUtils;
040import org.apache.logging.log4j.LogManager;
041import org.apache.logging.log4j.Logger;
042import org.nuxeo.common.utils.FileUtils;
043import org.nuxeo.ecm.core.api.Blob;
044import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
045import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder;
046import org.nuxeo.ecm.core.api.impl.blob.StringBlob;
047import org.nuxeo.ecm.core.convert.api.ConversionException;
048import org.nuxeo.ecm.core.convert.api.ConversionService;
049import org.nuxeo.ecm.core.convert.api.ConversionStatus;
050import org.nuxeo.ecm.core.convert.api.ConverterCheckResult;
051import org.nuxeo.ecm.core.convert.api.ConverterNotAvailable;
052import org.nuxeo.ecm.core.convert.api.ConverterNotRegistered;
053import org.nuxeo.ecm.core.convert.cache.CacheKeyGenerator;
054import org.nuxeo.ecm.core.convert.cache.ConversionCacheHolder;
055import org.nuxeo.ecm.core.convert.cache.GCTask;
056import org.nuxeo.ecm.core.convert.extension.ChainedConverter;
057import org.nuxeo.ecm.core.convert.extension.Converter;
058import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor;
059import org.nuxeo.ecm.core.convert.extension.ExternalConverter;
060import org.nuxeo.ecm.core.convert.extension.GlobalConfigDescriptor;
061import org.nuxeo.ecm.core.io.download.DownloadService;
062import org.nuxeo.ecm.core.transientstore.work.TransientStoreWork;
063import org.nuxeo.ecm.core.work.api.Work;
064import org.nuxeo.ecm.core.work.api.WorkManager;
065import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry;
066import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
067import org.nuxeo.runtime.api.Framework;
068import org.nuxeo.runtime.model.ComponentContext;
069import org.nuxeo.runtime.model.ComponentInstance;
070import org.nuxeo.runtime.model.DefaultComponent;
071import org.nuxeo.runtime.services.config.ConfigurationService;
072
073/**
074 * Runtime Component that also provides the POJO implementation of the {@link ConversionService}.
075 */
076public class ConversionServiceImpl extends DefaultComponent implements ConversionService {
077
078    private static final Logger log = LogManager.getLogger(ConversionServiceImpl.class);
079
080    public static final String CONVERTER_EP = "converter";
081
082    public static final String CONFIG_EP = "configuration";
083
084    /**
085     * @since 10.3
086     */
087    public static final String ENFORCE_SOURCE_MIME_TYPE_CHECK = "nuxeo.convert.enforceSourceMimeTypeCheck";
088
089    protected final Map<String, ConverterDescriptor> converterDescriptors = new HashMap<>();
090
091    protected final MimeTypeTranslationHelper translationHelper = new MimeTypeTranslationHelper();
092
093    protected final GlobalConfigDescriptor config = new GlobalConfigDescriptor();
094
095    protected static ConversionServiceImpl self;
096
097    protected Thread gcThread;
098
099    protected GCTask gcTask;
100
101    @Override
102    public void activate(ComponentContext context) {
103        converterDescriptors.clear();
104        translationHelper.clear();
105        self = this;
106        config.clearCachingDirectory();
107    }
108
109    @Override
110    public void deactivate(ComponentContext context) {
111        if (config.isCacheEnabled()) {
112            ConversionCacheHolder.deleteCache();
113        }
114        self = null;
115        converterDescriptors.clear();
116        translationHelper.clear();
117    }
118
119    /**
120     * Component implementation.
121     */
122    @Override
123    public void registerContribution(Object contribution, String extensionPoint, ComponentInstance contributor) {
124
125        if (CONVERTER_EP.equals(extensionPoint)) {
126            ConverterDescriptor desc = (ConverterDescriptor) contribution;
127            registerConverter(desc);
128        } else if (CONFIG_EP.equals(extensionPoint)) {
129            GlobalConfigDescriptor desc = (GlobalConfigDescriptor) contribution;
130            config.update(desc);
131            config.clearCachingDirectory();
132        } else {
133            log.error("Unable to handle unknown extensionPoint {}", extensionPoint);
134        }
135    }
136
137    @Override
138    public void unregisterContribution(Object contribution, String extensionPoint, ComponentInstance contributor) {
139    }
140
141    /* Component API */
142
143    public static Converter getConverter(String converterName) {
144        ConverterDescriptor desc = self.converterDescriptors.get(converterName);
145        if (desc == null) {
146            return null;
147        }
148        return desc.getConverterInstance();
149    }
150
151    public static ConverterDescriptor getConverterDescriptor(String converterName) {
152        return self.converterDescriptors.get(converterName);
153    }
154
155    public static long getGCIntervalInMinutes() {
156        return self.config.getGCInterval();
157    }
158
159    public static void setGCIntervalInMinutes(long interval) {
160        self.config.setGCInterval(interval);
161    }
162
163    public static void registerConverter(ConverterDescriptor desc) {
164
165        if (self.converterDescriptors.containsKey(desc.getConverterName())) {
166
167            ConverterDescriptor existing = self.converterDescriptors.get(desc.getConverterName());
168            desc = existing.merge(desc);
169        }
170        desc.initConverter();
171        self.translationHelper.addConverter(desc);
172        self.converterDescriptors.put(desc.getConverterName(), desc);
173    }
174
175    public static int getMaxCacheSizeInKB() {
176        return self.config.getDiskCacheSize();
177    }
178
179    public static void setMaxCacheSizeInKB(int size) {
180        self.config.setDiskCacheSize(size);
181    }
182
183    public static boolean isCacheEnabled() {
184        return self.config.isCacheEnabled();
185    }
186
187    public static String getCacheBasePath() {
188        return self.config.getCachingDirectory();
189    }
190
191    /* Service API */
192
193    @Override
194    public List<String> getRegistredConverters() {
195        return new ArrayList<>(converterDescriptors.keySet());
196    }
197
198    @Override
199    @Deprecated
200    public Blob convertBlobToPDF(Blob blob) {
201        return convertThroughHTML(new SimpleBlobHolder(blob), MimetypeRegistry.PDF_MIMETYPE).getBlob();
202    }
203
204    protected BlobHolder convertThroughHTML(BlobHolder blobHolder, String destMimeType) {
205        Blob blob = blobHolder.getBlob();
206        String mimetype = blob.getMimeType();
207        String filename = blob.getFilename();
208        if (destMimeType.equals(mimetype)) {
209            return blobHolder;
210        }
211
212        Path tempDirectory = null;
213        // Convert the blob to HTML
214        if (!MediaType.TEXT_HTML.equals(mimetype)) {
215            blobHolder = convertBlobToMimeType(blobHolder, MediaType.TEXT_HTML);
216        }
217        try {
218            tempDirectory = Framework.createTempDirectory("blobs");
219            // Replace the image URLs by absolute paths
220            DownloadService downloadService = Framework.getService(DownloadService.class);
221            blobHolder.setBlob(
222                    replaceURLsByAbsolutePaths(blob, tempDirectory, downloadService::resolveBlobFromDownloadUrl));
223            // Convert the blob to the destination mimetype
224            blobHolder = convertBlobToMimeType(blobHolder, destMimeType);
225            adjustBlobName(filename, blobHolder, destMimeType);
226        } catch (IOException e) {
227            throw new ConversionException(e);
228        } finally {
229            if (tempDirectory != null) {
230                org.apache.commons.io.FileUtils.deleteQuietly(tempDirectory.toFile());
231            }
232        }
233        return blobHolder;
234    }
235
236    protected BlobHolder convertBlobToMimeType(BlobHolder bh, String destinationMimeType) {
237        return convertToMimeType(destinationMimeType, bh, Collections.emptyMap());
238    }
239
240    protected void adjustBlobName(String filename, BlobHolder blobHolder, String mimeType) {
241        Blob blob = blobHolder.getBlob();
242        adjustBlobName(filename, blob, mimeType);
243        blobHolder.setBlob(blob);
244    }
245
246    protected void adjustBlobName(String filename, Blob blob, String mimeType) {
247        if (StringUtils.isBlank(filename)) {
248            filename = "file_" + System.currentTimeMillis();
249        } else {
250            filename = FilenameUtils.removeExtension(FilenameUtils.getName(filename));
251        }
252        String extension = Framework.getService(MimetypeRegistry.class)
253                                    .getExtensionsFromMimetypeName(mimeType)
254                                    .stream()
255                                    .findFirst()
256                                    .orElse("bin");
257        blob.setFilename(filename + "." + extension);
258        blob.setMimeType(mimeType);
259    }
260
261    /**
262     * Replace the image URLs of an HTML blob by absolute local paths.
263     *
264     * @since 9.1
265     */
266    protected static Blob replaceURLsByAbsolutePaths(Blob blob, Path tempDirectory, Function<String, Blob> blobResolver)
267            throws IOException {
268        String initialBlobContent = blob.getString();
269        // Find images links in the blob
270        Pattern pattern = Pattern.compile("(src=([\"']))(.*?)(\\2)");
271        Matcher matcher = pattern.matcher(initialBlobContent);
272        StringBuffer sb = new StringBuffer();
273        while (matcher.find()) {
274            // Retrieve the image from the URL
275            String url = matcher.group(3);
276            Blob imageBlob = blobResolver.apply(url);
277            if (imageBlob == null) {
278                break;
279            }
280            // Export the image to a temporary directory in File System
281            String safeFilename = FileUtils.getSafeFilename(imageBlob.getFilename());
282            File imageFile = tempDirectory.resolve(safeFilename).toFile();
283            imageBlob.transferTo(imageFile);
284            // Replace the image URL by its absolute local path
285            matcher.appendReplacement(sb, "$1" + Matcher.quoteReplacement(imageFile.toPath().toString()) + "$4");
286        }
287        matcher.appendTail(sb);
288        String blobContentWithAbsolutePaths = sb.toString();
289        if (blobContentWithAbsolutePaths.equals(initialBlobContent)) {
290            return blob;
291        }
292        // Create a new blob with the new content
293        Blob newBlob = new StringBlob(blobContentWithAbsolutePaths, blob.getMimeType(), blob.getEncoding());
294        newBlob.setFilename(blob.getFilename());
295        return newBlob;
296    }
297
298    @Override
299    public BlobHolder convert(String converterName, BlobHolder blobHolder, Map<String, Serializable> parameters)
300            throws ConversionException {
301
302        // set parameters if null to avoid NPE in converters
303        if (parameters == null) {
304            parameters = new HashMap<>();
305        }
306
307        // exit if not registered
308        ConverterCheckResult check = isConverterAvailable(converterName);
309        if (!check.isAvailable()) {
310            // exit is not installed / configured
311            throw new ConverterNotAvailable(converterName);
312        }
313
314        ConverterDescriptor desc = converterDescriptors.get(converterName);
315        if (desc == null) {
316            throw new ConversionException("Converter " + converterName + " can not be found");
317        }
318
319        // make sure the converter can handle the blob mime type
320        String mimeType = blobHolder.getBlob().getMimeType();
321        if (!hasSourceMimeType(desc, mimeType)) {
322            throw new ConversionException(
323                    String.format("%s mime type not supported by %s converter", mimeType, desc.getConverterName()));
324        }
325
326        String cacheKey = CacheKeyGenerator.computeKey(converterName, blobHolder, parameters);
327
328        BlobHolder result = ConversionCacheHolder.getFromCache(cacheKey);
329
330        if (result == null) {
331            Converter converter = desc.getConverterInstance();
332            result = converter.convert(blobHolder, parameters);
333
334            if (config.isCacheEnabled()) {
335                ConversionCacheHolder.addToCache(cacheKey, result);
336            }
337        } else if (result.getBlobs() != null && result.getBlobs().size() == 1) {
338            // we need to reset the filename if result is a single file from the cache because the name is just a hash
339            result.getBlob().setFilename(null);
340        }
341
342        if (result != null) {
343            updateResultBlobMimeType(result, desc);
344            updateResultBlobFileName(blobHolder, result);
345        }
346
347        return result;
348    }
349
350    /**
351     * Returns true if the converter has the given {@code mimeType} as source mime type, false otherwise.
352     *
353     * @since 10.3
354     */
355    protected boolean hasSourceMimeType(ConverterDescriptor converterDescriptor, String mimeType) {
356        if (!Framework.getService(ConfigurationService.class).isBooleanPropertyTrue(ENFORCE_SOURCE_MIME_TYPE_CHECK)) {
357            return true;
358        }
359
360        return translationHelper.hasCompatibleMimeType(converterDescriptor.getSourceMimeTypes(), mimeType);
361    }
362
363    protected void updateResultBlobMimeType(BlobHolder resultBh, ConverterDescriptor desc) {
364        Blob mainBlob = resultBh.getBlob();
365        if (mainBlob == null) {
366            return;
367        }
368        String mimeType = mainBlob.getMimeType();
369        if (StringUtils.isBlank(mimeType) || mimeType.equals("application/octet-stream")) {
370            mainBlob.setMimeType(desc.getDestinationMimeType());
371        }
372    }
373
374    protected void updateResultBlobFileName(BlobHolder srcBh, BlobHolder resultBh) {
375        Blob mainBlob = resultBh.getBlob();
376        if (mainBlob == null) {
377            return;
378        }
379        String filename = mainBlob.getFilename();
380        if (StringUtils.isBlank(filename) || filename.startsWith("nxblob-")) {
381            Blob srcBlob = srcBh.getBlob();
382            if (srcBlob != null && StringUtils.isNotBlank(srcBlob.getFilename())) {
383                String baseName = FilenameUtils.getBaseName(srcBlob.getFilename());
384
385                MimetypeRegistry mimetypeRegistry = Framework.getService(MimetypeRegistry.class);
386                MimetypeEntry mimeTypeEntry = mimetypeRegistry.getMimetypeEntryByMimeType(mainBlob.getMimeType());
387                List<String> extensions = mimeTypeEntry.getExtensions();
388                String extension;
389                if (!extensions.isEmpty()) {
390                    extension = extensions.get(0);
391                } else {
392                    extension = FilenameUtils.getExtension(filename);
393                    if (extension == null) {
394                        extension = "bin";
395                    }
396                }
397                mainBlob.setFilename(baseName + "." + extension);
398            }
399
400        }
401    }
402
403    @Override
404    public BlobHolder convertToMimeType(String destinationMimeType, BlobHolder blobHolder,
405            Map<String, Serializable> parameters) throws ConversionException {
406        String srcMimeType = blobHolder.getBlob().getMimeType();
407        String converterName = translationHelper.getConverterName(srcMimeType, destinationMimeType);
408        if (converterName == null) {
409            // check if a conversion is available through HTML
410            converterName = translationHelper.getConverterName(srcMimeType, MediaType.TEXT_HTML);
411            if (converterName == null) {
412                throw new ConversionException(String.format("No converters available to convert from %s to %s.",
413                        srcMimeType, destinationMimeType));
414            }
415            // Use a chain of 2 converters which will first try to go through HTML,
416            // then HTML to the destination mimetype
417            return convertThroughHTML(blobHolder, destinationMimeType);
418        } else {
419            return convert(converterName, blobHolder, parameters);
420        }
421    }
422
423    @Override
424    public List<String> getConverterNames(String sourceMimeType, String destinationMimeType) {
425        return translationHelper.getConverterNames(sourceMimeType, destinationMimeType);
426    }
427
428    @Override
429    public String getConverterName(String sourceMimeType, String destinationMimeType) {
430        return translationHelper.getConverterName(sourceMimeType, destinationMimeType);
431    }
432
433    @Override
434    public ConverterCheckResult isConverterAvailable(String converterName) throws ConversionException {
435        return isConverterAvailable(converterName, false);
436    }
437
438    protected final Map<String, ConverterCheckResult> checkResultCache = new HashMap<>();
439
440    @Override
441    public ConverterCheckResult isConverterAvailable(String converterName, boolean refresh)
442            throws ConverterNotRegistered {
443
444        if (!refresh) {
445            if (checkResultCache.containsKey(converterName)) {
446                return checkResultCache.get(converterName);
447            }
448        }
449
450        ConverterDescriptor descriptor = converterDescriptors.get(converterName);
451        if (descriptor == null) {
452            throw new ConverterNotRegistered(converterName);
453        }
454
455        Converter converter = descriptor.getConverterInstance();
456
457        ConverterCheckResult result;
458        if (converter instanceof ExternalConverter) {
459            ExternalConverter exConverter = (ExternalConverter) converter;
460            result = exConverter.isConverterAvailable();
461        } else if (converter instanceof ChainedConverter) {
462            ChainedConverter chainedConverter = (ChainedConverter) converter;
463            result = new ConverterCheckResult();
464            if (chainedConverter.isSubConvertersBased()) {
465                for (String subConverterName : chainedConverter.getSubConverters()) {
466                    result = isConverterAvailable(subConverterName, refresh);
467                    if (!result.isAvailable()) {
468                        break;
469                    }
470                }
471            }
472        } else {
473            // return success since there is nothing to test
474            result = new ConverterCheckResult();
475        }
476
477        result.setSupportedInputMimeTypes(descriptor.getSourceMimeTypes());
478        checkResultCache.put(converterName, result);
479
480        return result;
481    }
482
483    @Override
484    public boolean isSourceMimeTypeSupported(String converterName, String sourceMimeType) {
485        return getConverterDescriptor(converterName).getSourceMimeTypes().contains(sourceMimeType);
486    }
487
488    @Override
489    public String scheduleConversion(String converterName, BlobHolder blobHolder,
490            Map<String, Serializable> parameters) {
491        WorkManager workManager = Framework.getService(WorkManager.class);
492        ConversionWork work = new ConversionWork(converterName, null, blobHolder, parameters);
493        workManager.schedule(work);
494        return work.getId();
495    }
496
497    @Override
498    public String scheduleConversionToMimeType(String destinationMimeType, BlobHolder blobHolder,
499            Map<String, Serializable> parameters) {
500        WorkManager workManager = Framework.getService(WorkManager.class);
501        ConversionWork work = new ConversionWork(null, destinationMimeType, blobHolder, parameters);
502        workManager.schedule(work);
503        return work.getId();
504    }
505
506    @Override
507    public ConversionStatus getConversionStatus(String id) {
508        WorkManager workManager = Framework.getService(WorkManager.class);
509        Work.State workState = workManager.getWorkState(id);
510        if (workState == null) {
511            String entryKey = TransientStoreWork.computeEntryKey(id);
512            if (TransientStoreWork.containsBlobHolder(entryKey)) {
513                return new ConversionStatus(id, ConversionStatus.Status.COMPLETED);
514            }
515            return null;
516        }
517
518        return new ConversionStatus(id, ConversionStatus.Status.valueOf(workState.name()));
519    }
520
521    @Override
522    public BlobHolder getConversionResult(String id, boolean cleanTransientStoreEntry) {
523        String entryKey = TransientStoreWork.computeEntryKey(id);
524        BlobHolder bh = TransientStoreWork.getBlobHolder(entryKey);
525        if (cleanTransientStoreEntry) {
526            TransientStoreWork.removeBlobHolder(entryKey);
527        }
528        return bh;
529    }
530
531    @Override
532    public <T> T getAdapter(Class<T> adapter) {
533        if (adapter.isAssignableFrom(MimeTypeTranslationHelper.class)) {
534            return adapter.cast(translationHelper);
535        }
536        return super.getAdapter(adapter);
537    }
538
539    @Override
540    public void start(ComponentContext context) {
541        startGC();
542    }
543
544    @Override
545    public void stop(ComponentContext context) {
546        endGC();
547    }
548
549    protected void startGC() {
550        log.debug("CasheCGTaskActivator activated starting GC thread");
551        gcTask = new GCTask();
552        gcThread = new Thread(gcTask, "Nuxeo-Convert-GC");
553        gcThread.setDaemon(true);
554        gcThread.start();
555        log.debug("GC Thread started");
556
557    }
558
559    public void endGC() {
560        if (gcTask == null) {
561            return;
562        }
563        log.debug("Stopping GC Thread");
564        gcTask.GCEnabled = false;
565        gcTask = null;
566        gcThread.interrupt();
567        gcThread = null;
568    }
569
570}