001/*
002 * (C) Copyright 2006-2018 Nuxeo (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Tiry
018 *     Florent Guillaume
019 *     Estelle Giuly <egiuly@nuxeo.com>
020 */
021package org.nuxeo.ecm.core.convert.service;
022
023import java.io.File;
024import java.io.IOException;
025import java.io.Serializable;
026import java.nio.file.Path;
027import java.util.ArrayList;
028import java.util.Collections;
029import java.util.HashMap;
030import java.util.List;
031import java.util.Map;
032import java.util.function.Function;
033import java.util.regex.Matcher;
034import java.util.regex.Pattern;
035
036import javax.ws.rs.core.MediaType;
037
038import org.apache.commons.io.FilenameUtils;
039import org.apache.commons.lang3.StringUtils;
040import org.apache.logging.log4j.LogManager;
041import org.apache.logging.log4j.Logger;
042import org.nuxeo.common.utils.FileUtils;
043import org.nuxeo.ecm.core.api.Blob;
044import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
045import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder;
046import org.nuxeo.ecm.core.api.impl.blob.StringBlob;
047import org.nuxeo.ecm.core.convert.api.ConversionException;
048import org.nuxeo.ecm.core.convert.api.ConversionService;
049import org.nuxeo.ecm.core.convert.api.ConversionStatus;
050import org.nuxeo.ecm.core.convert.api.ConverterCheckResult;
051import org.nuxeo.ecm.core.convert.api.ConverterNotAvailable;
052import org.nuxeo.ecm.core.convert.api.ConverterNotRegistered;
053import org.nuxeo.ecm.core.convert.cache.CacheKeyGenerator;
054import org.nuxeo.ecm.core.convert.cache.ConversionCacheHolder;
055import org.nuxeo.ecm.core.convert.cache.GCTask;
056import org.nuxeo.ecm.core.convert.extension.ChainedConverter;
057import org.nuxeo.ecm.core.convert.extension.Converter;
058import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor;
059import org.nuxeo.ecm.core.convert.extension.ExternalConverter;
060import org.nuxeo.ecm.core.convert.extension.GlobalConfigDescriptor;
061import org.nuxeo.ecm.core.io.download.DownloadService;
062import org.nuxeo.ecm.core.transientstore.work.TransientStoreWork;
063import org.nuxeo.ecm.core.work.api.Work;
064import org.nuxeo.ecm.core.work.api.WorkManager;
065import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry;
066import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
067import org.nuxeo.runtime.api.Framework;
068import org.nuxeo.runtime.model.ComponentContext;
069import org.nuxeo.runtime.model.ComponentInstance;
070import org.nuxeo.runtime.model.DefaultComponent;
071import org.nuxeo.runtime.services.config.ConfigurationService;
072
073/**
074 * Runtime Component that also provides the POJO implementation of the {@link ConversionService}.
075 */
076public class ConversionServiceImpl extends DefaultComponent implements ConversionService {
077
078    private static final Logger log = LogManager.getLogger(ConversionServiceImpl.class);
079
080    public static final String CONVERTER_EP = "converter";
081
082    public static final String CONFIG_EP = "configuration";
083
084    /**
085     * @since 10.3
086     */
087    public static final String ENFORCE_SOURCE_MIME_TYPE_CHECK = "nuxeo.convert.enforceSourceMimeTypeCheck";
088
089    protected final Map<String, ConverterDescriptor> converterDescriptors = new HashMap<>();
090
091    protected final MimeTypeTranslationHelper translationHelper = new MimeTypeTranslationHelper();
092
093    protected final GlobalConfigDescriptor config = new GlobalConfigDescriptor();
094
095    protected Thread gcThread;
096
097    protected GCTask gcTask;
098
099    @Override
100    public void activate(ComponentContext context) {
101        converterDescriptors.clear();
102        translationHelper.clear();
103        config.clearCachingDirectory();
104    }
105
106    @Override
107    public void deactivate(ComponentContext context) {
108        if (config.isCacheEnabled()) {
109            ConversionCacheHolder.deleteCache();
110        }
111        converterDescriptors.clear();
112        translationHelper.clear();
113    }
114
115    /**
116     * Component implementation.
117     */
118    @Override
119    public void registerContribution(Object contribution, String extensionPoint, ComponentInstance contributor) {
120
121        if (CONVERTER_EP.equals(extensionPoint)) {
122            ConverterDescriptor desc = (ConverterDescriptor) contribution;
123            registerConverter(desc);
124        } else if (CONFIG_EP.equals(extensionPoint)) {
125            GlobalConfigDescriptor desc = (GlobalConfigDescriptor) contribution;
126            config.update(desc);
127            config.clearCachingDirectory();
128        } else {
129            log.error("Unable to handle unknown extensionPoint {}", extensionPoint);
130        }
131    }
132
133    @Override
134    public void unregisterContribution(Object contribution, String extensionPoint, ComponentInstance contributor) {
135    }
136
137    /* Component API */
138
139    private static ConversionServiceImpl getConversionService() {
140        return (ConversionServiceImpl) Framework.getService(ConversionService.class);
141    }
142
143    public static Converter getConverter(String converterName) {
144        ConverterDescriptor desc = getConversionService().converterDescriptors.get(converterName);
145        if (desc == null) {
146            return null;
147        }
148        return desc.getConverterInstance();
149    }
150
151    public static ConverterDescriptor getConverterDescriptor(String converterName) {
152        return getConversionService().converterDescriptors.get(converterName);
153    }
154
155    public static long getGCIntervalInMinutes() {
156        return getConversionService().config.getGCInterval();
157    }
158
159    public static void setGCIntervalInMinutes(long interval) {
160        getConversionService().config.setGCInterval(interval);
161    }
162
163    public static void registerConverter(ConverterDescriptor desc) {
164
165        ConversionServiceImpl self = getConversionService();
166        if (self.converterDescriptors.containsKey(desc.getConverterName())) {
167
168            ConverterDescriptor existing = self.converterDescriptors.get(desc.getConverterName());
169            desc = existing.merge(desc);
170        }
171        desc.initConverter();
172        self.translationHelper.addConverter(desc);
173        self.converterDescriptors.put(desc.getConverterName(), desc);
174    }
175
176    public static int getMaxCacheSizeInKB() {
177        return getConversionService().config.getDiskCacheSize();
178    }
179
180    public static void setMaxCacheSizeInKB(int size) {
181        getConversionService().config.setDiskCacheSize(size);
182    }
183
184    public static boolean isCacheEnabled() {
185        return getConversionService().config.isCacheEnabled();
186    }
187
188    public static String getCacheBasePath() {
189        return getConversionService().config.getCachingDirectory();
190    }
191
192    /* Service API */
193
194    @Override
195    public List<String> getRegistredConverters() {
196        return new ArrayList<>(converterDescriptors.keySet());
197    }
198
199    @Override
200    @Deprecated
201    public Blob convertBlobToPDF(Blob blob) {
202        return convertThroughHTML(new SimpleBlobHolder(blob), MimetypeRegistry.PDF_MIMETYPE).getBlob();
203    }
204
205    protected BlobHolder convertThroughHTML(BlobHolder blobHolder, String destMimeType) {
206        Blob blob = blobHolder.getBlob();
207        String mimetype = blob.getMimeType();
208        String filename = blob.getFilename();
209        if (destMimeType.equals(mimetype)) {
210            return blobHolder;
211        }
212
213        Path tempDirectory = null;
214        // Convert the blob to HTML
215        if (!MediaType.TEXT_HTML.equals(mimetype)) {
216            blobHolder = convertBlobToMimeType(blobHolder, MediaType.TEXT_HTML);
217            blob = blobHolder.getBlob();
218        }
219        try {
220            tempDirectory = Framework.createTempDirectory("blobs");
221            // Replace the image URLs by absolute paths
222            DownloadService downloadService = Framework.getService(DownloadService.class);
223            blobHolder.setBlob(
224                    replaceURLsByAbsolutePaths(blob, tempDirectory, downloadService::resolveBlobFromDownloadUrl));
225            // Convert the blob to the destination mimetype
226            blobHolder = convertBlobToMimeType(blobHolder, destMimeType);
227            adjustBlobName(filename, blobHolder, destMimeType);
228        } catch (IOException e) {
229            throw new ConversionException(blobHolder, e);
230        } finally {
231            if (tempDirectory != null) {
232                org.apache.commons.io.FileUtils.deleteQuietly(tempDirectory.toFile());
233            }
234        }
235        return blobHolder;
236    }
237
238    protected BlobHolder convertBlobToMimeType(BlobHolder bh, String destinationMimeType) {
239        return convertToMimeType(destinationMimeType, bh, Collections.emptyMap());
240    }
241
242    protected void adjustBlobName(String filename, BlobHolder blobHolder, String mimeType) {
243        Blob blob = blobHolder.getBlob();
244        adjustBlobName(filename, blob, mimeType);
245        blobHolder.setBlob(blob);
246    }
247
248    protected void adjustBlobName(String filename, Blob blob, String mimeType) {
249        if (StringUtils.isBlank(filename)) {
250            filename = "file_" + System.currentTimeMillis();
251        } else {
252            filename = FilenameUtils.removeExtension(FilenameUtils.getName(filename));
253        }
254        String extension = Framework.getService(MimetypeRegistry.class)
255                                    .getExtensionsFromMimetypeName(mimeType)
256                                    .stream()
257                                    .findFirst()
258                                    .orElse("bin");
259        blob.setFilename(filename + "." + extension);
260        blob.setMimeType(mimeType);
261    }
262
263    /**
264     * Replace the image URLs of an HTML blob by absolute local paths.
265     *
266     * @since 9.1
267     */
268    protected static Blob replaceURLsByAbsolutePaths(Blob blob, Path tempDirectory, Function<String, Blob> blobResolver)
269            throws IOException {
270        String initialBlobContent = blob.getString();
271        // Find images links in the blob
272        Pattern pattern = Pattern.compile("(src=([\"']))(.*?)(\\2)");
273        Matcher matcher = pattern.matcher(initialBlobContent);
274        StringBuffer sb = new StringBuffer();
275        while (matcher.find()) {
276            // Retrieve the image from the URL
277            String url = matcher.group(3);
278            Blob imageBlob = blobResolver.apply(url);
279            if (imageBlob == null) {
280                break;
281            }
282            // Export the image to a temporary directory in File System
283            String safeFilename = FileUtils.getSafeFilename(imageBlob.getFilename());
284            File imageFile = tempDirectory.resolve(safeFilename).toFile();
285            imageBlob.transferTo(imageFile);
286            // Replace the image URL by its absolute local path
287            matcher.appendReplacement(sb, "$1" + Matcher.quoteReplacement(imageFile.toPath().toString()) + "$4");
288        }
289        matcher.appendTail(sb);
290        String blobContentWithAbsolutePaths = sb.toString();
291        if (blobContentWithAbsolutePaths.equals(initialBlobContent)) {
292            return blob;
293        }
294        // Create a new blob with the new content
295        Blob newBlob = new StringBlob(blobContentWithAbsolutePaths, blob.getMimeType(), blob.getEncoding());
296        newBlob.setFilename(blob.getFilename());
297        return newBlob;
298    }
299
300    @Override
301    public BlobHolder convert(String converterName, BlobHolder blobHolder, Map<String, Serializable> parameters)
302            throws ConversionException {
303
304        // set parameters if null to avoid NPE in converters
305        if (parameters == null) {
306            parameters = new HashMap<>();
307        }
308
309        // exit if not registered
310        ConverterCheckResult check = isConverterAvailable(converterName);
311        if (!check.isAvailable()) {
312            // exit is not installed / configured
313            throw new ConverterNotAvailable(converterName, blobHolder);
314        }
315
316        ConverterDescriptor desc = converterDescriptors.get(converterName);
317        if (desc == null) {
318            throw new ConversionException("Converter " + converterName + " can not be found", blobHolder);
319        }
320
321        // make sure the converter can handle the blob mime type
322        String mimeType = blobHolder.getBlob().getMimeType();
323        if (!hasSourceMimeType(desc, mimeType)) {
324            throw new ConversionException(
325                    String.format("%s mime type not supported by %s converter", mimeType, desc.getConverterName()),
326                    blobHolder);
327        }
328
329        // Check if conversion is unwanted
330        if (desc.isBypassIfSameMimeType() && desc.getDestinationMimeType().equals(mimeType)) {
331            return blobHolder;
332        }
333
334        String cacheKey = CacheKeyGenerator.computeKey(converterName, blobHolder, parameters);
335
336        BlobHolder result = ConversionCacheHolder.getFromCache(cacheKey);
337
338        if (result == null) {
339            Converter converter = desc.getConverterInstance();
340            result = converter.convert(blobHolder, parameters);
341
342            if (config.isCacheEnabled()) {
343                ConversionCacheHolder.addToCache(cacheKey, result);
344            }
345        } else if (result.getBlobs() != null && result.getBlobs().size() == 1) {
346            // we need to reset the filename if result is a single file from the cache because the name is just a hash
347            result.getBlob().setFilename(null);
348        }
349
350        if (result != null) {
351            updateResultBlobMimeType(result, desc);
352            updateResultBlobFileName(blobHolder, result);
353        }
354
355        return result;
356    }
357
358    /**
359     * Returns true if the converter has the given {@code mimeType} as source mime type, false otherwise.
360     *
361     * @since 10.3
362     */
363    protected boolean hasSourceMimeType(ConverterDescriptor converterDescriptor, String mimeType) {
364        if (!Framework.getService(ConfigurationService.class).isBooleanTrue(ENFORCE_SOURCE_MIME_TYPE_CHECK)) {
365            return true;
366        }
367
368        return translationHelper.hasCompatibleMimeType(converterDescriptor.getSourceMimeTypes(), mimeType);
369    }
370
371    protected void updateResultBlobMimeType(BlobHolder resultBh, ConverterDescriptor desc) {
372        Blob mainBlob = resultBh.getBlob();
373        if (mainBlob == null) {
374            return;
375        }
376        String mimeType = mainBlob.getMimeType();
377        if (StringUtils.isBlank(mimeType) || mimeType.equals("application/octet-stream")) {
378            mainBlob.setMimeType(desc.getDestinationMimeType());
379        }
380    }
381
382    protected void updateResultBlobFileName(BlobHolder srcBh, BlobHolder resultBh) {
383        Blob mainBlob = resultBh.getBlob();
384        if (mainBlob == null) {
385            return;
386        }
387        String filename = mainBlob.getFilename();
388        if (StringUtils.isBlank(filename) || filename.startsWith("nxblob-")) {
389            Blob srcBlob = srcBh.getBlob();
390            if (srcBlob != null && StringUtils.isNotBlank(srcBlob.getFilename())) {
391                String baseName = FilenameUtils.getBaseName(srcBlob.getFilename());
392
393                MimetypeRegistry mimetypeRegistry = Framework.getService(MimetypeRegistry.class);
394                MimetypeEntry mimeTypeEntry = mimetypeRegistry.getMimetypeEntryByMimeType(mainBlob.getMimeType());
395                List<String> extensions = mimeTypeEntry.getExtensions();
396                String extension;
397                if (!extensions.isEmpty()) {
398                    extension = extensions.get(0);
399                } else {
400                    extension = FilenameUtils.getExtension(filename);
401                    if (extension == null) {
402                        extension = "bin";
403                    }
404                }
405                mainBlob.setFilename(baseName + "." + extension);
406            }
407
408        }
409    }
410
411    @Override
412    public BlobHolder convertToMimeType(String destinationMimeType, BlobHolder blobHolder,
413            Map<String, Serializable> parameters) throws ConversionException {
414        String srcMimeType = blobHolder.getBlob().getMimeType();
415        String converterName = translationHelper.getConverterName(srcMimeType, destinationMimeType);
416        if (converterName == null) {
417            // check if a conversion is available through HTML
418            converterName = translationHelper.getConverterName(srcMimeType, MediaType.TEXT_HTML);
419            if (converterName == null) {
420                throw new ConversionException(String.format("No converters available to convert from %s to %s.",
421                        srcMimeType, destinationMimeType), blobHolder);
422            }
423            // Use a chain of 2 converters which will first try to go through HTML,
424            // then HTML to the destination mimetype
425            return convertThroughHTML(blobHolder, destinationMimeType);
426        } else {
427            return convert(converterName, blobHolder, parameters);
428        }
429    }
430
431    @Override
432    public List<String> getConverterNames(String sourceMimeType, String destinationMimeType, boolean allowWildcard) {
433        return translationHelper.getConverterNames(sourceMimeType, destinationMimeType, allowWildcard);
434    }
435
436    @Override
437    public String getConverterName(String sourceMimeType, String destinationMimeType, boolean allowWildcard) {
438        return translationHelper.getConverterName(sourceMimeType, destinationMimeType, allowWildcard);
439    }
440
441    @Override
442    public ConverterCheckResult isConverterAvailable(String converterName) throws ConversionException {
443        return isConverterAvailable(converterName, false);
444    }
445
446    protected final Map<String, ConverterCheckResult> checkResultCache = new HashMap<>();
447
448    @Override
449    public ConverterCheckResult isConverterAvailable(String converterName, boolean refresh)
450            throws ConverterNotRegistered {
451
452        if (!refresh) {
453            if (checkResultCache.containsKey(converterName)) {
454                return checkResultCache.get(converterName);
455            }
456        }
457
458        ConverterDescriptor descriptor = converterDescriptors.get(converterName);
459        if (descriptor == null) {
460            throw new ConverterNotRegistered(converterName);
461        }
462
463        Converter converter = descriptor.getConverterInstance();
464
465        ConverterCheckResult result;
466        if (converter instanceof ExternalConverter) {
467            ExternalConverter exConverter = (ExternalConverter) converter;
468            result = exConverter.isConverterAvailable();
469        } else if (converter instanceof ChainedConverter) {
470            ChainedConverter chainedConverter = (ChainedConverter) converter;
471            result = new ConverterCheckResult();
472            if (chainedConverter.isSubConvertersBased()) {
473                for (String subConverterName : chainedConverter.getSubConverters()) {
474                    result = isConverterAvailable(subConverterName, refresh);
475                    if (!result.isAvailable()) {
476                        break;
477                    }
478                }
479            }
480        } else {
481            // return success since there is nothing to test
482            result = new ConverterCheckResult();
483        }
484
485        result.setSupportedInputMimeTypes(descriptor.getSourceMimeTypes());
486        checkResultCache.put(converterName, result);
487
488        return result;
489    }
490
491    @Override
492    public boolean isSourceMimeTypeSupported(String converterName, String sourceMimeType) {
493        return getConverterDescriptor(converterName).getSourceMimeTypes().contains(sourceMimeType);
494    }
495
496    @Override
497    public String scheduleConversion(String converterName, BlobHolder blobHolder,
498            Map<String, Serializable> parameters) {
499        WorkManager workManager = Framework.getService(WorkManager.class);
500        ConversionWork work = new ConversionWork(converterName, null, blobHolder, parameters);
501        workManager.schedule(work);
502        return work.getId();
503    }
504
505    @Override
506    public String scheduleConversionToMimeType(String destinationMimeType, BlobHolder blobHolder,
507            Map<String, Serializable> parameters) {
508        WorkManager workManager = Framework.getService(WorkManager.class);
509        ConversionWork work = new ConversionWork(null, destinationMimeType, blobHolder, parameters);
510        workManager.schedule(work);
511        return work.getId();
512    }
513
514    @Override
515    public ConversionStatus getConversionStatus(String id) {
516        WorkManager workManager = Framework.getService(WorkManager.class);
517        Work.State workState = workManager.getWorkState(id);
518        if (workState == null) {
519            String entryKey = TransientStoreWork.computeEntryKey(id);
520            if (TransientStoreWork.containsBlobHolder(entryKey)) {
521                return new ConversionStatus(id, ConversionStatus.Status.COMPLETED);
522            }
523            return null;
524        }
525
526        return new ConversionStatus(id, ConversionStatus.Status.valueOf(workState.name()));
527    }
528
529    @Override
530    public BlobHolder getConversionResult(String id, boolean cleanTransientStoreEntry) {
531        String entryKey = TransientStoreWork.computeEntryKey(id);
532        BlobHolder bh = TransientStoreWork.getBlobHolder(entryKey);
533        if (cleanTransientStoreEntry) {
534            TransientStoreWork.removeBlobHolder(entryKey);
535        }
536        return bh;
537    }
538
539    @Override
540    public <T> T getAdapter(Class<T> adapter) {
541        if (adapter.isAssignableFrom(MimeTypeTranslationHelper.class)) {
542            return adapter.cast(translationHelper);
543        }
544        return super.getAdapter(adapter);
545    }
546
547    @Override
548    public void start(ComponentContext context) {
549        startGC();
550    }
551
552    @Override
553    public void stop(ComponentContext context) {
554        endGC();
555    }
556
557    protected void startGC() {
558        log.debug("CasheCGTaskActivator activated starting GC thread");
559        gcTask = new GCTask();
560        gcThread = new Thread(gcTask, "Nuxeo-Convert-GC");
561        gcThread.setDaemon(true);
562        gcThread.start();
563        log.debug("GC Thread started");
564
565    }
566
567    public void endGC() {
568        if (gcTask == null) {
569            return;
570        }
571        log.debug("Stopping GC Thread");
572        gcTask.GCEnabled = false;
573        gcTask = null;
574        gcThread.interrupt();
575        gcThread = null;
576    }
577
578}