001/*
002 * (C) Copyright 2006-2018 Nuxeo (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Tiry
018 *     Florent Guillaume
019 *     Estelle Giuly <egiuly@nuxeo.com>
020 */
021package org.nuxeo.ecm.core.convert.service;
022
023import java.io.File;
024import java.io.IOException;
025import java.io.Serializable;
026import java.nio.file.Path;
027import java.util.ArrayList;
028import java.util.Collections;
029import java.util.HashMap;
030import java.util.List;
031import java.util.Map;
032import java.util.function.Function;
033import java.util.regex.Matcher;
034import java.util.regex.Pattern;
035
036import javax.ws.rs.core.MediaType;
037
038import org.apache.commons.io.FilenameUtils;
039import org.apache.commons.lang3.StringUtils;
040import org.apache.commons.logging.Log;
041import org.apache.commons.logging.LogFactory;
042import org.nuxeo.common.utils.FileUtils;
043import org.nuxeo.ecm.core.api.Blob;
044import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
045import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder;
046import org.nuxeo.ecm.core.api.impl.blob.StringBlob;
047import org.nuxeo.ecm.core.convert.api.ConversionException;
048import org.nuxeo.ecm.core.convert.api.ConversionService;
049import org.nuxeo.ecm.core.convert.api.ConversionStatus;
050import org.nuxeo.ecm.core.convert.api.ConverterCheckResult;
051import org.nuxeo.ecm.core.convert.api.ConverterNotAvailable;
052import org.nuxeo.ecm.core.convert.api.ConverterNotRegistered;
053import org.nuxeo.ecm.core.convert.cache.CacheKeyGenerator;
054import org.nuxeo.ecm.core.convert.cache.ConversionCacheHolder;
055import org.nuxeo.ecm.core.convert.cache.GCTask;
056import org.nuxeo.ecm.core.convert.extension.ChainedConverter;
057import org.nuxeo.ecm.core.convert.extension.Converter;
058import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor;
059import org.nuxeo.ecm.core.convert.extension.ExternalConverter;
060import org.nuxeo.ecm.core.convert.extension.GlobalConfigDescriptor;
061import org.nuxeo.ecm.core.io.download.DownloadService;
062import org.nuxeo.ecm.core.transientstore.work.TransientStoreWork;
063import org.nuxeo.ecm.core.work.api.Work;
064import org.nuxeo.ecm.core.work.api.WorkManager;
065import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry;
066import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
067import org.nuxeo.runtime.api.Framework;
068import org.nuxeo.runtime.model.ComponentContext;
069import org.nuxeo.runtime.model.ComponentInstance;
070import org.nuxeo.runtime.model.DefaultComponent;
071
072/**
073 * Runtime Component that also provides the POJO implementation of the {@link ConversionService}.
074 */
075public class ConversionServiceImpl extends DefaultComponent implements ConversionService {
076
077    protected static final Log log = LogFactory.getLog(ConversionServiceImpl.class);
078
079    public static final String CONVERTER_EP = "converter";
080
081    public static final String CONFIG_EP = "configuration";
082
083    protected final Map<String, ConverterDescriptor> converterDescriptors = new HashMap<>();
084
085    protected final MimeTypeTranslationHelper translationHelper = new MimeTypeTranslationHelper();
086
087    protected final GlobalConfigDescriptor config = new GlobalConfigDescriptor();
088
089    protected static ConversionServiceImpl self;
090
091    protected Thread gcThread;
092
093    protected GCTask gcTask;
094
095    @Override
096    public void activate(ComponentContext context) {
097        converterDescriptors.clear();
098        translationHelper.clear();
099        self = this;
100        config.clearCachingDirectory();
101    }
102
103    @Override
104    public void deactivate(ComponentContext context) {
105        if (config.isCacheEnabled()) {
106            ConversionCacheHolder.deleteCache();
107        }
108        self = null;
109        converterDescriptors.clear();
110        translationHelper.clear();
111    }
112
113    /**
114     * Component implementation.
115     */
116    @Override
117    public void registerContribution(Object contribution, String extensionPoint, ComponentInstance contributor) {
118
119        if (CONVERTER_EP.equals(extensionPoint)) {
120            ConverterDescriptor desc = (ConverterDescriptor) contribution;
121            registerConverter(desc);
122        } else if (CONFIG_EP.equals(extensionPoint)) {
123            GlobalConfigDescriptor desc = (GlobalConfigDescriptor) contribution;
124            config.update(desc);
125            config.clearCachingDirectory();
126        } else {
127            log.error("Unable to handle unknown extensionPoint " + extensionPoint);
128        }
129    }
130
131    @Override
132    public void unregisterContribution(Object contribution, String extensionPoint, ComponentInstance contributor) {
133    }
134
135    /* Component API */
136
137    public static Converter getConverter(String converterName) {
138        ConverterDescriptor desc = self.converterDescriptors.get(converterName);
139        if (desc == null) {
140            return null;
141        }
142        return desc.getConverterInstance();
143    }
144
145    public static ConverterDescriptor getConverterDescriptor(String converterName) {
146        return self.converterDescriptors.get(converterName);
147    }
148
149    public static long getGCIntervalInMinutes() {
150        return self.config.getGCInterval();
151    }
152
153    public static void setGCIntervalInMinutes(long interval) {
154        self.config.setGCInterval(interval);
155    }
156
157    public static void registerConverter(ConverterDescriptor desc) {
158
159        if (self.converterDescriptors.containsKey(desc.getConverterName())) {
160
161            ConverterDescriptor existing = self.converterDescriptors.get(desc.getConverterName());
162            desc = existing.merge(desc);
163        }
164        desc.initConverter();
165        self.translationHelper.addConverter(desc);
166        self.converterDescriptors.put(desc.getConverterName(), desc);
167    }
168
169    public static int getMaxCacheSizeInKB() {
170        return self.config.getDiskCacheSize();
171    }
172
173    public static void setMaxCacheSizeInKB(int size) {
174        self.config.setDiskCacheSize(size);
175    }
176
177    public static boolean isCacheEnabled() {
178        return self.config.isCacheEnabled();
179    }
180
181    public static String getCacheBasePath() {
182        return self.config.getCachingDirectory();
183    }
184
185    /* Service API */
186
187    @Override
188    public List<String> getRegistredConverters() {
189        List<String> converterNames = new ArrayList<>();
190        converterNames.addAll(converterDescriptors.keySet());
191        return converterNames;
192    }
193
194    @Override
195    @Deprecated
196    public Blob convertBlobToPDF(Blob blob) throws IOException {
197        return convertThroughHTML(new SimpleBlobHolder(blob), MimetypeRegistry.PDF_MIMETYPE).getBlob();
198    }
199
200    protected BlobHolder convertThroughHTML(BlobHolder blobHolder, String destMimeType) {
201        Blob blob = blobHolder.getBlob();
202        String mimetype = blob.getMimeType();
203        String filename = blob.getFilename();
204        if (destMimeType.equals(mimetype)) {
205            return blobHolder;
206        }
207
208        Path tempDirectory = null;
209        // Convert the blob to HTML
210        if (!MediaType.TEXT_HTML.equals(mimetype)) {
211            blobHolder = convertBlobToMimeType(blobHolder, MediaType.TEXT_HTML);
212        }
213        try {
214            tempDirectory = Framework.createTempDirectory("blobs");
215            // Replace the image URLs by absolute paths
216            DownloadService downloadService = Framework.getService(DownloadService.class);
217            blobHolder.setBlob(
218                    replaceURLsByAbsolutePaths(blob, tempDirectory, downloadService::resolveBlobFromDownloadUrl));
219            // Convert the blob to the destination mimetype
220            blobHolder = convertBlobToMimeType(blobHolder, destMimeType);
221            adjustBlobName(filename, blobHolder, destMimeType);
222        } catch (IOException e) {
223            throw new ConversionException(e);
224        } finally {
225            if (tempDirectory != null) {
226                org.apache.commons.io.FileUtils.deleteQuietly(tempDirectory.toFile());
227            }
228        }
229        return blobHolder;
230    }
231
232    protected BlobHolder convertBlobToMimeType(BlobHolder bh, String destinationMimeType) {
233        return convertToMimeType(destinationMimeType, bh, Collections.emptyMap());
234    }
235
236    protected void adjustBlobName(String filename, BlobHolder blobHolder, String mimeType) {
237        Blob blob = blobHolder.getBlob();
238        adjustBlobName(filename, blob, mimeType);
239        blobHolder.setBlob(blob);
240    }
241
242    protected void adjustBlobName(String filename, Blob blob, String mimeType) {
243        if (StringUtils.isBlank(filename)) {
244            filename = "file_" + System.currentTimeMillis();
245        } else {
246            filename = FilenameUtils.removeExtension(FilenameUtils.getName(filename));
247        }
248        String extension = Framework.getService(MimetypeRegistry.class)
249                                    .getExtensionsFromMimetypeName(mimeType)
250                                    .stream()
251                                    .findFirst()
252                                    .orElse("bin");
253        blob.setFilename(filename + "." + extension);
254        blob.setMimeType(mimeType);
255    }
256
257    /**
258     * Replace the image URLs of an HTML blob by absolute local paths.
259     *
260     * @since 9.1
261     */
262    protected static Blob replaceURLsByAbsolutePaths(Blob blob, Path tempDirectory, Function<String, Blob> blobResolver)
263            throws IOException {
264        String initialBlobContent = blob.getString();
265        // Find images links in the blob
266        Pattern pattern = Pattern.compile("(src=([\"']))(.*?)(\\2)");
267        Matcher matcher = pattern.matcher(initialBlobContent);
268        StringBuffer sb = new StringBuffer();
269        while (matcher.find()) {
270            // Retrieve the image from the URL
271            String url = matcher.group(3);
272            Blob imageBlob = blobResolver.apply(url);
273            if (imageBlob == null) {
274                break;
275            }
276            // Export the image to a temporary directory in File System
277            String safeFilename = FileUtils.getSafeFilename(imageBlob.getFilename());
278            File imageFile = tempDirectory.resolve(safeFilename).toFile();
279            imageBlob.transferTo(imageFile);
280            // Replace the image URL by its absolute local path
281            matcher.appendReplacement(sb, "$1" + Matcher.quoteReplacement(imageFile.toPath().toString()) + "$4");
282        }
283        matcher.appendTail(sb);
284        String blobContentWithAbsolutePaths = sb.toString();
285        if (blobContentWithAbsolutePaths.equals(initialBlobContent)) {
286            return blob;
287        }
288        // Create a new blob with the new content
289        Blob newBlob = new StringBlob(blobContentWithAbsolutePaths, blob.getMimeType(), blob.getEncoding());
290        newBlob.setFilename(blob.getFilename());
291        return newBlob;
292    }
293
294    @Override
295    public BlobHolder convert(String converterName, BlobHolder blobHolder, Map<String, Serializable> parameters)
296            throws ConversionException {
297
298        // set parameters if null to avoid NPE in converters
299        if (parameters == null) {
300            parameters = new HashMap<>();
301        }
302
303        // exist if not registered
304        ConverterCheckResult check = isConverterAvailable(converterName);
305        if (!check.isAvailable()) {
306            // exist is not installed / configured
307            throw new ConverterNotAvailable(converterName);
308        }
309
310        ConverterDescriptor desc = converterDescriptors.get(converterName);
311        if (desc == null) {
312            throw new ConversionException("Converter " + converterName + " can not be found");
313        }
314
315        String cacheKey = CacheKeyGenerator.computeKey(converterName, blobHolder, parameters);
316
317        BlobHolder result = ConversionCacheHolder.getFromCache(cacheKey);
318
319        if (result == null) {
320            Converter converter = desc.getConverterInstance();
321            result = converter.convert(blobHolder, parameters);
322
323            if (config.isCacheEnabled()) {
324                ConversionCacheHolder.addToCache(cacheKey, result);
325            }
326        } else {
327            // we need to reset the filename if result came from cache because it's just a hash
328            result.getBlob().setFilename(null);
329        }
330
331        if (result != null) {
332            updateResultBlobMimeType(result, desc);
333            updateResultBlobFileName(blobHolder, result);
334        }
335
336        return result;
337    }
338
339    protected void updateResultBlobMimeType(BlobHolder resultBh, ConverterDescriptor desc) {
340        Blob mainBlob = resultBh.getBlob();
341        if (mainBlob == null) {
342            return;
343        }
344        String mimeType = mainBlob.getMimeType();
345        if (StringUtils.isBlank(mimeType) || mimeType.equals("application/octet-stream")) {
346            mainBlob.setMimeType(desc.getDestinationMimeType());
347        }
348    }
349
350    protected void updateResultBlobFileName(BlobHolder srcBh, BlobHolder resultBh) {
351        Blob mainBlob = resultBh.getBlob();
352        if (mainBlob == null) {
353            return;
354        }
355        String filename = mainBlob.getFilename();
356        if (StringUtils.isBlank(filename) || filename.startsWith("nxblob-")) {
357            Blob srcBlob = srcBh.getBlob();
358            if (srcBlob != null && StringUtils.isNotBlank(srcBlob.getFilename())) {
359                String baseName = FilenameUtils.getBaseName(srcBlob.getFilename());
360
361                MimetypeRegistry mimetypeRegistry = Framework.getService(MimetypeRegistry.class);
362                MimetypeEntry mimeTypeEntry = mimetypeRegistry.getMimetypeEntryByMimeType(mainBlob.getMimeType());
363                List<String> extensions = mimeTypeEntry.getExtensions();
364                String extension;
365                if (!extensions.isEmpty()) {
366                    extension = extensions.get(0);
367                } else {
368                    extension = FilenameUtils.getExtension(filename);
369                    if (extension == null) {
370                        extension = "bin";
371                    }
372                }
373                mainBlob.setFilename(baseName + "." + extension);
374            }
375
376        }
377    }
378
379    @Override
380    public BlobHolder convertToMimeType(String destinationMimeType, BlobHolder blobHolder,
381            Map<String, Serializable> parameters) throws ConversionException {
382        String srcMimeType = blobHolder.getBlob().getMimeType();
383        String converterName = translationHelper.getConverterName(srcMimeType, destinationMimeType);
384        if (converterName == null) {
385            // check if a conversion is available through HTML
386            converterName = translationHelper.getConverterName(srcMimeType, MediaType.TEXT_HTML);
387            if (converterName == null) {
388                throw new ConversionException(String.format("No converters available to convert from %s to %s.",
389                        srcMimeType, destinationMimeType));
390            }
391            // Use a chain of 2 converters which will first try to go through HTML,
392            // then HTML to the destination mimetype
393            return convertThroughHTML(blobHolder, destinationMimeType);
394        } else {
395            return convert(converterName, blobHolder, parameters);
396        }
397    }
398
399    @Override
400    public List<String> getConverterNames(String sourceMimeType, String destinationMimeType) {
401        return translationHelper.getConverterNames(sourceMimeType, destinationMimeType);
402    }
403
404    @Override
405    public String getConverterName(String sourceMimeType, String destinationMimeType) {
406        List<String> converterNames = getConverterNames(sourceMimeType, destinationMimeType);
407        if (!converterNames.isEmpty()) {
408            return converterNames.get(converterNames.size() - 1);
409        }
410        return null;
411    }
412
413    @Override
414    public ConverterCheckResult isConverterAvailable(String converterName) throws ConversionException {
415        return isConverterAvailable(converterName, false);
416    }
417
418    protected final Map<String, ConverterCheckResult> checkResultCache = new HashMap<>();
419
420    @Override
421    public ConverterCheckResult isConverterAvailable(String converterName, boolean refresh)
422            throws ConverterNotRegistered {
423
424        if (!refresh) {
425            if (checkResultCache.containsKey(converterName)) {
426                return checkResultCache.get(converterName);
427            }
428        }
429
430        ConverterDescriptor descriptor = converterDescriptors.get(converterName);
431        if (descriptor == null) {
432            throw new ConverterNotRegistered(converterName);
433        }
434
435        Converter converter = descriptor.getConverterInstance();
436
437        ConverterCheckResult result;
438        if (converter instanceof ExternalConverter) {
439            ExternalConverter exConverter = (ExternalConverter) converter;
440            result = exConverter.isConverterAvailable();
441        } else if (converter instanceof ChainedConverter) {
442            ChainedConverter chainedConverter = (ChainedConverter) converter;
443            result = new ConverterCheckResult();
444            if (chainedConverter.isSubConvertersBased()) {
445                for (String subConverterName : chainedConverter.getSubConverters()) {
446                    result = isConverterAvailable(subConverterName, refresh);
447                    if (!result.isAvailable()) {
448                        break;
449                    }
450                }
451            }
452        } else {
453            // return success since there is nothing to test
454            result = new ConverterCheckResult();
455        }
456
457        result.setSupportedInputMimeTypes(descriptor.getSourceMimeTypes());
458        checkResultCache.put(converterName, result);
459
460        return result;
461    }
462
463    @Override
464    public boolean isSourceMimeTypeSupported(String converterName, String sourceMimeType) {
465        return getConverterDescriptor(converterName).getSourceMimeTypes().contains(sourceMimeType);
466    }
467
468    @Override
469    public String scheduleConversion(String converterName, BlobHolder blobHolder,
470            Map<String, Serializable> parameters) {
471        WorkManager workManager = Framework.getService(WorkManager.class);
472        ConversionWork work = new ConversionWork(converterName, null, blobHolder, parameters);
473        workManager.schedule(work);
474        return work.getId();
475    }
476
477    @Override
478    public String scheduleConversionToMimeType(String destinationMimeType, BlobHolder blobHolder,
479            Map<String, Serializable> parameters) {
480        WorkManager workManager = Framework.getService(WorkManager.class);
481        ConversionWork work = new ConversionWork(null, destinationMimeType, blobHolder, parameters);
482        workManager.schedule(work);
483        return work.getId();
484    }
485
486    @Override
487    public ConversionStatus getConversionStatus(String id) {
488        WorkManager workManager = Framework.getService(WorkManager.class);
489        Work.State workState = workManager.getWorkState(id);
490        if (workState == null) {
491            String entryKey = TransientStoreWork.computeEntryKey(id);
492            if (TransientStoreWork.containsBlobHolder(entryKey)) {
493                return new ConversionStatus(id, ConversionStatus.Status.COMPLETED);
494            }
495            return null;
496        }
497
498        return new ConversionStatus(id, ConversionStatus.Status.valueOf(workState.name()));
499    }
500
501    @Override
502    public BlobHolder getConversionResult(String id, boolean cleanTransientStoreEntry) {
503        String entryKey = TransientStoreWork.computeEntryKey(id);
504        BlobHolder bh = TransientStoreWork.getBlobHolder(entryKey);
505        if (cleanTransientStoreEntry) {
506            TransientStoreWork.removeBlobHolder(entryKey);
507        }
508        return bh;
509    }
510
511    @Override
512    public <T> T getAdapter(Class<T> adapter) {
513        if (adapter.isAssignableFrom(MimeTypeTranslationHelper.class)) {
514            return adapter.cast(translationHelper);
515        }
516        return super.getAdapter(adapter);
517    }
518
519    @Override
520    public void start(ComponentContext context) {
521        startGC();
522    }
523
524    @Override
525    public void stop(ComponentContext context) {
526        endGC();
527    }
528
529    protected void startGC() {
530        log.debug("CasheCGTaskActivator activated starting GC thread");
531        gcTask = new GCTask();
532        gcThread = new Thread(gcTask, "Nuxeo-Convert-GC");
533        gcThread.setDaemon(true);
534        gcThread.start();
535        log.debug("GC Thread started");
536
537    }
538
539    public void endGC() {
540        if (gcTask == null) {
541            return;
542        }
543        log.debug("Stopping GC Thread");
544        gcTask.GCEnabled = false;
545        gcTask = null;
546        gcThread.interrupt();
547        gcThread = null;
548    }
549
550}