001/*
002 * (C) Copyright 2006-2017 Nuxeo (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Tiry
018 *     Florent Guillaume
019 *     Estelle Giuly <egiuly@nuxeo.com>
020 */
021package org.nuxeo.ecm.core.convert.service;
022
023import java.io.File;
024import java.io.IOException;
025import java.io.Serializable;
026import java.nio.file.Path;
027import java.util.ArrayList;
028import java.util.Collections;
029import java.util.HashMap;
030import java.util.List;
031import java.util.Map;
032import java.util.function.Function;
033import java.util.regex.Matcher;
034import java.util.regex.Pattern;
035
036import javax.ws.rs.core.MediaType;
037
038import org.apache.commons.io.FilenameUtils;
039import org.apache.commons.lang.StringUtils;
040import org.apache.commons.logging.Log;
041import org.apache.commons.logging.LogFactory;
042import org.nuxeo.common.utils.FileUtils;
043import org.nuxeo.ecm.core.api.Blob;
044import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
045import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder;
046import org.nuxeo.ecm.core.api.impl.blob.StringBlob;
047import org.nuxeo.ecm.core.convert.api.ConversionException;
048import org.nuxeo.ecm.core.convert.api.ConversionService;
049import org.nuxeo.ecm.core.convert.api.ConversionStatus;
050import org.nuxeo.ecm.core.convert.api.ConverterCheckResult;
051import org.nuxeo.ecm.core.convert.api.ConverterNotAvailable;
052import org.nuxeo.ecm.core.convert.api.ConverterNotRegistered;
053import org.nuxeo.ecm.core.convert.cache.CacheKeyGenerator;
054import org.nuxeo.ecm.core.convert.cache.ConversionCacheHolder;
055import org.nuxeo.ecm.core.convert.cache.GCTask;
056import org.nuxeo.ecm.core.convert.extension.ChainedConverter;
057import org.nuxeo.ecm.core.convert.extension.Converter;
058import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor;
059import org.nuxeo.ecm.core.convert.extension.ExternalConverter;
060import org.nuxeo.ecm.core.convert.extension.GlobalConfigDescriptor;
061import org.nuxeo.ecm.core.io.download.DownloadService;
062import org.nuxeo.ecm.core.transientstore.work.TransientStoreWork;
063import org.nuxeo.ecm.core.work.api.Work;
064import org.nuxeo.ecm.core.work.api.WorkManager;
065import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry;
066import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
067import org.nuxeo.runtime.api.Framework;
068import org.nuxeo.runtime.model.ComponentContext;
069import org.nuxeo.runtime.model.ComponentInstance;
070import org.nuxeo.runtime.model.DefaultComponent;
071
072/**
073 * Runtime Component that also provides the POJO implementation of the {@link ConversionService}.
074 */
075public class ConversionServiceImpl extends DefaultComponent implements ConversionService {
076
077    protected static final Log log = LogFactory.getLog(ConversionServiceImpl.class);
078
079    public static final String CONVERTER_EP = "converter";
080
081    public static final String CONFIG_EP = "configuration";
082
083    protected final Map<String, ConverterDescriptor> converterDescriptors = new HashMap<>();
084
085    protected final MimeTypeTranslationHelper translationHelper = new MimeTypeTranslationHelper();
086
087    protected final GlobalConfigDescriptor config = new GlobalConfigDescriptor();
088
089    protected static ConversionServiceImpl self;
090
091    protected Thread gcThread;
092
093    protected GCTask gcTask;
094
095    @Override
096    public void activate(ComponentContext context) {
097        converterDescriptors.clear();
098        translationHelper.clear();
099        self = this;
100        config.clearCachingDirectory();
101    }
102
103    @Override
104    public void deactivate(ComponentContext context) {
105        if (config.isCacheEnabled()) {
106            ConversionCacheHolder.deleteCache();
107        }
108        self = null;
109        converterDescriptors.clear();
110        translationHelper.clear();
111    }
112
113    /**
114     * Component implementation.
115     */
116    @Override
117    public void registerContribution(Object contribution, String extensionPoint, ComponentInstance contributor) {
118
119        if (CONVERTER_EP.equals(extensionPoint)) {
120            ConverterDescriptor desc = (ConverterDescriptor) contribution;
121            registerConverter(desc);
122        } else if (CONFIG_EP.equals(extensionPoint)) {
123            GlobalConfigDescriptor desc = (GlobalConfigDescriptor) contribution;
124            config.update(desc);
125            config.clearCachingDirectory();
126        } else {
127            log.error("Unable to handle unknown extensionPoint " + extensionPoint);
128        }
129    }
130
131    @Override
132    public void unregisterContribution(Object contribution, String extensionPoint, ComponentInstance contributor) {
133    }
134
135    /* Component API */
136
137    public static Converter getConverter(String converterName) {
138        ConverterDescriptor desc = self.converterDescriptors.get(converterName);
139        if (desc == null) {
140            return null;
141        }
142        return desc.getConverterInstance();
143    }
144
145    public static ConverterDescriptor getConverterDescriptor(String converterName) {
146        return self.converterDescriptors.get(converterName);
147    }
148
149    public static long getGCIntervalInMinutes() {
150        return self.config.getGCInterval();
151    }
152
153    public static void setGCIntervalInMinutes(long interval) {
154        self.config.setGCInterval(interval);
155    }
156
157    public static void registerConverter(ConverterDescriptor desc) {
158
159        if (self.converterDescriptors.containsKey(desc.getConverterName())) {
160
161            ConverterDescriptor existing = self.converterDescriptors.get(desc.getConverterName());
162            desc = existing.merge(desc);
163        }
164        desc.initConverter();
165        self.translationHelper.addConverter(desc);
166        self.converterDescriptors.put(desc.getConverterName(), desc);
167    }
168
169    public static int getMaxCacheSizeInKB() {
170        return self.config.getDiskCacheSize();
171    }
172
173    public static void setMaxCacheSizeInKB(int size) {
174        self.config.setDiskCacheSize(size);
175    }
176
177    public static boolean isCacheEnabled() {
178        return self.config.isCacheEnabled();
179    }
180
181    public static String getCacheBasePath() {
182        return self.config.getCachingDirectory();
183    }
184
185    /* Service API */
186
187    @Override
188    public List<String> getRegistredConverters() {
189        List<String> converterNames = new ArrayList<>();
190        converterNames.addAll(converterDescriptors.keySet());
191        return converterNames;
192    }
193
194    @Override
195    @Deprecated
196    public Blob convertBlobToPDF(Blob blob) throws IOException {
197        return convertThroughHTML(new SimpleBlobHolder(blob), MimetypeRegistry.PDF_MIMETYPE).getBlob();
198    }
199
200    protected BlobHolder convertThroughHTML(BlobHolder blobHolder, String destMimeType) {
201        Blob blob = blobHolder.getBlob();
202        String mimetype = blob.getMimeType();
203        String filename = blob.getFilename();
204        if (destMimeType.equals(mimetype)) {
205            return blobHolder;
206        }
207
208        Path tempDirectory = null;
209        // Convert the blob to HTML
210        if (!MediaType.TEXT_HTML.equals(mimetype)) {
211            blobHolder = convertBlobToMimeType(blobHolder, MediaType.TEXT_HTML);
212        }
213        try {
214            tempDirectory = Framework.createTempDirectory("blobs");
215            // Replace the image URLs by absolute paths
216            DownloadService downloadService = Framework.getService(DownloadService.class);
217            blobHolder.setBlob(
218                    replaceURLsByAbsolutePaths(blob, tempDirectory, downloadService::resolveBlobFromDownloadUrl));
219            // Convert the blob to the destination mimetype
220            blobHolder = convertBlobToMimeType(blobHolder, destMimeType);
221            adjustBlobName(filename, blobHolder, destMimeType);
222        } catch (IOException e) {
223            throw new ConversionException(e);
224        } finally {
225            if (tempDirectory != null) {
226                org.apache.commons.io.FileUtils.deleteQuietly(tempDirectory.toFile());
227            }
228        }
229        return blobHolder;
230    }
231
232    protected BlobHolder convertBlobToMimeType(BlobHolder bh, String destinationMimeType) {
233        return convertToMimeType(destinationMimeType, bh, Collections.emptyMap());
234    }
235
236    protected void adjustBlobName(String filename, BlobHolder blobHolder, String mimeType) {
237        Blob blob = blobHolder.getBlob();
238        adjustBlobName(filename, blob, mimeType);
239        blobHolder.setBlob(blob);
240    }
241
242    protected void adjustBlobName(String filename, Blob blob, String mimeType) {
243        if (StringUtils.isBlank(filename)) {
244            filename = "file_" + System.currentTimeMillis();
245        } else {
246            filename = FilenameUtils.removeExtension(FilenameUtils.getName(filename));
247        }
248        String extension = Framework.getService(MimetypeRegistry.class)
249                                    .getExtensionsFromMimetypeName(mimeType)
250                                    .stream()
251                                    .findFirst()
252                                    .orElse("bin");
253        blob.setFilename(filename + "." + extension);
254        blob.setMimeType(mimeType);
255    }
256
257    /**
258     * Replace the image URLs of an HTML blob by absolute local paths.
259     *
260     * @throws IOException
261     * @since 9.1
262     */
263    protected static Blob replaceURLsByAbsolutePaths(Blob blob, Path tempDirectory, Function<String, Blob> blobResolver)
264            throws IOException {
265        String initialBlobContent = blob.getString();
266        // Find images links in the blob
267        Pattern pattern = Pattern.compile("(src=([\"']))(.*?)(\\2)");
268        Matcher matcher = pattern.matcher(initialBlobContent);
269        StringBuffer sb = new StringBuffer();
270        while (matcher.find()) {
271            // Retrieve the image from the URL
272            String url = matcher.group(3);
273            Blob imageBlob = blobResolver.apply(url);
274            if (imageBlob == null) {
275                break;
276            }
277            // Export the image to a temporary directory in File System
278            String safeFilename = FileUtils.getSafeFilename(imageBlob.getFilename());
279            File imageFile = tempDirectory.resolve(safeFilename).toFile();
280            imageBlob.transferTo(imageFile);
281            // Replace the image URL by its absolute local path
282            matcher.appendReplacement(sb, "$1" + Matcher.quoteReplacement(imageFile.toPath().toString()) + "$4");
283        }
284        matcher.appendTail(sb);
285        String blobContentWithAbsolutePaths = sb.toString();
286        if (blobContentWithAbsolutePaths.equals(initialBlobContent)) {
287            return blob;
288        }
289        // Create a new blob with the new content
290        Blob newBlob = new StringBlob(blobContentWithAbsolutePaths, blob.getMimeType(), blob.getEncoding());
291        newBlob.setFilename(blob.getFilename());
292        return newBlob;
293    }
294
295    @Override
296    public BlobHolder convert(String converterName, BlobHolder blobHolder, Map<String, Serializable> parameters)
297            throws ConversionException {
298
299        // set parameters if null to avoid NPE in converters
300        if (parameters == null) {
301            parameters = new HashMap<>();
302        }
303
304        // exist if not registered
305        ConverterCheckResult check = isConverterAvailable(converterName);
306        if (!check.isAvailable()) {
307            // exist is not installed / configured
308            throw new ConverterNotAvailable(converterName);
309        }
310
311        ConverterDescriptor desc = converterDescriptors.get(converterName);
312        if (desc == null) {
313            throw new ConversionException("Converter " + converterName + " can not be found");
314        }
315
316        String cacheKey = CacheKeyGenerator.computeKey(converterName, blobHolder, parameters);
317
318        BlobHolder result = ConversionCacheHolder.getFromCache(cacheKey);
319
320        if (result == null) {
321            Converter converter = desc.getConverterInstance();
322            result = converter.convert(blobHolder, parameters);
323
324            if (config.isCacheEnabled()) {
325                ConversionCacheHolder.addToCache(cacheKey, result);
326            }
327        } else {
328            // we need to reset the filename if result came from cache because it's just a hash
329            result.getBlob().setFilename(null);
330        }
331
332        if (result != null) {
333            updateResultBlobMimeType(result, desc);
334            updateResultBlobFileName(blobHolder, result);
335        }
336
337        return result;
338    }
339
340    protected void updateResultBlobMimeType(BlobHolder resultBh, ConverterDescriptor desc) {
341        Blob mainBlob = resultBh.getBlob();
342        if (mainBlob == null) {
343            return;
344        }
345        String mimeType = mainBlob.getMimeType();
346        if (StringUtils.isBlank(mimeType) || mimeType.equals("application/octet-stream")) {
347            mainBlob.setMimeType(desc.getDestinationMimeType());
348        }
349    }
350
351    protected void updateResultBlobFileName(BlobHolder srcBh, BlobHolder resultBh) {
352        Blob mainBlob = resultBh.getBlob();
353        if (mainBlob == null) {
354            return;
355        }
356        String filename = mainBlob.getFilename();
357        if (StringUtils.isBlank(filename) || filename.startsWith("nxblob-")) {
358            Blob srcBlob = srcBh.getBlob();
359            if (srcBlob != null && StringUtils.isNotBlank(srcBlob.getFilename())) {
360                String baseName = FilenameUtils.getBaseName(srcBlob.getFilename());
361
362                MimetypeRegistry mimetypeRegistry = Framework.getLocalService(MimetypeRegistry.class);
363                MimetypeEntry mimeTypeEntry = mimetypeRegistry.getMimetypeEntryByMimeType(mainBlob.getMimeType());
364                List<String> extensions = mimeTypeEntry.getExtensions();
365                String extension;
366                if (!extensions.isEmpty()) {
367                    extension = extensions.get(0);
368                } else {
369                    extension = FilenameUtils.getExtension(filename);
370                    if (extension == null) {
371                        extension = "bin";
372                    }
373                }
374                mainBlob.setFilename(baseName + "." + extension);
375            }
376
377        }
378    }
379
380    @Override
381    public BlobHolder convertToMimeType(String destinationMimeType, BlobHolder blobHolder,
382            Map<String, Serializable> parameters) throws ConversionException {
383        String srcMimeType = blobHolder.getBlob().getMimeType();
384        String converterName = translationHelper.getConverterName(srcMimeType, destinationMimeType);
385        if (converterName == null) {
386            // Use a chain of 2 converters which will first try to go through HTML,
387            // then HTML to the destination mimetype
388            return convertThroughHTML(blobHolder, destinationMimeType);
389        } else {
390            return convert(converterName, blobHolder, parameters);
391        }
392    }
393
394    @Override
395    public List<String> getConverterNames(String sourceMimeType, String destinationMimeType) {
396        return translationHelper.getConverterNames(sourceMimeType, destinationMimeType);
397    }
398
399    @Override
400    public String getConverterName(String sourceMimeType, String destinationMimeType) {
401        List<String> converterNames = getConverterNames(sourceMimeType, destinationMimeType);
402        if (!converterNames.isEmpty()) {
403            return converterNames.get(converterNames.size() - 1);
404        }
405        return null;
406    }
407
408    @Override
409    public ConverterCheckResult isConverterAvailable(String converterName) throws ConversionException {
410        return isConverterAvailable(converterName, false);
411    }
412
413    protected final Map<String, ConverterCheckResult> checkResultCache = new HashMap<>();
414
415    @Override
416    public ConverterCheckResult isConverterAvailable(String converterName, boolean refresh)
417            throws ConverterNotRegistered {
418
419        if (!refresh) {
420            if (checkResultCache.containsKey(converterName)) {
421                return checkResultCache.get(converterName);
422            }
423        }
424
425        ConverterDescriptor descriptor = converterDescriptors.get(converterName);
426        if (descriptor == null) {
427            throw new ConverterNotRegistered(converterName);
428        }
429
430        Converter converter = descriptor.getConverterInstance();
431
432        ConverterCheckResult result;
433        if (converter instanceof ExternalConverter) {
434            ExternalConverter exConverter = (ExternalConverter) converter;
435            result = exConverter.isConverterAvailable();
436        } else if (converter instanceof ChainedConverter) {
437            ChainedConverter chainedConverter = (ChainedConverter) converter;
438            result = new ConverterCheckResult();
439            if (chainedConverter.isSubConvertersBased()) {
440                for (String subConverterName : chainedConverter.getSubConverters()) {
441                    result = isConverterAvailable(subConverterName, refresh);
442                    if (!result.isAvailable()) {
443                        break;
444                    }
445                }
446            }
447        } else {
448            // return success since there is nothing to test
449            result = new ConverterCheckResult();
450        }
451
452        result.setSupportedInputMimeTypes(descriptor.getSourceMimeTypes());
453        checkResultCache.put(converterName, result);
454
455        return result;
456    }
457
458    @Override
459    public boolean isSourceMimeTypeSupported(String converterName, String sourceMimeType) {
460        return getConverterDescriptor(converterName).getSourceMimeTypes().contains(sourceMimeType);
461    }
462
463    @Override
464    public String scheduleConversion(String converterName, BlobHolder blobHolder,
465            Map<String, Serializable> parameters) {
466        WorkManager workManager = Framework.getService(WorkManager.class);
467        ConversionWork work = new ConversionWork(converterName, null, blobHolder, parameters);
468        workManager.schedule(work);
469        return work.getId();
470    }
471
472    @Override
473    public String scheduleConversionToMimeType(String destinationMimeType, BlobHolder blobHolder,
474            Map<String, Serializable> parameters) {
475        WorkManager workManager = Framework.getService(WorkManager.class);
476        ConversionWork work = new ConversionWork(null, destinationMimeType, blobHolder, parameters);
477        workManager.schedule(work);
478        return work.getId();
479    }
480
481    @Override
482    public ConversionStatus getConversionStatus(String id) {
483        WorkManager workManager = Framework.getService(WorkManager.class);
484        Work.State workState = workManager.getWorkState(id);
485        if (workState == null) {
486            String entryKey = TransientStoreWork.computeEntryKey(id);
487            if (TransientStoreWork.containsBlobHolder(entryKey)) {
488                return new ConversionStatus(id, ConversionStatus.Status.COMPLETED);
489            }
490            return null;
491        }
492
493        return new ConversionStatus(id, ConversionStatus.Status.valueOf(workState.name()));
494    }
495
496    @Override
497    public BlobHolder getConversionResult(String id, boolean cleanTransientStoreEntry) {
498        String entryKey = TransientStoreWork.computeEntryKey(id);
499        BlobHolder bh = TransientStoreWork.getBlobHolder(entryKey);
500        if (cleanTransientStoreEntry) {
501            TransientStoreWork.removeBlobHolder(entryKey);
502        }
503        return bh;
504    }
505
506    @Override
507    public <T> T getAdapter(Class<T> adapter) {
508        if (adapter.isAssignableFrom(MimeTypeTranslationHelper.class)) {
509            return adapter.cast(translationHelper);
510        }
511        return super.getAdapter(adapter);
512    }
513
514    @Override
515    public void start(ComponentContext context) {
516        startGC();
517    }
518
519    @Override
520    public void stop(ComponentContext context) {
521        endGC();
522    }
523
524    protected void startGC() {
525        log.debug("CasheCGTaskActivator activated starting GC thread");
526        gcTask = new GCTask();
527        gcThread = new Thread(gcTask, "Nuxeo-Convert-GC");
528        gcThread.setDaemon(true);
529        gcThread.start();
530        log.debug("GC Thread started");
531
532    }
533
534    public void endGC() {
535        if (gcTask == null) {
536            return;
537        }
538        log.debug("Stopping GC Thread");
539        gcTask.GCEnabled = false;
540        gcTask = null;
541        gcThread.interrupt();
542        gcThread = null;
543    }
544
545}