001/* 002 * (C) Copyright 2006-2017 Nuxeo (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Tiry 018 * Florent Guillaume 019 * Estelle Giuly <egiuly@nuxeo.com> 020 */ 021package org.nuxeo.ecm.core.convert.service; 022 023import java.io.File; 024import java.io.IOException; 025import java.io.Serializable; 026import java.nio.file.Path; 027import java.util.ArrayList; 028import java.util.Collections; 029import java.util.HashMap; 030import java.util.List; 031import java.util.Map; 032import java.util.function.Function; 033import java.util.regex.Matcher; 034import java.util.regex.Pattern; 035 036import javax.ws.rs.core.MediaType; 037 038import org.apache.commons.io.FilenameUtils; 039import org.apache.commons.lang.StringUtils; 040import org.apache.commons.logging.Log; 041import org.apache.commons.logging.LogFactory; 042import org.nuxeo.common.utils.FileUtils; 043import org.nuxeo.ecm.core.api.Blob; 044import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 045import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder; 046import org.nuxeo.ecm.core.api.impl.blob.StringBlob; 047import org.nuxeo.ecm.core.convert.api.ConversionException; 048import org.nuxeo.ecm.core.convert.api.ConversionService; 049import org.nuxeo.ecm.core.convert.api.ConversionStatus; 050import org.nuxeo.ecm.core.convert.api.ConverterCheckResult; 051import org.nuxeo.ecm.core.convert.api.ConverterNotAvailable; 052import org.nuxeo.ecm.core.convert.api.ConverterNotRegistered; 053import org.nuxeo.ecm.core.convert.cache.CacheKeyGenerator; 054import org.nuxeo.ecm.core.convert.cache.ConversionCacheHolder; 055import org.nuxeo.ecm.core.convert.cache.GCTask; 056import org.nuxeo.ecm.core.convert.extension.ChainedConverter; 057import org.nuxeo.ecm.core.convert.extension.Converter; 058import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor; 059import org.nuxeo.ecm.core.convert.extension.ExternalConverter; 060import org.nuxeo.ecm.core.convert.extension.GlobalConfigDescriptor; 061import org.nuxeo.ecm.core.io.download.DownloadService; 062import org.nuxeo.ecm.core.transientstore.work.TransientStoreWork; 063import org.nuxeo.ecm.core.work.api.Work; 064import org.nuxeo.ecm.core.work.api.WorkManager; 065import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry; 066import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 067import org.nuxeo.runtime.api.Framework; 068import org.nuxeo.runtime.model.ComponentContext; 069import org.nuxeo.runtime.model.ComponentInstance; 070import org.nuxeo.runtime.model.DefaultComponent; 071 072/** 073 * Runtime Component that also provides the POJO implementation of the {@link ConversionService}. 074 */ 075public class ConversionServiceImpl extends DefaultComponent implements ConversionService { 076 077 protected static final Log log = LogFactory.getLog(ConversionServiceImpl.class); 078 079 public static final String CONVERTER_EP = "converter"; 080 081 public static final String CONFIG_EP = "configuration"; 082 083 protected final Map<String, ConverterDescriptor> converterDescriptors = new HashMap<>(); 084 085 protected final MimeTypeTranslationHelper translationHelper = new MimeTypeTranslationHelper(); 086 087 protected final GlobalConfigDescriptor config = new GlobalConfigDescriptor(); 088 089 protected static ConversionServiceImpl self; 090 091 protected Thread gcThread; 092 093 protected GCTask gcTask; 094 095 @Override 096 public void activate(ComponentContext context) { 097 converterDescriptors.clear(); 098 translationHelper.clear(); 099 self = this; 100 config.clearCachingDirectory(); 101 } 102 103 @Override 104 public void deactivate(ComponentContext context) { 105 if (config.isCacheEnabled()) { 106 ConversionCacheHolder.deleteCache(); 107 } 108 self = null; 109 converterDescriptors.clear(); 110 translationHelper.clear(); 111 } 112 113 /** 114 * Component implementation. 115 */ 116 @Override 117 public void registerContribution(Object contribution, String extensionPoint, ComponentInstance contributor) { 118 119 if (CONVERTER_EP.equals(extensionPoint)) { 120 ConverterDescriptor desc = (ConverterDescriptor) contribution; 121 registerConverter(desc); 122 } else if (CONFIG_EP.equals(extensionPoint)) { 123 GlobalConfigDescriptor desc = (GlobalConfigDescriptor) contribution; 124 config.update(desc); 125 config.clearCachingDirectory(); 126 } else { 127 log.error("Unable to handle unknown extensionPoint " + extensionPoint); 128 } 129 } 130 131 @Override 132 public void unregisterContribution(Object contribution, String extensionPoint, ComponentInstance contributor) { 133 } 134 135 /* Component API */ 136 137 public static Converter getConverter(String converterName) { 138 ConverterDescriptor desc = self.converterDescriptors.get(converterName); 139 if (desc == null) { 140 return null; 141 } 142 return desc.getConverterInstance(); 143 } 144 145 public static ConverterDescriptor getConverterDescriptor(String converterName) { 146 return self.converterDescriptors.get(converterName); 147 } 148 149 public static long getGCIntervalInMinutes() { 150 return self.config.getGCInterval(); 151 } 152 153 public static void setGCIntervalInMinutes(long interval) { 154 self.config.setGCInterval(interval); 155 } 156 157 public static void registerConverter(ConverterDescriptor desc) { 158 159 if (self.converterDescriptors.containsKey(desc.getConverterName())) { 160 161 ConverterDescriptor existing = self.converterDescriptors.get(desc.getConverterName()); 162 desc = existing.merge(desc); 163 } 164 desc.initConverter(); 165 self.translationHelper.addConverter(desc); 166 self.converterDescriptors.put(desc.getConverterName(), desc); 167 } 168 169 public static int getMaxCacheSizeInKB() { 170 return self.config.getDiskCacheSize(); 171 } 172 173 public static void setMaxCacheSizeInKB(int size) { 174 self.config.setDiskCacheSize(size); 175 } 176 177 public static boolean isCacheEnabled() { 178 return self.config.isCacheEnabled(); 179 } 180 181 public static String getCacheBasePath() { 182 return self.config.getCachingDirectory(); 183 } 184 185 /* Service API */ 186 187 @Override 188 public List<String> getRegistredConverters() { 189 List<String> converterNames = new ArrayList<>(); 190 converterNames.addAll(converterDescriptors.keySet()); 191 return converterNames; 192 } 193 194 @Override 195 @Deprecated 196 public Blob convertBlobToPDF(Blob blob) throws IOException { 197 return convertThroughHTML(new SimpleBlobHolder(blob), MimetypeRegistry.PDF_MIMETYPE).getBlob(); 198 } 199 200 protected BlobHolder convertThroughHTML(BlobHolder blobHolder, String destMimeType) { 201 Blob blob = blobHolder.getBlob(); 202 String mimetype = blob.getMimeType(); 203 String filename = blob.getFilename(); 204 if (destMimeType.equals(mimetype)) { 205 return blobHolder; 206 } 207 208 Path tempDirectory = null; 209 // Convert the blob to HTML 210 if (!MediaType.TEXT_HTML.equals(mimetype)) { 211 blobHolder = convertBlobToMimeType(blobHolder, MediaType.TEXT_HTML); 212 } 213 try { 214 tempDirectory = Framework.createTempDirectory("blobs"); 215 // Replace the image URLs by absolute paths 216 DownloadService downloadService = Framework.getService(DownloadService.class); 217 blobHolder.setBlob( 218 replaceURLsByAbsolutePaths(blob, tempDirectory, downloadService::resolveBlobFromDownloadUrl)); 219 // Convert the blob to the destination mimetype 220 blobHolder = convertBlobToMimeType(blobHolder, destMimeType); 221 adjustBlobName(filename, blobHolder, destMimeType); 222 } catch (IOException e) { 223 throw new ConversionException(e); 224 } finally { 225 if (tempDirectory != null) { 226 org.apache.commons.io.FileUtils.deleteQuietly(tempDirectory.toFile()); 227 } 228 } 229 return blobHolder; 230 } 231 232 protected BlobHolder convertBlobToMimeType(BlobHolder bh, String destinationMimeType) { 233 return convertToMimeType(destinationMimeType, bh, Collections.emptyMap()); 234 } 235 236 protected void adjustBlobName(String filename, BlobHolder blobHolder, String mimeType) { 237 Blob blob = blobHolder.getBlob(); 238 adjustBlobName(filename, blob, mimeType); 239 blobHolder.setBlob(blob); 240 } 241 242 protected void adjustBlobName(String filename, Blob blob, String mimeType) { 243 if (StringUtils.isBlank(filename)) { 244 filename = "file_" + System.currentTimeMillis(); 245 } else { 246 filename = FilenameUtils.removeExtension(FilenameUtils.getName(filename)); 247 } 248 String extension = Framework.getService(MimetypeRegistry.class) 249 .getExtensionsFromMimetypeName(mimeType) 250 .stream() 251 .findFirst() 252 .orElse("bin"); 253 blob.setFilename(filename + "." + extension); 254 blob.setMimeType(mimeType); 255 } 256 257 /** 258 * Replace the image URLs of an HTML blob by absolute local paths. 259 * 260 * @throws IOException 261 * @since 9.1 262 */ 263 protected static Blob replaceURLsByAbsolutePaths(Blob blob, Path tempDirectory, Function<String, Blob> blobResolver) 264 throws IOException { 265 String initialBlobContent = blob.getString(); 266 // Find images links in the blob 267 Pattern pattern = Pattern.compile("(src=([\"']))(.*?)(\\2)"); 268 Matcher matcher = pattern.matcher(initialBlobContent); 269 StringBuffer sb = new StringBuffer(); 270 while (matcher.find()) { 271 // Retrieve the image from the URL 272 String url = matcher.group(3); 273 Blob imageBlob = blobResolver.apply(url); 274 if (imageBlob == null) { 275 break; 276 } 277 // Export the image to a temporary directory in File System 278 String safeFilename = FileUtils.getSafeFilename(imageBlob.getFilename()); 279 File imageFile = tempDirectory.resolve(safeFilename).toFile(); 280 imageBlob.transferTo(imageFile); 281 // Replace the image URL by its absolute local path 282 matcher.appendReplacement(sb, "$1" + Matcher.quoteReplacement(imageFile.toPath().toString()) + "$4"); 283 } 284 matcher.appendTail(sb); 285 String blobContentWithAbsolutePaths = sb.toString(); 286 if (blobContentWithAbsolutePaths.equals(initialBlobContent)) { 287 return blob; 288 } 289 // Create a new blob with the new content 290 Blob newBlob = new StringBlob(blobContentWithAbsolutePaths, blob.getMimeType(), blob.getEncoding()); 291 newBlob.setFilename(blob.getFilename()); 292 return newBlob; 293 } 294 295 @Override 296 public BlobHolder convert(String converterName, BlobHolder blobHolder, Map<String, Serializable> parameters) 297 throws ConversionException { 298 299 // set parameters if null to avoid NPE in converters 300 if (parameters == null) { 301 parameters = new HashMap<>(); 302 } 303 304 // exist if not registered 305 ConverterCheckResult check = isConverterAvailable(converterName); 306 if (!check.isAvailable()) { 307 // exist is not installed / configured 308 throw new ConverterNotAvailable(converterName); 309 } 310 311 ConverterDescriptor desc = converterDescriptors.get(converterName); 312 if (desc == null) { 313 throw new ConversionException("Converter " + converterName + " can not be found"); 314 } 315 316 String cacheKey = CacheKeyGenerator.computeKey(converterName, blobHolder, parameters); 317 318 BlobHolder result = ConversionCacheHolder.getFromCache(cacheKey); 319 320 if (result == null) { 321 Converter converter = desc.getConverterInstance(); 322 result = converter.convert(blobHolder, parameters); 323 324 if (config.isCacheEnabled()) { 325 ConversionCacheHolder.addToCache(cacheKey, result); 326 } 327 } else { 328 // we need to reset the filename if result came from cache because it's just a hash 329 result.getBlob().setFilename(null); 330 } 331 332 if (result != null) { 333 updateResultBlobMimeType(result, desc); 334 updateResultBlobFileName(blobHolder, result); 335 } 336 337 return result; 338 } 339 340 protected void updateResultBlobMimeType(BlobHolder resultBh, ConverterDescriptor desc) { 341 Blob mainBlob = resultBh.getBlob(); 342 if (mainBlob == null) { 343 return; 344 } 345 String mimeType = mainBlob.getMimeType(); 346 if (StringUtils.isBlank(mimeType) || mimeType.equals("application/octet-stream")) { 347 mainBlob.setMimeType(desc.getDestinationMimeType()); 348 } 349 } 350 351 protected void updateResultBlobFileName(BlobHolder srcBh, BlobHolder resultBh) { 352 Blob mainBlob = resultBh.getBlob(); 353 if (mainBlob == null) { 354 return; 355 } 356 String filename = mainBlob.getFilename(); 357 if (StringUtils.isBlank(filename) || filename.startsWith("nxblob-")) { 358 Blob srcBlob = srcBh.getBlob(); 359 if (srcBlob != null && StringUtils.isNotBlank(srcBlob.getFilename())) { 360 String baseName = FilenameUtils.getBaseName(srcBlob.getFilename()); 361 362 MimetypeRegistry mimetypeRegistry = Framework.getService(MimetypeRegistry.class); 363 MimetypeEntry mimeTypeEntry = mimetypeRegistry.getMimetypeEntryByMimeType(mainBlob.getMimeType()); 364 List<String> extensions = mimeTypeEntry.getExtensions(); 365 String extension; 366 if (!extensions.isEmpty()) { 367 extension = extensions.get(0); 368 } else { 369 extension = FilenameUtils.getExtension(filename); 370 if (extension == null) { 371 extension = "bin"; 372 } 373 } 374 mainBlob.setFilename(baseName + "." + extension); 375 } 376 377 } 378 } 379 380 @Override 381 public BlobHolder convertToMimeType(String destinationMimeType, BlobHolder blobHolder, 382 Map<String, Serializable> parameters) throws ConversionException { 383 String srcMimeType = blobHolder.getBlob().getMimeType(); 384 String converterName = translationHelper.getConverterName(srcMimeType, destinationMimeType); 385 if (converterName == null) { 386 // check if a conversion is available through HTML 387 converterName = translationHelper.getConverterName(srcMimeType, MediaType.TEXT_HTML); 388 if (converterName == null) { 389 throw new ConversionException(String.format("No converters available to convert from %s to %s.", 390 srcMimeType, destinationMimeType)); 391 } 392 // Use a chain of 2 converters which will first try to go through HTML, 393 // then HTML to the destination mimetype 394 return convertThroughHTML(blobHolder, destinationMimeType); 395 } else { 396 return convert(converterName, blobHolder, parameters); 397 } 398 } 399 400 @Override 401 public List<String> getConverterNames(String sourceMimeType, String destinationMimeType) { 402 return translationHelper.getConverterNames(sourceMimeType, destinationMimeType); 403 } 404 405 @Override 406 public String getConverterName(String sourceMimeType, String destinationMimeType) { 407 List<String> converterNames = getConverterNames(sourceMimeType, destinationMimeType); 408 if (!converterNames.isEmpty()) { 409 return converterNames.get(converterNames.size() - 1); 410 } 411 return null; 412 } 413 414 @Override 415 public ConverterCheckResult isConverterAvailable(String converterName) throws ConversionException { 416 return isConverterAvailable(converterName, false); 417 } 418 419 protected final Map<String, ConverterCheckResult> checkResultCache = new HashMap<>(); 420 421 @Override 422 public ConverterCheckResult isConverterAvailable(String converterName, boolean refresh) 423 throws ConverterNotRegistered { 424 425 if (!refresh) { 426 if (checkResultCache.containsKey(converterName)) { 427 return checkResultCache.get(converterName); 428 } 429 } 430 431 ConverterDescriptor descriptor = converterDescriptors.get(converterName); 432 if (descriptor == null) { 433 throw new ConverterNotRegistered(converterName); 434 } 435 436 Converter converter = descriptor.getConverterInstance(); 437 438 ConverterCheckResult result; 439 if (converter instanceof ExternalConverter) { 440 ExternalConverter exConverter = (ExternalConverter) converter; 441 result = exConverter.isConverterAvailable(); 442 } else if (converter instanceof ChainedConverter) { 443 ChainedConverter chainedConverter = (ChainedConverter) converter; 444 result = new ConverterCheckResult(); 445 if (chainedConverter.isSubConvertersBased()) { 446 for (String subConverterName : chainedConverter.getSubConverters()) { 447 result = isConverterAvailable(subConverterName, refresh); 448 if (!result.isAvailable()) { 449 break; 450 } 451 } 452 } 453 } else { 454 // return success since there is nothing to test 455 result = new ConverterCheckResult(); 456 } 457 458 result.setSupportedInputMimeTypes(descriptor.getSourceMimeTypes()); 459 checkResultCache.put(converterName, result); 460 461 return result; 462 } 463 464 @Override 465 public boolean isSourceMimeTypeSupported(String converterName, String sourceMimeType) { 466 return getConverterDescriptor(converterName).getSourceMimeTypes().contains(sourceMimeType); 467 } 468 469 @Override 470 public String scheduleConversion(String converterName, BlobHolder blobHolder, 471 Map<String, Serializable> parameters) { 472 WorkManager workManager = Framework.getService(WorkManager.class); 473 ConversionWork work = new ConversionWork(converterName, null, blobHolder, parameters); 474 workManager.schedule(work); 475 return work.getId(); 476 } 477 478 @Override 479 public String scheduleConversionToMimeType(String destinationMimeType, BlobHolder blobHolder, 480 Map<String, Serializable> parameters) { 481 WorkManager workManager = Framework.getService(WorkManager.class); 482 ConversionWork work = new ConversionWork(null, destinationMimeType, blobHolder, parameters); 483 workManager.schedule(work); 484 return work.getId(); 485 } 486 487 @Override 488 public ConversionStatus getConversionStatus(String id) { 489 WorkManager workManager = Framework.getService(WorkManager.class); 490 Work.State workState = workManager.getWorkState(id); 491 if (workState == null) { 492 String entryKey = TransientStoreWork.computeEntryKey(id); 493 if (TransientStoreWork.containsBlobHolder(entryKey)) { 494 return new ConversionStatus(id, ConversionStatus.Status.COMPLETED); 495 } 496 return null; 497 } 498 499 return new ConversionStatus(id, ConversionStatus.Status.valueOf(workState.name())); 500 } 501 502 @Override 503 public BlobHolder getConversionResult(String id, boolean cleanTransientStoreEntry) { 504 String entryKey = TransientStoreWork.computeEntryKey(id); 505 BlobHolder bh = TransientStoreWork.getBlobHolder(entryKey); 506 if (cleanTransientStoreEntry) { 507 TransientStoreWork.removeBlobHolder(entryKey); 508 } 509 return bh; 510 } 511 512 @Override 513 public <T> T getAdapter(Class<T> adapter) { 514 if (adapter.isAssignableFrom(MimeTypeTranslationHelper.class)) { 515 return adapter.cast(translationHelper); 516 } 517 return super.getAdapter(adapter); 518 } 519 520 @Override 521 public void start(ComponentContext context) { 522 startGC(); 523 } 524 525 @Override 526 public void stop(ComponentContext context) { 527 endGC(); 528 } 529 530 protected void startGC() { 531 log.debug("CasheCGTaskActivator activated starting GC thread"); 532 gcTask = new GCTask(); 533 gcThread = new Thread(gcTask, "Nuxeo-Convert-GC"); 534 gcThread.setDaemon(true); 535 gcThread.start(); 536 log.debug("GC Thread started"); 537 538 } 539 540 public void endGC() { 541 if (gcTask == null) { 542 return; 543 } 544 log.debug("Stopping GC Thread"); 545 gcTask.GCEnabled = false; 546 gcTask = null; 547 gcThread.interrupt(); 548 gcThread = null; 549 } 550 551}