001/* 002 * (C) Copyright 2006-2016 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo 018 * Florent Guillaume 019 * Thierry Delprat 020 */ 021package org.nuxeo.ecm.platform.convert.plugins; 022 023import java.io.File; 024import java.io.FileInputStream; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.Serializable; 028import java.util.ArrayList; 029import java.util.HashMap; 030import java.util.List; 031import java.util.Map; 032import java.util.regex.Matcher; 033import java.util.regex.Pattern; 034 035import org.apache.commons.io.FileUtils; 036import org.apache.commons.lang.StringUtils; 037import org.apache.commons.logging.Log; 038import org.apache.commons.logging.LogFactory; 039import org.artofsolving.jodconverter.OfficeDocumentConverter; 040import org.artofsolving.jodconverter.StandardConversionTask; 041import org.artofsolving.jodconverter.document.DocumentFamily; 042import org.artofsolving.jodconverter.document.DocumentFormat; 043import org.nuxeo.common.Environment; 044import org.nuxeo.ecm.core.api.Blob; 045import org.nuxeo.ecm.core.api.Blobs; 046import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 047import org.nuxeo.ecm.core.convert.api.ConversionException; 048import org.nuxeo.ecm.core.convert.api.ConverterCheckResult; 049import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder; 050import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor; 051import org.nuxeo.ecm.core.convert.extension.ExternalConverter; 052import org.nuxeo.ecm.platform.convert.ooomanager.OOoManagerService; 053import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 054import org.nuxeo.runtime.api.Framework; 055 056/** 057 * Converter based on JOD which uses an external OpenOffice process to do actual conversions. 058 * 059 * @deprecated Since 8.4. Use 'soffice' with {@link org.nuxeo.ecm.platform.convert.plugins.CommandLineConverter} instead 060 */ 061@Deprecated 062public class JODBasedConverter implements ExternalConverter { 063 064 protected static final String TMP_PATH_PARAMETER = "TmpDirectory"; 065 066 private static final Log log = LogFactory.getLog(JODBasedConverter.class); 067 068 /** 069 * Boolean conversion parameter for PDF/A-1. 070 * 071 * @since 5.6 072 */ 073 public static final String PDFA1_PARAM = "PDF/A-1"; 074 075 /** 076 * Boolean parameter to force update of the document TOC 077 * 078 * @since 5.6 079 */ 080 public static final String UPDATE_INDEX_PARAM = StandardConversionTask.UPDATE_DOCUMENT_INDEX; 081 082 protected static final Map<DocumentFamily, String> PDF_FILTER_NAMES = new HashMap<>(); 083 { 084 PDF_FILTER_NAMES.put(DocumentFamily.TEXT, "writer_pdf_Export"); 085 PDF_FILTER_NAMES.put(DocumentFamily.SPREADSHEET, "calc_pdf_Export"); 086 PDF_FILTER_NAMES.put(DocumentFamily.PRESENTATION, "impress_pdf_Export"); 087 PDF_FILTER_NAMES.put(DocumentFamily.DRAWING, "draw_pdf_Export"); 088 } 089 090 protected ConverterDescriptor descriptor; 091 092 protected String getDestinationMimeType() { 093 return descriptor.getDestinationMimeType(); 094 } 095 096 /** 097 * Returns the destination format for the given plugin. 098 * <p> 099 * It takes the actual destination mimetype from the plugin configuration. 100 * 101 * @param sourceFormat the source format 102 * @param pdfa1 true if PDF/A-1 is required 103 */ 104 protected DocumentFormat getDestinationFormat(OfficeDocumentConverter documentConverter, 105 DocumentFormat sourceFormat, boolean pdfa1) { 106 String mimeType = getDestinationMimeType(); 107 DocumentFormat destinationFormat = documentConverter.getFormatRegistry().getFormatByMediaType(mimeType); 108 if ("application/pdf".equals(mimeType)) { 109 destinationFormat = extendPDFFormat(sourceFormat, destinationFormat, pdfa1); 110 } 111 return destinationFormat; 112 } 113 114 protected DocumentFormat extendPDFFormat(DocumentFormat sourceFormat, DocumentFormat defaultFormat, boolean pdfa1) { 115 DocumentFamily sourceFamily = sourceFormat.getInputFamily(); 116 String sourceMediaType = sourceFormat.getMediaType(); 117 DocumentFormat pdfFormat = new DocumentFormat(pdfa1 ? "PDF/A-1" : "PDF", "pdf", "application/pdf"); 118 Map<DocumentFamily, Map<String, ?>> storePropertiesByFamily = new HashMap<>(); 119 Map<DocumentFamily, Map<String, ?>> defaultStorePropertiesByFamily = defaultFormat.getStorePropertiesByFamily(); 120 for (DocumentFamily family : defaultStorePropertiesByFamily.keySet()) { 121 if (family.equals(sourceFamily)) { 122 continue; 123 } 124 storePropertiesByFamily.put(family, defaultStorePropertiesByFamily.get(family)); 125 } 126 storePropertiesByFamily.put(sourceFamily, 127 extendPDFStoreProperties(sourceMediaType, pdfa1, defaultStorePropertiesByFamily.get(sourceFamily))); 128 pdfFormat.setStorePropertiesByFamily(storePropertiesByFamily); 129 return pdfFormat; 130 } 131 132 protected Map<String, Object> extendPDFStoreProperties(String mediatype, boolean pdfa1, 133 Map<String, ?> originalProperties) { 134 Map<String, Object> extendedProperties = new HashMap<>(); 135 for (Map.Entry<String, ?> entry : originalProperties.entrySet()) { 136 extendedProperties.put(entry.getKey(), entry.getValue()); 137 } 138 if ("text/html".equals(mediatype)) { 139 extendedProperties.put("FilterName", "writer_web_pdf_Export"); 140 } 141 if (pdfa1) { 142 Map<String, Object> filterData = new HashMap<>(); 143 filterData.put("SelectPdfVersion", Integer.valueOf(1)); // PDF/A-1 144 filterData.put("UseTaggedPDF", Boolean.TRUE); // per spec 145 extendedProperties.put("FilterData", filterData); 146 } 147 return extendedProperties; 148 } 149 150 /** 151 * Returns the format for the file passed as a parameter. 152 * <p> 153 * We will ask the mimetype registry service to sniff its mimetype. 154 * 155 * @return DocumentFormat for the given file 156 */ 157 private static DocumentFormat getSourceFormat(OfficeDocumentConverter documentConverter, File file) { 158 MimetypeRegistry mimetypeRegistry = Framework.getService(MimetypeRegistry.class); 159 String mimetypeStr = mimetypeRegistry.getMimetypeFromFile(file); 160 DocumentFormat format = documentConverter.getFormatRegistry().getFormatByMediaType(mimetypeStr); 161 return format; 162 } 163 164 /** 165 * Returns the DocumentFormat for the given mimetype. 166 * 167 * @return DocumentFormat for the given mimetype 168 */ 169 private static DocumentFormat getSourceFormat(OfficeDocumentConverter documentConverter, String mimetype) { 170 return documentConverter.getFormatRegistry().getFormatByMediaType(mimetype); 171 } 172 173 @Override 174 protected void finalize() throws Throwable { 175 super.finalize(); 176 } 177 178 @Override 179 public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException { 180 blobHolder = new UTF8CharsetConverter().convert(blobHolder, parameters); 181 Blob inputBlob = blobHolder.getBlob(); 182 String blobPath = blobHolder.getFilePath(); 183 if (inputBlob == null) { 184 return null; 185 } 186 187 OfficeDocumentConverter documentConverter = newDocumentConverter(); 188 // This plugin do deal only with one input source. 189 String sourceMimetype = inputBlob.getMimeType(); 190 191 boolean pdfa1 = false; 192 if (parameters != null) { 193 Serializable pdfa1Val = parameters.get(PDFA1_PARAM); 194 if (pdfa1Val instanceof Boolean) { 195 pdfa1 = ((Boolean) pdfa1Val).booleanValue(); 196 } else if (pdfa1Val instanceof String) { 197 pdfa1 = Boolean.parseBoolean((String) pdfa1Val); 198 } 199 } 200 201 File sourceFile = null; 202 File outFile = null; 203 File[] files = null; 204 try { 205 206 // If the input blob has the HTML mime type, make sure the 207 // charset meta is present, add it if not 208 if ("text/html".equals(sourceMimetype)) { 209 inputBlob = checkCharsetMeta(inputBlob); 210 } 211 212 // Get original file extension 213 String ext = inputBlob.getFilename(); 214 int dotPosition = ext.lastIndexOf('.'); 215 if (dotPosition == -1) { 216 ext = ".bin"; 217 } else { 218 ext = ext.substring(dotPosition); 219 } 220 // Copy in a file to be able to read it several time 221 sourceFile = Framework.createTempFile("NXJOOoConverterDocumentIn", ext); 222 InputStream stream = inputBlob.getStream(); 223 FileUtils.copyInputStreamToFile(stream, sourceFile); 224 stream.close(); 225 226 DocumentFormat sourceFormat = null; 227 if (sourceMimetype != null) { 228 // Try to fetch it from the registry. 229 sourceFormat = getSourceFormat(documentConverter, sourceMimetype); 230 } 231 // If not found in the registry or not given as a parameter. 232 // Try to sniff ! What does that smell ? :) 233 if (sourceFormat == null) { 234 sourceFormat = getSourceFormat(documentConverter, sourceFile); 235 } 236 237 // From plugin settings because we know the destination 238 // mimetype. 239 DocumentFormat destinationFormat = getDestinationFormat(documentConverter, sourceFormat, pdfa1); 240 241 // allow HTML2PDF filtering 242 243 List<Blob> blobs = new ArrayList<>(); 244 245 if (descriptor.getDestinationMimeType().equals("text/html")) { 246 String tmpDirPath = getTmpDirectory(); 247 File myTmpDir = new File(tmpDirPath + "/JODConv_" + System.currentTimeMillis()); 248 boolean created = myTmpDir.mkdir(); 249 if (!created) { 250 throw new IOException("Unable to create temp dir"); 251 } 252 253 outFile = new File(myTmpDir.getAbsolutePath() + "/" + "NXJOOoConverterDocumentOut." 254 + destinationFormat.getExtension()); 255 256 created = outFile.createNewFile(); 257 if (!created) { 258 throw new IOException("Unable to create temp file"); 259 } 260 261 log.debug("Input File = " + outFile.getAbsolutePath()); 262 // Perform the actual conversion. 263 documentConverter.convert(sourceFile, outFile, destinationFormat); 264 265 files = myTmpDir.listFiles(); 266 for (File file : files) { 267 // copy the files to a new tmp location, as we'll delete them 268 Blob blob; 269 try (FileInputStream in = new FileInputStream(file)) { 270 blob = Blobs.createBlob(in); 271 } 272 blob.setFilename(file.getName()); 273 blobs.add(blob); 274 // add a blob for the index 275 if (file.getName().equals(outFile.getName())) { 276 Blob indexBlob; 277 try (FileInputStream in = new FileInputStream(file)) { 278 indexBlob = Blobs.createBlob(in); 279 } 280 indexBlob.setFilename("index.html"); 281 blobs.add(0, indexBlob); 282 } 283 } 284 285 } else { 286 outFile = Framework.createTempFile("NXJOOoConverterDocumentOut", '.' + destinationFormat.getExtension()); 287 288 // Perform the actual conversion. 289 documentConverter.convert(sourceFile, outFile, destinationFormat, parameters); 290 291 Blob blob; 292 try (FileInputStream in = new FileInputStream(outFile)) { 293 blob = Blobs.createBlob(in, getDestinationMimeType()); 294 } 295 blobs.add(blob); 296 } 297 return new SimpleCachableBlobHolder(blobs); 298 } catch (IOException e) { 299 String msg = String.format("An error occurred trying to convert file %s to from %s to %s", blobPath, 300 sourceMimetype, getDestinationMimeType()); 301 throw new ConversionException(msg, e); 302 } finally { 303 if (sourceFile != null) { 304 sourceFile.delete(); 305 } 306 if (outFile != null) { 307 outFile.delete(); 308 } 309 310 if (files != null) { 311 for (File file : files) { 312 if (file.exists()) { 313 file.delete(); 314 } 315 } 316 } 317 } 318 319 } 320 321 protected OfficeDocumentConverter newDocumentConverter() throws ConversionException { 322 OOoManagerService oooManagerService = Framework.getService(OOoManagerService.class); 323 OfficeDocumentConverter documentConverter = oooManagerService.getDocumentConverter(); 324 if (documentConverter == null) { 325 throw new ConversionException("Could not connect to the remote OpenOffice server"); 326 } 327 return documentConverter; 328 } 329 330 @SuppressWarnings("hiding") 331 @Override 332 public void init(ConverterDescriptor descriptor) { 333 this.descriptor = descriptor; 334 } 335 336 @Override 337 public ConverterCheckResult isConverterAvailable() { 338 ConverterCheckResult result = new ConverterCheckResult(); 339 OOoManagerService oooManagerService = Framework.getService(OOoManagerService.class); 340 if (!oooManagerService.isOOoManagerStarted()) { 341 result.setAvailable(false); 342 } 343 return result; 344 } 345 346 protected String getTmpDirectory() { 347 String tmp = null; 348 Map<String, String> parameters = descriptor.getParameters(); 349 if (parameters != null && parameters.containsKey(TMP_PATH_PARAMETER)) { 350 tmp = parameters.get(TMP_PATH_PARAMETER); 351 } 352 if (tmp == null) { 353 tmp = Environment.getDefault().getTemp().getPath(); 354 } 355 return tmp; 356 } 357 358 /** 359 * Checks if the {@code inputBlob} string contains a {@code charset} meta tag. If not, add it. 360 * 361 * @param inputBlob the input blob 362 * @throws IOException Signals that an I/O exception has occurred. 363 */ 364 protected Blob checkCharsetMeta(Blob inputBlob) throws IOException { 365 366 String charset = inputBlob.getEncoding(); 367 if (!StringUtils.isEmpty(charset)) { 368 Pattern charsetMetaPattern = Pattern.compile(String.format("content=\"text/html;\\s*charset=%s\"", charset)); 369 Matcher charsetMetaMatcher = charsetMetaPattern.matcher(inputBlob.getString()); 370 if (!charsetMetaMatcher.find()) { 371 String charsetMetaTag = String.format( 372 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">", charset); 373 StringBuilder sb = new StringBuilder(charsetMetaTag); 374 sb.append(new String(inputBlob.getByteArray(), charset)); 375 Blob blobWithCharsetMetaTag = Blobs.createBlob(sb.toString(), "text/html", charset, 376 inputBlob.getFilename()); 377 return blobWithCharsetMetaTag; 378 } 379 } 380 return inputBlob; 381 } 382}