001/* 002 * (C) Copyright 2006-2018 Nuxeo (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo 018 * Florent Guillaume 019 * Thierry Delprat 020 */ 021package org.nuxeo.ecm.platform.convert.plugins; 022 023import java.io.File; 024import java.io.FileInputStream; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.Serializable; 028import java.util.ArrayList; 029import java.util.HashMap; 030import java.util.List; 031import java.util.Map; 032import java.util.regex.Matcher; 033import java.util.regex.Pattern; 034 035import org.apache.commons.io.FileUtils; 036import org.apache.commons.lang3.StringUtils; 037import org.apache.commons.logging.Log; 038import org.apache.commons.logging.LogFactory; 039import org.artofsolving.jodconverter.OfficeDocumentConverter; 040import org.artofsolving.jodconverter.StandardConversionTask; 041import org.artofsolving.jodconverter.document.DocumentFamily; 042import org.artofsolving.jodconverter.document.DocumentFormat; 043import org.nuxeo.common.Environment; 044import org.nuxeo.ecm.core.api.Blob; 045import org.nuxeo.ecm.core.api.Blobs; 046import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 047import org.nuxeo.ecm.core.convert.api.ConversionException; 048import org.nuxeo.ecm.core.convert.api.ConverterCheckResult; 049import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder; 050import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor; 051import org.nuxeo.ecm.core.convert.extension.ExternalConverter; 052import org.nuxeo.ecm.platform.convert.ooomanager.OOoManagerService; 053import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 054import org.nuxeo.runtime.api.Framework; 055 056/** 057 * Converter based on JOD which uses an external OpenOffice process to do actual conversions. 058 * 059 * @deprecated Since 8.4. Use 'soffice' with {@link org.nuxeo.ecm.platform.convert.plugins.CommandLineConverter} instead 060 */ 061@Deprecated 062public class JODBasedConverter implements ExternalConverter { 063 064 protected static final String TMP_PATH_PARAMETER = "TmpDirectory"; 065 066 private static final Log log = LogFactory.getLog(JODBasedConverter.class); 067 068 /** 069 * Boolean conversion parameter for PDF/A-1. 070 * 071 * @since 5.6 072 */ 073 public static final String PDFA1_PARAM = "PDF/A-1"; 074 075 /** 076 * Boolean parameter to force update of the document TOC 077 * 078 * @since 5.6 079 */ 080 public static final String UPDATE_INDEX_PARAM = StandardConversionTask.UPDATE_DOCUMENT_INDEX; 081 082 protected static final Map<DocumentFamily, String> PDF_FILTER_NAMES = new HashMap<>(); 083 { 084 PDF_FILTER_NAMES.put(DocumentFamily.TEXT, "writer_pdf_Export"); 085 PDF_FILTER_NAMES.put(DocumentFamily.SPREADSHEET, "calc_pdf_Export"); 086 PDF_FILTER_NAMES.put(DocumentFamily.PRESENTATION, "impress_pdf_Export"); 087 PDF_FILTER_NAMES.put(DocumentFamily.DRAWING, "draw_pdf_Export"); 088 } 089 090 protected ConverterDescriptor descriptor; 091 092 protected String getDestinationMimeType() { 093 return descriptor.getDestinationMimeType(); 094 } 095 096 /** 097 * Returns the destination format for the given plugin. 098 * <p> 099 * It takes the actual destination mimetype from the plugin configuration. 100 * 101 * @param sourceFormat the source format 102 * @param pdfa1 true if PDF/A-1 is required 103 */ 104 protected DocumentFormat getDestinationFormat(OfficeDocumentConverter documentConverter, 105 DocumentFormat sourceFormat, boolean pdfa1) { 106 String mimeType = getDestinationMimeType(); 107 DocumentFormat destinationFormat = documentConverter.getFormatRegistry().getFormatByMediaType(mimeType); 108 if ("application/pdf".equals(mimeType)) { 109 destinationFormat = extendPDFFormat(sourceFormat, destinationFormat, pdfa1); 110 } 111 return destinationFormat; 112 } 113 114 protected DocumentFormat extendPDFFormat(DocumentFormat sourceFormat, DocumentFormat defaultFormat, boolean pdfa1) { 115 DocumentFamily sourceFamily = sourceFormat.getInputFamily(); 116 String sourceMediaType = sourceFormat.getMediaType(); 117 DocumentFormat pdfFormat = new DocumentFormat(pdfa1 ? "PDF/A-1" : "PDF", "pdf", "application/pdf"); 118 Map<DocumentFamily, Map<String, ?>> storePropertiesByFamily = new HashMap<>(); 119 Map<DocumentFamily, Map<String, ?>> defaultStorePropertiesByFamily = defaultFormat.getStorePropertiesByFamily(); 120 for (DocumentFamily family : defaultStorePropertiesByFamily.keySet()) { 121 if (family.equals(sourceFamily)) { 122 continue; 123 } 124 storePropertiesByFamily.put(family, defaultStorePropertiesByFamily.get(family)); 125 } 126 storePropertiesByFamily.put(sourceFamily, 127 extendPDFStoreProperties(sourceMediaType, pdfa1, defaultStorePropertiesByFamily.get(sourceFamily))); 128 pdfFormat.setStorePropertiesByFamily(storePropertiesByFamily); 129 return pdfFormat; 130 } 131 132 protected Map<String, Object> extendPDFStoreProperties(String mediatype, boolean pdfa1, 133 Map<String, ?> originalProperties) { 134 Map<String, Object> extendedProperties = new HashMap<>(); 135 for (Map.Entry<String, ?> entry : originalProperties.entrySet()) { 136 extendedProperties.put(entry.getKey(), entry.getValue()); 137 } 138 if ("text/html".equals(mediatype)) { 139 extendedProperties.put("FilterName", "writer_web_pdf_Export"); 140 } 141 if (pdfa1) { 142 Map<String, Object> filterData = new HashMap<>(); 143 filterData.put("SelectPdfVersion", Integer.valueOf(1)); // PDF/A-1 144 filterData.put("UseTaggedPDF", Boolean.TRUE); // per spec 145 extendedProperties.put("FilterData", filterData); 146 } 147 return extendedProperties; 148 } 149 150 /** 151 * Returns the format for the file passed as a parameter. 152 * <p> 153 * We will ask the mimetype registry service to sniff its mimetype. 154 * 155 * @return DocumentFormat for the given file 156 */ 157 private static DocumentFormat getSourceFormat(OfficeDocumentConverter documentConverter, File file) { 158 MimetypeRegistry mimetypeRegistry = Framework.getService(MimetypeRegistry.class); 159 String mimetypeStr = mimetypeRegistry.getMimetypeFromFile(file); 160 DocumentFormat format = documentConverter.getFormatRegistry().getFormatByMediaType(mimetypeStr); 161 return format; 162 } 163 164 /** 165 * Returns the DocumentFormat for the given mimetype. 166 * 167 * @return DocumentFormat for the given mimetype 168 */ 169 private static DocumentFormat getSourceFormat(OfficeDocumentConverter documentConverter, String mimetype) { 170 return documentConverter.getFormatRegistry().getFormatByMediaType(mimetype); 171 } 172 173 @Override 174 public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException { 175 blobHolder = new UTF8CharsetConverter().convert(blobHolder, parameters); 176 Blob inputBlob = blobHolder.getBlob(); 177 String blobPath = blobHolder.getFilePath(); 178 if (inputBlob == null) { 179 return null; 180 } 181 182 OfficeDocumentConverter documentConverter = newDocumentConverter(); 183 // This plugin do deal only with one input source. 184 String sourceMimetype = inputBlob.getMimeType(); 185 186 boolean pdfa1 = false; 187 if (parameters != null) { 188 Serializable pdfa1Val = parameters.get(PDFA1_PARAM); 189 if (pdfa1Val instanceof Boolean) { 190 pdfa1 = ((Boolean) pdfa1Val).booleanValue(); 191 } else if (pdfa1Val instanceof String) { 192 pdfa1 = Boolean.parseBoolean((String) pdfa1Val); 193 } 194 } 195 196 File sourceFile = null; 197 File outFile = null; 198 File[] files = null; 199 try { 200 201 // If the input blob has the HTML mime type, make sure the 202 // charset meta is present, add it if not 203 if ("text/html".equals(sourceMimetype)) { 204 inputBlob = checkCharsetMeta(inputBlob); 205 } 206 207 // Get original file extension 208 String ext = inputBlob.getFilename(); 209 int dotPosition = ext.lastIndexOf('.'); 210 if (dotPosition == -1) { 211 ext = ".bin"; 212 } else { 213 ext = ext.substring(dotPosition); 214 } 215 // Copy in a file to be able to read it several time 216 sourceFile = Framework.createTempFile("NXJOOoConverterDocumentIn", ext); 217 InputStream stream = inputBlob.getStream(); 218 FileUtils.copyInputStreamToFile(stream, sourceFile); 219 stream.close(); 220 221 DocumentFormat sourceFormat = null; 222 if (sourceMimetype != null) { 223 // Try to fetch it from the registry. 224 sourceFormat = getSourceFormat(documentConverter, sourceMimetype); 225 } 226 // If not found in the registry or not given as a parameter. 227 // Try to sniff ! What does that smell ? :) 228 if (sourceFormat == null) { 229 sourceFormat = getSourceFormat(documentConverter, sourceFile); 230 } 231 232 // From plugin settings because we know the destination 233 // mimetype. 234 DocumentFormat destinationFormat = getDestinationFormat(documentConverter, sourceFormat, pdfa1); 235 236 // allow HTML2PDF filtering 237 238 List<Blob> blobs = new ArrayList<>(); 239 240 if (descriptor.getDestinationMimeType().equals("text/html")) { 241 String tmpDirPath = getTmpDirectory(); 242 File myTmpDir = new File(tmpDirPath + "/JODConv_" + System.currentTimeMillis()); 243 boolean created = myTmpDir.mkdir(); 244 if (!created) { 245 throw new IOException("Unable to create temp dir"); 246 } 247 248 outFile = new File(myTmpDir.getAbsolutePath() + "/" + "NXJOOoConverterDocumentOut." 249 + destinationFormat.getExtension()); 250 251 created = outFile.createNewFile(); 252 if (!created) { 253 throw new IOException("Unable to create temp file"); 254 } 255 256 log.debug("Input File = " + outFile.getAbsolutePath()); 257 // Perform the actual conversion. 258 documentConverter.convert(sourceFile, outFile, destinationFormat); 259 260 files = myTmpDir.listFiles(); 261 for (File file : files) { 262 // copy the files to a new tmp location, as we'll delete them 263 Blob blob; 264 try (FileInputStream in = new FileInputStream(file)) { 265 blob = Blobs.createBlob(in); 266 } 267 blob.setFilename(file.getName()); 268 blobs.add(blob); 269 // add a blob for the index 270 if (file.getName().equals(outFile.getName())) { 271 Blob indexBlob; 272 try (FileInputStream in = new FileInputStream(file)) { 273 indexBlob = Blobs.createBlob(in); 274 } 275 indexBlob.setFilename("index.html"); 276 blobs.add(0, indexBlob); 277 } 278 } 279 280 } else { 281 outFile = Framework.createTempFile("NXJOOoConverterDocumentOut", '.' + destinationFormat.getExtension()); 282 283 // Perform the actual conversion. 284 documentConverter.convert(sourceFile, outFile, destinationFormat, parameters); 285 286 Blob blob; 287 try (FileInputStream in = new FileInputStream(outFile)) { 288 blob = Blobs.createBlob(in, getDestinationMimeType()); 289 } 290 blobs.add(blob); 291 } 292 return new SimpleCachableBlobHolder(blobs); 293 } catch (IOException e) { 294 String msg = String.format("An error occurred trying to convert file %s to from %s to %s", blobPath, 295 sourceMimetype, getDestinationMimeType()); 296 throw new ConversionException(msg, e); 297 } finally { 298 if (sourceFile != null) { 299 sourceFile.delete(); 300 } 301 if (outFile != null) { 302 outFile.delete(); 303 } 304 305 if (files != null) { 306 for (File file : files) { 307 if (file.exists()) { 308 file.delete(); 309 } 310 } 311 } 312 } 313 314 } 315 316 protected OfficeDocumentConverter newDocumentConverter() throws ConversionException { 317 OOoManagerService oooManagerService = Framework.getService(OOoManagerService.class); 318 OfficeDocumentConverter documentConverter = oooManagerService.getDocumentConverter(); 319 if (documentConverter == null) { 320 throw new ConversionException("Could not connect to the remote OpenOffice server"); 321 } 322 return documentConverter; 323 } 324 325 @SuppressWarnings("hiding") 326 @Override 327 public void init(ConverterDescriptor descriptor) { 328 this.descriptor = descriptor; 329 } 330 331 @Override 332 public ConverterCheckResult isConverterAvailable() { 333 ConverterCheckResult result = new ConverterCheckResult(); 334 OOoManagerService oooManagerService = Framework.getService(OOoManagerService.class); 335 if (!oooManagerService.isOOoManagerStarted()) { 336 result.setAvailable(false); 337 } 338 return result; 339 } 340 341 protected String getTmpDirectory() { 342 String tmp = null; 343 Map<String, String> parameters = descriptor.getParameters(); 344 if (parameters != null && parameters.containsKey(TMP_PATH_PARAMETER)) { 345 tmp = parameters.get(TMP_PATH_PARAMETER); 346 } 347 if (tmp == null) { 348 tmp = Environment.getDefault().getTemp().getPath(); 349 } 350 return tmp; 351 } 352 353 /** 354 * Checks if the {@code inputBlob} string contains a {@code charset} meta tag. If not, add it. 355 * 356 * @param inputBlob the input blob 357 * @throws IOException Signals that an I/O exception has occurred. 358 */ 359 protected Blob checkCharsetMeta(Blob inputBlob) throws IOException { 360 361 String charset = inputBlob.getEncoding(); 362 if (!StringUtils.isEmpty(charset)) { 363 Pattern charsetMetaPattern = Pattern.compile(String.format("content=\"text/html;\\s*charset=%s\"", charset)); 364 Matcher charsetMetaMatcher = charsetMetaPattern.matcher(inputBlob.getString()); 365 if (!charsetMetaMatcher.find()) { 366 String charsetMetaTag = String.format( 367 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">", charset); 368 StringBuilder sb = new StringBuilder(charsetMetaTag); 369 sb.append(new String(inputBlob.getByteArray(), charset)); 370 Blob blobWithCharsetMetaTag = Blobs.createBlob(sb.toString(), "text/html", charset, 371 inputBlob.getFilename()); 372 return blobWithCharsetMetaTag; 373 } 374 } 375 return inputBlob; 376 } 377}