001/* 002 * (C) Copyright 2006-2012 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo 018 * Florent Guillaume 019 * Thierry Delprat 020 */ 021package org.nuxeo.ecm.platform.convert.plugins; 022 023import java.io.File; 024import java.io.FileInputStream; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.Serializable; 028import java.util.ArrayList; 029import java.util.HashMap; 030import java.util.List; 031import java.util.Map; 032import java.util.regex.Matcher; 033import java.util.regex.Pattern; 034 035import org.apache.commons.lang.StringUtils; 036import org.apache.commons.logging.Log; 037import org.apache.commons.logging.LogFactory; 038import org.artofsolving.jodconverter.OfficeDocumentConverter; 039import org.artofsolving.jodconverter.StandardConversionTask; 040import org.artofsolving.jodconverter.document.DocumentFamily; 041import org.artofsolving.jodconverter.document.DocumentFormat; 042 043import org.nuxeo.common.Environment; 044import org.nuxeo.common.utils.FileUtils; 045import org.nuxeo.ecm.core.api.Blob; 046import org.nuxeo.ecm.core.api.Blobs; 047import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 048import org.nuxeo.ecm.core.convert.api.ConversionException; 049import org.nuxeo.ecm.core.convert.api.ConverterCheckResult; 050import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder; 051import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor; 052import org.nuxeo.ecm.core.convert.extension.ExternalConverter; 053import org.nuxeo.ecm.platform.convert.ooomanager.OOoManagerService; 054import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 055import org.nuxeo.runtime.api.Framework; 056 057/** 058 * Converter based on JOD which uses an external OpenOffice process to do actual conversions. 059 * 060 * @deprecated Since 8.4. Use 'soffice' with {@link org.nuxeo.ecm.platform.convert.plugins.CommandLineConverter} instead 061 */ 062@Deprecated 063public class JODBasedConverter implements ExternalConverter { 064 065 protected static final String TMP_PATH_PARAMETER = "TmpDirectory"; 066 067 private static final Log log = LogFactory.getLog(JODBasedConverter.class); 068 069 /** 070 * Boolean conversion parameter for PDF/A-1. 071 * 072 * @since 5.6 073 */ 074 public static final String PDFA1_PARAM = "PDF/A-1"; 075 076 /** 077 * Boolean parameter to force update of the document TOC 078 * 079 * @since 5.6 080 */ 081 public static final String UPDATE_INDEX_PARAM = StandardConversionTask.UPDATE_DOCUMENT_INDEX; 082 083 protected static final Map<DocumentFamily, String> PDF_FILTER_NAMES = new HashMap<>(); 084 { 085 PDF_FILTER_NAMES.put(DocumentFamily.TEXT, "writer_pdf_Export"); 086 PDF_FILTER_NAMES.put(DocumentFamily.SPREADSHEET, "calc_pdf_Export"); 087 PDF_FILTER_NAMES.put(DocumentFamily.PRESENTATION, "impress_pdf_Export"); 088 PDF_FILTER_NAMES.put(DocumentFamily.DRAWING, "draw_pdf_Export"); 089 } 090 091 protected ConverterDescriptor descriptor; 092 093 protected String getDestinationMimeType() { 094 return descriptor.getDestinationMimeType(); 095 } 096 097 /** 098 * Returns the destination format for the given plugin. 099 * <p> 100 * It takes the actual destination mimetype from the plugin configuration. 101 * 102 * @param sourceFormat the source format 103 * @param pdfa1 true if PDF/A-1 is required 104 */ 105 protected DocumentFormat getDestinationFormat(OfficeDocumentConverter documentConverter, 106 DocumentFormat sourceFormat, boolean pdfa1) { 107 String mimeType = getDestinationMimeType(); 108 DocumentFormat destinationFormat = documentConverter.getFormatRegistry().getFormatByMediaType(mimeType); 109 if ("application/pdf".equals(mimeType)) { 110 destinationFormat = extendPDFFormat(sourceFormat, destinationFormat, pdfa1); 111 } 112 return destinationFormat; 113 } 114 115 protected DocumentFormat extendPDFFormat(DocumentFormat sourceFormat, DocumentFormat defaultFormat, boolean pdfa1) { 116 DocumentFamily sourceFamily = sourceFormat.getInputFamily(); 117 String sourceMediaType = sourceFormat.getMediaType(); 118 DocumentFormat pdfFormat = new DocumentFormat(pdfa1 ? "PDF/A-1" : "PDF", "pdf", "application/pdf"); 119 Map<DocumentFamily, Map<String, ?>> storePropertiesByFamily = new HashMap<>(); 120 Map<DocumentFamily, Map<String, ?>> defaultStorePropertiesByFamily = defaultFormat.getStorePropertiesByFamily(); 121 for (DocumentFamily family : defaultStorePropertiesByFamily.keySet()) { 122 if (family.equals(sourceFamily)) { 123 continue; 124 } 125 storePropertiesByFamily.put(family, defaultStorePropertiesByFamily.get(family)); 126 } 127 storePropertiesByFamily.put(sourceFamily, 128 extendPDFStoreProperties(sourceMediaType, pdfa1, defaultStorePropertiesByFamily.get(sourceFamily))); 129 pdfFormat.setStorePropertiesByFamily(storePropertiesByFamily); 130 return pdfFormat; 131 } 132 133 protected Map<String, Object> extendPDFStoreProperties(String mediatype, boolean pdfa1, 134 Map<String, ?> originalProperties) { 135 Map<String, Object> extendedProperties = new HashMap<>(); 136 for (Map.Entry<String, ?> entry : originalProperties.entrySet()) { 137 extendedProperties.put(entry.getKey(), entry.getValue()); 138 } 139 if ("text/html".equals(mediatype)) { 140 extendedProperties.put("FilterName", "writer_web_pdf_Export"); 141 } 142 if (pdfa1) { 143 Map<String, Object> filterData = new HashMap<>(); 144 filterData.put("SelectPdfVersion", Integer.valueOf(1)); // PDF/A-1 145 filterData.put("UseTaggedPDF", Boolean.TRUE); // per spec 146 extendedProperties.put("FilterData", filterData); 147 } 148 return extendedProperties; 149 } 150 151 /** 152 * Returns the format for the file passed as a parameter. 153 * <p> 154 * We will ask the mimetype registry service to sniff its mimetype. 155 * 156 * @return DocumentFormat for the given file 157 */ 158 private static DocumentFormat getSourceFormat(OfficeDocumentConverter documentConverter, File file) { 159 MimetypeRegistry mimetypeRegistry = Framework.getService(MimetypeRegistry.class); 160 String mimetypeStr = mimetypeRegistry.getMimetypeFromFile(file); 161 DocumentFormat format = documentConverter.getFormatRegistry().getFormatByMediaType(mimetypeStr); 162 return format; 163 } 164 165 /** 166 * Returns the DocumentFormat for the given mimetype. 167 * 168 * @return DocumentFormat for the given mimetype 169 */ 170 private static DocumentFormat getSourceFormat(OfficeDocumentConverter documentConverter, String mimetype) { 171 return documentConverter.getFormatRegistry().getFormatByMediaType(mimetype); 172 } 173 174 @Override 175 protected void finalize() throws Throwable { 176 super.finalize(); 177 } 178 179 @Override 180 public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException { 181 blobHolder = new UTF8CharsetConverter().convert(blobHolder, parameters); 182 Blob inputBlob = blobHolder.getBlob(); 183 String blobPath = blobHolder.getFilePath(); 184 if (inputBlob == null) { 185 return null; 186 } 187 188 OfficeDocumentConverter documentConverter = newDocumentConverter(); 189 // This plugin do deal only with one input source. 190 String sourceMimetype = inputBlob.getMimeType(); 191 192 boolean pdfa1 = parameters != null && Boolean.TRUE.equals(parameters.get(PDFA1_PARAM)); 193 194 File sourceFile = null; 195 File outFile = null; 196 File[] files = null; 197 try { 198 199 // If the input blob has the HTML mime type, make sure the 200 // charset meta is present, add it if not 201 if ("text/html".equals(sourceMimetype)) { 202 inputBlob = checkCharsetMeta(inputBlob); 203 } 204 205 // Get original file extension 206 String ext = inputBlob.getFilename(); 207 int dotPosition = ext.lastIndexOf('.'); 208 if (dotPosition == -1) { 209 ext = ".bin"; 210 } else { 211 ext = ext.substring(dotPosition); 212 } 213 // Copy in a file to be able to read it several time 214 sourceFile = Framework.createTempFile("NXJOOoConverterDocumentIn", ext); 215 InputStream stream = inputBlob.getStream(); 216 FileUtils.copyToFile(stream, sourceFile); 217 stream.close(); 218 219 DocumentFormat sourceFormat = null; 220 if (sourceMimetype != null) { 221 // Try to fetch it from the registry. 222 sourceFormat = getSourceFormat(documentConverter, sourceMimetype); 223 } 224 // If not found in the registry or not given as a parameter. 225 // Try to sniff ! What does that smell ? :) 226 if (sourceFormat == null) { 227 sourceFormat = getSourceFormat(documentConverter, sourceFile); 228 } 229 230 // From plugin settings because we know the destination 231 // mimetype. 232 DocumentFormat destinationFormat = getDestinationFormat(documentConverter, sourceFormat, pdfa1); 233 234 // allow HTML2PDF filtering 235 236 List<Blob> blobs = new ArrayList<>(); 237 238 if (descriptor.getDestinationMimeType().equals("text/html")) { 239 String tmpDirPath = getTmpDirectory(); 240 File myTmpDir = new File(tmpDirPath + "/JODConv_" + System.currentTimeMillis()); 241 boolean created = myTmpDir.mkdir(); 242 if (!created) { 243 throw new IOException("Unable to create temp dir"); 244 } 245 246 outFile = new File(myTmpDir.getAbsolutePath() + "/" + "NXJOOoConverterDocumentOut." 247 + destinationFormat.getExtension()); 248 249 created = outFile.createNewFile(); 250 if (!created) { 251 throw new IOException("Unable to create temp file"); 252 } 253 254 log.debug("Input File = " + outFile.getAbsolutePath()); 255 // Perform the actual conversion. 256 documentConverter.convert(sourceFile, outFile, destinationFormat); 257 258 files = myTmpDir.listFiles(); 259 for (File file : files) { 260 // copy the files to a new tmp location, as we'll delete them 261 Blob blob; 262 try (FileInputStream in = new FileInputStream(file)) { 263 blob = Blobs.createBlob(in); 264 } 265 blob.setFilename(file.getName()); 266 blobs.add(blob); 267 // add a blob for the index 268 if (file.getName().equals(outFile.getName())) { 269 Blob indexBlob; 270 try (FileInputStream in = new FileInputStream(file)) { 271 indexBlob = Blobs.createBlob(in); 272 } 273 indexBlob.setFilename("index.html"); 274 blobs.add(0, indexBlob); 275 } 276 } 277 278 } else { 279 outFile = Framework.createTempFile("NXJOOoConverterDocumentOut", '.' + destinationFormat.getExtension()); 280 281 // Perform the actual conversion. 282 documentConverter.convert(sourceFile, outFile, destinationFormat, parameters); 283 284 Blob blob; 285 try (FileInputStream in = new FileInputStream(outFile)) { 286 blob = Blobs.createBlob(in, getDestinationMimeType()); 287 } 288 blobs.add(blob); 289 } 290 return new SimpleCachableBlobHolder(blobs); 291 } catch (IOException e) { 292 String msg = String.format("An error occurred trying to convert file %s to from %s to %s", blobPath, 293 sourceMimetype, getDestinationMimeType()); 294 throw new ConversionException(msg, e); 295 } finally { 296 if (sourceFile != null) { 297 sourceFile.delete(); 298 } 299 if (outFile != null) { 300 outFile.delete(); 301 } 302 303 if (files != null) { 304 for (File file : files) { 305 if (file.exists()) { 306 file.delete(); 307 } 308 } 309 } 310 } 311 312 } 313 314 protected OfficeDocumentConverter newDocumentConverter() throws ConversionException { 315 OOoManagerService oooManagerService = Framework.getService(OOoManagerService.class); 316 OfficeDocumentConverter documentConverter = oooManagerService.getDocumentConverter(); 317 if (documentConverter == null) { 318 throw new ConversionException("Could not connect to the remote OpenOffice server"); 319 } 320 return documentConverter; 321 } 322 323 @SuppressWarnings("hiding") 324 @Override 325 public void init(ConverterDescriptor descriptor) { 326 this.descriptor = descriptor; 327 } 328 329 @Override 330 public ConverterCheckResult isConverterAvailable() { 331 ConverterCheckResult result = new ConverterCheckResult(); 332 OOoManagerService oooManagerService = Framework.getService(OOoManagerService.class); 333 if (!oooManagerService.isOOoManagerStarted()) { 334 result.setAvailable(false); 335 } 336 return result; 337 } 338 339 protected String getTmpDirectory() { 340 String tmp = null; 341 Map<String, String> parameters = descriptor.getParameters(); 342 if (parameters != null && parameters.containsKey(TMP_PATH_PARAMETER)) { 343 tmp = parameters.get(TMP_PATH_PARAMETER); 344 } 345 if (tmp == null) { 346 tmp = Environment.getDefault().getTemp().getPath(); 347 } 348 return tmp; 349 } 350 351 /** 352 * Checks if the {@code inputBlob} string contains a {@code charset} meta tag. If not, add it. 353 * 354 * @param inputBlob the input blob 355 * @throws IOException Signals that an I/O exception has occurred. 356 */ 357 protected Blob checkCharsetMeta(Blob inputBlob) throws IOException { 358 359 String charset = inputBlob.getEncoding(); 360 if (!StringUtils.isEmpty(charset)) { 361 Pattern charsetMetaPattern = Pattern.compile(String.format("content=\"text/html;\\s*charset=%s\"", charset)); 362 Matcher charsetMetaMatcher = charsetMetaPattern.matcher(inputBlob.getString()); 363 if (!charsetMetaMatcher.find()) { 364 String charsetMetaTag = String.format( 365 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">", charset); 366 StringBuilder sb = new StringBuilder(charsetMetaTag); 367 sb.append(new String(inputBlob.getByteArray(), charset)); 368 Blob blobWithCharsetMetaTag = Blobs.createBlob(sb.toString(), "text/html", charset, 369 inputBlob.getFilename()); 370 return blobWithCharsetMetaTag; 371 } 372 } 373 return inputBlob; 374 } 375}