001/* 002 * (C) Copyright 2006-2007 Nuxeo SAS (http://nuxeo.com/) and contributors. 003 * 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the GNU Lesser General Public License 006 * (LGPL) version 2.1 which accompanies this distribution, and is available at 007 * http://www.gnu.org/licenses/lgpl.html 008 * 009 * This library is distributed in the hope that it will be useful, 010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 012 * Lesser General Public License for more details. 013 * 014 */ 015package org.nuxeo.ecm.diff.content.adapter.base; 016 017import java.io.IOException; 018import java.util.ArrayList; 019import java.util.List; 020import java.util.Locale; 021 022import org.apache.commons.lang.StringEscapeUtils; 023import org.apache.commons.logging.Log; 024import org.apache.commons.logging.LogFactory; 025import org.nuxeo.ecm.core.api.Blob; 026import org.nuxeo.ecm.core.api.Blobs; 027import org.nuxeo.ecm.core.api.DocumentModel; 028import org.nuxeo.ecm.core.api.NuxeoException; 029import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 030import org.nuxeo.ecm.core.convert.api.ConversionException; 031import org.nuxeo.ecm.core.convert.api.ConversionService; 032import org.nuxeo.ecm.diff.content.ContentDiffException; 033import org.nuxeo.ecm.diff.content.ContentDiffHelper; 034import org.nuxeo.ecm.diff.content.adapter.HtmlContentDiffer; 035import org.nuxeo.ecm.diff.content.adapter.MimeTypeContentDiffer; 036import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException; 037import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 038import org.nuxeo.runtime.api.Framework; 039 040/** 041 * Base class for content diff based on "on the fly html or text transformers. 042 * 043 * @author Antoine Taillefer 044 * @since 5.6 045 */ 046public class ConverterBasedContentDiffAdapter extends AbstractContentDiffAdapter { 047 048 private static final Log log = LogFactory.getLog(ConverterBasedContentDiffAdapter.class); 049 050 protected static final String DEFAULT_CONVERTER_NAME = "any2text"; 051 052 protected String defaultFieldXPath; 053 054 protected MimetypeRegistry mimeTypeService; 055 056 @Override 057 public List<Blob> getContentDiffBlobs(DocumentModel otherDoc, ContentDiffConversionType conversionType, 058 Locale locale) throws ContentDiffException, ConversionException { 059 return getContentDiffBlobs(otherDoc, getDefaultContentDiffFieldXPath(), conversionType, locale); 060 } 061 062 @Override 063 public List<Blob> getContentDiffBlobs(DocumentModel otherDoc, String xpath, 064 ContentDiffConversionType conversionType, Locale locale) throws ContentDiffException, ConversionException { 065 066 Blob adaptedDocBlob = null; 067 Blob otherDocBlob = null; 068 BlobHolder adaptedDocBlobHolder = null; 069 BlobHolder otherDocBlobHolder = null; 070 071 if ((xpath == null) || (ContentDiffHelper.DEFAULT_XPATH.equals(xpath))) { 072 adaptedDocBlobHolder = adaptedDoc.getAdapter(BlobHolder.class); 073 otherDocBlobHolder = otherDoc.getAdapter(BlobHolder.class); 074 } else { 075 adaptedDocBlobHolder = ContentDiffHelper.getBlobHolder(adaptedDoc, xpath); 076 otherDocBlobHolder = ContentDiffHelper.getBlobHolder(otherDoc, xpath); 077 } 078 if (adaptedDocBlobHolder == null || otherDocBlobHolder == null) { 079 throw new ContentDiffException("Can not make a content diff of documents without a blob"); 080 } 081 082 adaptedDocBlob = adaptedDocBlobHolder.getBlob(); 083 otherDocBlob = otherDocBlobHolder.getBlob(); 084 if (adaptedDocBlob == null || otherDocBlob == null) { 085 throw new ContentDiffException("Can not make a content diff of documents without a blob"); 086 } 087 088 List<Blob> blobResults = new ArrayList<Blob>(); 089 090 String adaptedDocMimeType = getMimeType(adaptedDocBlob); 091 String otherDocMimeType = getMimeType(otherDocBlob); 092 log.debug("Mime type of adapted doc for HTML content diff = " + adaptedDocMimeType); 093 log.debug("Mime type of other doc for HTML content diff = " + otherDocMimeType); 094 095 // Check doc mime types, if a common mime type is found, look for the 096 // associated content differ. 097 if (adaptedDocMimeType != null && otherDocMimeType != null && adaptedDocMimeType.equals(otherDocMimeType)) { 098 MimeTypeContentDiffer mtContentDiffer = getContentDiffAdapterManager().getContentDiffer(adaptedDocMimeType); 099 if (mtContentDiffer != null) { 100 // If using the HtmlContentDiffer for non HTML blobs 101 // (text/plain, text/xml), we need to transform the blob strings 102 // to encode XML entities and replace all occurrences of "\n" 103 // with "<br />", since they will then be displayed in HTML. 104 if (mtContentDiffer instanceof HtmlContentDiffer && !"text/html".equals(adaptedDocMimeType)) { 105 adaptedDocBlob = getHtmlStringBlob(adaptedDocBlob); 106 otherDocBlob = getHtmlStringBlob(otherDocBlob); 107 } 108 blobResults = mtContentDiffer.getContentDiff(adaptedDocBlob, otherDocBlob, locale); 109 return blobResults; 110 } 111 } 112 113 // Docs have a different mime type or no content differ found for the 114 // common mime type. 115 // Fall back on a conversion (conversionType) + HtmlContentDiffer. 116 // Default conversion type is HTML 117 if (conversionType == null) { 118 conversionType = ContentDiffConversionType.html; 119 } 120 String converterName = conversionType.getValue(); 121 BlobHolder adaptedDocConvertedBlobHolder = getConvertedBlobHolder(adaptedDocBlobHolder, converterName); 122 BlobHolder otherDocConvertedBlobHolder = getConvertedBlobHolder(otherDocBlobHolder, converterName); 123 Blob adaptedDocConvertedBlob = adaptedDocConvertedBlobHolder.getBlob(); 124 Blob otherDocConvertedBlob = otherDocConvertedBlobHolder.getBlob(); 125 126 // In the case of a text conversion, we need to transform the blob 127 // strings to encode XML entities and replace all occurrences of 128 // "\n" with "<br />", since they will then be displayed in HTML by 129 // the HtmlContentDiffer. 130 if (ContentDiffConversionType.text.equals(conversionType)) { 131 adaptedDocConvertedBlob = getHtmlStringBlob(adaptedDocConvertedBlob); 132 otherDocConvertedBlob = getHtmlStringBlob(otherDocConvertedBlob); 133 } 134 135 // Add html content diff blob 136 MimeTypeContentDiffer contentDiffer = getContentDiffAdapterManager().getHtmlContentDiffer(); 137 blobResults.addAll(contentDiffer.getContentDiff(adaptedDocConvertedBlob, otherDocConvertedBlob, locale)); 138 139 // Add secondary blobs (mostly images) 140 addSecondaryBlobs(blobResults, adaptedDocConvertedBlobHolder, adaptedDocConvertedBlob.getFilename()); 141 addSecondaryBlobs(blobResults, otherDocConvertedBlobHolder, otherDocConvertedBlob.getFilename()); 142 return blobResults; 143 } 144 145 @Override 146 public void cleanup() { 147 // Nothing to do here 148 } 149 150 @Override 151 public boolean cachable() { 152 return true; 153 } 154 155 public void setDefaultContentDiffFieldXPath(String xPath) { 156 defaultFieldXPath = xPath; 157 } 158 159 protected String getMimeType(Blob blob) { 160 if (blob == null) { 161 return null; 162 } 163 164 String srcMT = blob.getMimeType(); 165 if (srcMT == null || srcMT.startsWith("application/octet-stream")) { 166 // call MT Service 167 try { 168 MimetypeRegistry mtr = Framework.getService(MimetypeRegistry.class); 169 srcMT = mtr.getMimetypeFromFilenameAndBlobWithDefault(blob.getFilename(), blob, 170 "application/octet-stream"); 171 log.debug("mime type service returned " + srcMT); 172 } catch (MimetypeDetectionException e) { 173 log.warn("error while calling Mimetype service", e); 174 } 175 } 176 return srcMT; 177 } 178 179 protected void setMimeType(BlobHolder result) { 180 for (Blob blob : result.getBlobs()) { 181 if (blob.getMimeType() == null && blob.getFilename().endsWith("html")) { 182 String mimeTpye = getMimeType(blob); 183 blob.setMimeType(mimeTpye); 184 } 185 } 186 } 187 188 protected String getDefaultContentDiffFieldXPath() { 189 return defaultFieldXPath; 190 } 191 192 /** 193 * Returns a blob holder converted using the specified converter name. 194 * 195 * @param blobHolder the blob holder 196 * @param converterName the converter name 197 * @return the converted blob holder 198 * @throws ConversionException if an error occurs while converting the blob holder 199 */ 200 protected BlobHolder getConvertedBlobHolder(BlobHolder blobHolder, String converterName) throws ConversionException { 201 202 if (converterName == null) { 203 log.debug(String.format("No converter parameter, using generic one: '%s'.", DEFAULT_CONVERTER_NAME)); 204 converterName = DEFAULT_CONVERTER_NAME; 205 } 206 207 BlobHolder convertedBlobHolder = getConversionService().convert(converterName, blobHolder, null); 208 setMimeType(convertedBlobHolder); 209 return convertedBlobHolder; 210 } 211 212 protected Blob getHtmlStringBlob(Blob blob) throws ContentDiffException { 213 try { 214 Blob htmlStringBlob = Blobs.createBlob(StringEscapeUtils.escapeHtml( 215 new String(blob.getByteArray(), "UTF-8")).replace("\r\n", "\n").replace("\n", "<br />")); 216 htmlStringBlob.setFilename(blob.getFilename()); 217 return htmlStringBlob; 218 } catch (IOException ioe) { 219 throw new ContentDiffException(String.format("Could not get string from blob %s", blob.getFilename()), ioe); 220 } 221 } 222 223 protected void addSecondaryBlobs(List<Blob> blobResults, BlobHolder blobHolder, String mainBlobFilename) { 224 225 for (Blob blob : blobHolder.getBlobs()) { 226 String blobFilename = blob.getFilename(); 227 if (blobFilename != null && !blobFilename.equals(mainBlobFilename)) { 228 blobResults.add(blob); 229 } 230 } 231 } 232 233 /** 234 * Gets the conversion service. 235 * 236 * @return the conversion service 237 */ 238 protected final ConversionService getConversionService() { 239 ConversionService conversionService = Framework.getService(ConversionService.class); 240 if (conversionService == null) { 241 throw new NuxeoException("ConversionService service is null."); 242 } 243 return conversionService; 244 } 245 246}