001/* 002 * (C) Copyright 2006-2007 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 */ 017package org.nuxeo.ecm.diff.content.adapter.base; 018 019import java.io.IOException; 020import java.util.ArrayList; 021import java.util.List; 022import java.util.Locale; 023 024import org.apache.commons.lang.StringEscapeUtils; 025import org.apache.commons.logging.Log; 026import org.apache.commons.logging.LogFactory; 027import org.nuxeo.ecm.core.api.Blob; 028import org.nuxeo.ecm.core.api.Blobs; 029import org.nuxeo.ecm.core.api.DocumentModel; 030import org.nuxeo.ecm.core.api.NuxeoException; 031import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 032import org.nuxeo.ecm.core.convert.api.ConversionException; 033import org.nuxeo.ecm.core.convert.api.ConversionService; 034import org.nuxeo.ecm.diff.content.ContentDiffException; 035import org.nuxeo.ecm.diff.content.ContentDiffHelper; 036import org.nuxeo.ecm.diff.content.adapter.HtmlContentDiffer; 037import org.nuxeo.ecm.diff.content.adapter.MimeTypeContentDiffer; 038import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException; 039import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 040import org.nuxeo.runtime.api.Framework; 041 042/** 043 * Base class for content diff based on "on the fly html or text transformers. 044 * 045 * @author Antoine Taillefer 046 * @since 5.6 047 */ 048public class ConverterBasedContentDiffAdapter extends AbstractContentDiffAdapter { 049 050 private static final Log log = LogFactory.getLog(ConverterBasedContentDiffAdapter.class); 051 052 protected static final String DEFAULT_CONVERTER_NAME = "any2text"; 053 054 protected String defaultFieldXPath; 055 056 protected MimetypeRegistry mimeTypeService; 057 058 @Override 059 public List<Blob> getContentDiffBlobs(DocumentModel otherDoc, ContentDiffConversionType conversionType, 060 Locale locale) throws ContentDiffException, ConversionException { 061 return getContentDiffBlobs(otherDoc, getDefaultContentDiffFieldXPath(), conversionType, locale); 062 } 063 064 @Override 065 public List<Blob> getContentDiffBlobs(DocumentModel otherDoc, String xpath, 066 ContentDiffConversionType conversionType, Locale locale) throws ContentDiffException, ConversionException { 067 068 Blob adaptedDocBlob = null; 069 Blob otherDocBlob = null; 070 BlobHolder adaptedDocBlobHolder = null; 071 BlobHolder otherDocBlobHolder = null; 072 073 if ((xpath == null) || (ContentDiffHelper.DEFAULT_XPATH.equals(xpath))) { 074 adaptedDocBlobHolder = adaptedDoc.getAdapter(BlobHolder.class); 075 otherDocBlobHolder = otherDoc.getAdapter(BlobHolder.class); 076 } else { 077 adaptedDocBlobHolder = ContentDiffHelper.getBlobHolder(adaptedDoc, xpath); 078 otherDocBlobHolder = ContentDiffHelper.getBlobHolder(otherDoc, xpath); 079 } 080 if (adaptedDocBlobHolder == null || otherDocBlobHolder == null) { 081 throw new ContentDiffException("Can not make a content diff of documents without a blob"); 082 } 083 084 adaptedDocBlob = adaptedDocBlobHolder.getBlob(); 085 otherDocBlob = otherDocBlobHolder.getBlob(); 086 if (adaptedDocBlob == null || otherDocBlob == null) { 087 throw new ContentDiffException("Can not make a content diff of documents without a blob"); 088 } 089 090 List<Blob> blobResults = new ArrayList<Blob>(); 091 092 String adaptedDocMimeType = getMimeType(adaptedDocBlob); 093 String otherDocMimeType = getMimeType(otherDocBlob); 094 log.debug("Mime type of adapted doc for HTML content diff = " + adaptedDocMimeType); 095 log.debug("Mime type of other doc for HTML content diff = " + otherDocMimeType); 096 097 // Check doc mime types, if a common mime type is found, look for the 098 // associated content differ. 099 if (adaptedDocMimeType != null && otherDocMimeType != null && adaptedDocMimeType.equals(otherDocMimeType)) { 100 MimeTypeContentDiffer mtContentDiffer = getContentDiffAdapterManager().getContentDiffer(adaptedDocMimeType); 101 if (mtContentDiffer != null) { 102 // If using the HtmlContentDiffer for non HTML blobs 103 // (text/plain, text/xml), we need to transform the blob strings 104 // to encode XML entities and replace all occurrences of "\n" 105 // with "<br />", since they will then be displayed in HTML. 106 if (mtContentDiffer instanceof HtmlContentDiffer && !"text/html".equals(adaptedDocMimeType)) { 107 adaptedDocBlob = getHtmlStringBlob(adaptedDocBlob); 108 otherDocBlob = getHtmlStringBlob(otherDocBlob); 109 } 110 blobResults = mtContentDiffer.getContentDiff(adaptedDocBlob, otherDocBlob, locale); 111 return blobResults; 112 } 113 } 114 115 // Docs have a different mime type or no content differ found for the 116 // common mime type. 117 // Fall back on a conversion (conversionType) + HtmlContentDiffer. 118 // Default conversion type is HTML 119 if (conversionType == null) { 120 conversionType = ContentDiffConversionType.html; 121 } 122 String converterName = conversionType.getValue(); 123 BlobHolder adaptedDocConvertedBlobHolder = getConvertedBlobHolder(adaptedDocBlobHolder, converterName); 124 BlobHolder otherDocConvertedBlobHolder = getConvertedBlobHolder(otherDocBlobHolder, converterName); 125 Blob adaptedDocConvertedBlob = adaptedDocConvertedBlobHolder.getBlob(); 126 Blob otherDocConvertedBlob = otherDocConvertedBlobHolder.getBlob(); 127 128 // In the case of a text conversion, we need to transform the blob 129 // strings to encode XML entities and replace all occurrences of 130 // "\n" with "<br />", since they will then be displayed in HTML by 131 // the HtmlContentDiffer. 132 if (ContentDiffConversionType.text.equals(conversionType)) { 133 adaptedDocConvertedBlob = getHtmlStringBlob(adaptedDocConvertedBlob); 134 otherDocConvertedBlob = getHtmlStringBlob(otherDocConvertedBlob); 135 } 136 137 // Add html content diff blob 138 MimeTypeContentDiffer contentDiffer = getContentDiffAdapterManager().getHtmlContentDiffer(); 139 blobResults.addAll(contentDiffer.getContentDiff(adaptedDocConvertedBlob, otherDocConvertedBlob, locale)); 140 141 // Add secondary blobs (mostly images) 142 addSecondaryBlobs(blobResults, adaptedDocConvertedBlobHolder, adaptedDocConvertedBlob.getFilename()); 143 addSecondaryBlobs(blobResults, otherDocConvertedBlobHolder, otherDocConvertedBlob.getFilename()); 144 return blobResults; 145 } 146 147 @Override 148 public void cleanup() { 149 // Nothing to do here 150 } 151 152 @Override 153 public boolean cachable() { 154 return true; 155 } 156 157 public void setDefaultContentDiffFieldXPath(String xPath) { 158 defaultFieldXPath = xPath; 159 } 160 161 protected String getMimeType(Blob blob) { 162 if (blob == null) { 163 return null; 164 } 165 166 String srcMT = blob.getMimeType(); 167 if (srcMT == null || srcMT.startsWith("application/octet-stream")) { 168 // call MT Service 169 try { 170 MimetypeRegistry mtr = Framework.getService(MimetypeRegistry.class); 171 srcMT = mtr.getMimetypeFromFilenameAndBlobWithDefault(blob.getFilename(), blob, 172 "application/octet-stream"); 173 log.debug("mime type service returned " + srcMT); 174 } catch (MimetypeDetectionException e) { 175 log.warn("error while calling Mimetype service", e); 176 } 177 } 178 return srcMT; 179 } 180 181 protected void setMimeType(BlobHolder result) { 182 for (Blob blob : result.getBlobs()) { 183 if (blob.getMimeType() == null && blob.getFilename().endsWith("html")) { 184 String mimeTpye = getMimeType(blob); 185 blob.setMimeType(mimeTpye); 186 } 187 } 188 } 189 190 protected String getDefaultContentDiffFieldXPath() { 191 return defaultFieldXPath; 192 } 193 194 /** 195 * Returns a blob holder converted using the specified converter name. 196 * 197 * @param blobHolder the blob holder 198 * @param converterName the converter name 199 * @return the converted blob holder 200 * @throws ConversionException if an error occurs while converting the blob holder 201 */ 202 protected BlobHolder getConvertedBlobHolder(BlobHolder blobHolder, String converterName) throws ConversionException { 203 204 if (converterName == null) { 205 log.debug(String.format("No converter parameter, using generic one: '%s'.", DEFAULT_CONVERTER_NAME)); 206 converterName = DEFAULT_CONVERTER_NAME; 207 } 208 209 BlobHolder convertedBlobHolder = getConversionService().convert(converterName, blobHolder, null); 210 setMimeType(convertedBlobHolder); 211 return convertedBlobHolder; 212 } 213 214 protected Blob getHtmlStringBlob(Blob blob) throws ContentDiffException { 215 try { 216 Blob htmlStringBlob = Blobs.createBlob(StringEscapeUtils.escapeHtml( 217 new String(blob.getByteArray(), "UTF-8")).replace("\r\n", "\n").replace("\n", "<br />")); 218 htmlStringBlob.setFilename(blob.getFilename()); 219 return htmlStringBlob; 220 } catch (IOException ioe) { 221 throw new ContentDiffException(String.format("Could not get string from blob %s", blob.getFilename()), ioe); 222 } 223 } 224 225 protected void addSecondaryBlobs(List<Blob> blobResults, BlobHolder blobHolder, String mainBlobFilename) { 226 227 for (Blob blob : blobHolder.getBlobs()) { 228 String blobFilename = blob.getFilename(); 229 if (blobFilename != null && !blobFilename.equals(mainBlobFilename)) { 230 blobResults.add(blob); 231 } 232 } 233 } 234 235 /** 236 * Gets the conversion service. 237 * 238 * @return the conversion service 239 */ 240 protected final ConversionService getConversionService() { 241 ConversionService conversionService = Framework.getService(ConversionService.class); 242 if (conversionService == null) { 243 throw new NuxeoException("ConversionService service is null."); 244 } 245 return conversionService; 246 } 247 248}