001/*
002 * (C) Copyright 2006-2007 Nuxeo SAS (http://nuxeo.com/) and contributors.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the GNU Lesser General Public License
006 * (LGPL) version 2.1 which accompanies this distribution, and is available at
007 * http://www.gnu.org/licenses/lgpl.html
008 *
009 * This library is distributed in the hope that it will be useful,
010 * but WITHOUT ANY WARRANTY; without even the implied warranty of
011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012 * Lesser General Public License for more details.
013 *
014 */
015package org.nuxeo.ecm.diff.content.adapter.base;
016
017import java.io.IOException;
018import java.util.ArrayList;
019import java.util.List;
020import java.util.Locale;
021
022import org.apache.commons.lang.StringEscapeUtils;
023import org.apache.commons.logging.Log;
024import org.apache.commons.logging.LogFactory;
025import org.nuxeo.ecm.core.api.Blob;
026import org.nuxeo.ecm.core.api.Blobs;
027import org.nuxeo.ecm.core.api.DocumentModel;
028import org.nuxeo.ecm.core.api.NuxeoException;
029import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
030import org.nuxeo.ecm.core.convert.api.ConversionException;
031import org.nuxeo.ecm.core.convert.api.ConversionService;
032import org.nuxeo.ecm.diff.content.ContentDiffException;
033import org.nuxeo.ecm.diff.content.ContentDiffHelper;
034import org.nuxeo.ecm.diff.content.adapter.HtmlContentDiffer;
035import org.nuxeo.ecm.diff.content.adapter.MimeTypeContentDiffer;
036import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException;
037import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
038import org.nuxeo.runtime.api.Framework;
039
040/**
041 * Base class for content diff based on "on the fly html or text transformers.
042 *
043 * @author Antoine Taillefer
044 * @since 5.6
045 */
046public class ConverterBasedContentDiffAdapter extends AbstractContentDiffAdapter {
047
048    private static final Log log = LogFactory.getLog(ConverterBasedContentDiffAdapter.class);
049
050    protected static final String DEFAULT_CONVERTER_NAME = "any2text";
051
052    protected String defaultFieldXPath;
053
054    protected MimetypeRegistry mimeTypeService;
055
056    @Override
057    public List<Blob> getContentDiffBlobs(DocumentModel otherDoc, ContentDiffConversionType conversionType,
058            Locale locale) throws ContentDiffException, ConversionException {
059        return getContentDiffBlobs(otherDoc, getDefaultContentDiffFieldXPath(), conversionType, locale);
060    }
061
062    @Override
063    public List<Blob> getContentDiffBlobs(DocumentModel otherDoc, String xpath,
064            ContentDiffConversionType conversionType, Locale locale) throws ContentDiffException, ConversionException {
065
066        Blob adaptedDocBlob = null;
067        Blob otherDocBlob = null;
068        BlobHolder adaptedDocBlobHolder = null;
069        BlobHolder otherDocBlobHolder = null;
070
071        if ((xpath == null) || (ContentDiffHelper.DEFAULT_XPATH.equals(xpath))) {
072            adaptedDocBlobHolder = adaptedDoc.getAdapter(BlobHolder.class);
073            otherDocBlobHolder = otherDoc.getAdapter(BlobHolder.class);
074        } else {
075            adaptedDocBlobHolder = ContentDiffHelper.getBlobHolder(adaptedDoc, xpath);
076            otherDocBlobHolder = ContentDiffHelper.getBlobHolder(otherDoc, xpath);
077        }
078        if (adaptedDocBlobHolder == null || otherDocBlobHolder == null) {
079            throw new ContentDiffException("Can not make a content diff of documents without a blob");
080        }
081
082        adaptedDocBlob = adaptedDocBlobHolder.getBlob();
083        otherDocBlob = otherDocBlobHolder.getBlob();
084        if (adaptedDocBlob == null || otherDocBlob == null) {
085            throw new ContentDiffException("Can not make a content diff of documents without a blob");
086        }
087
088        List<Blob> blobResults = new ArrayList<Blob>();
089
090        String adaptedDocMimeType = getMimeType(adaptedDocBlob);
091        String otherDocMimeType = getMimeType(otherDocBlob);
092        log.debug("Mime type of adapted doc for HTML content diff = " + adaptedDocMimeType);
093        log.debug("Mime type of other doc for HTML content diff = " + otherDocMimeType);
094
095        // Check doc mime types, if a common mime type is found, look for the
096        // associated content differ.
097        if (adaptedDocMimeType != null && otherDocMimeType != null && adaptedDocMimeType.equals(otherDocMimeType)) {
098            MimeTypeContentDiffer mtContentDiffer = getContentDiffAdapterManager().getContentDiffer(adaptedDocMimeType);
099            if (mtContentDiffer != null) {
100                // If using the HtmlContentDiffer for non HTML blobs
101                // (text/plain, text/xml), we need to transform the blob strings
102                // to encode XML entities and replace all occurrences of "\n"
103                // with "<br />", since they will then be displayed in HTML.
104                if (mtContentDiffer instanceof HtmlContentDiffer && !"text/html".equals(adaptedDocMimeType)) {
105                    adaptedDocBlob = getHtmlStringBlob(adaptedDocBlob);
106                    otherDocBlob = getHtmlStringBlob(otherDocBlob);
107                }
108                blobResults = mtContentDiffer.getContentDiff(adaptedDocBlob, otherDocBlob, locale);
109                return blobResults;
110            }
111        }
112
113        // Docs have a different mime type or no content differ found for the
114        // common mime type.
115        // Fall back on a conversion (conversionType) + HtmlContentDiffer.
116        // Default conversion type is HTML
117        if (conversionType == null) {
118            conversionType = ContentDiffConversionType.html;
119        }
120        String converterName = conversionType.getValue();
121        BlobHolder adaptedDocConvertedBlobHolder = getConvertedBlobHolder(adaptedDocBlobHolder, converterName);
122        BlobHolder otherDocConvertedBlobHolder = getConvertedBlobHolder(otherDocBlobHolder, converterName);
123        Blob adaptedDocConvertedBlob = adaptedDocConvertedBlobHolder.getBlob();
124        Blob otherDocConvertedBlob = otherDocConvertedBlobHolder.getBlob();
125
126        // In the case of a text conversion, we need to transform the blob
127        // strings to encode XML entities and replace all occurrences of
128        // "\n" with "<br />", since they will then be displayed in HTML by
129        // the HtmlContentDiffer.
130        if (ContentDiffConversionType.text.equals(conversionType)) {
131            adaptedDocConvertedBlob = getHtmlStringBlob(adaptedDocConvertedBlob);
132            otherDocConvertedBlob = getHtmlStringBlob(otherDocConvertedBlob);
133        }
134
135        // Add html content diff blob
136        MimeTypeContentDiffer contentDiffer = getContentDiffAdapterManager().getHtmlContentDiffer();
137        blobResults.addAll(contentDiffer.getContentDiff(adaptedDocConvertedBlob, otherDocConvertedBlob, locale));
138
139        // Add secondary blobs (mostly images)
140        addSecondaryBlobs(blobResults, adaptedDocConvertedBlobHolder, adaptedDocConvertedBlob.getFilename());
141        addSecondaryBlobs(blobResults, otherDocConvertedBlobHolder, otherDocConvertedBlob.getFilename());
142        return blobResults;
143    }
144
145    @Override
146    public void cleanup() {
147        // Nothing to do here
148    }
149
150    @Override
151    public boolean cachable() {
152        return true;
153    }
154
155    public void setDefaultContentDiffFieldXPath(String xPath) {
156        defaultFieldXPath = xPath;
157    }
158
159    protected String getMimeType(Blob blob) {
160        if (blob == null) {
161            return null;
162        }
163
164        String srcMT = blob.getMimeType();
165        if (srcMT == null || srcMT.startsWith("application/octet-stream")) {
166            // call MT Service
167            try {
168                MimetypeRegistry mtr = Framework.getService(MimetypeRegistry.class);
169                srcMT = mtr.getMimetypeFromFilenameAndBlobWithDefault(blob.getFilename(), blob,
170                        "application/octet-stream");
171                log.debug("mime type service returned " + srcMT);
172            } catch (MimetypeDetectionException e) {
173                log.warn("error while calling Mimetype service", e);
174            }
175        }
176        return srcMT;
177    }
178
179    protected void setMimeType(BlobHolder result) {
180        for (Blob blob : result.getBlobs()) {
181            if (blob.getMimeType() == null && blob.getFilename().endsWith("html")) {
182                String mimeTpye = getMimeType(blob);
183                blob.setMimeType(mimeTpye);
184            }
185        }
186    }
187
188    protected String getDefaultContentDiffFieldXPath() {
189        return defaultFieldXPath;
190    }
191
192    /**
193     * Returns a blob holder converted using the specified converter name.
194     *
195     * @param blobHolder the blob holder
196     * @param converterName the converter name
197     * @return the converted blob holder
198     * @throws ConversionException if an error occurs while converting the blob holder
199     */
200    protected BlobHolder getConvertedBlobHolder(BlobHolder blobHolder, String converterName) throws ConversionException {
201
202        if (converterName == null) {
203            log.debug(String.format("No converter parameter, using generic one: '%s'.", DEFAULT_CONVERTER_NAME));
204            converterName = DEFAULT_CONVERTER_NAME;
205        }
206
207        BlobHolder convertedBlobHolder = getConversionService().convert(converterName, blobHolder, null);
208        setMimeType(convertedBlobHolder);
209        return convertedBlobHolder;
210    }
211
212    protected Blob getHtmlStringBlob(Blob blob) throws ContentDiffException {
213        try {
214            Blob htmlStringBlob = Blobs.createBlob(StringEscapeUtils.escapeHtml(
215                    new String(blob.getByteArray(), "UTF-8")).replace("\r\n", "\n").replace("\n", "<br />"));
216            htmlStringBlob.setFilename(blob.getFilename());
217            return htmlStringBlob;
218        } catch (IOException ioe) {
219            throw new ContentDiffException(String.format("Could not get string from blob %s", blob.getFilename()), ioe);
220        }
221    }
222
223    protected void addSecondaryBlobs(List<Blob> blobResults, BlobHolder blobHolder, String mainBlobFilename) {
224
225        for (Blob blob : blobHolder.getBlobs()) {
226            String blobFilename = blob.getFilename();
227            if (blobFilename != null && !blobFilename.equals(mainBlobFilename)) {
228                blobResults.add(blob);
229            }
230        }
231    }
232
233    /**
234     * Gets the conversion service.
235     *
236     * @return the conversion service
237     */
238    protected final ConversionService getConversionService() {
239        ConversionService conversionService = Framework.getService(ConversionService.class);
240        if (conversionService == null) {
241            throw new NuxeoException("ConversionService service is null.");
242        }
243        return conversionService;
244    }
245
246}