001/*
002 * (C) Copyright 2006-2007 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 */
017package org.nuxeo.ecm.diff.content.adapter.base;
018
019import java.io.IOException;
020import java.util.ArrayList;
021import java.util.List;
022import java.util.Locale;
023
024import org.apache.commons.lang.StringEscapeUtils;
025import org.apache.commons.logging.Log;
026import org.apache.commons.logging.LogFactory;
027import org.nuxeo.ecm.core.api.Blob;
028import org.nuxeo.ecm.core.api.Blobs;
029import org.nuxeo.ecm.core.api.DocumentModel;
030import org.nuxeo.ecm.core.api.NuxeoException;
031import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
032import org.nuxeo.ecm.core.convert.api.ConversionException;
033import org.nuxeo.ecm.core.convert.api.ConversionService;
034import org.nuxeo.ecm.diff.content.ContentDiffException;
035import org.nuxeo.ecm.diff.content.ContentDiffHelper;
036import org.nuxeo.ecm.diff.content.adapter.HtmlContentDiffer;
037import org.nuxeo.ecm.diff.content.adapter.MimeTypeContentDiffer;
038import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException;
039import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
040import org.nuxeo.runtime.api.Framework;
041
042/**
043 * Base class for content diff based on "on the fly html or text transformers.
044 *
045 * @author Antoine Taillefer
046 * @since 5.6
047 */
048public class ConverterBasedContentDiffAdapter extends AbstractContentDiffAdapter {
049
050    private static final Log log = LogFactory.getLog(ConverterBasedContentDiffAdapter.class);
051
052    protected static final String DEFAULT_CONVERTER_NAME = "any2text";
053
054    protected String defaultFieldXPath;
055
056    protected MimetypeRegistry mimeTypeService;
057
058    @Override
059    public List<Blob> getContentDiffBlobs(DocumentModel otherDoc, ContentDiffConversionType conversionType,
060            Locale locale) throws ContentDiffException, ConversionException {
061        return getContentDiffBlobs(otherDoc, getDefaultContentDiffFieldXPath(), conversionType, locale);
062    }
063
064    @Override
065    public List<Blob> getContentDiffBlobs(DocumentModel otherDoc, String xpath,
066            ContentDiffConversionType conversionType, Locale locale) throws ContentDiffException, ConversionException {
067
068        Blob adaptedDocBlob = null;
069        Blob otherDocBlob = null;
070        BlobHolder adaptedDocBlobHolder = null;
071        BlobHolder otherDocBlobHolder = null;
072
073        if ((xpath == null) || (ContentDiffHelper.DEFAULT_XPATH.equals(xpath))) {
074            adaptedDocBlobHolder = adaptedDoc.getAdapter(BlobHolder.class);
075            otherDocBlobHolder = otherDoc.getAdapter(BlobHolder.class);
076        } else {
077            adaptedDocBlobHolder = ContentDiffHelper.getBlobHolder(adaptedDoc, xpath);
078            otherDocBlobHolder = ContentDiffHelper.getBlobHolder(otherDoc, xpath);
079        }
080        if (adaptedDocBlobHolder == null || otherDocBlobHolder == null) {
081            throw new ContentDiffException("Can not make a content diff of documents without a blob");
082        }
083
084        adaptedDocBlob = adaptedDocBlobHolder.getBlob();
085        otherDocBlob = otherDocBlobHolder.getBlob();
086        if (adaptedDocBlob == null || otherDocBlob == null) {
087            throw new ContentDiffException("Can not make a content diff of documents without a blob");
088        }
089
090        List<Blob> blobResults = new ArrayList<Blob>();
091
092        String adaptedDocMimeType = getMimeType(adaptedDocBlob);
093        String otherDocMimeType = getMimeType(otherDocBlob);
094        log.debug("Mime type of adapted doc for HTML content diff = " + adaptedDocMimeType);
095        log.debug("Mime type of other doc for HTML content diff = " + otherDocMimeType);
096
097        // Check doc mime types, if a common mime type is found, look for the
098        // associated content differ.
099        if (adaptedDocMimeType != null && otherDocMimeType != null && adaptedDocMimeType.equals(otherDocMimeType)) {
100            MimeTypeContentDiffer mtContentDiffer = getContentDiffAdapterManager().getContentDiffer(adaptedDocMimeType);
101            if (mtContentDiffer != null) {
102                // If using the HtmlContentDiffer for non HTML blobs
103                // (text/plain, text/xml), we need to transform the blob strings
104                // to encode XML entities and replace all occurrences of "\n"
105                // with "<br />", since they will then be displayed in HTML.
106                if (mtContentDiffer instanceof HtmlContentDiffer && !"text/html".equals(adaptedDocMimeType)) {
107                    adaptedDocBlob = getHtmlStringBlob(adaptedDocBlob);
108                    otherDocBlob = getHtmlStringBlob(otherDocBlob);
109                }
110                blobResults = mtContentDiffer.getContentDiff(adaptedDocBlob, otherDocBlob, locale);
111                return blobResults;
112            }
113        }
114
115        // Docs have a different mime type or no content differ found for the
116        // common mime type.
117        // Fall back on a conversion (conversionType) + HtmlContentDiffer.
118        // Default conversion type is HTML
119        if (conversionType == null) {
120            conversionType = ContentDiffConversionType.html;
121        }
122        String converterName = conversionType.getValue();
123        BlobHolder adaptedDocConvertedBlobHolder = getConvertedBlobHolder(adaptedDocBlobHolder, converterName);
124        BlobHolder otherDocConvertedBlobHolder = getConvertedBlobHolder(otherDocBlobHolder, converterName);
125        Blob adaptedDocConvertedBlob = adaptedDocConvertedBlobHolder.getBlob();
126        Blob otherDocConvertedBlob = otherDocConvertedBlobHolder.getBlob();
127
128        // In the case of a text conversion, we need to transform the blob
129        // strings to encode XML entities and replace all occurrences of
130        // "\n" with "<br />", since they will then be displayed in HTML by
131        // the HtmlContentDiffer.
132        if (ContentDiffConversionType.text.equals(conversionType)) {
133            adaptedDocConvertedBlob = getHtmlStringBlob(adaptedDocConvertedBlob);
134            otherDocConvertedBlob = getHtmlStringBlob(otherDocConvertedBlob);
135        }
136
137        // Add html content diff blob
138        MimeTypeContentDiffer contentDiffer = getContentDiffAdapterManager().getHtmlContentDiffer();
139        blobResults.addAll(contentDiffer.getContentDiff(adaptedDocConvertedBlob, otherDocConvertedBlob, locale));
140
141        // Add secondary blobs (mostly images)
142        addSecondaryBlobs(blobResults, adaptedDocConvertedBlobHolder, adaptedDocConvertedBlob.getFilename());
143        addSecondaryBlobs(blobResults, otherDocConvertedBlobHolder, otherDocConvertedBlob.getFilename());
144        return blobResults;
145    }
146
147    @Override
148    public void cleanup() {
149        // Nothing to do here
150    }
151
152    @Override
153    public boolean cachable() {
154        return true;
155    }
156
157    public void setDefaultContentDiffFieldXPath(String xPath) {
158        defaultFieldXPath = xPath;
159    }
160
161    protected String getMimeType(Blob blob) {
162        if (blob == null) {
163            return null;
164        }
165
166        String srcMT = blob.getMimeType();
167        if (srcMT == null || srcMT.startsWith("application/octet-stream")) {
168            // call MT Service
169            try {
170                MimetypeRegistry mtr = Framework.getService(MimetypeRegistry.class);
171                srcMT = mtr.getMimetypeFromFilenameAndBlobWithDefault(blob.getFilename(), blob,
172                        "application/octet-stream");
173                log.debug("mime type service returned " + srcMT);
174            } catch (MimetypeDetectionException e) {
175                log.warn("error while calling Mimetype service", e);
176            }
177        }
178        return srcMT;
179    }
180
181    protected void setMimeType(BlobHolder result) {
182        for (Blob blob : result.getBlobs()) {
183            if (blob.getMimeType() == null && blob.getFilename().endsWith("html")) {
184                String mimeTpye = getMimeType(blob);
185                blob.setMimeType(mimeTpye);
186            }
187        }
188    }
189
190    protected String getDefaultContentDiffFieldXPath() {
191        return defaultFieldXPath;
192    }
193
194    /**
195     * Returns a blob holder converted using the specified converter name.
196     *
197     * @param blobHolder the blob holder
198     * @param converterName the converter name
199     * @return the converted blob holder
200     * @throws ConversionException if an error occurs while converting the blob holder
201     */
202    protected BlobHolder getConvertedBlobHolder(BlobHolder blobHolder, String converterName) throws ConversionException {
203
204        if (converterName == null) {
205            log.debug(String.format("No converter parameter, using generic one: '%s'.", DEFAULT_CONVERTER_NAME));
206            converterName = DEFAULT_CONVERTER_NAME;
207        }
208
209        BlobHolder convertedBlobHolder = getConversionService().convert(converterName, blobHolder, null);
210        setMimeType(convertedBlobHolder);
211        return convertedBlobHolder;
212    }
213
214    protected Blob getHtmlStringBlob(Blob blob) throws ContentDiffException {
215        try {
216            Blob htmlStringBlob = Blobs.createBlob(StringEscapeUtils.escapeHtml(
217                    new String(blob.getByteArray(), "UTF-8")).replace("\r\n", "\n").replace("\n", "<br />"));
218            htmlStringBlob.setFilename(blob.getFilename());
219            return htmlStringBlob;
220        } catch (IOException ioe) {
221            throw new ContentDiffException(String.format("Could not get string from blob %s", blob.getFilename()), ioe);
222        }
223    }
224
225    protected void addSecondaryBlobs(List<Blob> blobResults, BlobHolder blobHolder, String mainBlobFilename) {
226
227        for (Blob blob : blobHolder.getBlobs()) {
228            String blobFilename = blob.getFilename();
229            if (blobFilename != null && !blobFilename.equals(mainBlobFilename)) {
230                blobResults.add(blob);
231            }
232        }
233    }
234
235    /**
236     * Gets the conversion service.
237     *
238     * @return the conversion service
239     */
240    protected final ConversionService getConversionService() {
241        ConversionService conversionService = Framework.getService(ConversionService.class);
242        if (conversionService == null) {
243            throw new NuxeoException("ConversionService service is null.");
244        }
245        return conversionService;
246    }
247
248}