001/*
002 * (C) Copyright 2020 Nuxeo (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Kevin Leturc <kleturc@nuxeo.com>
018 */
019package org.nuxeo.ecm.platform.preview.adapter;
020
021import static javax.ws.rs.core.MediaType.TEXT_HTML;
022import static javax.ws.rs.core.MediaType.TEXT_PLAIN;
023import static javax.ws.rs.core.MediaType.TEXT_XML;
024import static org.apache.commons.lang3.ObjectUtils.defaultIfNull;
025
026import java.io.IOException;
027import java.io.InputStream;
028import java.util.List;
029import java.util.Set;
030import java.util.stream.Collectors;
031
032import org.apache.commons.io.IOUtils;
033import org.apache.commons.io.LineIterator;
034import org.apache.logging.log4j.LogManager;
035import org.apache.logging.log4j.Logger;
036import org.nuxeo.ecm.core.api.Blob;
037import org.nuxeo.ecm.core.api.Blobs;
038import org.nuxeo.ecm.core.api.DocumentModel;
039import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
040import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder;
041import org.nuxeo.ecm.core.convert.api.ConversionException;
042import org.nuxeo.ecm.core.convert.api.ConversionService;
043import org.nuxeo.ecm.platform.htmlsanitizer.HtmlSanitizerService;
044import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
045import org.nuxeo.ecm.platform.preview.api.PreviewException;
046import org.nuxeo.ecm.platform.preview.helper.PreviewHelper;
047import org.nuxeo.runtime.api.Framework;
048import org.nuxeo.runtime.services.config.ConfigurationService;
049
050/**
051 * Previewer for Zip blobs.
052 * <p>
053 * It sanitizes each zip entry if needed.
054 *
055 * @since 11.5
056 */
057public class ZipPreviewer implements MimeTypePreviewer {
058
059    private static final Logger log = LogManager.getLogger(ZipPreviewer.class);
060
061    protected static final String SANITIZE_ZIP_PREVIEW = "nuxeo.preview.zip.sanitize.enabled";
062
063    protected static final Set<String> HTML_MIME_TYPES = Set.of(TEXT_HTML, TEXT_XML, TEXT_PLAIN);
064
065    @Override
066    public List<Blob> getPreview(Blob blob, DocumentModel dm) throws PreviewException {
067        ConversionService conversionService = Framework.getService(ConversionService.class);
068        String converterName = conversionService.getConverterName("application/zip", "text/html");
069        if (converterName == null) {
070            throw new PreviewException("Unable to find converter from application/zip to text/html");
071        }
072
073        try {
074            BlobHolder result = conversionService.convert(converterName, new SimpleBlobHolder(blob), null);
075            List<Blob> blobs = result.getBlobs();
076            if (Framework.getService(ConfigurationService.class).isBooleanTrue(SANITIZE_ZIP_PREVIEW)) {
077                blobs.replaceAll(this::sanitize);
078            }
079            return blobs;
080        } catch (ConversionException e) {
081            throw new PreviewException(e.getMessage(), e);
082        }
083    }
084
085    protected Blob sanitize(Blob blob) {
086        String filename = blob.getFilename();
087        if (!isSanitizable(blob)) {
088            log.debug("ZipEntryBlob: {} is not sanitizable", filename);
089            return blob;
090        }
091        HtmlSanitizerService sanitizerService = Framework.getService(HtmlSanitizerService.class);
092        try {
093            String content = blob.getString();
094            content = sanitizerService.sanitizeString(content, null);
095            content = PreviewHelper.makeHtmlPage(content);
096            return Blobs.createBlob(content, "text/html", null, filename);
097        } catch (IOException e) {
098            throw new PreviewException("Cannot read ZipEntryBlob content with filename: " + filename, e);
099        }
100    }
101
102    protected boolean isSanitizable(Blob blob) {
103        MimetypeRegistry registry = Framework.getService(MimetypeRegistry.class);
104        String mimeType = registry.getMimetypeFromFilenameWithBlobMimetypeFallback(blob.getFilename(), blob, null);
105        if (mimeType == null) {
106            try (InputStream stream = blob.getStream()) {
107                // last chance introspect the content
108                LineIterator lineIt = IOUtils.lineIterator(stream, defaultIfNull(blob.getEncoding(), "UTF-8"));
109                while (lineIt.hasNext()) {
110                    String line = lineIt.nextLine();
111                    if (line.contains("<script")) {
112                        return true;
113                    }
114                }
115                return false;
116            } catch (IOException e) {
117                throw new PreviewException("Unable to introspect content");
118            }
119        } else {
120            return HTML_MIME_TYPES.contains(mimeType);
121        }
122    }
123}