001/* 002 * (C) Copyright 2020 Nuxeo (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Kevin Leturc <kleturc@nuxeo.com> 018 */ 019package org.nuxeo.ecm.platform.preview.adapter; 020 021import static javax.ws.rs.core.MediaType.TEXT_HTML; 022import static javax.ws.rs.core.MediaType.TEXT_PLAIN; 023import static javax.ws.rs.core.MediaType.TEXT_XML; 024import static org.apache.commons.lang3.ObjectUtils.defaultIfNull; 025 026import java.io.IOException; 027import java.io.InputStream; 028import java.util.List; 029import java.util.Set; 030import java.util.stream.Collectors; 031 032import org.apache.commons.io.IOUtils; 033import org.apache.commons.io.LineIterator; 034import org.apache.logging.log4j.LogManager; 035import org.apache.logging.log4j.Logger; 036import org.nuxeo.ecm.core.api.Blob; 037import org.nuxeo.ecm.core.api.Blobs; 038import org.nuxeo.ecm.core.api.DocumentModel; 039import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 040import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder; 041import org.nuxeo.ecm.core.convert.api.ConversionException; 042import org.nuxeo.ecm.core.convert.api.ConversionService; 043import org.nuxeo.ecm.platform.htmlsanitizer.HtmlSanitizerService; 044import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 045import org.nuxeo.ecm.platform.preview.api.PreviewException; 046import org.nuxeo.ecm.platform.preview.helper.PreviewHelper; 047import org.nuxeo.runtime.api.Framework; 048import org.nuxeo.runtime.services.config.ConfigurationService; 049 050/** 051 * Previewer for Zip blobs. 052 * <p> 053 * It sanitizes each zip entry if needed. 054 * 055 * @since 11.5 056 */ 057public class ZipPreviewer implements MimeTypePreviewer { 058 059 private static final Logger log = LogManager.getLogger(ZipPreviewer.class); 060 061 protected static final String SANITIZE_ZIP_PREVIEW = "nuxeo.preview.zip.sanitize.enabled"; 062 063 protected static final Set<String> HTML_MIME_TYPES = Set.of(TEXT_HTML, TEXT_XML, TEXT_PLAIN); 064 065 @Override 066 public List<Blob> getPreview(Blob blob, DocumentModel dm) throws PreviewException { 067 ConversionService conversionService = Framework.getService(ConversionService.class); 068 String converterName = conversionService.getConverterName("application/zip", "text/html"); 069 if (converterName == null) { 070 throw new PreviewException("Unable to find converter from application/zip to text/html"); 071 } 072 073 try { 074 BlobHolder result = conversionService.convert(converterName, new SimpleBlobHolder(blob), null); 075 List<Blob> blobs = result.getBlobs(); 076 if (Framework.getService(ConfigurationService.class).isBooleanTrue(SANITIZE_ZIP_PREVIEW)) { 077 blobs.replaceAll(this::sanitize); 078 } 079 return blobs; 080 } catch (ConversionException e) { 081 throw new PreviewException(e.getMessage(), e); 082 } 083 } 084 085 protected Blob sanitize(Blob blob) { 086 String filename = blob.getFilename(); 087 if (!isSanitizable(blob)) { 088 log.debug("ZipEntryBlob: {} is not sanitizable", filename); 089 return blob; 090 } 091 HtmlSanitizerService sanitizerService = Framework.getService(HtmlSanitizerService.class); 092 try { 093 String content = blob.getString(); 094 content = sanitizerService.sanitizeString(content, null); 095 content = PreviewHelper.makeHtmlPage(content); 096 return Blobs.createBlob(content, "text/html", null, filename); 097 } catch (IOException e) { 098 throw new PreviewException("Cannot read ZipEntryBlob content with filename: " + filename, e); 099 } 100 } 101 102 protected boolean isSanitizable(Blob blob) { 103 MimetypeRegistry registry = Framework.getService(MimetypeRegistry.class); 104 String mimeType = registry.getMimetypeFromFilenameWithBlobMimetypeFallback(blob.getFilename(), blob, null); 105 if (mimeType == null) { 106 try (InputStream stream = blob.getStream()) { 107 // last chance introspect the content 108 LineIterator lineIt = IOUtils.lineIterator(stream, defaultIfNull(blob.getEncoding(), "UTF-8")); 109 while (lineIt.hasNext()) { 110 String line = lineIt.nextLine(); 111 if (line.contains("<script")) { 112 return true; 113 } 114 } 115 return false; 116 } catch (IOException e) { 117 throw new PreviewException("Unable to introspect content"); 118 } 119 } else { 120 return HTML_MIME_TYPES.contains(mimeType); 121 } 122 } 123}