001/*
002 * (C) Copyright 2006-2019 Nuxeo (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo - initial API and implementation
018 */
019package org.nuxeo.ecm.platform.mimetype.service;
020
021import java.io.File;
022import java.io.FileInputStream;
023import java.io.IOException;
024import java.io.InputStream;
025import java.util.Collection;
026import java.util.HashMap;
027import java.util.Iterator;
028import java.util.List;
029import java.util.Map;
030import java.util.Optional;
031import java.util.Set;
032import java.util.stream.Collectors;
033
034import org.apache.commons.io.FileUtils;
035import org.apache.commons.io.FilenameUtils;
036import org.apache.commons.lang3.StringUtils;
037import org.apache.commons.logging.Log;
038import org.apache.commons.logging.LogFactory;
039import org.nuxeo.ecm.core.api.Blob;
040import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException;
041import org.nuxeo.ecm.platform.mimetype.MimetypeNotFoundException;
042import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry;
043import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
044import org.nuxeo.runtime.api.Framework;
045import org.nuxeo.runtime.model.ComponentContext;
046import org.nuxeo.runtime.model.ComponentName;
047import org.nuxeo.runtime.model.DefaultComponent;
048import org.nuxeo.runtime.model.Extension;
049import org.nuxeo.runtime.model.RuntimeContext;
050
051import net.sf.jmimemagic.Magic;
052import net.sf.jmimemagic.MagicException;
053import net.sf.jmimemagic.MagicMatch;
054import net.sf.jmimemagic.MagicMatchNotFoundException;
055import net.sf.jmimemagic.MagicParseException;
056
057/**
058 * MimetypeEntry registry service.
059 * <p>
060 * Singleton holding a registry of mimetype entries and exposes an API to grab information related to these mimetypes.
061 * As well, this is possible to ask for a mimetype magic detection from a stream or file using the API.
062 *
063 * @author <a href="mailto:ja@nuxeo.com">Julien Anguenot</a>
064 */
065public class MimetypeRegistryService extends DefaultComponent implements MimetypeRegistry {
066
067    public static final ComponentName NAME = new ComponentName(
068            "org.nuxeo.ecm.platform.mimetype.service.MimetypeRegistryService");
069
070    // 10 MB is the max size to allow full file scan
071    public static final long MAX_SIZE_FOR_SCAN = 10 * 1024 * 1024;
072
073    public static final String TMP_EXTENSION = "tmp";
074
075    public static final String MSOFFICE_TMP_PREFIX = "~$";
076
077    private static final Log log = LogFactory.getLog(MimetypeRegistryService.class);
078
079    protected Map<String, MimetypeEntry> mimetypeByNormalisedRegistry;
080
081    protected Map<String, MimetypeEntry> mimetypeByExtensionRegistry;
082
083    protected Map<String, ExtensionDescriptor> extensionRegistry;
084
085    private RuntimeContext bundle;
086
087    public MimetypeRegistryService() {
088        initializeRegistries();
089    }
090
091    protected void initializeRegistries() {
092        mimetypeByNormalisedRegistry = new HashMap<>();
093        mimetypeByExtensionRegistry = new HashMap<>();
094        extensionRegistry = new HashMap<>();
095    }
096
097    /**
098     * @deprecated since 11.1. Use {@link #isMimeTypeNormalized(String)} instead.
099     */
100    @Deprecated(since = "11.1", forRemoval = true)
101    protected boolean isMimetypeEntry(String mimetypeName) {
102        return mimetypeByNormalisedRegistry.containsKey(mimetypeName);
103    }
104
105    @Override
106    public void activate(ComponentContext context) {
107        bundle = context.getRuntimeContext();
108        initializeRegistries();
109    }
110
111    @Override
112    public void deactivate(ComponentContext context) {
113        mimetypeByNormalisedRegistry = null;
114        mimetypeByExtensionRegistry = null;
115        extensionRegistry = null;
116    }
117
118    @Override
119    public void registerExtension(Extension extension) {
120        Object[] contribs = extension.getContributions();
121        if (contribs == null) {
122            return;
123        }
124        for (Object contrib : contribs) {
125            if (contrib instanceof MimetypeDescriptor) {
126                MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib;
127                registerMimetype(mimetypeDescriptor.getMimetype());
128            } else if (contrib instanceof ExtensionDescriptor) {
129                registerFileExtension((ExtensionDescriptor) contrib);
130            }
131        }
132    }
133
134    public void registerMimetype(MimetypeEntry mimetype) {
135        log.debug("Registering mimetype: " + mimetype.getNormalized());
136        mimetypeByNormalisedRegistry.put(mimetype.getNormalized(), mimetype);
137        for (String extension : mimetype.getExtensions()) {
138            mimetypeByExtensionRegistry.put(extension, mimetype);
139        }
140    }
141
142    public void registerFileExtension(ExtensionDescriptor extensionDescriptor) {
143        log.debug("Registering file extension: " + extensionDescriptor.getName());
144        extensionRegistry.put(extensionDescriptor.getName(), extensionDescriptor);
145    }
146
147    @Override
148    public void unregisterExtension(Extension extension) {
149        Object[] contribs = extension.getContributions();
150        if (contribs == null) {
151            return;
152        }
153        for (Object contrib : contribs) {
154            if (contrib instanceof MimetypeDescriptor) {
155                MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib;
156                unregisterMimetype(mimetypeDescriptor.getNormalized());
157            } else if (contrib instanceof ExtensionDescriptor) {
158                ExtensionDescriptor extensionDescriptor = (ExtensionDescriptor) contrib;
159                unregisterFileExtension(extensionDescriptor);
160            }
161        }
162    }
163
164    public void unregisterMimetype(String mimetypeName) {
165        log.debug("Unregistering mimetype: " + mimetypeName);
166        MimetypeEntry mimetype = mimetypeByNormalisedRegistry.get(mimetypeName);
167        if (mimetype == null) {
168            return;
169        }
170        List<String> extensions = mimetype.getExtensions();
171        mimetypeByNormalisedRegistry.remove(mimetypeName);
172        for (String extension : extensions) {
173            mimetypeByExtensionRegistry.remove(extension);
174        }
175    }
176
177    public void unregisterFileExtension(ExtensionDescriptor extensionDescriptor) {
178        log.debug("Unregistering file extension: " + extensionDescriptor.getName());
179        extensionRegistry.remove(extensionDescriptor.getName());
180    }
181
182    public RuntimeContext getContext() {
183        return bundle;
184    }
185
186    @Override
187    public List<String> getExtensionsFromMimetypeName(String mimetypeName) {
188        return mimetypeByNormalisedRegistry.entrySet()
189                                           .stream()
190                                           .filter(e -> e.getValue().getMimetypes().contains(mimetypeName))
191                                           .flatMap(e -> e.getValue().getExtensions().stream())
192                                           .collect(Collectors.toList());
193    }
194
195    @Override
196    public MimetypeEntry getMimetypeEntryByName(String name) {
197        return mimetypeByNormalisedRegistry.get(name);
198    }
199
200    @Override
201    public String getMimetypeFromFile(File file) {
202        if (file.length() > MAX_SIZE_FOR_SCAN) {
203            String exceptionMessage = "Not able to determine mime type from filename and file is too big for binary scan.";
204            if (file.getAbsolutePath() == null) {
205                throw new MimetypeNotFoundException(exceptionMessage);
206            }
207            try {
208                return getMimetypeFromFilename(file.getAbsolutePath());
209            } catch (MimetypeNotFoundException e) {
210                throw new MimetypeNotFoundException(exceptionMessage, e);
211            }
212        }
213        try {
214            MagicMatch match = Magic.getMagicMatch(file, true, false);
215            String mimeType;
216
217            if (match.getSubMatches().isEmpty()) {
218                mimeType = match.getMimeType();
219            } else {
220                // Submatches found
221                // TODO: we only take the first here
222                // what to do with other possible responses ?
223                // b.t.w., multiple responses denotes a non-accuracy problem in
224                // magic.xml but be careful to nested possible
225                // sub-sub-...-submatches make this as recursive ?
226                Collection<MagicMatch> possibilities = match.getSubMatches();
227                Iterator<MagicMatch> iter = possibilities.iterator();
228                MagicMatch m = iter.next();
229                mimeType = m.getMimeType();
230                // need to clean for subsequent calls
231                possibilities.clear();
232                match.setSubMatches(possibilities);
233            }
234            if ("text/plain".equals(mimeType)) {
235                // check we didn't mis-detect files with zeroes
236                // check first 16 bytes
237                byte[] bytes = new byte[16];
238                int n = 0;
239                try (FileInputStream is = new FileInputStream(file)) {
240                    n = is.read(bytes);
241                }
242                for (int i = 0; i < n; i++) {
243                    if (bytes[i] == 0) {
244                        mimeType = DEFAULT_MIMETYPE;
245                        break;
246                    }
247                }
248            }
249            return mimeType;
250        } catch (MagicMatchNotFoundException e) {
251            if (file.getAbsolutePath() != null) {
252                return getMimetypeFromFilename(file.getAbsolutePath());
253            }
254            throw new MimetypeNotFoundException(e.getMessage(), e);
255        } catch (MagicException | MagicParseException | IOException e) {
256            throw new MimetypeDetectionException(e.getMessage(), e);
257        }
258    }
259
260    @Override
261    public String getMimetypeFromExtension(String extension) {
262        String lowerCaseExtension = extension.toLowerCase();
263        ExtensionDescriptor extensionDescriptor = extensionRegistry.get(lowerCaseExtension);
264        if (extensionDescriptor == null) {
265            // no explicit extension rule, analyse the inverted mimetype
266            // registry
267            MimetypeEntry mimetype = mimetypeByExtensionRegistry.get(lowerCaseExtension);
268            if (mimetype == null) {
269                throw new MimetypeNotFoundException("no registered mimetype has extension: " + lowerCaseExtension);
270            } else {
271                return mimetype.getNormalized();
272            }
273        } else {
274            if (extensionDescriptor.isAmbiguous()) {
275                throw new MimetypeNotFoundException(
276                        String.format("mimetype for %s is ambiguous, binary sniffing needed", lowerCaseExtension));
277            } else {
278                return extensionDescriptor.getMimetype();
279            }
280        }
281    }
282
283    @Override
284    public String getMimetypeFromFilename(String filename) {
285        if (filename == null) {
286            throw new MimetypeNotFoundException("filename is null");
287        }
288        if (isTemporaryFile(filename)) {
289            return DEFAULT_MIMETYPE;
290        }
291        String extension = FilenameUtils.getExtension(filename);
292        if (StringUtils.isBlank(extension)) {
293            throw new MimetypeNotFoundException(filename + "has no extension");
294        }
295        return getMimetypeFromExtension(extension);
296    }
297
298    protected boolean isTemporaryFile(String filename) {
299        return FilenameUtils.getExtension(filename).equalsIgnoreCase(TMP_EXTENSION)
300                || FilenameUtils.getName(filename).startsWith(MSOFFICE_TMP_PREFIX);
301    }
302
303    @Override
304    public String getMimetypeFromBlob(Blob blob) {
305        File file;
306        try {
307            file = Framework.createTempFile("NXMimetypeBean", ".bin");
308            try (InputStream is = blob.getStream()) {
309                FileUtils.copyInputStreamToFile(is, file);
310                return getMimetypeFromFile(file);
311            } finally {
312                file.delete();
313            }
314        } catch (IOException e) {
315            throw new MimetypeDetectionException(e.getMessage(), e);
316        }
317    }
318
319    @Override
320    public MimetypeEntry getMimetypeEntryByMimeType(String mimetype) {
321        String normalized = getNormalizedMimeType(mimetype).orElse(DEFAULT_MIMETYPE);
322        return mimetypeByNormalisedRegistry.get(normalized);
323    }
324
325    @Override
326    public String getMimetypeFromBlobWithDefault(Blob blob, String defaultMimetype) {
327        try {
328            return getMimetypeFromBlob(blob);
329        } catch (MimetypeNotFoundException e) {
330            return defaultMimetype;
331        }
332    }
333
334    @Override
335    public String getMimetypeFromFilenameAndBlobWithDefault(String filename, Blob blob, String defaultMimetype) {
336        try {
337            return getMimetypeFromFilename(filename);
338        } catch (MimetypeNotFoundException e) {
339            // failed to detect mimetype on extension:
340            // fallback to calculate mimetype from blob content
341            return getMimetypeFromBlobWithDefault(blob, defaultMimetype);
342        }
343    }
344
345    @Override
346    public String getMimetypeFromFilenameWithBlobMimetypeFallback(String filename, Blob blob, String defaultMimetype) {
347        try {
348            return getMimetypeFromFilename(filename);
349        } catch (MimetypeNotFoundException e) {
350            // failed to detect mimetype on extension:
351            // fallback to the blob defined mimetype
352            String mimeTypeName = blob.getMimeType();
353            if (isMimeTypeNormalized(mimeTypeName)) {
354                return mimeTypeName;
355            } else {
356                // failed to detect mimetype on blob:
357                // fallback to calculate mimetype from blob content
358                return getMimetypeFromBlobWithDefault(blob, defaultMimetype);
359            }
360        }
361    }
362
363    @Override
364    public Blob updateMimetype(Blob blob, String filename, Boolean withBlobMimetypeFallback) {
365        if (filename == null) {
366            filename = blob.getFilename();
367        } else if (blob.getFilename() == null) {
368            blob.setFilename(filename);
369        }
370        if (withBlobMimetypeFallback) {
371            blob.setMimeType(getMimetypeFromFilenameWithBlobMimetypeFallback(filename, blob, DEFAULT_MIMETYPE));
372        } else {
373            blob.setMimeType(getMimetypeFromFilenameAndBlobWithDefault(filename, blob, DEFAULT_MIMETYPE));
374        }
375        return blob;
376    }
377
378    @Override
379    public Blob updateMimetype(Blob blob, String filename) {
380        return updateMimetype(blob, filename, false);
381    }
382
383    @Override
384    public Blob updateMimetype(Blob blob) {
385        return updateMimetype(blob, null);
386    }
387
388    @Override
389    public Optional<String> getNormalizedMimeType(String mimeType) {
390        if (mimeType == null) {
391            return Optional.empty();
392        }
393
394        Set<Map.Entry<String, MimetypeEntry>> entries = mimetypeByNormalisedRegistry.entrySet();
395        return entries.stream()
396                      .filter(e -> e.getKey().equals(mimeType) || e.getValue().getMimetypes().contains(mimeType))
397                      .findAny()
398                      .map(Map.Entry::getKey);
399    }
400
401    @Override
402    public boolean isMimeTypeNormalized(String mimeType) {
403        return mimetypeByNormalisedRegistry.containsKey(mimeType);
404    }
405}