001/*
002 * (C) Copyright 2006-2018 Nuxeo (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo - initial API and implementation
018 */
019package org.nuxeo.ecm.platform.mimetype.service;
020
021import java.io.File;
022import java.io.FileInputStream;
023import java.io.IOException;
024import java.io.InputStream;
025import java.util.ArrayList;
026import java.util.Collection;
027import java.util.HashMap;
028import java.util.Iterator;
029import java.util.List;
030import java.util.Map;
031
032import org.apache.commons.io.FileUtils;
033import org.apache.commons.io.FilenameUtils;
034import org.apache.commons.lang3.StringUtils;
035import org.apache.commons.logging.Log;
036import org.apache.commons.logging.LogFactory;
037import org.nuxeo.ecm.core.api.Blob;
038import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException;
039import org.nuxeo.ecm.platform.mimetype.MimetypeNotFoundException;
040import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry;
041import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
042import org.nuxeo.runtime.api.Framework;
043import org.nuxeo.runtime.model.ComponentContext;
044import org.nuxeo.runtime.model.ComponentName;
045import org.nuxeo.runtime.model.DefaultComponent;
046import org.nuxeo.runtime.model.Extension;
047import org.nuxeo.runtime.model.RuntimeContext;
048
049import net.sf.jmimemagic.Magic;
050import net.sf.jmimemagic.MagicException;
051import net.sf.jmimemagic.MagicMatch;
052import net.sf.jmimemagic.MagicMatchNotFoundException;
053import net.sf.jmimemagic.MagicParseException;
054
055/**
056 * MimetypeEntry registry service.
057 * <p>
058 * Singleton holding a registry of mimetype entries and exposes an API to grab information related to these mimetypes.
059 * As well, this is possible to ask for a mimetype magic detection from a stream or file using the API.
060 *
061 * @author <a href="mailto:ja@nuxeo.com">Julien Anguenot</a>
062 */
063public class MimetypeRegistryService extends DefaultComponent implements MimetypeRegistry {
064
065    public static final ComponentName NAME = new ComponentName(
066            "org.nuxeo.ecm.platform.mimetype.service.MimetypeRegistryService");
067
068    // 10 MB is the max size to allow full file scan
069    public static final long MAX_SIZE_FOR_SCAN = 10 * 1024 * 1024;
070
071    public static final String TMP_EXTENSION = "tmp";
072
073    public static final String MSOFFICE_TMP_PREFIX = "~$";
074
075    private static final Log log = LogFactory.getLog(MimetypeRegistryService.class);
076
077    protected Map<String, MimetypeEntry> mimetypeByNormalisedRegistry;
078
079    protected Map<String, MimetypeEntry> mimetypeByExtensionRegistry;
080
081    protected Map<String, ExtensionDescriptor> extensionRegistry;
082
083    private RuntimeContext bundle;
084
085    public MimetypeRegistryService() {
086        initializeRegistries();
087    }
088
089    protected void initializeRegistries() {
090        mimetypeByNormalisedRegistry = new HashMap<>();
091        mimetypeByExtensionRegistry = new HashMap<>();
092        extensionRegistry = new HashMap<>();
093    }
094
095    protected boolean isMimetypeEntry(String mimetypeName) {
096        return mimetypeByNormalisedRegistry.containsKey(mimetypeName);
097    }
098
099    @Override
100    public void activate(ComponentContext context) {
101        bundle = context.getRuntimeContext();
102        initializeRegistries();
103    }
104
105    @Override
106    public void deactivate(ComponentContext context) {
107        mimetypeByNormalisedRegistry = null;
108        mimetypeByExtensionRegistry = null;
109        extensionRegistry = null;
110    }
111
112    @Override
113    public void registerExtension(Extension extension) {
114        Object[] contribs = extension.getContributions();
115        if (contribs == null) {
116            return;
117        }
118        for (Object contrib : contribs) {
119            if (contrib instanceof MimetypeDescriptor) {
120                MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib;
121                registerMimetype(mimetypeDescriptor.getMimetype());
122            } else if (contrib instanceof ExtensionDescriptor) {
123                registerFileExtension((ExtensionDescriptor) contrib);
124            }
125        }
126    }
127
128    public void registerMimetype(MimetypeEntry mimetype) {
129        log.debug("Registering mimetype: " + mimetype.getNormalized());
130        mimetypeByNormalisedRegistry.put(mimetype.getNormalized(), mimetype);
131        for (String extension : mimetype.getExtensions()) {
132            mimetypeByExtensionRegistry.put(extension, mimetype);
133        }
134    }
135
136    public void registerFileExtension(ExtensionDescriptor extensionDescriptor) {
137        log.debug("Registering file extension: " + extensionDescriptor.getName());
138        extensionRegistry.put(extensionDescriptor.getName(), extensionDescriptor);
139    }
140
141    @Override
142    public void unregisterExtension(Extension extension) {
143        Object[] contribs = extension.getContributions();
144        if (contribs == null) {
145            return;
146        }
147        for (Object contrib : contribs) {
148            if (contrib instanceof MimetypeDescriptor) {
149                MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib;
150                unregisterMimetype(mimetypeDescriptor.getNormalized());
151            } else if (contrib instanceof ExtensionDescriptor) {
152                ExtensionDescriptor extensionDescriptor = (ExtensionDescriptor) contrib;
153                unregisterFileExtension(extensionDescriptor);
154            }
155        }
156    }
157
158    public void unregisterMimetype(String mimetypeName) {
159        log.debug("Unregistering mimetype: " + mimetypeName);
160        MimetypeEntry mimetype = mimetypeByNormalisedRegistry.get(mimetypeName);
161        if (mimetype == null) {
162            return;
163        }
164        List<String> extensions = mimetype.getExtensions();
165        mimetypeByNormalisedRegistry.remove(mimetypeName);
166        for (String extension : extensions) {
167            mimetypeByExtensionRegistry.remove(extension);
168        }
169    }
170
171    public void unregisterFileExtension(ExtensionDescriptor extensionDescriptor) {
172        log.debug("Unregistering file extension: " + extensionDescriptor.getName());
173        extensionRegistry.remove(extensionDescriptor.getName());
174    }
175
176    public RuntimeContext getContext() {
177        return bundle;
178    }
179
180    @Override
181    public List<String> getExtensionsFromMimetypeName(String mimetypeName) {
182        List<String> extensions = new ArrayList<>();
183        for (String key : mimetypeByNormalisedRegistry.keySet()) {
184            MimetypeEntry mimetypeEntry = mimetypeByNormalisedRegistry.get(key);
185            if (mimetypeEntry.getMimetypes().contains(mimetypeName)) {
186                extensions.addAll(mimetypeEntry.getExtensions());
187            }
188        }
189        return extensions;
190    }
191
192    @Override
193    public MimetypeEntry getMimetypeEntryByName(String name) {
194        return mimetypeByNormalisedRegistry.get(name);
195    }
196
197    @Override
198    public String getMimetypeFromFile(File file) throws MimetypeNotFoundException, MimetypeDetectionException {
199        if (file.length() > MAX_SIZE_FOR_SCAN) {
200            String exceptionMessage = "Not able to determine mime type from filename and file is too big for binary scan.";
201            if (file.getAbsolutePath() == null) {
202                throw new MimetypeNotFoundException(exceptionMessage);
203            }
204            try {
205                return getMimetypeFromFilename(file.getAbsolutePath());
206            } catch (MimetypeNotFoundException e) {
207                throw new MimetypeNotFoundException(exceptionMessage, e);
208            }
209        }
210        try {
211            MagicMatch match = Magic.getMagicMatch(file, true, false);
212            String mimeType;
213
214            if (match.getSubMatches().isEmpty()) {
215                mimeType = match.getMimeType();
216            } else {
217                // Submatches found
218                // TODO: we only take the first here
219                // what to do with other possible responses ?
220                // b.t.w., multiple responses denotes a non-accuracy problem in
221                // magic.xml but be careful to nested possible
222                // sub-sub-...-submatches make this as recursive ?
223                Collection<MagicMatch> possibilities = match.getSubMatches();
224                Iterator<MagicMatch> iter = possibilities.iterator();
225                MagicMatch m = iter.next();
226                mimeType = m.getMimeType();
227                // need to clean for subsequent calls
228                possibilities.clear();
229                match.setSubMatches(possibilities);
230            }
231            if ("text/plain".equals(mimeType)) {
232                // check we didn't mis-detect files with zeroes
233                // check first 16 bytes
234                byte[] bytes = new byte[16];
235                int n = 0;
236                try (FileInputStream is = new FileInputStream(file)) {
237                    n = is.read(bytes);
238                }
239                for (int i = 0; i < n; i++) {
240                    if (bytes[i] == 0) {
241                        mimeType = "application/octet-stream";
242                        break;
243                    }
244                }
245            }
246            return mimeType;
247        } catch (MagicMatchNotFoundException e) {
248            if (file.getAbsolutePath() != null) {
249                return getMimetypeFromFilename(file.getAbsolutePath());
250            }
251            throw new MimetypeNotFoundException(e.getMessage(), e);
252        } catch (MagicException | MagicParseException | IOException e) {
253            throw new MimetypeDetectionException(e.getMessage(), e);
254        }
255    }
256
257    @Override
258    public String getMimetypeFromExtension(String extension) throws MimetypeNotFoundException {
259        String lowerCaseExtension = extension.toLowerCase();
260        ExtensionDescriptor extensionDescriptor = extensionRegistry.get(lowerCaseExtension);
261        if (extensionDescriptor == null) {
262            // no explicit extension rule, analyse the inverted mimetype
263            // registry
264            MimetypeEntry mimetype = mimetypeByExtensionRegistry.get(lowerCaseExtension);
265            if (mimetype == null) {
266                throw new MimetypeNotFoundException("no registered mimetype has extension: " + lowerCaseExtension);
267            } else {
268                return mimetype.getNormalized();
269            }
270        } else {
271            if (extensionDescriptor.isAmbiguous()) {
272                throw new MimetypeNotFoundException(
273                        String.format("mimetype for %s is ambiguous, binary sniffing needed", lowerCaseExtension));
274            } else {
275                return extensionDescriptor.getMimetype();
276            }
277        }
278    }
279
280    @Override
281    public String getMimetypeFromFilename(String filename) throws MimetypeNotFoundException {
282        if (filename == null) {
283            throw new MimetypeNotFoundException("filename is null");
284        }
285        if (isTemporaryFile(filename)) {
286            return DEFAULT_MIMETYPE;
287        }
288        String extension = FilenameUtils.getExtension(filename);
289        if (StringUtils.isBlank(extension)) {
290            throw new MimetypeNotFoundException(filename + "has no extension");
291        }
292        return getMimetypeFromExtension(extension);
293    }
294
295    protected boolean isTemporaryFile(String filename) {
296        return FilenameUtils.getExtension(filename).equalsIgnoreCase(TMP_EXTENSION)
297                || FilenameUtils.getName(filename).startsWith(MSOFFICE_TMP_PREFIX);
298    }
299
300    @Override
301    public String getMimetypeFromBlob(Blob blob) throws MimetypeNotFoundException, MimetypeDetectionException {
302        File file;
303        try {
304            file = Framework.createTempFile("NXMimetypeBean", ".bin");
305            try (InputStream is = blob.getStream()) {
306                FileUtils.copyInputStreamToFile(is, file);
307                return getMimetypeFromFile(file);
308            } finally {
309                file.delete();
310            }
311        } catch (IOException e) {
312            throw new MimetypeDetectionException(e.getMessage(), e);
313        }
314    }
315
316    @Override
317    public MimetypeEntry getMimetypeEntryByMimeType(String mimetype) {
318        MimetypeEntry mtype = mimetypeByNormalisedRegistry.get("application/octet-stream");
319        if (mimetype != null) {
320            for (String key : mimetypeByNormalisedRegistry.keySet()) {
321                MimetypeEntry entry = mimetypeByNormalisedRegistry.get(key);
322                if (mimetype.equals(entry.getNormalized()) || entry.getMimetypes().contains(mimetype)) {
323                    mtype = entry;
324                    break;
325                }
326            }
327        }
328        return mtype;
329    }
330
331    @Override
332    public String getMimetypeFromBlobWithDefault(Blob blob, String defaultMimetype) throws MimetypeDetectionException {
333        try {
334            return getMimetypeFromBlob(blob);
335        } catch (MimetypeNotFoundException e) {
336            return defaultMimetype;
337        }
338    }
339
340    @Override
341    public String getMimetypeFromFilenameAndBlobWithDefault(String filename, Blob blob, String defaultMimetype)
342            throws MimetypeDetectionException {
343        try {
344            return getMimetypeFromFilename(filename);
345        } catch (MimetypeNotFoundException e) {
346            // failed to detect mimetype on extension:
347            // fallback to calculate mimetype from blob content
348            return getMimetypeFromBlobWithDefault(blob, defaultMimetype);
349        }
350    }
351
352    @Override
353    public String getMimetypeFromFilenameWithBlobMimetypeFallback(String filename, Blob blob, String defaultMimetype)
354            throws MimetypeDetectionException {
355        try {
356            return getMimetypeFromFilename(filename);
357        } catch (MimetypeNotFoundException e) {
358            // failed to detect mimetype on extension:
359            // fallback to the blob defined mimetype
360            String mimeTypeName = blob.getMimeType();
361            if (isMimetypeEntry(mimeTypeName)) {
362                return mimeTypeName;
363            } else {
364                // failed to detect mimetype on blob:
365                // fallback to calculate mimetype from blob content
366                return getMimetypeFromBlobWithDefault(blob, defaultMimetype);
367            }
368        }
369    }
370
371    @Override
372    public Blob updateMimetype(Blob blob, String filename, Boolean withBlobMimetypeFallback)
373            throws MimetypeDetectionException {
374        if (filename == null) {
375            filename = blob.getFilename();
376        } else if (blob.getFilename() == null) {
377            blob.setFilename(filename);
378        }
379        if (withBlobMimetypeFallback) {
380            blob.setMimeType(getMimetypeFromFilenameWithBlobMimetypeFallback(filename, blob, DEFAULT_MIMETYPE));
381        } else {
382            blob.setMimeType(getMimetypeFromFilenameAndBlobWithDefault(filename, blob, DEFAULT_MIMETYPE));
383        }
384        return blob;
385    }
386
387    @Override
388    public Blob updateMimetype(Blob blob, String filename) throws MimetypeDetectionException {
389        return updateMimetype(blob, filename, false);
390    }
391
392    @Override
393    public Blob updateMimetype(Blob blob) throws MimetypeDetectionException {
394        return updateMimetype(blob, null);
395    }
396
397}