001/*
002 * (C) Copyright 2006-2016 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo - initial API and implementation
018 *
019 */
020package org.nuxeo.ecm.platform.mimetype.service;
021
022import java.io.File;
023import java.io.FileInputStream;
024import java.io.IOException;
025import java.io.InputStream;
026import java.util.ArrayList;
027import java.util.Collection;
028import java.util.HashMap;
029import java.util.Iterator;
030import java.util.List;
031import java.util.Map;
032
033import net.sf.jmimemagic.Magic;
034import net.sf.jmimemagic.MagicException;
035import net.sf.jmimemagic.MagicMatch;
036import net.sf.jmimemagic.MagicMatchNotFoundException;
037import net.sf.jmimemagic.MagicParseException;
038
039import org.apache.commons.io.FilenameUtils;
040import org.apache.commons.lang.StringUtils;
041import org.apache.commons.logging.Log;
042import org.apache.commons.logging.LogFactory;
043
044import org.nuxeo.common.utils.FileUtils;
045import org.nuxeo.ecm.core.api.Blob;
046import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException;
047import org.nuxeo.ecm.platform.mimetype.MimetypeNotFoundException;
048import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry;
049import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
050import org.nuxeo.runtime.api.Framework;
051import org.nuxeo.runtime.model.ComponentContext;
052import org.nuxeo.runtime.model.ComponentName;
053import org.nuxeo.runtime.model.DefaultComponent;
054import org.nuxeo.runtime.model.Extension;
055import org.nuxeo.runtime.model.RuntimeContext;
056
057/**
058 * MimetypeEntry registry service.
059 * <p>
060 * Singleton holding a registry of mimetype entries and exposes an API to grab information related to these mimetypes.
061 * As well, this is possible to ask for a mimetype magic detection from a stream or file using the API.
062 *
063 * @author <a href="mailto:ja@nuxeo.com">Julien Anguenot</a>
064 */
065public class MimetypeRegistryService extends DefaultComponent implements MimetypeRegistry {
066
067    public static final ComponentName NAME = new ComponentName(
068            "org.nuxeo.ecm.platform.mimetype.service.MimetypeRegistryService");
069
070    // 10 MB is the max size to allow full file scan
071    public static final long MAX_SIZE_FOR_SCAN = 10 * 1024 * 1024;
072
073    private static final Log log = LogFactory.getLog(MimetypeRegistryService.class);
074
075    protected Map<String, MimetypeEntry> mimetypeByNormalisedRegistry;
076
077    protected Map<String, MimetypeEntry> mimetypeByExtensionRegistry;
078
079    protected Map<String, ExtensionDescriptor> extensionRegistry;
080
081    private RuntimeContext bundle;
082
083    public MimetypeRegistryService() {
084        initializeRegistries();
085    }
086
087    protected void initializeRegistries() {
088        mimetypeByNormalisedRegistry = new HashMap<>();
089        mimetypeByExtensionRegistry = new HashMap<>();
090        extensionRegistry = new HashMap<>();
091    }
092
093    @Override
094    public void activate(ComponentContext context) {
095        bundle = context.getRuntimeContext();
096        initializeRegistries();
097    }
098
099    @Override
100    public void deactivate(ComponentContext context) {
101        mimetypeByNormalisedRegistry = null;
102        mimetypeByExtensionRegistry = null;
103        extensionRegistry = null;
104    }
105
106    @Override
107    public void registerExtension(Extension extension) {
108        Object[] contribs = extension.getContributions();
109        if (contribs == null) {
110            return;
111        }
112        for (Object contrib : contribs) {
113            if (contrib instanceof MimetypeDescriptor) {
114                MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib;
115                registerMimetype(mimetypeDescriptor.getMimetype());
116            } else if (contrib instanceof ExtensionDescriptor) {
117                registerFileExtension((ExtensionDescriptor) contrib);
118            }
119        }
120    }
121
122    public void registerMimetype(MimetypeEntry mimetype) {
123        log.debug("Registering mimetype: " + mimetype.getNormalized());
124        mimetypeByNormalisedRegistry.put(mimetype.getNormalized(), mimetype);
125        for (String extension : mimetype.getExtensions()) {
126            mimetypeByExtensionRegistry.put(extension, mimetype);
127        }
128    }
129
130    public void registerFileExtension(ExtensionDescriptor extensionDescriptor) {
131        log.debug("Registering file extension: " + extensionDescriptor.getName());
132        extensionRegistry.put(extensionDescriptor.getName(), extensionDescriptor);
133    }
134
135    @Override
136    public void unregisterExtension(Extension extension) {
137        Object[] contribs = extension.getContributions();
138        if (contribs == null) {
139            return;
140        }
141        for (Object contrib : contribs) {
142            if (contrib instanceof MimetypeDescriptor) {
143                MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib;
144                unregisterMimetype(mimetypeDescriptor.getNormalized());
145            } else if (contrib instanceof ExtensionDescriptor) {
146                ExtensionDescriptor extensionDescriptor = (ExtensionDescriptor) contrib;
147                unregisterFileExtension(extensionDescriptor);
148            }
149        }
150    }
151
152    public void unregisterMimetype(String mimetypeName) {
153        log.debug("Unregistering mimetype: " + mimetypeName);
154        MimetypeEntry mimetype = mimetypeByNormalisedRegistry.get(mimetypeName);
155        if (mimetype == null) {
156            return;
157        }
158        List<String> extensions = mimetype.getExtensions();
159        mimetypeByNormalisedRegistry.remove(mimetypeName);
160        for (String extension : extensions) {
161            // FIXME: equals always fails because types are incompatible.
162            if (mimetype.getNormalized().equals(mimetypeByExtensionRegistry.get(extension))) {
163                mimetypeByExtensionRegistry.remove(extension);
164            }
165        }
166    }
167
168    public void unregisterFileExtension(ExtensionDescriptor extensionDescriptor) {
169        log.debug("Unregistering file extension: " + extensionDescriptor.getName());
170        extensionRegistry.remove(extensionDescriptor.getName());
171    }
172
173    public RuntimeContext getContext() {
174        return bundle;
175    }
176
177    @Override
178    public List<String> getExtensionsFromMimetypeName(String mimetypeName) {
179        List<String> extensions = new ArrayList<>();
180        for (String key : mimetypeByNormalisedRegistry.keySet()) {
181            MimetypeEntry mimetypeEntry = mimetypeByNormalisedRegistry.get(key);
182            if (mimetypeEntry.getMimetypes().contains(mimetypeName)) {
183                extensions.addAll(mimetypeEntry.getExtensions());
184            }
185        }
186        return extensions;
187    }
188
189    @Override
190    public MimetypeEntry getMimetypeEntryByName(String name) {
191        return mimetypeByNormalisedRegistry.get(name);
192    }
193
194    @Override
195    public String getMimetypeFromFile(File file) throws MimetypeNotFoundException, MimetypeDetectionException {
196        if (file.length() > MAX_SIZE_FOR_SCAN) {
197            String exceptionMessage = "Not able to determine mime type from filename and file is too big for binary scan.";
198            if (file.getAbsolutePath() == null) {
199                throw new MimetypeNotFoundException(exceptionMessage);
200            }
201            try {
202                return getMimetypeFromFilename(file.getAbsolutePath());
203            } catch (MimetypeNotFoundException e) {
204                throw new MimetypeNotFoundException(exceptionMessage, e);
205            }
206        }
207        try {
208            MagicMatch match = Magic.getMagicMatch(file, true, false);
209            String mimeType;
210
211            if (match.getSubMatches().isEmpty()) {
212                mimeType = match.getMimeType();
213            } else {
214                // Submatches found
215                // TODO: we only take the first here
216                // what to do with other possible responses ?
217                // b.t.w., multiple responses denotes a non-accuracy problem in
218                // magic.xml but be careful to nested possible
219                // sub-sub-...-submatches make this as recursive ?
220                Collection<MagicMatch> possibilities = match.getSubMatches();
221                Iterator<MagicMatch> iter = possibilities.iterator();
222                MagicMatch m = iter.next();
223                mimeType = m.getMimeType();
224                // need to clean for subsequent calls
225                possibilities.clear();
226                match.setSubMatches(possibilities);
227            }
228            if ("text/plain".equals(mimeType)) {
229                // check we didn't mis-detect files with zeroes
230                // check first 16 bytes
231                byte[] bytes = new byte[16];
232                FileInputStream is = new FileInputStream(file);
233                int n = 0;
234                try {
235                    n = is.read(bytes);
236                } finally {
237                    is.close();
238                }
239                for (int i = 0; i < n; i++) {
240                    if (bytes[i] == 0) {
241                        mimeType = "application/octet-stream";
242                        break;
243                    }
244                }
245            }
246            return mimeType;
247        } catch (MagicMatchNotFoundException e) {
248            if (file.getAbsolutePath() != null) {
249                return getMimetypeFromFilename(file.getAbsolutePath());
250            }
251            throw new MimetypeNotFoundException(e.getMessage(), e);
252        } catch (MagicException | MagicParseException | IOException e) {
253            throw new MimetypeDetectionException(e.getMessage(), e);
254        }
255    }
256
257    @Override
258    public String getMimetypeFromExtension(String extension) throws MimetypeNotFoundException {
259        String lowerCaseExtension = extension.toLowerCase();
260        ExtensionDescriptor extensionDescriptor = extensionRegistry.get(lowerCaseExtension);
261        if (extensionDescriptor == null) {
262            // no explicit extension rule, analyse the inverted mimetype
263            // registry
264            MimetypeEntry mimetype = mimetypeByExtensionRegistry.get(lowerCaseExtension);
265            if (mimetype == null) {
266                throw new MimetypeNotFoundException("no registered mimetype has extension: " + lowerCaseExtension);
267            } else {
268                return mimetype.getNormalized();
269            }
270        } else {
271            if (extensionDescriptor.isAmbiguous()) {
272                throw new MimetypeNotFoundException(String.format(
273                        "mimetype for %s is ambiguous, binary sniffing needed", lowerCaseExtension));
274            } else {
275                return extensionDescriptor.getMimetype();
276            }
277        }
278    }
279
280    @Override
281    public String getMimetypeFromFilename(String filename) throws MimetypeNotFoundException {
282        if (filename == null) {
283            throw new MimetypeNotFoundException("filename is null");
284        }
285        String extension = FilenameUtils.getExtension(filename);
286        if (StringUtils.isBlank(extension)) {
287            throw new MimetypeNotFoundException(filename + "has no extension");
288        }
289        return getMimetypeFromExtension(extension);
290    }
291
292    // the stream based detection is deprecated and should be replaced by
293    // StreamingBlob detection instead to make serialization efficient for
294    // remote call
295    @Override
296    @Deprecated
297    public String getMimetypeFromStream(InputStream stream) throws MimetypeNotFoundException,
298            MimetypeDetectionException {
299        File file = null;
300        try {
301            file = Framework.createTempFile("NXMimetypeBean", ".bin");
302            try {
303                FileUtils.copyToFile(stream, file);
304                return getMimetypeFromFile(file);
305            } finally {
306                file.delete();
307            }
308        } catch (IOException e) {
309            throw new MimetypeDetectionException(e.getMessage(), e);
310        }
311    }
312
313    /**
314     * Finds the mimetype of a stream content and returns provided default if not possible.
315     *
316     * @param is content to be analyzed
317     * @param defaultMimetype default mimetype to be used if no found
318     * @return the string mimetype
319     * @throws MimetypeDetectionException
320     * @author lgodard
321     */
322    @Override
323    @Deprecated
324    // use getMimetypeFromBlobWithDefault instead
325    public String getMimetypeFromStreamWithDefault(InputStream is, String defaultMimetype)
326            throws MimetypeDetectionException {
327        try {
328            return getMimetypeFromStream(is);
329        } catch (MimetypeNotFoundException e) {
330            return defaultMimetype;
331        }
332    }
333
334    @Override
335    public String getMimetypeFromBlob(Blob blob) throws MimetypeNotFoundException, MimetypeDetectionException {
336        File file = null;
337        try {
338            file = Framework.createTempFile("NXMimetypeBean", ".bin");
339            try {
340                InputStream is = blob.getStream();
341                try {
342                    FileUtils.copyToFile(is, file);
343                } finally {
344                    is.close();
345                }
346                return getMimetypeFromFile(file);
347            } finally {
348                file.delete();
349            }
350        } catch (IOException e) {
351            throw new MimetypeDetectionException(e.getMessage(), e);
352        }
353    }
354
355    @Override
356    public MimetypeEntry getMimetypeEntryByMimeType(String mimetype) {
357        MimetypeEntry mtype = mimetypeByNormalisedRegistry.get("application/octet-stream");
358        if (mimetype != null) {
359            for (String key : mimetypeByNormalisedRegistry.keySet()) {
360                MimetypeEntry entry = mimetypeByNormalisedRegistry.get(key);
361                if (mimetype.equals(entry.getNormalized()) || entry.getMimetypes().contains(mimetype)) {
362                    mtype = entry;
363                    break;
364                }
365            }
366        }
367        return mtype;
368    }
369
370    /**
371     * Finds the mimetype of a Blob content and returns provided default if not possible.
372     *
373     * @param blob content to be analyzed
374     * @param defaultMimetype defaultMimeType to be used if no found
375     * @return the string mimetype
376     * @author lgodard
377     * @throws MimetypeDetectionException
378     */
379    @Override
380    public String getMimetypeFromBlobWithDefault(Blob blob, String defaultMimetype) throws MimetypeDetectionException {
381        try {
382            return getMimetypeFromBlob(blob);
383        } catch (MimetypeNotFoundException e) {
384            return defaultMimetype;
385        }
386    }
387
388    /**
389     * Finds the mimetype of some content according to its filename and / or binary content.
390     *
391     * @param filename extension to analyze
392     * @param blob content to be analyzed if filename is ambiguous
393     * @param defaultMimetype defaultMimeType to be used if no found
394     * @return the string mimetype
395     * @throws MimetypeDetectionException
396     * @author lgodard
397     */
398    @Override
399    public String getMimetypeFromFilenameAndBlobWithDefault(String filename, Blob blob, String defaultMimetype)
400            throws MimetypeDetectionException {
401        try {
402            return getMimetypeFromFilename(filename);
403        } catch (MimetypeNotFoundException e) {
404            // failed to detect mimetype on extension: fallback to Blob based
405            // detection
406            try {
407                return getMimetypeFromBlob(blob);
408            } catch (MimetypeNotFoundException mtnfe) {
409                return defaultMimetype;
410            }
411        }
412    }
413
414    @Override
415    public Blob updateMimetype(Blob blob, String filename) throws MimetypeDetectionException {
416        if (filename == null) {
417            filename = blob.getFilename();
418        } else if (blob.getFilename() == null) {
419            blob.setFilename(filename);
420        }
421        String mimetype = getMimetypeFromFilenameAndBlobWithDefault(filename, blob, DEFAULT_MIMETYPE);
422        blob.setMimeType(mimetype);
423        return blob;
424    }
425
426    @Override
427    public Blob updateMimetype(Blob blob) throws MimetypeDetectionException {
428        return updateMimetype(blob, null);
429    }
430
431}