001/*
002 * (C) Copyright 2006-2016 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo - initial API and implementation
018 *
019 */
020package org.nuxeo.ecm.platform.mimetype.service;
021
022import java.io.File;
023import java.io.FileInputStream;
024import java.io.IOException;
025import java.io.InputStream;
026import java.util.ArrayList;
027import java.util.Collection;
028import java.util.HashMap;
029import java.util.Iterator;
030import java.util.List;
031import java.util.Map;
032
033import net.sf.jmimemagic.Magic;
034import net.sf.jmimemagic.MagicException;
035import net.sf.jmimemagic.MagicMatch;
036import net.sf.jmimemagic.MagicMatchNotFoundException;
037import net.sf.jmimemagic.MagicParseException;
038
039import org.apache.commons.io.FilenameUtils;
040import org.apache.commons.lang.StringUtils;
041import org.apache.commons.logging.Log;
042import org.apache.commons.logging.LogFactory;
043
044import org.nuxeo.common.utils.FileUtils;
045import org.nuxeo.ecm.core.api.Blob;
046import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException;
047import org.nuxeo.ecm.platform.mimetype.MimetypeNotFoundException;
048import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry;
049import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
050import org.nuxeo.runtime.api.Framework;
051import org.nuxeo.runtime.model.ComponentContext;
052import org.nuxeo.runtime.model.ComponentName;
053import org.nuxeo.runtime.model.DefaultComponent;
054import org.nuxeo.runtime.model.Extension;
055import org.nuxeo.runtime.model.RuntimeContext;
056
057/**
058 * MimetypeEntry registry service.
059 * <p>
060 * Singleton holding a registry of mimetype entries and exposes an API to grab information related to these mimetypes.
061 * As well, this is possible to ask for a mimetype magic detection from a stream or file using the API.
062 *
063 * @author <a href="mailto:ja@nuxeo.com">Julien Anguenot</a>
064 */
065public class MimetypeRegistryService extends DefaultComponent implements MimetypeRegistry {
066
067    public static final ComponentName NAME = new ComponentName(
068            "org.nuxeo.ecm.platform.mimetype.service.MimetypeRegistryService");
069
070    // 10 MB is the max size to allow full file scan
071    public static final long MAX_SIZE_FOR_SCAN = 10 * 1024 * 1024;
072
073    private static final Log log = LogFactory.getLog(MimetypeRegistryService.class);
074
075    protected Map<String, MimetypeEntry> mimetypeByNormalisedRegistry;
076
077    protected Map<String, MimetypeEntry> mimetypeByExtensionRegistry;
078
079    protected Map<String, ExtensionDescriptor> extensionRegistry;
080
081    private RuntimeContext bundle;
082
083    public MimetypeRegistryService() {
084        initializeRegistries();
085    }
086
087    protected void initializeRegistries() {
088        mimetypeByNormalisedRegistry = new HashMap<>();
089        mimetypeByExtensionRegistry = new HashMap<>();
090        extensionRegistry = new HashMap<>();
091    }
092
093    protected boolean isMimetypeEntry(String mimetypeName) {
094        return mimetypeByNormalisedRegistry.containsKey(mimetypeName);
095    }
096
097    @Override
098    public void activate(ComponentContext context) {
099        bundle = context.getRuntimeContext();
100        initializeRegistries();
101    }
102
103    @Override
104    public void deactivate(ComponentContext context) {
105        mimetypeByNormalisedRegistry = null;
106        mimetypeByExtensionRegistry = null;
107        extensionRegistry = null;
108    }
109
110    @Override
111    public void registerExtension(Extension extension) {
112        Object[] contribs = extension.getContributions();
113        if (contribs == null) {
114            return;
115        }
116        for (Object contrib : contribs) {
117            if (contrib instanceof MimetypeDescriptor) {
118                MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib;
119                registerMimetype(mimetypeDescriptor.getMimetype());
120            } else if (contrib instanceof ExtensionDescriptor) {
121                registerFileExtension((ExtensionDescriptor) contrib);
122            }
123        }
124    }
125
126    public void registerMimetype(MimetypeEntry mimetype) {
127        log.debug("Registering mimetype: " + mimetype.getNormalized());
128        mimetypeByNormalisedRegistry.put(mimetype.getNormalized(), mimetype);
129        for (String extension : mimetype.getExtensions()) {
130            mimetypeByExtensionRegistry.put(extension, mimetype);
131        }
132    }
133
134    public void registerFileExtension(ExtensionDescriptor extensionDescriptor) {
135        log.debug("Registering file extension: " + extensionDescriptor.getName());
136        extensionRegistry.put(extensionDescriptor.getName(), extensionDescriptor);
137    }
138
139    @Override
140    public void unregisterExtension(Extension extension) {
141        Object[] contribs = extension.getContributions();
142        if (contribs == null) {
143            return;
144        }
145        for (Object contrib : contribs) {
146            if (contrib instanceof MimetypeDescriptor) {
147                MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib;
148                unregisterMimetype(mimetypeDescriptor.getNormalized());
149            } else if (contrib instanceof ExtensionDescriptor) {
150                ExtensionDescriptor extensionDescriptor = (ExtensionDescriptor) contrib;
151                unregisterFileExtension(extensionDescriptor);
152            }
153        }
154    }
155
156    public void unregisterMimetype(String mimetypeName) {
157        log.debug("Unregistering mimetype: " + mimetypeName);
158        MimetypeEntry mimetype = mimetypeByNormalisedRegistry.get(mimetypeName);
159        if (mimetype == null) {
160            return;
161        }
162        List<String> extensions = mimetype.getExtensions();
163        mimetypeByNormalisedRegistry.remove(mimetypeName);
164        for (String extension : extensions) {
165            // FIXME: equals always fails because types are incompatible.
166            if (mimetype.getNormalized().equals(mimetypeByExtensionRegistry.get(extension))) {
167                mimetypeByExtensionRegistry.remove(extension);
168            }
169        }
170    }
171
172    public void unregisterFileExtension(ExtensionDescriptor extensionDescriptor) {
173        log.debug("Unregistering file extension: " + extensionDescriptor.getName());
174        extensionRegistry.remove(extensionDescriptor.getName());
175    }
176
177    public RuntimeContext getContext() {
178        return bundle;
179    }
180
181    @Override
182    public List<String> getExtensionsFromMimetypeName(String mimetypeName) {
183        List<String> extensions = new ArrayList<>();
184        for (String key : mimetypeByNormalisedRegistry.keySet()) {
185            MimetypeEntry mimetypeEntry = mimetypeByNormalisedRegistry.get(key);
186            if (mimetypeEntry.getMimetypes().contains(mimetypeName)) {
187                extensions.addAll(mimetypeEntry.getExtensions());
188            }
189        }
190        return extensions;
191    }
192
193    @Override
194    public MimetypeEntry getMimetypeEntryByName(String name) {
195        return mimetypeByNormalisedRegistry.get(name);
196    }
197
198    @Override
199    public String getMimetypeFromFile(File file) throws MimetypeNotFoundException, MimetypeDetectionException {
200        if (file.length() > MAX_SIZE_FOR_SCAN) {
201            String exceptionMessage = "Not able to determine mime type from filename and file is too big for binary scan.";
202            if (file.getAbsolutePath() == null) {
203                throw new MimetypeNotFoundException(exceptionMessage);
204            }
205            try {
206                return getMimetypeFromFilename(file.getAbsolutePath());
207            } catch (MimetypeNotFoundException e) {
208                throw new MimetypeNotFoundException(exceptionMessage, e);
209            }
210        }
211        try {
212            MagicMatch match = Magic.getMagicMatch(file, true, false);
213            String mimeType;
214
215            if (match.getSubMatches().isEmpty()) {
216                mimeType = match.getMimeType();
217            } else {
218                // Submatches found
219                // TODO: we only take the first here
220                // what to do with other possible responses ?
221                // b.t.w., multiple responses denotes a non-accuracy problem in
222                // magic.xml but be careful to nested possible
223                // sub-sub-...-submatches make this as recursive ?
224                Collection<MagicMatch> possibilities = match.getSubMatches();
225                Iterator<MagicMatch> iter = possibilities.iterator();
226                MagicMatch m = iter.next();
227                mimeType = m.getMimeType();
228                // need to clean for subsequent calls
229                possibilities.clear();
230                match.setSubMatches(possibilities);
231            }
232            if ("text/plain".equals(mimeType)) {
233                // check we didn't mis-detect files with zeroes
234                // check first 16 bytes
235                byte[] bytes = new byte[16];
236                FileInputStream is = new FileInputStream(file);
237                int n = 0;
238                try {
239                    n = is.read(bytes);
240                } finally {
241                    is.close();
242                }
243                for (int i = 0; i < n; i++) {
244                    if (bytes[i] == 0) {
245                        mimeType = "application/octet-stream";
246                        break;
247                    }
248                }
249            }
250            return mimeType;
251        } catch (MagicMatchNotFoundException e) {
252            if (file.getAbsolutePath() != null) {
253                return getMimetypeFromFilename(file.getAbsolutePath());
254            }
255            throw new MimetypeNotFoundException(e.getMessage(), e);
256        } catch (MagicException | MagicParseException | IOException e) {
257            throw new MimetypeDetectionException(e.getMessage(), e);
258        }
259    }
260
261    @Override
262    public String getMimetypeFromExtension(String extension) throws MimetypeNotFoundException {
263        String lowerCaseExtension = extension.toLowerCase();
264        ExtensionDescriptor extensionDescriptor = extensionRegistry.get(lowerCaseExtension);
265        if (extensionDescriptor == null) {
266            // no explicit extension rule, analyse the inverted mimetype
267            // registry
268            MimetypeEntry mimetype = mimetypeByExtensionRegistry.get(lowerCaseExtension);
269            if (mimetype == null) {
270                throw new MimetypeNotFoundException("no registered mimetype has extension: " + lowerCaseExtension);
271            } else {
272                return mimetype.getNormalized();
273            }
274        } else {
275            if (extensionDescriptor.isAmbiguous()) {
276                throw new MimetypeNotFoundException(
277                        String.format("mimetype for %s is ambiguous, binary sniffing needed", lowerCaseExtension));
278            } else {
279                return extensionDescriptor.getMimetype();
280            }
281        }
282    }
283
284    @Override
285    public String getMimetypeFromFilename(String filename) throws MimetypeNotFoundException {
286        if (filename == null) {
287            throw new MimetypeNotFoundException("filename is null");
288        }
289        String extension = FilenameUtils.getExtension(filename);
290        if (StringUtils.isBlank(extension)) {
291            throw new MimetypeNotFoundException(filename + "has no extension");
292        }
293        return getMimetypeFromExtension(extension);
294    }
295
296    // the stream based detection is deprecated and should be replaced by
297    // StreamingBlob detection instead to make serialization efficient for
298    // remote call
299    @Override
300    @Deprecated
301    public String getMimetypeFromStream(InputStream stream)
302            throws MimetypeNotFoundException, MimetypeDetectionException {
303        File file = null;
304        try {
305            file = Framework.createTempFile("NXMimetypeBean", ".bin");
306            try {
307                FileUtils.copyToFile(stream, file);
308                return getMimetypeFromFile(file);
309            } finally {
310                file.delete();
311            }
312        } catch (IOException e) {
313            throw new MimetypeDetectionException(e.getMessage(), e);
314        }
315    }
316
317    /**
318     * Finds the mimetype of a stream content and returns provided default if not possible.
319     *
320     * @param is content to be analyzed
321     * @param defaultMimetype default mimetype to be used if no found
322     * @return the string mimetype
323     * @throws MimetypeDetectionException
324     * @author lgodard
325     */
326    @Override
327    @Deprecated
328    // use getMimetypeFromBlobWithDefault instead
329    public String getMimetypeFromStreamWithDefault(InputStream is, String defaultMimetype)
330            throws MimetypeDetectionException {
331        try {
332            return getMimetypeFromStream(is);
333        } catch (MimetypeNotFoundException e) {
334            return defaultMimetype;
335        }
336    }
337
338    @Override
339    public String getMimetypeFromBlob(Blob blob) throws MimetypeNotFoundException, MimetypeDetectionException {
340        File file = null;
341        try {
342            file = Framework.createTempFile("NXMimetypeBean", ".bin");
343            try {
344                InputStream is = blob.getStream();
345                try {
346                    FileUtils.copyToFile(is, file);
347                } finally {
348                    is.close();
349                }
350                return getMimetypeFromFile(file);
351            } finally {
352                file.delete();
353            }
354        } catch (IOException e) {
355            throw new MimetypeDetectionException(e.getMessage(), e);
356        }
357    }
358
359    @Override
360    public MimetypeEntry getMimetypeEntryByMimeType(String mimetype) {
361        MimetypeEntry mtype = mimetypeByNormalisedRegistry.get("application/octet-stream");
362        if (mimetype != null) {
363            for (String key : mimetypeByNormalisedRegistry.keySet()) {
364                MimetypeEntry entry = mimetypeByNormalisedRegistry.get(key);
365                if (mimetype.equals(entry.getNormalized()) || entry.getMimetypes().contains(mimetype)) {
366                    mtype = entry;
367                    break;
368                }
369            }
370        }
371        return mtype;
372    }
373
374    @Override
375    public String getMimetypeFromBlobWithDefault(Blob blob, String defaultMimetype) throws MimetypeDetectionException {
376        try {
377            return getMimetypeFromBlob(blob);
378        } catch (MimetypeNotFoundException e) {
379            return defaultMimetype;
380        }
381    }
382
383    @Override
384    public String getMimetypeFromFilenameAndBlobWithDefault(String filename, Blob blob, String defaultMimetype)
385            throws MimetypeDetectionException {
386        try {
387            return getMimetypeFromFilename(filename);
388        } catch (MimetypeNotFoundException e) {
389            // failed to detect mimetype on extension:
390            // fallback to calculate mimetype from blob content
391            return getMimetypeFromBlobWithDefault(blob, defaultMimetype);
392        }
393    }
394
395    @Override
396    public String getMimetypeFromFilenameWithBlobMimetypeFallback(String filename, Blob blob, String defaultMimetype)
397            throws MimetypeDetectionException {
398        try {
399            return getMimetypeFromFilename(filename);
400        } catch (MimetypeNotFoundException e) {
401            // failed to detect mimetype on extension:
402            // fallback to the blob defined mimetype
403            String mimeTypeName = blob.getMimeType();
404            if (isMimetypeEntry(mimeTypeName)) {
405                return mimeTypeName;
406            } else {
407                // failed to detect mimetype on blob:
408                // fallback to calculate mimetype from blob content
409                return getMimetypeFromBlobWithDefault(blob, defaultMimetype);
410            }
411        }
412    }
413
414    @Override
415    public Blob updateMimetype(Blob blob, String filename, Boolean withBlobMimetypeFallback)
416            throws MimetypeDetectionException {
417        if (filename == null) {
418            filename = blob.getFilename();
419        } else if (blob.getFilename() == null) {
420            blob.setFilename(filename);
421        }
422        if (withBlobMimetypeFallback) {
423            blob.setMimeType(getMimetypeFromFilenameWithBlobMimetypeFallback(filename, blob, DEFAULT_MIMETYPE));
424        } else {
425            blob.setMimeType(getMimetypeFromFilenameAndBlobWithDefault(filename, blob, DEFAULT_MIMETYPE));
426        }
427        return blob;
428    }
429
430    @Override
431    public Blob updateMimetype(Blob blob, String filename) throws MimetypeDetectionException {
432        return updateMimetype(blob, filename, false);
433    }
434
435    @Override
436    public Blob updateMimetype(Blob blob) throws MimetypeDetectionException {
437        return updateMimetype(blob, null);
438    }
439
440}