001/*
002 * (C) Copyright 2006-2007 Nuxeo SAS (http://nuxeo.com/) and contributors.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the GNU Lesser General Public License
006 * (LGPL) version 2.1 which accompanies this distribution, and is available at
007 * http://www.gnu.org/licenses/lgpl.html
008 *
009 * This library is distributed in the hope that it will be useful,
010 * but WITHOUT ANY WARRANTY; without even the implied warranty of
011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012 * Lesser General Public License for more details.
013 *
014 * Contributors:
015 *     Nuxeo - initial API and implementation
016 *
017 * $Id: MimetypeEntry.java 2920 2006-09-15 13:28:15Z janguenot $
018 */
019package org.nuxeo.ecm.platform.mimetype.service;
020
021import java.io.File;
022import java.io.FileInputStream;
023import java.io.IOException;
024import java.io.InputStream;
025import java.util.ArrayList;
026import java.util.Collection;
027import java.util.HashMap;
028import java.util.Iterator;
029import java.util.List;
030import java.util.Map;
031
032import net.sf.jmimemagic.Magic;
033import net.sf.jmimemagic.MagicException;
034import net.sf.jmimemagic.MagicMatch;
035import net.sf.jmimemagic.MagicMatchNotFoundException;
036import net.sf.jmimemagic.MagicParseException;
037
038import org.apache.commons.io.FilenameUtils;
039import org.apache.commons.logging.Log;
040import org.apache.commons.logging.LogFactory;
041import org.nuxeo.common.utils.FileUtils;
042import org.nuxeo.ecm.core.api.Blob;
043import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException;
044import org.nuxeo.ecm.platform.mimetype.MimetypeNotFoundException;
045import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry;
046import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
047import org.nuxeo.runtime.model.ComponentContext;
048import org.nuxeo.runtime.model.ComponentName;
049import org.nuxeo.runtime.model.DefaultComponent;
050import org.nuxeo.runtime.model.Extension;
051import org.nuxeo.runtime.model.RuntimeContext;
052
053/**
054 * MimetypeEntry registry service.
055 * <p>
056 * Singleton holding a registry of mimetype entries and exposes an API to grab information related to these mimetypes.
057 * As well, this is possible to ask for a mimetype magic detection from a stream or file using the API.
058 *
059 * @author <a href="mailto:ja@nuxeo.com">Julien Anguenot</a>
060 */
061public class MimetypeRegistryService extends DefaultComponent implements MimetypeRegistry {
062
063    public static final ComponentName NAME = new ComponentName(
064            "org.nuxeo.ecm.platform.mimetype.service.MimetypeRegistryService");
065
066    // 10 MB is the max size to allow full file scan
067    public static final long MAX_SIZE_FOR_SCAN = 10 * 1024 * 1024;
068
069    private static final Log log = LogFactory.getLog(MimetypeRegistryService.class);
070
071    protected Map<String, MimetypeEntry> mimetypeByNormalisedRegistry;
072
073    protected Map<String, MimetypeEntry> mimetypeByExtensionRegistry;
074
075    protected Map<String, ExtensionDescriptor> extensionRegistry;
076
077    private RuntimeContext bundle;
078
079    public MimetypeRegistryService() {
080        initializeRegistries();
081    }
082
083    protected void initializeRegistries() {
084        mimetypeByNormalisedRegistry = new HashMap<String, MimetypeEntry>();
085        mimetypeByExtensionRegistry = new HashMap<String, MimetypeEntry>();
086        extensionRegistry = new HashMap<String, ExtensionDescriptor>();
087    }
088
089    @Override
090    public void activate(ComponentContext context) {
091        bundle = context.getRuntimeContext();
092        initializeRegistries();
093    }
094
095    @Override
096    public void deactivate(ComponentContext context) {
097        mimetypeByNormalisedRegistry = null;
098        mimetypeByExtensionRegistry = null;
099        extensionRegistry = null;
100    }
101
102    @Override
103    public void registerExtension(Extension extension) {
104        Object[] contribs = extension.getContributions();
105        if (contribs == null) {
106            return;
107        }
108        for (Object contrib : contribs) {
109            if (contrib instanceof MimetypeDescriptor) {
110                MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib;
111                registerMimetype(mimetypeDescriptor.getMimetype());
112            } else if (contrib instanceof ExtensionDescriptor) {
113                registerFileExtension((ExtensionDescriptor) contrib);
114            }
115        }
116    }
117
118    public void registerMimetype(MimetypeEntry mimetype) {
119        log.debug("Registering mimetype: " + mimetype.getNormalized());
120        mimetypeByNormalisedRegistry.put(mimetype.getNormalized(), mimetype);
121        for (String extension : mimetype.getExtensions()) {
122            mimetypeByExtensionRegistry.put(extension, mimetype);
123        }
124    }
125
126    public void registerFileExtension(ExtensionDescriptor extensionDescriptor) {
127        log.debug("Registering file extension: " + extensionDescriptor.getName());
128        extensionRegistry.put(extensionDescriptor.getName(), extensionDescriptor);
129    }
130
131    @Override
132    public void unregisterExtension(Extension extension) {
133        Object[] contribs = extension.getContributions();
134        if (contribs == null) {
135            return;
136        }
137        for (Object contrib : contribs) {
138            if (contrib instanceof MimetypeDescriptor) {
139                MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib;
140                unregisterMimetype(mimetypeDescriptor.getNormalized());
141            } else if (contrib instanceof ExtensionDescriptor) {
142                ExtensionDescriptor extensionDescriptor = (ExtensionDescriptor) contrib;
143                unregisterFileExtension(extensionDescriptor);
144            }
145        }
146    }
147
148    public void unregisterMimetype(String mimetypeName) {
149        log.debug("Unregistering mimetype: " + mimetypeName);
150        MimetypeEntry mimetype = mimetypeByNormalisedRegistry.get(mimetypeName);
151        if (mimetype == null) {
152            return;
153        }
154        List<String> extensions = mimetype.getExtensions();
155        mimetypeByNormalisedRegistry.remove(mimetypeName);
156        for (String extension : extensions) {
157            // FIXME: equals always fails because types are incompatible.
158            if (mimetype.getNormalized().equals(mimetypeByExtensionRegistry.get(extension))) {
159                mimetypeByExtensionRegistry.remove(extension);
160            }
161        }
162    }
163
164    public void unregisterFileExtension(ExtensionDescriptor extensionDescriptor) {
165        log.debug("Unregistering file extension: " + extensionDescriptor.getName());
166        extensionRegistry.remove(extensionDescriptor.getName());
167    }
168
169    public RuntimeContext getContext() {
170        return bundle;
171    }
172
173    public List<String> getExtensionsFromMimetypeName(String mimetypeName) {
174        List<String> extensions = new ArrayList<String>();
175        for (String key : mimetypeByNormalisedRegistry.keySet()) {
176            MimetypeEntry mimetypeEntry = mimetypeByNormalisedRegistry.get(key);
177            if (mimetypeEntry.getMimetypes().contains(mimetypeName)) {
178                extensions.addAll(mimetypeEntry.getExtensions());
179            }
180        }
181        return extensions;
182    }
183
184    public MimetypeEntry getMimetypeEntryByName(String name) {
185        return mimetypeByNormalisedRegistry.get(name);
186    }
187
188    @SuppressWarnings({ "unchecked" })
189    public String getMimetypeFromFile(File file) throws MimetypeNotFoundException, MimetypeDetectionException {
190        if (file.length() > MAX_SIZE_FOR_SCAN) {
191            String exceptionMessage = "Not able to determine mime type from filename and file is too big for binary scan.";
192            if (file.getAbsolutePath() == null) {
193                throw new MimetypeNotFoundException(exceptionMessage);
194            }
195            try {
196                return getMimetypeFromFilename(file.getAbsolutePath());
197            } catch (MimetypeNotFoundException e) {
198                throw new MimetypeNotFoundException(exceptionMessage, e);
199            }
200        }
201        try {
202            MagicMatch match = Magic.getMagicMatch(file, true, false);
203            String mimeType;
204
205            if (match.getSubMatches().isEmpty()) {
206                mimeType = match.getMimeType();
207            } else {
208                // Submatches found
209                // TODO: we only take the first here
210                // what to do with other possible responses ?
211                // b.t.w., multiple responses denotes a non-accuracy problem in
212                // magic.xml but be careful to nested possible
213                // sub-sub-...-submatches make this as recursive ?
214                Collection<MagicMatch> possibilities = match.getSubMatches();
215                Iterator<MagicMatch> iter = possibilities.iterator();
216                MagicMatch m = iter.next();
217                mimeType = m.getMimeType();
218                // need to clean for subsequent calls
219                possibilities.clear();
220                match.setSubMatches(possibilities);
221            }
222            if ("text/plain".equals(mimeType)) {
223                // check we didn't mis-detect files with zeroes
224                // check first 16 bytes
225                byte[] bytes = new byte[16];
226                FileInputStream is = new FileInputStream(file);
227                int n = 0;
228                try {
229                    n = is.read(bytes);
230                } finally {
231                    is.close();
232                }
233                for (int i = 0; i < n; i++) {
234                    if (bytes[i] == 0) {
235                        mimeType = "application/octet-stream";
236                        break;
237                    }
238                }
239            }
240            return mimeType;
241        } catch (MagicMatchNotFoundException e) {
242            if (file.getAbsolutePath() != null) {
243                return getMimetypeFromFilename(file.getAbsolutePath());
244            }
245            throw new MimetypeNotFoundException(e.getMessage(), e);
246        } catch (MagicException | MagicParseException | IOException e) {
247            throw new MimetypeDetectionException(e.getMessage(), e);
248        }
249    }
250
251    public String getMimetypeFromExtension(String extension) throws MimetypeNotFoundException {
252        String lowerCaseExtension = extension.toLowerCase();
253        ExtensionDescriptor extensionDescriptor = extensionRegistry.get(lowerCaseExtension);
254        if (extensionDescriptor == null) {
255            // no explicit extension rule, analyse the inverted mimetype
256            // registry
257            MimetypeEntry mimetype = mimetypeByExtensionRegistry.get(lowerCaseExtension);
258            if (mimetype == null) {
259                throw new MimetypeNotFoundException("no registered mimetype has extension: " + lowerCaseExtension);
260            } else {
261                return mimetype.getNormalized();
262            }
263        } else {
264            if (extensionDescriptor.isAmbiguous()) {
265                throw new MimetypeNotFoundException(String.format(
266                        "mimetype for %s is ambiguous, binary sniffing needed", lowerCaseExtension));
267            } else {
268                return extensionDescriptor.getMimetype();
269            }
270        }
271    }
272
273    public String getMimetypeFromFilename(String filename) throws MimetypeNotFoundException {
274        if (filename == null) {
275            throw new MimetypeNotFoundException("filename is null");
276        }
277        String extension = FilenameUtils.getExtension(filename);
278        String[] parts = filename.split("\\.");
279        if (parts.length < 2) {
280            throw new MimetypeNotFoundException(filename + "has no extension");
281        }
282        return getMimetypeFromExtension(parts[parts.length - 1]);
283    }
284
285    // the stream based detection is deprecated and should be replaced by
286    // StreamingBlob detection instead to make serialization efficient for
287    // remote call
288    @Deprecated
289    public String getMimetypeFromStream(InputStream stream) throws MimetypeNotFoundException,
290            MimetypeDetectionException {
291        File file = null;
292        try {
293            file = File.createTempFile("NXMimetypeBean", ".bin");
294            try {
295                FileUtils.copyToFile(stream, file);
296                return getMimetypeFromFile(file);
297            } finally {
298                file.delete();
299            }
300        } catch (IOException e) {
301            throw new MimetypeDetectionException(e.getMessage(), e);
302        }
303    }
304
305    /**
306     * Finds the mimetype of a stream content and returns provided default if not possible.
307     *
308     * @param is content to be analyzed
309     * @param defaultMimetype default mimetype to be used if no found
310     * @return the string mimetype
311     * @throws MimetypeDetectionException
312     * @author lgodard
313     */
314    @Deprecated
315    // use getMimetypeFromBlobWithDefault instead
316    public String getMimetypeFromStreamWithDefault(InputStream is, String defaultMimetype)
317            throws MimetypeDetectionException {
318        try {
319            return getMimetypeFromStream(is);
320        } catch (MimetypeNotFoundException e) {
321            return defaultMimetype;
322        }
323    }
324
325    public String getMimetypeFromBlob(Blob blob) throws MimetypeNotFoundException, MimetypeDetectionException {
326        File file = null;
327        try {
328            file = File.createTempFile("NXMimetypeBean", ".bin");
329            try {
330                InputStream is = blob.getStream();
331                try {
332                    FileUtils.copyToFile(is, file);
333                } finally {
334                    is.close();
335                }
336                return getMimetypeFromFile(file);
337            } finally {
338                file.delete();
339            }
340        } catch (IOException e) {
341            throw new MimetypeDetectionException(e.getMessage(), e);
342        }
343    }
344
345    public MimetypeEntry getMimetypeEntryByMimeType(String mimetype) {
346        MimetypeEntry mtype = mimetypeByNormalisedRegistry.get("application/octet-stream");
347        if (mimetype != null) {
348            for (String key : mimetypeByNormalisedRegistry.keySet()) {
349                MimetypeEntry entry = mimetypeByNormalisedRegistry.get(key);
350                if (mimetype.equals(entry.getNormalized()) || entry.getMimetypes().contains(mimetype)) {
351                    mtype = entry;
352                    break;
353                }
354            }
355        }
356        return mtype;
357    }
358
359    /**
360     * Finds the mimetype of a Blob content and returns provided default if not possible.
361     *
362     * @param blob content to be analyzed
363     * @param defaultMimetype defaultMimeType to be used if no found
364     * @return the string mimetype
365     * @author lgodard
366     * @throws MimetypeDetectionException
367     */
368    public String getMimetypeFromBlobWithDefault(Blob blob, String defaultMimetype) throws MimetypeDetectionException {
369        try {
370            return getMimetypeFromBlob(blob);
371        } catch (MimetypeNotFoundException e) {
372            return defaultMimetype;
373        }
374    }
375
376    /**
377     * Finds the mimetype of some content according to its filename and / or binary content.
378     *
379     * @param filename extension to analyze
380     * @param blob content to be analyzed if filename is ambiguous
381     * @param defaultMimetype defaultMimeType to be used if no found
382     * @return the string mimetype
383     * @throws MimetypeDetectionException
384     * @author lgodard
385     */
386    public String getMimetypeFromFilenameAndBlobWithDefault(String filename, Blob blob, String defaultMimetype)
387            throws MimetypeDetectionException {
388        try {
389            return getMimetypeFromFilename(filename);
390        } catch (MimetypeNotFoundException e) {
391            // failed to detect mimetype on extension: fallback to Blob based
392            // detection
393            try {
394                return getMimetypeFromBlob(blob);
395            } catch (MimetypeNotFoundException mtnfe) {
396                return defaultMimetype;
397            }
398        }
399    }
400
401    public Blob updateMimetype(Blob blob, String filename) throws MimetypeDetectionException {
402        if (filename == null) {
403            filename = blob.getFilename();
404        } else if (blob.getFilename() == null) {
405            blob.setFilename(filename);
406        }
407        String mimetype = getMimetypeFromFilenameAndBlobWithDefault(filename, blob, DEFAULT_MIMETYPE);
408        blob.setMimeType(mimetype);
409        return blob;
410    }
411
412    public Blob updateMimetype(Blob blob) throws MimetypeDetectionException {
413        return updateMimetype(blob, null);
414    }
415
416}