001/*
002 * (C) Copyright 2006-2016 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo - initial API and implementation
018 */
019package org.nuxeo.ecm.platform.mimetype.service;
020
021import java.io.File;
022import java.io.FileInputStream;
023import java.io.IOException;
024import java.io.InputStream;
025import java.util.ArrayList;
026import java.util.Collection;
027import java.util.HashMap;
028import java.util.Iterator;
029import java.util.List;
030import java.util.Map;
031
032import org.apache.commons.io.FileUtils;
033import org.apache.commons.io.FilenameUtils;
034import org.apache.commons.lang.StringUtils;
035import org.apache.commons.logging.Log;
036import org.apache.commons.logging.LogFactory;
037import org.nuxeo.ecm.core.api.Blob;
038import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException;
039import org.nuxeo.ecm.platform.mimetype.MimetypeNotFoundException;
040import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry;
041import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
042import org.nuxeo.runtime.api.Framework;
043import org.nuxeo.runtime.model.ComponentContext;
044import org.nuxeo.runtime.model.ComponentName;
045import org.nuxeo.runtime.model.DefaultComponent;
046import org.nuxeo.runtime.model.Extension;
047import org.nuxeo.runtime.model.RuntimeContext;
048
049import net.sf.jmimemagic.Magic;
050import net.sf.jmimemagic.MagicException;
051import net.sf.jmimemagic.MagicMatch;
052import net.sf.jmimemagic.MagicMatchNotFoundException;
053import net.sf.jmimemagic.MagicParseException;
054
055/**
056 * MimetypeEntry registry service.
057 * <p>
058 * Singleton holding a registry of mimetype entries and exposes an API to grab information related to these mimetypes.
059 * As well, this is possible to ask for a mimetype magic detection from a stream or file using the API.
060 *
061 * @author <a href="mailto:ja@nuxeo.com">Julien Anguenot</a>
062 */
063public class MimetypeRegistryService extends DefaultComponent implements MimetypeRegistry {
064
065    public static final ComponentName NAME = new ComponentName(
066            "org.nuxeo.ecm.platform.mimetype.service.MimetypeRegistryService");
067
068    // 10 MB is the max size to allow full file scan
069    public static final long MAX_SIZE_FOR_SCAN = 10 * 1024 * 1024;
070
071    private static final Log log = LogFactory.getLog(MimetypeRegistryService.class);
072
073    protected Map<String, MimetypeEntry> mimetypeByNormalisedRegistry;
074
075    protected Map<String, MimetypeEntry> mimetypeByExtensionRegistry;
076
077    protected Map<String, ExtensionDescriptor> extensionRegistry;
078
079    private RuntimeContext bundle;
080
081    public MimetypeRegistryService() {
082        initializeRegistries();
083    }
084
085    protected void initializeRegistries() {
086        mimetypeByNormalisedRegistry = new HashMap<>();
087        mimetypeByExtensionRegistry = new HashMap<>();
088        extensionRegistry = new HashMap<>();
089    }
090
091    protected boolean isMimetypeEntry(String mimetypeName) {
092        return mimetypeByNormalisedRegistry.containsKey(mimetypeName);
093    }
094
095    @Override
096    public void activate(ComponentContext context) {
097        bundle = context.getRuntimeContext();
098        initializeRegistries();
099    }
100
101    @Override
102    public void deactivate(ComponentContext context) {
103        mimetypeByNormalisedRegistry = null;
104        mimetypeByExtensionRegistry = null;
105        extensionRegistry = null;
106    }
107
108    @Override
109    public void registerExtension(Extension extension) {
110        Object[] contribs = extension.getContributions();
111        if (contribs == null) {
112            return;
113        }
114        for (Object contrib : contribs) {
115            if (contrib instanceof MimetypeDescriptor) {
116                MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib;
117                registerMimetype(mimetypeDescriptor.getMimetype());
118            } else if (contrib instanceof ExtensionDescriptor) {
119                registerFileExtension((ExtensionDescriptor) contrib);
120            }
121        }
122    }
123
124    public void registerMimetype(MimetypeEntry mimetype) {
125        log.debug("Registering mimetype: " + mimetype.getNormalized());
126        mimetypeByNormalisedRegistry.put(mimetype.getNormalized(), mimetype);
127        for (String extension : mimetype.getExtensions()) {
128            mimetypeByExtensionRegistry.put(extension, mimetype);
129        }
130    }
131
132    public void registerFileExtension(ExtensionDescriptor extensionDescriptor) {
133        log.debug("Registering file extension: " + extensionDescriptor.getName());
134        extensionRegistry.put(extensionDescriptor.getName(), extensionDescriptor);
135    }
136
137    @Override
138    public void unregisterExtension(Extension extension) {
139        Object[] contribs = extension.getContributions();
140        if (contribs == null) {
141            return;
142        }
143        for (Object contrib : contribs) {
144            if (contrib instanceof MimetypeDescriptor) {
145                MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib;
146                unregisterMimetype(mimetypeDescriptor.getNormalized());
147            } else if (contrib instanceof ExtensionDescriptor) {
148                ExtensionDescriptor extensionDescriptor = (ExtensionDescriptor) contrib;
149                unregisterFileExtension(extensionDescriptor);
150            }
151        }
152    }
153
154    public void unregisterMimetype(String mimetypeName) {
155        log.debug("Unregistering mimetype: " + mimetypeName);
156        MimetypeEntry mimetype = mimetypeByNormalisedRegistry.get(mimetypeName);
157        if (mimetype == null) {
158            return;
159        }
160        List<String> extensions = mimetype.getExtensions();
161        mimetypeByNormalisedRegistry.remove(mimetypeName);
162        for (String extension : extensions) {
163            // FIXME: equals always fails because types are incompatible.
164            if (mimetype.getNormalized().equals(mimetypeByExtensionRegistry.get(extension))) {
165                mimetypeByExtensionRegistry.remove(extension);
166            }
167        }
168    }
169
170    public void unregisterFileExtension(ExtensionDescriptor extensionDescriptor) {
171        log.debug("Unregistering file extension: " + extensionDescriptor.getName());
172        extensionRegistry.remove(extensionDescriptor.getName());
173    }
174
175    public RuntimeContext getContext() {
176        return bundle;
177    }
178
179    @Override
180    public List<String> getExtensionsFromMimetypeName(String mimetypeName) {
181        List<String> extensions = new ArrayList<>();
182        for (String key : mimetypeByNormalisedRegistry.keySet()) {
183            MimetypeEntry mimetypeEntry = mimetypeByNormalisedRegistry.get(key);
184            if (mimetypeEntry.getMimetypes().contains(mimetypeName)) {
185                extensions.addAll(mimetypeEntry.getExtensions());
186            }
187        }
188        return extensions;
189    }
190
191    @Override
192    public MimetypeEntry getMimetypeEntryByName(String name) {
193        return mimetypeByNormalisedRegistry.get(name);
194    }
195
196    @Override
197    public String getMimetypeFromFile(File file) throws MimetypeNotFoundException, MimetypeDetectionException {
198        if (file.length() > MAX_SIZE_FOR_SCAN) {
199            String exceptionMessage = "Not able to determine mime type from filename and file is too big for binary scan.";
200            if (file.getAbsolutePath() == null) {
201                throw new MimetypeNotFoundException(exceptionMessage);
202            }
203            try {
204                return getMimetypeFromFilename(file.getAbsolutePath());
205            } catch (MimetypeNotFoundException e) {
206                throw new MimetypeNotFoundException(exceptionMessage, e);
207            }
208        }
209        try {
210            MagicMatch match = Magic.getMagicMatch(file, true, false);
211            String mimeType;
212
213            if (match.getSubMatches().isEmpty()) {
214                mimeType = match.getMimeType();
215            } else {
216                // Submatches found
217                // TODO: we only take the first here
218                // what to do with other possible responses ?
219                // b.t.w., multiple responses denotes a non-accuracy problem in
220                // magic.xml but be careful to nested possible
221                // sub-sub-...-submatches make this as recursive ?
222                Collection<MagicMatch> possibilities = match.getSubMatches();
223                Iterator<MagicMatch> iter = possibilities.iterator();
224                MagicMatch m = iter.next();
225                mimeType = m.getMimeType();
226                // need to clean for subsequent calls
227                possibilities.clear();
228                match.setSubMatches(possibilities);
229            }
230            if ("text/plain".equals(mimeType)) {
231                // check we didn't mis-detect files with zeroes
232                // check first 16 bytes
233                byte[] bytes = new byte[16];
234                int n = 0;
235                try (FileInputStream is = new FileInputStream(file)) {
236                    n = is.read(bytes);
237                }
238                for (int i = 0; i < n; i++) {
239                    if (bytes[i] == 0) {
240                        mimeType = "application/octet-stream";
241                        break;
242                    }
243                }
244            }
245            return mimeType;
246        } catch (MagicMatchNotFoundException e) {
247            if (file.getAbsolutePath() != null) {
248                return getMimetypeFromFilename(file.getAbsolutePath());
249            }
250            throw new MimetypeNotFoundException(e.getMessage(), e);
251        } catch (MagicException | MagicParseException | IOException e) {
252            throw new MimetypeDetectionException(e.getMessage(), e);
253        }
254    }
255
256    @Override
257    public String getMimetypeFromExtension(String extension) throws MimetypeNotFoundException {
258        String lowerCaseExtension = extension.toLowerCase();
259        ExtensionDescriptor extensionDescriptor = extensionRegistry.get(lowerCaseExtension);
260        if (extensionDescriptor == null) {
261            // no explicit extension rule, analyse the inverted mimetype
262            // registry
263            MimetypeEntry mimetype = mimetypeByExtensionRegistry.get(lowerCaseExtension);
264            if (mimetype == null) {
265                throw new MimetypeNotFoundException("no registered mimetype has extension: " + lowerCaseExtension);
266            } else {
267                return mimetype.getNormalized();
268            }
269        } else {
270            if (extensionDescriptor.isAmbiguous()) {
271                throw new MimetypeNotFoundException(
272                        String.format("mimetype for %s is ambiguous, binary sniffing needed", lowerCaseExtension));
273            } else {
274                return extensionDescriptor.getMimetype();
275            }
276        }
277    }
278
279    @Override
280    public String getMimetypeFromFilename(String filename) throws MimetypeNotFoundException {
281        if (filename == null) {
282            throw new MimetypeNotFoundException("filename is null");
283        }
284        String extension = FilenameUtils.getExtension(filename);
285        if (StringUtils.isBlank(extension)) {
286            throw new MimetypeNotFoundException(filename + "has no extension");
287        }
288        return getMimetypeFromExtension(extension);
289    }
290
291    @Override
292    public String getMimetypeFromBlob(Blob blob) throws MimetypeNotFoundException, MimetypeDetectionException {
293        File file;
294        try {
295            file = Framework.createTempFile("NXMimetypeBean", ".bin");
296            try (InputStream is = blob.getStream()) {
297                FileUtils.copyInputStreamToFile(is, file);
298                return getMimetypeFromFile(file);
299            } finally {
300                file.delete();
301            }
302        } catch (IOException e) {
303            throw new MimetypeDetectionException(e.getMessage(), e);
304        }
305    }
306
307    @Override
308    public MimetypeEntry getMimetypeEntryByMimeType(String mimetype) {
309        MimetypeEntry mtype = mimetypeByNormalisedRegistry.get("application/octet-stream");
310        if (mimetype != null) {
311            for (String key : mimetypeByNormalisedRegistry.keySet()) {
312                MimetypeEntry entry = mimetypeByNormalisedRegistry.get(key);
313                if (mimetype.equals(entry.getNormalized()) || entry.getMimetypes().contains(mimetype)) {
314                    mtype = entry;
315                    break;
316                }
317            }
318        }
319        return mtype;
320    }
321
322    @Override
323    public String getMimetypeFromBlobWithDefault(Blob blob, String defaultMimetype) throws MimetypeDetectionException {
324        try {
325            return getMimetypeFromBlob(blob);
326        } catch (MimetypeNotFoundException e) {
327            return defaultMimetype;
328        }
329    }
330
331    @Override
332    public String getMimetypeFromFilenameAndBlobWithDefault(String filename, Blob blob, String defaultMimetype)
333            throws MimetypeDetectionException {
334        try {
335            return getMimetypeFromFilename(filename);
336        } catch (MimetypeNotFoundException e) {
337            // failed to detect mimetype on extension:
338            // fallback to calculate mimetype from blob content
339            return getMimetypeFromBlobWithDefault(blob, defaultMimetype);
340        }
341    }
342
343    @Override
344    public String getMimetypeFromFilenameWithBlobMimetypeFallback(String filename, Blob blob, String defaultMimetype)
345            throws MimetypeDetectionException {
346        try {
347            return getMimetypeFromFilename(filename);
348        } catch (MimetypeNotFoundException e) {
349            // failed to detect mimetype on extension:
350            // fallback to the blob defined mimetype
351            String mimeTypeName = blob.getMimeType();
352            if (isMimetypeEntry(mimeTypeName)) {
353                return mimeTypeName;
354            } else {
355                // failed to detect mimetype on blob:
356                // fallback to calculate mimetype from blob content
357                return getMimetypeFromBlobWithDefault(blob, defaultMimetype);
358            }
359        }
360    }
361
362    @Override
363    public Blob updateMimetype(Blob blob, String filename, Boolean withBlobMimetypeFallback)
364            throws MimetypeDetectionException {
365        if (filename == null) {
366            filename = blob.getFilename();
367        } else if (blob.getFilename() == null) {
368            blob.setFilename(filename);
369        }
370        if (withBlobMimetypeFallback) {
371            blob.setMimeType(getMimetypeFromFilenameWithBlobMimetypeFallback(filename, blob, DEFAULT_MIMETYPE));
372        } else {
373            blob.setMimeType(getMimetypeFromFilenameAndBlobWithDefault(filename, blob, DEFAULT_MIMETYPE));
374        }
375        return blob;
376    }
377
378    @Override
379    public Blob updateMimetype(Blob blob, String filename) throws MimetypeDetectionException {
380        return updateMimetype(blob, filename, false);
381    }
382
383    @Override
384    public Blob updateMimetype(Blob blob) throws MimetypeDetectionException {
385        return updateMimetype(blob, null);
386    }
387
388}