001/*
002 * (C) Copyright 2006-2018 Nuxeo (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo - initial API and implementation
018 */
019package org.nuxeo.ecm.platform.mimetype.service;
020
021import java.io.File;
022import java.io.FileInputStream;
023import java.io.IOException;
024import java.io.InputStream;
025import java.util.ArrayList;
026import java.util.Collection;
027import java.util.HashMap;
028import java.util.Iterator;
029import java.util.List;
030import java.util.Map;
031
032import org.apache.commons.io.FileUtils;
033import org.apache.commons.io.FilenameUtils;
034import org.apache.commons.lang3.StringUtils;
035import org.apache.commons.logging.Log;
036import org.apache.commons.logging.LogFactory;
037import org.nuxeo.ecm.core.api.Blob;
038import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException;
039import org.nuxeo.ecm.platform.mimetype.MimetypeNotFoundException;
040import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry;
041import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
042import org.nuxeo.runtime.api.Framework;
043import org.nuxeo.runtime.model.ComponentContext;
044import org.nuxeo.runtime.model.ComponentName;
045import org.nuxeo.runtime.model.DefaultComponent;
046import org.nuxeo.runtime.model.Extension;
047import org.nuxeo.runtime.model.RuntimeContext;
048
049import net.sf.jmimemagic.Magic;
050import net.sf.jmimemagic.MagicException;
051import net.sf.jmimemagic.MagicMatch;
052import net.sf.jmimemagic.MagicMatchNotFoundException;
053import net.sf.jmimemagic.MagicParseException;
054
055/**
056 * MimetypeEntry registry service.
057 * <p>
058 * Singleton holding a registry of mimetype entries and exposes an API to grab information related to these mimetypes.
059 * As well, this is possible to ask for a mimetype magic detection from a stream or file using the API.
060 *
061 * @author <a href="mailto:ja@nuxeo.com">Julien Anguenot</a>
062 */
063public class MimetypeRegistryService extends DefaultComponent implements MimetypeRegistry {
064
065    public static final ComponentName NAME = new ComponentName(
066            "org.nuxeo.ecm.platform.mimetype.service.MimetypeRegistryService");
067
068    // 10 MB is the max size to allow full file scan
069    public static final long MAX_SIZE_FOR_SCAN = 10 * 1024 * 1024;
070
071    private static final Log log = LogFactory.getLog(MimetypeRegistryService.class);
072
073    protected Map<String, MimetypeEntry> mimetypeByNormalisedRegistry;
074
075    protected Map<String, MimetypeEntry> mimetypeByExtensionRegistry;
076
077    protected Map<String, ExtensionDescriptor> extensionRegistry;
078
079    private RuntimeContext bundle;
080
081    public MimetypeRegistryService() {
082        initializeRegistries();
083    }
084
085    protected void initializeRegistries() {
086        mimetypeByNormalisedRegistry = new HashMap<>();
087        mimetypeByExtensionRegistry = new HashMap<>();
088        extensionRegistry = new HashMap<>();
089    }
090
091    protected boolean isMimetypeEntry(String mimetypeName) {
092        return mimetypeByNormalisedRegistry.containsKey(mimetypeName);
093    }
094
095    @Override
096    public void activate(ComponentContext context) {
097        bundle = context.getRuntimeContext();
098        initializeRegistries();
099    }
100
101    @Override
102    public void deactivate(ComponentContext context) {
103        mimetypeByNormalisedRegistry = null;
104        mimetypeByExtensionRegistry = null;
105        extensionRegistry = null;
106    }
107
108    @Override
109    public void registerExtension(Extension extension) {
110        Object[] contribs = extension.getContributions();
111        if (contribs == null) {
112            return;
113        }
114        for (Object contrib : contribs) {
115            if (contrib instanceof MimetypeDescriptor) {
116                MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib;
117                registerMimetype(mimetypeDescriptor.getMimetype());
118            } else if (contrib instanceof ExtensionDescriptor) {
119                registerFileExtension((ExtensionDescriptor) contrib);
120            }
121        }
122    }
123
124    public void registerMimetype(MimetypeEntry mimetype) {
125        log.debug("Registering mimetype: " + mimetype.getNormalized());
126        mimetypeByNormalisedRegistry.put(mimetype.getNormalized(), mimetype);
127        for (String extension : mimetype.getExtensions()) {
128            mimetypeByExtensionRegistry.put(extension, mimetype);
129        }
130    }
131
132    public void registerFileExtension(ExtensionDescriptor extensionDescriptor) {
133        log.debug("Registering file extension: " + extensionDescriptor.getName());
134        extensionRegistry.put(extensionDescriptor.getName(), extensionDescriptor);
135    }
136
137    @Override
138    public void unregisterExtension(Extension extension) {
139        Object[] contribs = extension.getContributions();
140        if (contribs == null) {
141            return;
142        }
143        for (Object contrib : contribs) {
144            if (contrib instanceof MimetypeDescriptor) {
145                MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib;
146                unregisterMimetype(mimetypeDescriptor.getNormalized());
147            } else if (contrib instanceof ExtensionDescriptor) {
148                ExtensionDescriptor extensionDescriptor = (ExtensionDescriptor) contrib;
149                unregisterFileExtension(extensionDescriptor);
150            }
151        }
152    }
153
154    public void unregisterMimetype(String mimetypeName) {
155        log.debug("Unregistering mimetype: " + mimetypeName);
156        MimetypeEntry mimetype = mimetypeByNormalisedRegistry.get(mimetypeName);
157        if (mimetype == null) {
158            return;
159        }
160        List<String> extensions = mimetype.getExtensions();
161        mimetypeByNormalisedRegistry.remove(mimetypeName);
162        for (String extension : extensions) {
163            mimetypeByExtensionRegistry.remove(extension);
164        }
165    }
166
167    public void unregisterFileExtension(ExtensionDescriptor extensionDescriptor) {
168        log.debug("Unregistering file extension: " + extensionDescriptor.getName());
169        extensionRegistry.remove(extensionDescriptor.getName());
170    }
171
172    public RuntimeContext getContext() {
173        return bundle;
174    }
175
176    @Override
177    public List<String> getExtensionsFromMimetypeName(String mimetypeName) {
178        List<String> extensions = new ArrayList<>();
179        for (String key : mimetypeByNormalisedRegistry.keySet()) {
180            MimetypeEntry mimetypeEntry = mimetypeByNormalisedRegistry.get(key);
181            if (mimetypeEntry.getMimetypes().contains(mimetypeName)) {
182                extensions.addAll(mimetypeEntry.getExtensions());
183            }
184        }
185        return extensions;
186    }
187
188    @Override
189    public MimetypeEntry getMimetypeEntryByName(String name) {
190        return mimetypeByNormalisedRegistry.get(name);
191    }
192
193    @Override
194    public String getMimetypeFromFile(File file) throws MimetypeNotFoundException, MimetypeDetectionException {
195        if (file.length() > MAX_SIZE_FOR_SCAN) {
196            String exceptionMessage = "Not able to determine mime type from filename and file is too big for binary scan.";
197            if (file.getAbsolutePath() == null) {
198                throw new MimetypeNotFoundException(exceptionMessage);
199            }
200            try {
201                return getMimetypeFromFilename(file.getAbsolutePath());
202            } catch (MimetypeNotFoundException e) {
203                throw new MimetypeNotFoundException(exceptionMessage, e);
204            }
205        }
206        try {
207            MagicMatch match = Magic.getMagicMatch(file, true, false);
208            String mimeType;
209
210            if (match.getSubMatches().isEmpty()) {
211                mimeType = match.getMimeType();
212            } else {
213                // Submatches found
214                // TODO: we only take the first here
215                // what to do with other possible responses ?
216                // b.t.w., multiple responses denotes a non-accuracy problem in
217                // magic.xml but be careful to nested possible
218                // sub-sub-...-submatches make this as recursive ?
219                Collection<MagicMatch> possibilities = match.getSubMatches();
220                Iterator<MagicMatch> iter = possibilities.iterator();
221                MagicMatch m = iter.next();
222                mimeType = m.getMimeType();
223                // need to clean for subsequent calls
224                possibilities.clear();
225                match.setSubMatches(possibilities);
226            }
227            if ("text/plain".equals(mimeType)) {
228                // check we didn't mis-detect files with zeroes
229                // check first 16 bytes
230                byte[] bytes = new byte[16];
231                int n = 0;
232                try (FileInputStream is = new FileInputStream(file)) {
233                    n = is.read(bytes);
234                }
235                for (int i = 0; i < n; i++) {
236                    if (bytes[i] == 0) {
237                        mimeType = "application/octet-stream";
238                        break;
239                    }
240                }
241            }
242            return mimeType;
243        } catch (MagicMatchNotFoundException e) {
244            if (file.getAbsolutePath() != null) {
245                return getMimetypeFromFilename(file.getAbsolutePath());
246            }
247            throw new MimetypeNotFoundException(e.getMessage(), e);
248        } catch (MagicException | MagicParseException | IOException e) {
249            throw new MimetypeDetectionException(e.getMessage(), e);
250        }
251    }
252
253    @Override
254    public String getMimetypeFromExtension(String extension) throws MimetypeNotFoundException {
255        String lowerCaseExtension = extension.toLowerCase();
256        ExtensionDescriptor extensionDescriptor = extensionRegistry.get(lowerCaseExtension);
257        if (extensionDescriptor == null) {
258            // no explicit extension rule, analyse the inverted mimetype
259            // registry
260            MimetypeEntry mimetype = mimetypeByExtensionRegistry.get(lowerCaseExtension);
261            if (mimetype == null) {
262                throw new MimetypeNotFoundException("no registered mimetype has extension: " + lowerCaseExtension);
263            } else {
264                return mimetype.getNormalized();
265            }
266        } else {
267            if (extensionDescriptor.isAmbiguous()) {
268                throw new MimetypeNotFoundException(
269                        String.format("mimetype for %s is ambiguous, binary sniffing needed", lowerCaseExtension));
270            } else {
271                return extensionDescriptor.getMimetype();
272            }
273        }
274    }
275
276    @Override
277    public String getMimetypeFromFilename(String filename) throws MimetypeNotFoundException {
278        if (filename == null) {
279            throw new MimetypeNotFoundException("filename is null");
280        }
281        String extension = FilenameUtils.getExtension(filename);
282        if (StringUtils.isBlank(extension)) {
283            throw new MimetypeNotFoundException(filename + "has no extension");
284        }
285        return getMimetypeFromExtension(extension);
286    }
287
288    @Override
289    public String getMimetypeFromBlob(Blob blob) throws MimetypeNotFoundException, MimetypeDetectionException {
290        File file;
291        try {
292            file = Framework.createTempFile("NXMimetypeBean", ".bin");
293            try (InputStream is = blob.getStream()) {
294                FileUtils.copyInputStreamToFile(is, file);
295                return getMimetypeFromFile(file);
296            } finally {
297                file.delete();
298            }
299        } catch (IOException e) {
300            throw new MimetypeDetectionException(e.getMessage(), e);
301        }
302    }
303
304    @Override
305    public MimetypeEntry getMimetypeEntryByMimeType(String mimetype) {
306        MimetypeEntry mtype = mimetypeByNormalisedRegistry.get("application/octet-stream");
307        if (mimetype != null) {
308            for (String key : mimetypeByNormalisedRegistry.keySet()) {
309                MimetypeEntry entry = mimetypeByNormalisedRegistry.get(key);
310                if (mimetype.equals(entry.getNormalized()) || entry.getMimetypes().contains(mimetype)) {
311                    mtype = entry;
312                    break;
313                }
314            }
315        }
316        return mtype;
317    }
318
319    @Override
320    public String getMimetypeFromBlobWithDefault(Blob blob, String defaultMimetype) throws MimetypeDetectionException {
321        try {
322            return getMimetypeFromBlob(blob);
323        } catch (MimetypeNotFoundException e) {
324            return defaultMimetype;
325        }
326    }
327
328    @Override
329    public String getMimetypeFromFilenameAndBlobWithDefault(String filename, Blob blob, String defaultMimetype)
330            throws MimetypeDetectionException {
331        try {
332            return getMimetypeFromFilename(filename);
333        } catch (MimetypeNotFoundException e) {
334            // failed to detect mimetype on extension:
335            // fallback to calculate mimetype from blob content
336            return getMimetypeFromBlobWithDefault(blob, defaultMimetype);
337        }
338    }
339
340    @Override
341    public String getMimetypeFromFilenameWithBlobMimetypeFallback(String filename, Blob blob, String defaultMimetype)
342            throws MimetypeDetectionException {
343        try {
344            return getMimetypeFromFilename(filename);
345        } catch (MimetypeNotFoundException e) {
346            // failed to detect mimetype on extension:
347            // fallback to the blob defined mimetype
348            String mimeTypeName = blob.getMimeType();
349            if (isMimetypeEntry(mimeTypeName)) {
350                return mimeTypeName;
351            } else {
352                // failed to detect mimetype on blob:
353                // fallback to calculate mimetype from blob content
354                return getMimetypeFromBlobWithDefault(blob, defaultMimetype);
355            }
356        }
357    }
358
359    @Override
360    public Blob updateMimetype(Blob blob, String filename, Boolean withBlobMimetypeFallback)
361            throws MimetypeDetectionException {
362        if (filename == null) {
363            filename = blob.getFilename();
364        } else if (blob.getFilename() == null) {
365            blob.setFilename(filename);
366        }
367        if (withBlobMimetypeFallback) {
368            blob.setMimeType(getMimetypeFromFilenameWithBlobMimetypeFallback(filename, blob, DEFAULT_MIMETYPE));
369        } else {
370            blob.setMimeType(getMimetypeFromFilenameAndBlobWithDefault(filename, blob, DEFAULT_MIMETYPE));
371        }
372        return blob;
373    }
374
375    @Override
376    public Blob updateMimetype(Blob blob, String filename) throws MimetypeDetectionException {
377        return updateMimetype(blob, filename, false);
378    }
379
380    @Override
381    public Blob updateMimetype(Blob blob) throws MimetypeDetectionException {
382        return updateMimetype(blob, null);
383    }
384
385}