001/* 002 * (C) Copyright 2006-2018 Nuxeo (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 */ 019package org.nuxeo.ecm.platform.mimetype.service; 020 021import java.io.File; 022import java.io.FileInputStream; 023import java.io.IOException; 024import java.io.InputStream; 025import java.util.ArrayList; 026import java.util.Collection; 027import java.util.HashMap; 028import java.util.Iterator; 029import java.util.List; 030import java.util.Map; 031 032import org.apache.commons.io.FileUtils; 033import org.apache.commons.io.FilenameUtils; 034import org.apache.commons.lang3.StringUtils; 035import org.apache.commons.logging.Log; 036import org.apache.commons.logging.LogFactory; 037import org.nuxeo.ecm.core.api.Blob; 038import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException; 039import org.nuxeo.ecm.platform.mimetype.MimetypeNotFoundException; 040import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry; 041import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 042import org.nuxeo.runtime.api.Framework; 043import org.nuxeo.runtime.model.ComponentContext; 044import org.nuxeo.runtime.model.ComponentName; 045import org.nuxeo.runtime.model.DefaultComponent; 046import org.nuxeo.runtime.model.Extension; 047import org.nuxeo.runtime.model.RuntimeContext; 048 049import net.sf.jmimemagic.Magic; 050import net.sf.jmimemagic.MagicException; 051import net.sf.jmimemagic.MagicMatch; 052import net.sf.jmimemagic.MagicMatchNotFoundException; 053import net.sf.jmimemagic.MagicParseException; 054 055/** 056 * MimetypeEntry registry service. 057 * <p> 058 * Singleton holding a registry of mimetype entries and exposes an API to grab information related to these mimetypes. 059 * As well, this is possible to ask for a mimetype magic detection from a stream or file using the API. 060 * 061 * @author <a href="mailto:ja@nuxeo.com">Julien Anguenot</a> 062 */ 063public class MimetypeRegistryService extends DefaultComponent implements MimetypeRegistry { 064 065 public static final ComponentName NAME = new ComponentName( 066 "org.nuxeo.ecm.platform.mimetype.service.MimetypeRegistryService"); 067 068 // 10 MB is the max size to allow full file scan 069 public static final long MAX_SIZE_FOR_SCAN = 10 * 1024 * 1024; 070 071 private static final Log log = LogFactory.getLog(MimetypeRegistryService.class); 072 073 protected Map<String, MimetypeEntry> mimetypeByNormalisedRegistry; 074 075 protected Map<String, MimetypeEntry> mimetypeByExtensionRegistry; 076 077 protected Map<String, ExtensionDescriptor> extensionRegistry; 078 079 private RuntimeContext bundle; 080 081 public MimetypeRegistryService() { 082 initializeRegistries(); 083 } 084 085 protected void initializeRegistries() { 086 mimetypeByNormalisedRegistry = new HashMap<>(); 087 mimetypeByExtensionRegistry = new HashMap<>(); 088 extensionRegistry = new HashMap<>(); 089 } 090 091 protected boolean isMimetypeEntry(String mimetypeName) { 092 return mimetypeByNormalisedRegistry.containsKey(mimetypeName); 093 } 094 095 @Override 096 public void activate(ComponentContext context) { 097 bundle = context.getRuntimeContext(); 098 initializeRegistries(); 099 } 100 101 @Override 102 public void deactivate(ComponentContext context) { 103 mimetypeByNormalisedRegistry = null; 104 mimetypeByExtensionRegistry = null; 105 extensionRegistry = null; 106 } 107 108 @Override 109 public void registerExtension(Extension extension) { 110 Object[] contribs = extension.getContributions(); 111 if (contribs == null) { 112 return; 113 } 114 for (Object contrib : contribs) { 115 if (contrib instanceof MimetypeDescriptor) { 116 MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib; 117 registerMimetype(mimetypeDescriptor.getMimetype()); 118 } else if (contrib instanceof ExtensionDescriptor) { 119 registerFileExtension((ExtensionDescriptor) contrib); 120 } 121 } 122 } 123 124 public void registerMimetype(MimetypeEntry mimetype) { 125 log.debug("Registering mimetype: " + mimetype.getNormalized()); 126 mimetypeByNormalisedRegistry.put(mimetype.getNormalized(), mimetype); 127 for (String extension : mimetype.getExtensions()) { 128 mimetypeByExtensionRegistry.put(extension, mimetype); 129 } 130 } 131 132 public void registerFileExtension(ExtensionDescriptor extensionDescriptor) { 133 log.debug("Registering file extension: " + extensionDescriptor.getName()); 134 extensionRegistry.put(extensionDescriptor.getName(), extensionDescriptor); 135 } 136 137 @Override 138 public void unregisterExtension(Extension extension) { 139 Object[] contribs = extension.getContributions(); 140 if (contribs == null) { 141 return; 142 } 143 for (Object contrib : contribs) { 144 if (contrib instanceof MimetypeDescriptor) { 145 MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib; 146 unregisterMimetype(mimetypeDescriptor.getNormalized()); 147 } else if (contrib instanceof ExtensionDescriptor) { 148 ExtensionDescriptor extensionDescriptor = (ExtensionDescriptor) contrib; 149 unregisterFileExtension(extensionDescriptor); 150 } 151 } 152 } 153 154 public void unregisterMimetype(String mimetypeName) { 155 log.debug("Unregistering mimetype: " + mimetypeName); 156 MimetypeEntry mimetype = mimetypeByNormalisedRegistry.get(mimetypeName); 157 if (mimetype == null) { 158 return; 159 } 160 List<String> extensions = mimetype.getExtensions(); 161 mimetypeByNormalisedRegistry.remove(mimetypeName); 162 for (String extension : extensions) { 163 mimetypeByExtensionRegistry.remove(extension); 164 } 165 } 166 167 public void unregisterFileExtension(ExtensionDescriptor extensionDescriptor) { 168 log.debug("Unregistering file extension: " + extensionDescriptor.getName()); 169 extensionRegistry.remove(extensionDescriptor.getName()); 170 } 171 172 public RuntimeContext getContext() { 173 return bundle; 174 } 175 176 @Override 177 public List<String> getExtensionsFromMimetypeName(String mimetypeName) { 178 List<String> extensions = new ArrayList<>(); 179 for (String key : mimetypeByNormalisedRegistry.keySet()) { 180 MimetypeEntry mimetypeEntry = mimetypeByNormalisedRegistry.get(key); 181 if (mimetypeEntry.getMimetypes().contains(mimetypeName)) { 182 extensions.addAll(mimetypeEntry.getExtensions()); 183 } 184 } 185 return extensions; 186 } 187 188 @Override 189 public MimetypeEntry getMimetypeEntryByName(String name) { 190 return mimetypeByNormalisedRegistry.get(name); 191 } 192 193 @Override 194 public String getMimetypeFromFile(File file) throws MimetypeNotFoundException, MimetypeDetectionException { 195 if (file.length() > MAX_SIZE_FOR_SCAN) { 196 String exceptionMessage = "Not able to determine mime type from filename and file is too big for binary scan."; 197 if (file.getAbsolutePath() == null) { 198 throw new MimetypeNotFoundException(exceptionMessage); 199 } 200 try { 201 return getMimetypeFromFilename(file.getAbsolutePath()); 202 } catch (MimetypeNotFoundException e) { 203 throw new MimetypeNotFoundException(exceptionMessage, e); 204 } 205 } 206 try { 207 MagicMatch match = Magic.getMagicMatch(file, true, false); 208 String mimeType; 209 210 if (match.getSubMatches().isEmpty()) { 211 mimeType = match.getMimeType(); 212 } else { 213 // Submatches found 214 // TODO: we only take the first here 215 // what to do with other possible responses ? 216 // b.t.w., multiple responses denotes a non-accuracy problem in 217 // magic.xml but be careful to nested possible 218 // sub-sub-...-submatches make this as recursive ? 219 Collection<MagicMatch> possibilities = match.getSubMatches(); 220 Iterator<MagicMatch> iter = possibilities.iterator(); 221 MagicMatch m = iter.next(); 222 mimeType = m.getMimeType(); 223 // need to clean for subsequent calls 224 possibilities.clear(); 225 match.setSubMatches(possibilities); 226 } 227 if ("text/plain".equals(mimeType)) { 228 // check we didn't mis-detect files with zeroes 229 // check first 16 bytes 230 byte[] bytes = new byte[16]; 231 int n = 0; 232 try (FileInputStream is = new FileInputStream(file)) { 233 n = is.read(bytes); 234 } 235 for (int i = 0; i < n; i++) { 236 if (bytes[i] == 0) { 237 mimeType = "application/octet-stream"; 238 break; 239 } 240 } 241 } 242 return mimeType; 243 } catch (MagicMatchNotFoundException e) { 244 if (file.getAbsolutePath() != null) { 245 return getMimetypeFromFilename(file.getAbsolutePath()); 246 } 247 throw new MimetypeNotFoundException(e.getMessage(), e); 248 } catch (MagicException | MagicParseException | IOException e) { 249 throw new MimetypeDetectionException(e.getMessage(), e); 250 } 251 } 252 253 @Override 254 public String getMimetypeFromExtension(String extension) throws MimetypeNotFoundException { 255 String lowerCaseExtension = extension.toLowerCase(); 256 ExtensionDescriptor extensionDescriptor = extensionRegistry.get(lowerCaseExtension); 257 if (extensionDescriptor == null) { 258 // no explicit extension rule, analyse the inverted mimetype 259 // registry 260 MimetypeEntry mimetype = mimetypeByExtensionRegistry.get(lowerCaseExtension); 261 if (mimetype == null) { 262 throw new MimetypeNotFoundException("no registered mimetype has extension: " + lowerCaseExtension); 263 } else { 264 return mimetype.getNormalized(); 265 } 266 } else { 267 if (extensionDescriptor.isAmbiguous()) { 268 throw new MimetypeNotFoundException( 269 String.format("mimetype for %s is ambiguous, binary sniffing needed", lowerCaseExtension)); 270 } else { 271 return extensionDescriptor.getMimetype(); 272 } 273 } 274 } 275 276 @Override 277 public String getMimetypeFromFilename(String filename) throws MimetypeNotFoundException { 278 if (filename == null) { 279 throw new MimetypeNotFoundException("filename is null"); 280 } 281 String extension = FilenameUtils.getExtension(filename); 282 if (StringUtils.isBlank(extension)) { 283 throw new MimetypeNotFoundException(filename + "has no extension"); 284 } 285 return getMimetypeFromExtension(extension); 286 } 287 288 @Override 289 public String getMimetypeFromBlob(Blob blob) throws MimetypeNotFoundException, MimetypeDetectionException { 290 File file; 291 try { 292 file = Framework.createTempFile("NXMimetypeBean", ".bin"); 293 try (InputStream is = blob.getStream()) { 294 FileUtils.copyInputStreamToFile(is, file); 295 return getMimetypeFromFile(file); 296 } finally { 297 file.delete(); 298 } 299 } catch (IOException e) { 300 throw new MimetypeDetectionException(e.getMessage(), e); 301 } 302 } 303 304 @Override 305 public MimetypeEntry getMimetypeEntryByMimeType(String mimetype) { 306 MimetypeEntry mtype = mimetypeByNormalisedRegistry.get("application/octet-stream"); 307 if (mimetype != null) { 308 for (String key : mimetypeByNormalisedRegistry.keySet()) { 309 MimetypeEntry entry = mimetypeByNormalisedRegistry.get(key); 310 if (mimetype.equals(entry.getNormalized()) || entry.getMimetypes().contains(mimetype)) { 311 mtype = entry; 312 break; 313 } 314 } 315 } 316 return mtype; 317 } 318 319 @Override 320 public String getMimetypeFromBlobWithDefault(Blob blob, String defaultMimetype) throws MimetypeDetectionException { 321 try { 322 return getMimetypeFromBlob(blob); 323 } catch (MimetypeNotFoundException e) { 324 return defaultMimetype; 325 } 326 } 327 328 @Override 329 public String getMimetypeFromFilenameAndBlobWithDefault(String filename, Blob blob, String defaultMimetype) 330 throws MimetypeDetectionException { 331 try { 332 return getMimetypeFromFilename(filename); 333 } catch (MimetypeNotFoundException e) { 334 // failed to detect mimetype on extension: 335 // fallback to calculate mimetype from blob content 336 return getMimetypeFromBlobWithDefault(blob, defaultMimetype); 337 } 338 } 339 340 @Override 341 public String getMimetypeFromFilenameWithBlobMimetypeFallback(String filename, Blob blob, String defaultMimetype) 342 throws MimetypeDetectionException { 343 try { 344 return getMimetypeFromFilename(filename); 345 } catch (MimetypeNotFoundException e) { 346 // failed to detect mimetype on extension: 347 // fallback to the blob defined mimetype 348 String mimeTypeName = blob.getMimeType(); 349 if (isMimetypeEntry(mimeTypeName)) { 350 return mimeTypeName; 351 } else { 352 // failed to detect mimetype on blob: 353 // fallback to calculate mimetype from blob content 354 return getMimetypeFromBlobWithDefault(blob, defaultMimetype); 355 } 356 } 357 } 358 359 @Override 360 public Blob updateMimetype(Blob blob, String filename, Boolean withBlobMimetypeFallback) 361 throws MimetypeDetectionException { 362 if (filename == null) { 363 filename = blob.getFilename(); 364 } else if (blob.getFilename() == null) { 365 blob.setFilename(filename); 366 } 367 if (withBlobMimetypeFallback) { 368 blob.setMimeType(getMimetypeFromFilenameWithBlobMimetypeFallback(filename, blob, DEFAULT_MIMETYPE)); 369 } else { 370 blob.setMimeType(getMimetypeFromFilenameAndBlobWithDefault(filename, blob, DEFAULT_MIMETYPE)); 371 } 372 return blob; 373 } 374 375 @Override 376 public Blob updateMimetype(Blob blob, String filename) throws MimetypeDetectionException { 377 return updateMimetype(blob, filename, false); 378 } 379 380 @Override 381 public Blob updateMimetype(Blob blob) throws MimetypeDetectionException { 382 return updateMimetype(blob, null); 383 } 384 385}