001/* 002 * (C) Copyright 2006-2016 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 */ 019package org.nuxeo.ecm.platform.mimetype.service; 020 021import java.io.File; 022import java.io.FileInputStream; 023import java.io.IOException; 024import java.io.InputStream; 025import java.util.ArrayList; 026import java.util.Collection; 027import java.util.HashMap; 028import java.util.Iterator; 029import java.util.List; 030import java.util.Map; 031 032import org.apache.commons.io.FileUtils; 033import org.apache.commons.io.FilenameUtils; 034import org.apache.commons.lang.StringUtils; 035import org.apache.commons.logging.Log; 036import org.apache.commons.logging.LogFactory; 037import org.nuxeo.ecm.core.api.Blob; 038import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException; 039import org.nuxeo.ecm.platform.mimetype.MimetypeNotFoundException; 040import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry; 041import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 042import org.nuxeo.runtime.api.Framework; 043import org.nuxeo.runtime.model.ComponentContext; 044import org.nuxeo.runtime.model.ComponentName; 045import org.nuxeo.runtime.model.DefaultComponent; 046import org.nuxeo.runtime.model.Extension; 047import org.nuxeo.runtime.model.RuntimeContext; 048 049import net.sf.jmimemagic.Magic; 050import net.sf.jmimemagic.MagicException; 051import net.sf.jmimemagic.MagicMatch; 052import net.sf.jmimemagic.MagicMatchNotFoundException; 053import net.sf.jmimemagic.MagicParseException; 054 055/** 056 * MimetypeEntry registry service. 057 * <p> 058 * Singleton holding a registry of mimetype entries and exposes an API to grab information related to these mimetypes. 059 * As well, this is possible to ask for a mimetype magic detection from a stream or file using the API. 060 * 061 * @author <a href="mailto:ja@nuxeo.com">Julien Anguenot</a> 062 */ 063public class MimetypeRegistryService extends DefaultComponent implements MimetypeRegistry { 064 065 public static final ComponentName NAME = new ComponentName( 066 "org.nuxeo.ecm.platform.mimetype.service.MimetypeRegistryService"); 067 068 // 10 MB is the max size to allow full file scan 069 public static final long MAX_SIZE_FOR_SCAN = 10 * 1024 * 1024; 070 071 private static final Log log = LogFactory.getLog(MimetypeRegistryService.class); 072 073 protected Map<String, MimetypeEntry> mimetypeByNormalisedRegistry; 074 075 protected Map<String, MimetypeEntry> mimetypeByExtensionRegistry; 076 077 protected Map<String, ExtensionDescriptor> extensionRegistry; 078 079 private RuntimeContext bundle; 080 081 public MimetypeRegistryService() { 082 initializeRegistries(); 083 } 084 085 protected void initializeRegistries() { 086 mimetypeByNormalisedRegistry = new HashMap<>(); 087 mimetypeByExtensionRegistry = new HashMap<>(); 088 extensionRegistry = new HashMap<>(); 089 } 090 091 protected boolean isMimetypeEntry(String mimetypeName) { 092 return mimetypeByNormalisedRegistry.containsKey(mimetypeName); 093 } 094 095 @Override 096 public void activate(ComponentContext context) { 097 bundle = context.getRuntimeContext(); 098 initializeRegistries(); 099 } 100 101 @Override 102 public void deactivate(ComponentContext context) { 103 mimetypeByNormalisedRegistry = null; 104 mimetypeByExtensionRegistry = null; 105 extensionRegistry = null; 106 } 107 108 @Override 109 public void registerExtension(Extension extension) { 110 Object[] contribs = extension.getContributions(); 111 if (contribs == null) { 112 return; 113 } 114 for (Object contrib : contribs) { 115 if (contrib instanceof MimetypeDescriptor) { 116 MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib; 117 registerMimetype(mimetypeDescriptor.getMimetype()); 118 } else if (contrib instanceof ExtensionDescriptor) { 119 registerFileExtension((ExtensionDescriptor) contrib); 120 } 121 } 122 } 123 124 public void registerMimetype(MimetypeEntry mimetype) { 125 log.debug("Registering mimetype: " + mimetype.getNormalized()); 126 mimetypeByNormalisedRegistry.put(mimetype.getNormalized(), mimetype); 127 for (String extension : mimetype.getExtensions()) { 128 mimetypeByExtensionRegistry.put(extension, mimetype); 129 } 130 } 131 132 public void registerFileExtension(ExtensionDescriptor extensionDescriptor) { 133 log.debug("Registering file extension: " + extensionDescriptor.getName()); 134 extensionRegistry.put(extensionDescriptor.getName(), extensionDescriptor); 135 } 136 137 @Override 138 public void unregisterExtension(Extension extension) { 139 Object[] contribs = extension.getContributions(); 140 if (contribs == null) { 141 return; 142 } 143 for (Object contrib : contribs) { 144 if (contrib instanceof MimetypeDescriptor) { 145 MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib; 146 unregisterMimetype(mimetypeDescriptor.getNormalized()); 147 } else if (contrib instanceof ExtensionDescriptor) { 148 ExtensionDescriptor extensionDescriptor = (ExtensionDescriptor) contrib; 149 unregisterFileExtension(extensionDescriptor); 150 } 151 } 152 } 153 154 public void unregisterMimetype(String mimetypeName) { 155 log.debug("Unregistering mimetype: " + mimetypeName); 156 MimetypeEntry mimetype = mimetypeByNormalisedRegistry.get(mimetypeName); 157 if (mimetype == null) { 158 return; 159 } 160 List<String> extensions = mimetype.getExtensions(); 161 mimetypeByNormalisedRegistry.remove(mimetypeName); 162 for (String extension : extensions) { 163 // FIXME: equals always fails because types are incompatible. 164 if (mimetype.getNormalized().equals(mimetypeByExtensionRegistry.get(extension))) { 165 mimetypeByExtensionRegistry.remove(extension); 166 } 167 } 168 } 169 170 public void unregisterFileExtension(ExtensionDescriptor extensionDescriptor) { 171 log.debug("Unregistering file extension: " + extensionDescriptor.getName()); 172 extensionRegistry.remove(extensionDescriptor.getName()); 173 } 174 175 public RuntimeContext getContext() { 176 return bundle; 177 } 178 179 @Override 180 public List<String> getExtensionsFromMimetypeName(String mimetypeName) { 181 List<String> extensions = new ArrayList<>(); 182 for (String key : mimetypeByNormalisedRegistry.keySet()) { 183 MimetypeEntry mimetypeEntry = mimetypeByNormalisedRegistry.get(key); 184 if (mimetypeEntry.getMimetypes().contains(mimetypeName)) { 185 extensions.addAll(mimetypeEntry.getExtensions()); 186 } 187 } 188 return extensions; 189 } 190 191 @Override 192 public MimetypeEntry getMimetypeEntryByName(String name) { 193 return mimetypeByNormalisedRegistry.get(name); 194 } 195 196 @Override 197 public String getMimetypeFromFile(File file) throws MimetypeNotFoundException, MimetypeDetectionException { 198 if (file.length() > MAX_SIZE_FOR_SCAN) { 199 String exceptionMessage = "Not able to determine mime type from filename and file is too big for binary scan."; 200 if (file.getAbsolutePath() == null) { 201 throw new MimetypeNotFoundException(exceptionMessage); 202 } 203 try { 204 return getMimetypeFromFilename(file.getAbsolutePath()); 205 } catch (MimetypeNotFoundException e) { 206 throw new MimetypeNotFoundException(exceptionMessage, e); 207 } 208 } 209 try { 210 MagicMatch match = Magic.getMagicMatch(file, true, false); 211 String mimeType; 212 213 if (match.getSubMatches().isEmpty()) { 214 mimeType = match.getMimeType(); 215 } else { 216 // Submatches found 217 // TODO: we only take the first here 218 // what to do with other possible responses ? 219 // b.t.w., multiple responses denotes a non-accuracy problem in 220 // magic.xml but be careful to nested possible 221 // sub-sub-...-submatches make this as recursive ? 222 Collection<MagicMatch> possibilities = match.getSubMatches(); 223 Iterator<MagicMatch> iter = possibilities.iterator(); 224 MagicMatch m = iter.next(); 225 mimeType = m.getMimeType(); 226 // need to clean for subsequent calls 227 possibilities.clear(); 228 match.setSubMatches(possibilities); 229 } 230 if ("text/plain".equals(mimeType)) { 231 // check we didn't mis-detect files with zeroes 232 // check first 16 bytes 233 byte[] bytes = new byte[16]; 234 int n = 0; 235 try (FileInputStream is = new FileInputStream(file)) { 236 n = is.read(bytes); 237 } 238 for (int i = 0; i < n; i++) { 239 if (bytes[i] == 0) { 240 mimeType = "application/octet-stream"; 241 break; 242 } 243 } 244 } 245 return mimeType; 246 } catch (MagicMatchNotFoundException e) { 247 if (file.getAbsolutePath() != null) { 248 return getMimetypeFromFilename(file.getAbsolutePath()); 249 } 250 throw new MimetypeNotFoundException(e.getMessage(), e); 251 } catch (MagicException | MagicParseException | IOException e) { 252 throw new MimetypeDetectionException(e.getMessage(), e); 253 } 254 } 255 256 @Override 257 public String getMimetypeFromExtension(String extension) throws MimetypeNotFoundException { 258 String lowerCaseExtension = extension.toLowerCase(); 259 ExtensionDescriptor extensionDescriptor = extensionRegistry.get(lowerCaseExtension); 260 if (extensionDescriptor == null) { 261 // no explicit extension rule, analyse the inverted mimetype 262 // registry 263 MimetypeEntry mimetype = mimetypeByExtensionRegistry.get(lowerCaseExtension); 264 if (mimetype == null) { 265 throw new MimetypeNotFoundException("no registered mimetype has extension: " + lowerCaseExtension); 266 } else { 267 return mimetype.getNormalized(); 268 } 269 } else { 270 if (extensionDescriptor.isAmbiguous()) { 271 throw new MimetypeNotFoundException( 272 String.format("mimetype for %s is ambiguous, binary sniffing needed", lowerCaseExtension)); 273 } else { 274 return extensionDescriptor.getMimetype(); 275 } 276 } 277 } 278 279 @Override 280 public String getMimetypeFromFilename(String filename) throws MimetypeNotFoundException { 281 if (filename == null) { 282 throw new MimetypeNotFoundException("filename is null"); 283 } 284 String extension = FilenameUtils.getExtension(filename); 285 if (StringUtils.isBlank(extension)) { 286 throw new MimetypeNotFoundException(filename + "has no extension"); 287 } 288 return getMimetypeFromExtension(extension); 289 } 290 291 @Override 292 public String getMimetypeFromBlob(Blob blob) throws MimetypeNotFoundException, MimetypeDetectionException { 293 File file; 294 try { 295 file = Framework.createTempFile("NXMimetypeBean", ".bin"); 296 try (InputStream is = blob.getStream()) { 297 FileUtils.copyInputStreamToFile(is, file); 298 return getMimetypeFromFile(file); 299 } finally { 300 file.delete(); 301 } 302 } catch (IOException e) { 303 throw new MimetypeDetectionException(e.getMessage(), e); 304 } 305 } 306 307 @Override 308 public MimetypeEntry getMimetypeEntryByMimeType(String mimetype) { 309 MimetypeEntry mtype = mimetypeByNormalisedRegistry.get("application/octet-stream"); 310 if (mimetype != null) { 311 for (String key : mimetypeByNormalisedRegistry.keySet()) { 312 MimetypeEntry entry = mimetypeByNormalisedRegistry.get(key); 313 if (mimetype.equals(entry.getNormalized()) || entry.getMimetypes().contains(mimetype)) { 314 mtype = entry; 315 break; 316 } 317 } 318 } 319 return mtype; 320 } 321 322 @Override 323 public String getMimetypeFromBlobWithDefault(Blob blob, String defaultMimetype) throws MimetypeDetectionException { 324 try { 325 return getMimetypeFromBlob(blob); 326 } catch (MimetypeNotFoundException e) { 327 return defaultMimetype; 328 } 329 } 330 331 @Override 332 public String getMimetypeFromFilenameAndBlobWithDefault(String filename, Blob blob, String defaultMimetype) 333 throws MimetypeDetectionException { 334 try { 335 return getMimetypeFromFilename(filename); 336 } catch (MimetypeNotFoundException e) { 337 // failed to detect mimetype on extension: 338 // fallback to calculate mimetype from blob content 339 return getMimetypeFromBlobWithDefault(blob, defaultMimetype); 340 } 341 } 342 343 @Override 344 public String getMimetypeFromFilenameWithBlobMimetypeFallback(String filename, Blob blob, String defaultMimetype) 345 throws MimetypeDetectionException { 346 try { 347 return getMimetypeFromFilename(filename); 348 } catch (MimetypeNotFoundException e) { 349 // failed to detect mimetype on extension: 350 // fallback to the blob defined mimetype 351 String mimeTypeName = blob.getMimeType(); 352 if (isMimetypeEntry(mimeTypeName)) { 353 return mimeTypeName; 354 } else { 355 // failed to detect mimetype on blob: 356 // fallback to calculate mimetype from blob content 357 return getMimetypeFromBlobWithDefault(blob, defaultMimetype); 358 } 359 } 360 } 361 362 @Override 363 public Blob updateMimetype(Blob blob, String filename, Boolean withBlobMimetypeFallback) 364 throws MimetypeDetectionException { 365 if (filename == null) { 366 filename = blob.getFilename(); 367 } else if (blob.getFilename() == null) { 368 blob.setFilename(filename); 369 } 370 if (withBlobMimetypeFallback) { 371 blob.setMimeType(getMimetypeFromFilenameWithBlobMimetypeFallback(filename, blob, DEFAULT_MIMETYPE)); 372 } else { 373 blob.setMimeType(getMimetypeFromFilenameAndBlobWithDefault(filename, blob, DEFAULT_MIMETYPE)); 374 } 375 return blob; 376 } 377 378 @Override 379 public Blob updateMimetype(Blob blob, String filename) throws MimetypeDetectionException { 380 return updateMimetype(blob, filename, false); 381 } 382 383 @Override 384 public Blob updateMimetype(Blob blob) throws MimetypeDetectionException { 385 return updateMimetype(blob, null); 386 } 387 388}