001/* 002 * (C) Copyright 2006-2007 Nuxeo SAS (http://nuxeo.com/) and contributors. 003 * 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the GNU Lesser General Public License 006 * (LGPL) version 2.1 which accompanies this distribution, and is available at 007 * http://www.gnu.org/licenses/lgpl.html 008 * 009 * This library is distributed in the hope that it will be useful, 010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 012 * Lesser General Public License for more details. 013 * 014 * Contributors: 015 * Nuxeo - initial API and implementation 016 * 017 * $Id: MimetypeEntry.java 2920 2006-09-15 13:28:15Z janguenot $ 018 */ 019package org.nuxeo.ecm.platform.mimetype.service; 020 021import java.io.File; 022import java.io.FileInputStream; 023import java.io.IOException; 024import java.io.InputStream; 025import java.util.ArrayList; 026import java.util.Collection; 027import java.util.HashMap; 028import java.util.Iterator; 029import java.util.List; 030import java.util.Map; 031 032import net.sf.jmimemagic.Magic; 033import net.sf.jmimemagic.MagicException; 034import net.sf.jmimemagic.MagicMatch; 035import net.sf.jmimemagic.MagicMatchNotFoundException; 036import net.sf.jmimemagic.MagicParseException; 037 038import org.apache.commons.io.FilenameUtils; 039import org.apache.commons.logging.Log; 040import org.apache.commons.logging.LogFactory; 041import org.nuxeo.common.utils.FileUtils; 042import org.nuxeo.ecm.core.api.Blob; 043import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException; 044import org.nuxeo.ecm.platform.mimetype.MimetypeNotFoundException; 045import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry; 046import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 047import org.nuxeo.runtime.model.ComponentContext; 048import org.nuxeo.runtime.model.ComponentName; 049import org.nuxeo.runtime.model.DefaultComponent; 050import org.nuxeo.runtime.model.Extension; 051import org.nuxeo.runtime.model.RuntimeContext; 052 053/** 054 * MimetypeEntry registry service. 055 * <p> 056 * Singleton holding a registry of mimetype entries and exposes an API to grab information related to these mimetypes. 057 * As well, this is possible to ask for a mimetype magic detection from a stream or file using the API. 058 * 059 * @author <a href="mailto:ja@nuxeo.com">Julien Anguenot</a> 060 */ 061public class MimetypeRegistryService extends DefaultComponent implements MimetypeRegistry { 062 063 public static final ComponentName NAME = new ComponentName( 064 "org.nuxeo.ecm.platform.mimetype.service.MimetypeRegistryService"); 065 066 // 10 MB is the max size to allow full file scan 067 public static final long MAX_SIZE_FOR_SCAN = 10 * 1024 * 1024; 068 069 private static final Log log = LogFactory.getLog(MimetypeRegistryService.class); 070 071 protected Map<String, MimetypeEntry> mimetypeByNormalisedRegistry; 072 073 protected Map<String, MimetypeEntry> mimetypeByExtensionRegistry; 074 075 protected Map<String, ExtensionDescriptor> extensionRegistry; 076 077 private RuntimeContext bundle; 078 079 public MimetypeRegistryService() { 080 initializeRegistries(); 081 } 082 083 protected void initializeRegistries() { 084 mimetypeByNormalisedRegistry = new HashMap<String, MimetypeEntry>(); 085 mimetypeByExtensionRegistry = new HashMap<String, MimetypeEntry>(); 086 extensionRegistry = new HashMap<String, ExtensionDescriptor>(); 087 } 088 089 @Override 090 public void activate(ComponentContext context) { 091 bundle = context.getRuntimeContext(); 092 initializeRegistries(); 093 } 094 095 @Override 096 public void deactivate(ComponentContext context) { 097 mimetypeByNormalisedRegistry = null; 098 mimetypeByExtensionRegistry = null; 099 extensionRegistry = null; 100 } 101 102 @Override 103 public void registerExtension(Extension extension) { 104 Object[] contribs = extension.getContributions(); 105 if (contribs == null) { 106 return; 107 } 108 for (Object contrib : contribs) { 109 if (contrib instanceof MimetypeDescriptor) { 110 MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib; 111 registerMimetype(mimetypeDescriptor.getMimetype()); 112 } else if (contrib instanceof ExtensionDescriptor) { 113 registerFileExtension((ExtensionDescriptor) contrib); 114 } 115 } 116 } 117 118 public void registerMimetype(MimetypeEntry mimetype) { 119 log.debug("Registering mimetype: " + mimetype.getNormalized()); 120 mimetypeByNormalisedRegistry.put(mimetype.getNormalized(), mimetype); 121 for (String extension : mimetype.getExtensions()) { 122 mimetypeByExtensionRegistry.put(extension, mimetype); 123 } 124 } 125 126 public void registerFileExtension(ExtensionDescriptor extensionDescriptor) { 127 log.debug("Registering file extension: " + extensionDescriptor.getName()); 128 extensionRegistry.put(extensionDescriptor.getName(), extensionDescriptor); 129 } 130 131 @Override 132 public void unregisterExtension(Extension extension) { 133 Object[] contribs = extension.getContributions(); 134 if (contribs == null) { 135 return; 136 } 137 for (Object contrib : contribs) { 138 if (contrib instanceof MimetypeDescriptor) { 139 MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib; 140 unregisterMimetype(mimetypeDescriptor.getNormalized()); 141 } else if (contrib instanceof ExtensionDescriptor) { 142 ExtensionDescriptor extensionDescriptor = (ExtensionDescriptor) contrib; 143 unregisterFileExtension(extensionDescriptor); 144 } 145 } 146 } 147 148 public void unregisterMimetype(String mimetypeName) { 149 log.debug("Unregistering mimetype: " + mimetypeName); 150 MimetypeEntry mimetype = mimetypeByNormalisedRegistry.get(mimetypeName); 151 if (mimetype == null) { 152 return; 153 } 154 List<String> extensions = mimetype.getExtensions(); 155 mimetypeByNormalisedRegistry.remove(mimetypeName); 156 for (String extension : extensions) { 157 // FIXME: equals always fails because types are incompatible. 158 if (mimetype.getNormalized().equals(mimetypeByExtensionRegistry.get(extension))) { 159 mimetypeByExtensionRegistry.remove(extension); 160 } 161 } 162 } 163 164 public void unregisterFileExtension(ExtensionDescriptor extensionDescriptor) { 165 log.debug("Unregistering file extension: " + extensionDescriptor.getName()); 166 extensionRegistry.remove(extensionDescriptor.getName()); 167 } 168 169 public RuntimeContext getContext() { 170 return bundle; 171 } 172 173 public List<String> getExtensionsFromMimetypeName(String mimetypeName) { 174 List<String> extensions = new ArrayList<String>(); 175 for (String key : mimetypeByNormalisedRegistry.keySet()) { 176 MimetypeEntry mimetypeEntry = mimetypeByNormalisedRegistry.get(key); 177 if (mimetypeEntry.getMimetypes().contains(mimetypeName)) { 178 extensions.addAll(mimetypeEntry.getExtensions()); 179 } 180 } 181 return extensions; 182 } 183 184 public MimetypeEntry getMimetypeEntryByName(String name) { 185 return mimetypeByNormalisedRegistry.get(name); 186 } 187 188 @SuppressWarnings({ "unchecked" }) 189 public String getMimetypeFromFile(File file) throws MimetypeNotFoundException, MimetypeDetectionException { 190 if (file.length() > MAX_SIZE_FOR_SCAN) { 191 String exceptionMessage = "Not able to determine mime type from filename and file is too big for binary scan."; 192 if (file.getAbsolutePath() == null) { 193 throw new MimetypeNotFoundException(exceptionMessage); 194 } 195 try { 196 return getMimetypeFromFilename(file.getAbsolutePath()); 197 } catch (MimetypeNotFoundException e) { 198 throw new MimetypeNotFoundException(exceptionMessage, e); 199 } 200 } 201 try { 202 MagicMatch match = Magic.getMagicMatch(file, true, false); 203 String mimeType; 204 205 if (match.getSubMatches().isEmpty()) { 206 mimeType = match.getMimeType(); 207 } else { 208 // Submatches found 209 // TODO: we only take the first here 210 // what to do with other possible responses ? 211 // b.t.w., multiple responses denotes a non-accuracy problem in 212 // magic.xml but be careful to nested possible 213 // sub-sub-...-submatches make this as recursive ? 214 Collection<MagicMatch> possibilities = match.getSubMatches(); 215 Iterator<MagicMatch> iter = possibilities.iterator(); 216 MagicMatch m = iter.next(); 217 mimeType = m.getMimeType(); 218 // need to clean for subsequent calls 219 possibilities.clear(); 220 match.setSubMatches(possibilities); 221 } 222 if ("text/plain".equals(mimeType)) { 223 // check we didn't mis-detect files with zeroes 224 // check first 16 bytes 225 byte[] bytes = new byte[16]; 226 FileInputStream is = new FileInputStream(file); 227 int n = 0; 228 try { 229 n = is.read(bytes); 230 } finally { 231 is.close(); 232 } 233 for (int i = 0; i < n; i++) { 234 if (bytes[i] == 0) { 235 mimeType = "application/octet-stream"; 236 break; 237 } 238 } 239 } 240 return mimeType; 241 } catch (MagicMatchNotFoundException e) { 242 if (file.getAbsolutePath() != null) { 243 return getMimetypeFromFilename(file.getAbsolutePath()); 244 } 245 throw new MimetypeNotFoundException(e.getMessage(), e); 246 } catch (MagicException | MagicParseException | IOException e) { 247 throw new MimetypeDetectionException(e.getMessage(), e); 248 } 249 } 250 251 public String getMimetypeFromExtension(String extension) throws MimetypeNotFoundException { 252 String lowerCaseExtension = extension.toLowerCase(); 253 ExtensionDescriptor extensionDescriptor = extensionRegistry.get(lowerCaseExtension); 254 if (extensionDescriptor == null) { 255 // no explicit extension rule, analyse the inverted mimetype 256 // registry 257 MimetypeEntry mimetype = mimetypeByExtensionRegistry.get(lowerCaseExtension); 258 if (mimetype == null) { 259 throw new MimetypeNotFoundException("no registered mimetype has extension: " + lowerCaseExtension); 260 } else { 261 return mimetype.getNormalized(); 262 } 263 } else { 264 if (extensionDescriptor.isAmbiguous()) { 265 throw new MimetypeNotFoundException(String.format( 266 "mimetype for %s is ambiguous, binary sniffing needed", lowerCaseExtension)); 267 } else { 268 return extensionDescriptor.getMimetype(); 269 } 270 } 271 } 272 273 public String getMimetypeFromFilename(String filename) throws MimetypeNotFoundException { 274 if (filename == null) { 275 throw new MimetypeNotFoundException("filename is null"); 276 } 277 String extension = FilenameUtils.getExtension(filename); 278 String[] parts = filename.split("\\."); 279 if (parts.length < 2) { 280 throw new MimetypeNotFoundException(filename + "has no extension"); 281 } 282 return getMimetypeFromExtension(parts[parts.length - 1]); 283 } 284 285 // the stream based detection is deprecated and should be replaced by 286 // StreamingBlob detection instead to make serialization efficient for 287 // remote call 288 @Deprecated 289 public String getMimetypeFromStream(InputStream stream) throws MimetypeNotFoundException, 290 MimetypeDetectionException { 291 File file = null; 292 try { 293 file = File.createTempFile("NXMimetypeBean", ".bin"); 294 try { 295 FileUtils.copyToFile(stream, file); 296 return getMimetypeFromFile(file); 297 } finally { 298 file.delete(); 299 } 300 } catch (IOException e) { 301 throw new MimetypeDetectionException(e.getMessage(), e); 302 } 303 } 304 305 /** 306 * Finds the mimetype of a stream content and returns provided default if not possible. 307 * 308 * @param is content to be analyzed 309 * @param defaultMimetype default mimetype to be used if no found 310 * @return the string mimetype 311 * @throws MimetypeDetectionException 312 * @author lgodard 313 */ 314 @Deprecated 315 // use getMimetypeFromBlobWithDefault instead 316 public String getMimetypeFromStreamWithDefault(InputStream is, String defaultMimetype) 317 throws MimetypeDetectionException { 318 try { 319 return getMimetypeFromStream(is); 320 } catch (MimetypeNotFoundException e) { 321 return defaultMimetype; 322 } 323 } 324 325 public String getMimetypeFromBlob(Blob blob) throws MimetypeNotFoundException, MimetypeDetectionException { 326 File file = null; 327 try { 328 file = File.createTempFile("NXMimetypeBean", ".bin"); 329 try { 330 InputStream is = blob.getStream(); 331 try { 332 FileUtils.copyToFile(is, file); 333 } finally { 334 is.close(); 335 } 336 return getMimetypeFromFile(file); 337 } finally { 338 file.delete(); 339 } 340 } catch (IOException e) { 341 throw new MimetypeDetectionException(e.getMessage(), e); 342 } 343 } 344 345 public MimetypeEntry getMimetypeEntryByMimeType(String mimetype) { 346 MimetypeEntry mtype = mimetypeByNormalisedRegistry.get("application/octet-stream"); 347 if (mimetype != null) { 348 for (String key : mimetypeByNormalisedRegistry.keySet()) { 349 MimetypeEntry entry = mimetypeByNormalisedRegistry.get(key); 350 if (mimetype.equals(entry.getNormalized()) || entry.getMimetypes().contains(mimetype)) { 351 mtype = entry; 352 break; 353 } 354 } 355 } 356 return mtype; 357 } 358 359 /** 360 * Finds the mimetype of a Blob content and returns provided default if not possible. 361 * 362 * @param blob content to be analyzed 363 * @param defaultMimetype defaultMimeType to be used if no found 364 * @return the string mimetype 365 * @author lgodard 366 * @throws MimetypeDetectionException 367 */ 368 public String getMimetypeFromBlobWithDefault(Blob blob, String defaultMimetype) throws MimetypeDetectionException { 369 try { 370 return getMimetypeFromBlob(blob); 371 } catch (MimetypeNotFoundException e) { 372 return defaultMimetype; 373 } 374 } 375 376 /** 377 * Finds the mimetype of some content according to its filename and / or binary content. 378 * 379 * @param filename extension to analyze 380 * @param blob content to be analyzed if filename is ambiguous 381 * @param defaultMimetype defaultMimeType to be used if no found 382 * @return the string mimetype 383 * @throws MimetypeDetectionException 384 * @author lgodard 385 */ 386 public String getMimetypeFromFilenameAndBlobWithDefault(String filename, Blob blob, String defaultMimetype) 387 throws MimetypeDetectionException { 388 try { 389 return getMimetypeFromFilename(filename); 390 } catch (MimetypeNotFoundException e) { 391 // failed to detect mimetype on extension: fallback to Blob based 392 // detection 393 try { 394 return getMimetypeFromBlob(blob); 395 } catch (MimetypeNotFoundException mtnfe) { 396 return defaultMimetype; 397 } 398 } 399 } 400 401 public Blob updateMimetype(Blob blob, String filename) throws MimetypeDetectionException { 402 if (filename == null) { 403 filename = blob.getFilename(); 404 } else if (blob.getFilename() == null) { 405 blob.setFilename(filename); 406 } 407 String mimetype = getMimetypeFromFilenameAndBlobWithDefault(filename, blob, DEFAULT_MIMETYPE); 408 blob.setMimeType(mimetype); 409 return blob; 410 } 411 412 public Blob updateMimetype(Blob blob) throws MimetypeDetectionException { 413 return updateMimetype(blob, null); 414 } 415 416}