001/* 002 * (C) Copyright 2006-2016 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 * 019 */ 020package org.nuxeo.ecm.platform.mimetype.service; 021 022import java.io.File; 023import java.io.FileInputStream; 024import java.io.IOException; 025import java.io.InputStream; 026import java.util.ArrayList; 027import java.util.Collection; 028import java.util.HashMap; 029import java.util.Iterator; 030import java.util.List; 031import java.util.Map; 032 033import net.sf.jmimemagic.Magic; 034import net.sf.jmimemagic.MagicException; 035import net.sf.jmimemagic.MagicMatch; 036import net.sf.jmimemagic.MagicMatchNotFoundException; 037import net.sf.jmimemagic.MagicParseException; 038 039import org.apache.commons.io.FilenameUtils; 040import org.apache.commons.lang.StringUtils; 041import org.apache.commons.logging.Log; 042import org.apache.commons.logging.LogFactory; 043 044import org.nuxeo.common.utils.FileUtils; 045import org.nuxeo.ecm.core.api.Blob; 046import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException; 047import org.nuxeo.ecm.platform.mimetype.MimetypeNotFoundException; 048import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry; 049import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 050import org.nuxeo.runtime.api.Framework; 051import org.nuxeo.runtime.model.ComponentContext; 052import org.nuxeo.runtime.model.ComponentName; 053import org.nuxeo.runtime.model.DefaultComponent; 054import org.nuxeo.runtime.model.Extension; 055import org.nuxeo.runtime.model.RuntimeContext; 056 057/** 058 * MimetypeEntry registry service. 059 * <p> 060 * Singleton holding a registry of mimetype entries and exposes an API to grab information related to these mimetypes. 061 * As well, this is possible to ask for a mimetype magic detection from a stream or file using the API. 062 * 063 * @author <a href="mailto:ja@nuxeo.com">Julien Anguenot</a> 064 */ 065public class MimetypeRegistryService extends DefaultComponent implements MimetypeRegistry { 066 067 public static final ComponentName NAME = new ComponentName( 068 "org.nuxeo.ecm.platform.mimetype.service.MimetypeRegistryService"); 069 070 // 10 MB is the max size to allow full file scan 071 public static final long MAX_SIZE_FOR_SCAN = 10 * 1024 * 1024; 072 073 private static final Log log = LogFactory.getLog(MimetypeRegistryService.class); 074 075 protected Map<String, MimetypeEntry> mimetypeByNormalisedRegistry; 076 077 protected Map<String, MimetypeEntry> mimetypeByExtensionRegistry; 078 079 protected Map<String, ExtensionDescriptor> extensionRegistry; 080 081 private RuntimeContext bundle; 082 083 public MimetypeRegistryService() { 084 initializeRegistries(); 085 } 086 087 protected void initializeRegistries() { 088 mimetypeByNormalisedRegistry = new HashMap<>(); 089 mimetypeByExtensionRegistry = new HashMap<>(); 090 extensionRegistry = new HashMap<>(); 091 } 092 093 @Override 094 public void activate(ComponentContext context) { 095 bundle = context.getRuntimeContext(); 096 initializeRegistries(); 097 } 098 099 @Override 100 public void deactivate(ComponentContext context) { 101 mimetypeByNormalisedRegistry = null; 102 mimetypeByExtensionRegistry = null; 103 extensionRegistry = null; 104 } 105 106 @Override 107 public void registerExtension(Extension extension) { 108 Object[] contribs = extension.getContributions(); 109 if (contribs == null) { 110 return; 111 } 112 for (Object contrib : contribs) { 113 if (contrib instanceof MimetypeDescriptor) { 114 MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib; 115 registerMimetype(mimetypeDescriptor.getMimetype()); 116 } else if (contrib instanceof ExtensionDescriptor) { 117 registerFileExtension((ExtensionDescriptor) contrib); 118 } 119 } 120 } 121 122 public void registerMimetype(MimetypeEntry mimetype) { 123 log.debug("Registering mimetype: " + mimetype.getNormalized()); 124 mimetypeByNormalisedRegistry.put(mimetype.getNormalized(), mimetype); 125 for (String extension : mimetype.getExtensions()) { 126 mimetypeByExtensionRegistry.put(extension, mimetype); 127 } 128 } 129 130 public void registerFileExtension(ExtensionDescriptor extensionDescriptor) { 131 log.debug("Registering file extension: " + extensionDescriptor.getName()); 132 extensionRegistry.put(extensionDescriptor.getName(), extensionDescriptor); 133 } 134 135 @Override 136 public void unregisterExtension(Extension extension) { 137 Object[] contribs = extension.getContributions(); 138 if (contribs == null) { 139 return; 140 } 141 for (Object contrib : contribs) { 142 if (contrib instanceof MimetypeDescriptor) { 143 MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib; 144 unregisterMimetype(mimetypeDescriptor.getNormalized()); 145 } else if (contrib instanceof ExtensionDescriptor) { 146 ExtensionDescriptor extensionDescriptor = (ExtensionDescriptor) contrib; 147 unregisterFileExtension(extensionDescriptor); 148 } 149 } 150 } 151 152 public void unregisterMimetype(String mimetypeName) { 153 log.debug("Unregistering mimetype: " + mimetypeName); 154 MimetypeEntry mimetype = mimetypeByNormalisedRegistry.get(mimetypeName); 155 if (mimetype == null) { 156 return; 157 } 158 List<String> extensions = mimetype.getExtensions(); 159 mimetypeByNormalisedRegistry.remove(mimetypeName); 160 for (String extension : extensions) { 161 // FIXME: equals always fails because types are incompatible. 162 if (mimetype.getNormalized().equals(mimetypeByExtensionRegistry.get(extension))) { 163 mimetypeByExtensionRegistry.remove(extension); 164 } 165 } 166 } 167 168 public void unregisterFileExtension(ExtensionDescriptor extensionDescriptor) { 169 log.debug("Unregistering file extension: " + extensionDescriptor.getName()); 170 extensionRegistry.remove(extensionDescriptor.getName()); 171 } 172 173 public RuntimeContext getContext() { 174 return bundle; 175 } 176 177 @Override 178 public List<String> getExtensionsFromMimetypeName(String mimetypeName) { 179 List<String> extensions = new ArrayList<>(); 180 for (String key : mimetypeByNormalisedRegistry.keySet()) { 181 MimetypeEntry mimetypeEntry = mimetypeByNormalisedRegistry.get(key); 182 if (mimetypeEntry.getMimetypes().contains(mimetypeName)) { 183 extensions.addAll(mimetypeEntry.getExtensions()); 184 } 185 } 186 return extensions; 187 } 188 189 @Override 190 public MimetypeEntry getMimetypeEntryByName(String name) { 191 return mimetypeByNormalisedRegistry.get(name); 192 } 193 194 @Override 195 public String getMimetypeFromFile(File file) throws MimetypeNotFoundException, MimetypeDetectionException { 196 if (file.length() > MAX_SIZE_FOR_SCAN) { 197 String exceptionMessage = "Not able to determine mime type from filename and file is too big for binary scan."; 198 if (file.getAbsolutePath() == null) { 199 throw new MimetypeNotFoundException(exceptionMessage); 200 } 201 try { 202 return getMimetypeFromFilename(file.getAbsolutePath()); 203 } catch (MimetypeNotFoundException e) { 204 throw new MimetypeNotFoundException(exceptionMessage, e); 205 } 206 } 207 try { 208 MagicMatch match = Magic.getMagicMatch(file, true, false); 209 String mimeType; 210 211 if (match.getSubMatches().isEmpty()) { 212 mimeType = match.getMimeType(); 213 } else { 214 // Submatches found 215 // TODO: we only take the first here 216 // what to do with other possible responses ? 217 // b.t.w., multiple responses denotes a non-accuracy problem in 218 // magic.xml but be careful to nested possible 219 // sub-sub-...-submatches make this as recursive ? 220 Collection<MagicMatch> possibilities = match.getSubMatches(); 221 Iterator<MagicMatch> iter = possibilities.iterator(); 222 MagicMatch m = iter.next(); 223 mimeType = m.getMimeType(); 224 // need to clean for subsequent calls 225 possibilities.clear(); 226 match.setSubMatches(possibilities); 227 } 228 if ("text/plain".equals(mimeType)) { 229 // check we didn't mis-detect files with zeroes 230 // check first 16 bytes 231 byte[] bytes = new byte[16]; 232 FileInputStream is = new FileInputStream(file); 233 int n = 0; 234 try { 235 n = is.read(bytes); 236 } finally { 237 is.close(); 238 } 239 for (int i = 0; i < n; i++) { 240 if (bytes[i] == 0) { 241 mimeType = "application/octet-stream"; 242 break; 243 } 244 } 245 } 246 return mimeType; 247 } catch (MagicMatchNotFoundException e) { 248 if (file.getAbsolutePath() != null) { 249 return getMimetypeFromFilename(file.getAbsolutePath()); 250 } 251 throw new MimetypeNotFoundException(e.getMessage(), e); 252 } catch (MagicException | MagicParseException | IOException e) { 253 throw new MimetypeDetectionException(e.getMessage(), e); 254 } 255 } 256 257 @Override 258 public String getMimetypeFromExtension(String extension) throws MimetypeNotFoundException { 259 String lowerCaseExtension = extension.toLowerCase(); 260 ExtensionDescriptor extensionDescriptor = extensionRegistry.get(lowerCaseExtension); 261 if (extensionDescriptor == null) { 262 // no explicit extension rule, analyse the inverted mimetype 263 // registry 264 MimetypeEntry mimetype = mimetypeByExtensionRegistry.get(lowerCaseExtension); 265 if (mimetype == null) { 266 throw new MimetypeNotFoundException("no registered mimetype has extension: " + lowerCaseExtension); 267 } else { 268 return mimetype.getNormalized(); 269 } 270 } else { 271 if (extensionDescriptor.isAmbiguous()) { 272 throw new MimetypeNotFoundException(String.format( 273 "mimetype for %s is ambiguous, binary sniffing needed", lowerCaseExtension)); 274 } else { 275 return extensionDescriptor.getMimetype(); 276 } 277 } 278 } 279 280 @Override 281 public String getMimetypeFromFilename(String filename) throws MimetypeNotFoundException { 282 if (filename == null) { 283 throw new MimetypeNotFoundException("filename is null"); 284 } 285 String extension = FilenameUtils.getExtension(filename); 286 if (StringUtils.isBlank(extension)) { 287 throw new MimetypeNotFoundException(filename + "has no extension"); 288 } 289 return getMimetypeFromExtension(extension); 290 } 291 292 // the stream based detection is deprecated and should be replaced by 293 // StreamingBlob detection instead to make serialization efficient for 294 // remote call 295 @Override 296 @Deprecated 297 public String getMimetypeFromStream(InputStream stream) throws MimetypeNotFoundException, 298 MimetypeDetectionException { 299 File file = null; 300 try { 301 file = Framework.createTempFile("NXMimetypeBean", ".bin"); 302 try { 303 FileUtils.copyToFile(stream, file); 304 return getMimetypeFromFile(file); 305 } finally { 306 file.delete(); 307 } 308 } catch (IOException e) { 309 throw new MimetypeDetectionException(e.getMessage(), e); 310 } 311 } 312 313 /** 314 * Finds the mimetype of a stream content and returns provided default if not possible. 315 * 316 * @param is content to be analyzed 317 * @param defaultMimetype default mimetype to be used if no found 318 * @return the string mimetype 319 * @throws MimetypeDetectionException 320 * @author lgodard 321 */ 322 @Override 323 @Deprecated 324 // use getMimetypeFromBlobWithDefault instead 325 public String getMimetypeFromStreamWithDefault(InputStream is, String defaultMimetype) 326 throws MimetypeDetectionException { 327 try { 328 return getMimetypeFromStream(is); 329 } catch (MimetypeNotFoundException e) { 330 return defaultMimetype; 331 } 332 } 333 334 @Override 335 public String getMimetypeFromBlob(Blob blob) throws MimetypeNotFoundException, MimetypeDetectionException { 336 File file = null; 337 try { 338 file = Framework.createTempFile("NXMimetypeBean", ".bin"); 339 try { 340 InputStream is = blob.getStream(); 341 try { 342 FileUtils.copyToFile(is, file); 343 } finally { 344 is.close(); 345 } 346 return getMimetypeFromFile(file); 347 } finally { 348 file.delete(); 349 } 350 } catch (IOException e) { 351 throw new MimetypeDetectionException(e.getMessage(), e); 352 } 353 } 354 355 @Override 356 public MimetypeEntry getMimetypeEntryByMimeType(String mimetype) { 357 MimetypeEntry mtype = mimetypeByNormalisedRegistry.get("application/octet-stream"); 358 if (mimetype != null) { 359 for (String key : mimetypeByNormalisedRegistry.keySet()) { 360 MimetypeEntry entry = mimetypeByNormalisedRegistry.get(key); 361 if (mimetype.equals(entry.getNormalized()) || entry.getMimetypes().contains(mimetype)) { 362 mtype = entry; 363 break; 364 } 365 } 366 } 367 return mtype; 368 } 369 370 /** 371 * Finds the mimetype of a Blob content and returns provided default if not possible. 372 * 373 * @param blob content to be analyzed 374 * @param defaultMimetype defaultMimeType to be used if no found 375 * @return the string mimetype 376 * @author lgodard 377 * @throws MimetypeDetectionException 378 */ 379 @Override 380 public String getMimetypeFromBlobWithDefault(Blob blob, String defaultMimetype) throws MimetypeDetectionException { 381 try { 382 return getMimetypeFromBlob(blob); 383 } catch (MimetypeNotFoundException e) { 384 return defaultMimetype; 385 } 386 } 387 388 /** 389 * Finds the mimetype of some content according to its filename and / or binary content. 390 * 391 * @param filename extension to analyze 392 * @param blob content to be analyzed if filename is ambiguous 393 * @param defaultMimetype defaultMimeType to be used if no found 394 * @return the string mimetype 395 * @throws MimetypeDetectionException 396 * @author lgodard 397 */ 398 @Override 399 public String getMimetypeFromFilenameAndBlobWithDefault(String filename, Blob blob, String defaultMimetype) 400 throws MimetypeDetectionException { 401 try { 402 return getMimetypeFromFilename(filename); 403 } catch (MimetypeNotFoundException e) { 404 // failed to detect mimetype on extension: fallback to Blob based 405 // detection 406 try { 407 return getMimetypeFromBlob(blob); 408 } catch (MimetypeNotFoundException mtnfe) { 409 return defaultMimetype; 410 } 411 } 412 } 413 414 @Override 415 public Blob updateMimetype(Blob blob, String filename) throws MimetypeDetectionException { 416 if (filename == null) { 417 filename = blob.getFilename(); 418 } else if (blob.getFilename() == null) { 419 blob.setFilename(filename); 420 } 421 String mimetype = getMimetypeFromFilenameAndBlobWithDefault(filename, blob, DEFAULT_MIMETYPE); 422 blob.setMimeType(mimetype); 423 return blob; 424 } 425 426 @Override 427 public Blob updateMimetype(Blob blob) throws MimetypeDetectionException { 428 return updateMimetype(blob, null); 429 } 430 431}