001/* 002 * (C) Copyright 2006-2016 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 * 019 */ 020package org.nuxeo.ecm.platform.mimetype.service; 021 022import java.io.File; 023import java.io.FileInputStream; 024import java.io.IOException; 025import java.io.InputStream; 026import java.util.ArrayList; 027import java.util.Collection; 028import java.util.HashMap; 029import java.util.Iterator; 030import java.util.List; 031import java.util.Map; 032 033import net.sf.jmimemagic.Magic; 034import net.sf.jmimemagic.MagicException; 035import net.sf.jmimemagic.MagicMatch; 036import net.sf.jmimemagic.MagicMatchNotFoundException; 037import net.sf.jmimemagic.MagicParseException; 038 039import org.apache.commons.io.FilenameUtils; 040import org.apache.commons.lang.StringUtils; 041import org.apache.commons.logging.Log; 042import org.apache.commons.logging.LogFactory; 043 044import org.nuxeo.common.utils.FileUtils; 045import org.nuxeo.ecm.core.api.Blob; 046import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException; 047import org.nuxeo.ecm.platform.mimetype.MimetypeNotFoundException; 048import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry; 049import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 050import org.nuxeo.runtime.api.Framework; 051import org.nuxeo.runtime.model.ComponentContext; 052import org.nuxeo.runtime.model.ComponentName; 053import org.nuxeo.runtime.model.DefaultComponent; 054import org.nuxeo.runtime.model.Extension; 055import org.nuxeo.runtime.model.RuntimeContext; 056 057/** 058 * MimetypeEntry registry service. 059 * <p> 060 * Singleton holding a registry of mimetype entries and exposes an API to grab information related to these mimetypes. 061 * As well, this is possible to ask for a mimetype magic detection from a stream or file using the API. 062 * 063 * @author <a href="mailto:ja@nuxeo.com">Julien Anguenot</a> 064 */ 065public class MimetypeRegistryService extends DefaultComponent implements MimetypeRegistry { 066 067 public static final ComponentName NAME = new ComponentName( 068 "org.nuxeo.ecm.platform.mimetype.service.MimetypeRegistryService"); 069 070 // 10 MB is the max size to allow full file scan 071 public static final long MAX_SIZE_FOR_SCAN = 10 * 1024 * 1024; 072 073 private static final Log log = LogFactory.getLog(MimetypeRegistryService.class); 074 075 protected Map<String, MimetypeEntry> mimetypeByNormalisedRegistry; 076 077 protected Map<String, MimetypeEntry> mimetypeByExtensionRegistry; 078 079 protected Map<String, ExtensionDescriptor> extensionRegistry; 080 081 private RuntimeContext bundle; 082 083 public MimetypeRegistryService() { 084 initializeRegistries(); 085 } 086 087 protected void initializeRegistries() { 088 mimetypeByNormalisedRegistry = new HashMap<>(); 089 mimetypeByExtensionRegistry = new HashMap<>(); 090 extensionRegistry = new HashMap<>(); 091 } 092 093 protected boolean isMimetypeEntry(String mimetypeName) { 094 return mimetypeByNormalisedRegistry.containsKey(mimetypeName); 095 } 096 097 @Override 098 public void activate(ComponentContext context) { 099 bundle = context.getRuntimeContext(); 100 initializeRegistries(); 101 } 102 103 @Override 104 public void deactivate(ComponentContext context) { 105 mimetypeByNormalisedRegistry = null; 106 mimetypeByExtensionRegistry = null; 107 extensionRegistry = null; 108 } 109 110 @Override 111 public void registerExtension(Extension extension) { 112 Object[] contribs = extension.getContributions(); 113 if (contribs == null) { 114 return; 115 } 116 for (Object contrib : contribs) { 117 if (contrib instanceof MimetypeDescriptor) { 118 MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib; 119 registerMimetype(mimetypeDescriptor.getMimetype()); 120 } else if (contrib instanceof ExtensionDescriptor) { 121 registerFileExtension((ExtensionDescriptor) contrib); 122 } 123 } 124 } 125 126 public void registerMimetype(MimetypeEntry mimetype) { 127 log.debug("Registering mimetype: " + mimetype.getNormalized()); 128 mimetypeByNormalisedRegistry.put(mimetype.getNormalized(), mimetype); 129 for (String extension : mimetype.getExtensions()) { 130 mimetypeByExtensionRegistry.put(extension, mimetype); 131 } 132 } 133 134 public void registerFileExtension(ExtensionDescriptor extensionDescriptor) { 135 log.debug("Registering file extension: " + extensionDescriptor.getName()); 136 extensionRegistry.put(extensionDescriptor.getName(), extensionDescriptor); 137 } 138 139 @Override 140 public void unregisterExtension(Extension extension) { 141 Object[] contribs = extension.getContributions(); 142 if (contribs == null) { 143 return; 144 } 145 for (Object contrib : contribs) { 146 if (contrib instanceof MimetypeDescriptor) { 147 MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib; 148 unregisterMimetype(mimetypeDescriptor.getNormalized()); 149 } else if (contrib instanceof ExtensionDescriptor) { 150 ExtensionDescriptor extensionDescriptor = (ExtensionDescriptor) contrib; 151 unregisterFileExtension(extensionDescriptor); 152 } 153 } 154 } 155 156 public void unregisterMimetype(String mimetypeName) { 157 log.debug("Unregistering mimetype: " + mimetypeName); 158 MimetypeEntry mimetype = mimetypeByNormalisedRegistry.get(mimetypeName); 159 if (mimetype == null) { 160 return; 161 } 162 List<String> extensions = mimetype.getExtensions(); 163 mimetypeByNormalisedRegistry.remove(mimetypeName); 164 for (String extension : extensions) { 165 // FIXME: equals always fails because types are incompatible. 166 if (mimetype.getNormalized().equals(mimetypeByExtensionRegistry.get(extension))) { 167 mimetypeByExtensionRegistry.remove(extension); 168 } 169 } 170 } 171 172 public void unregisterFileExtension(ExtensionDescriptor extensionDescriptor) { 173 log.debug("Unregistering file extension: " + extensionDescriptor.getName()); 174 extensionRegistry.remove(extensionDescriptor.getName()); 175 } 176 177 public RuntimeContext getContext() { 178 return bundle; 179 } 180 181 @Override 182 public List<String> getExtensionsFromMimetypeName(String mimetypeName) { 183 List<String> extensions = new ArrayList<>(); 184 for (String key : mimetypeByNormalisedRegistry.keySet()) { 185 MimetypeEntry mimetypeEntry = mimetypeByNormalisedRegistry.get(key); 186 if (mimetypeEntry.getMimetypes().contains(mimetypeName)) { 187 extensions.addAll(mimetypeEntry.getExtensions()); 188 } 189 } 190 return extensions; 191 } 192 193 @Override 194 public MimetypeEntry getMimetypeEntryByName(String name) { 195 return mimetypeByNormalisedRegistry.get(name); 196 } 197 198 @Override 199 public String getMimetypeFromFile(File file) throws MimetypeNotFoundException, MimetypeDetectionException { 200 if (file.length() > MAX_SIZE_FOR_SCAN) { 201 String exceptionMessage = "Not able to determine mime type from filename and file is too big for binary scan."; 202 if (file.getAbsolutePath() == null) { 203 throw new MimetypeNotFoundException(exceptionMessage); 204 } 205 try { 206 return getMimetypeFromFilename(file.getAbsolutePath()); 207 } catch (MimetypeNotFoundException e) { 208 throw new MimetypeNotFoundException(exceptionMessage, e); 209 } 210 } 211 try { 212 MagicMatch match = Magic.getMagicMatch(file, true, false); 213 String mimeType; 214 215 if (match.getSubMatches().isEmpty()) { 216 mimeType = match.getMimeType(); 217 } else { 218 // Submatches found 219 // TODO: we only take the first here 220 // what to do with other possible responses ? 221 // b.t.w., multiple responses denotes a non-accuracy problem in 222 // magic.xml but be careful to nested possible 223 // sub-sub-...-submatches make this as recursive ? 224 Collection<MagicMatch> possibilities = match.getSubMatches(); 225 Iterator<MagicMatch> iter = possibilities.iterator(); 226 MagicMatch m = iter.next(); 227 mimeType = m.getMimeType(); 228 // need to clean for subsequent calls 229 possibilities.clear(); 230 match.setSubMatches(possibilities); 231 } 232 if ("text/plain".equals(mimeType)) { 233 // check we didn't mis-detect files with zeroes 234 // check first 16 bytes 235 byte[] bytes = new byte[16]; 236 FileInputStream is = new FileInputStream(file); 237 int n = 0; 238 try { 239 n = is.read(bytes); 240 } finally { 241 is.close(); 242 } 243 for (int i = 0; i < n; i++) { 244 if (bytes[i] == 0) { 245 mimeType = "application/octet-stream"; 246 break; 247 } 248 } 249 } 250 return mimeType; 251 } catch (MagicMatchNotFoundException e) { 252 if (file.getAbsolutePath() != null) { 253 return getMimetypeFromFilename(file.getAbsolutePath()); 254 } 255 throw new MimetypeNotFoundException(e.getMessage(), e); 256 } catch (MagicException | MagicParseException | IOException e) { 257 throw new MimetypeDetectionException(e.getMessage(), e); 258 } 259 } 260 261 @Override 262 public String getMimetypeFromExtension(String extension) throws MimetypeNotFoundException { 263 String lowerCaseExtension = extension.toLowerCase(); 264 ExtensionDescriptor extensionDescriptor = extensionRegistry.get(lowerCaseExtension); 265 if (extensionDescriptor == null) { 266 // no explicit extension rule, analyse the inverted mimetype 267 // registry 268 MimetypeEntry mimetype = mimetypeByExtensionRegistry.get(lowerCaseExtension); 269 if (mimetype == null) { 270 throw new MimetypeNotFoundException("no registered mimetype has extension: " + lowerCaseExtension); 271 } else { 272 return mimetype.getNormalized(); 273 } 274 } else { 275 if (extensionDescriptor.isAmbiguous()) { 276 throw new MimetypeNotFoundException( 277 String.format("mimetype for %s is ambiguous, binary sniffing needed", lowerCaseExtension)); 278 } else { 279 return extensionDescriptor.getMimetype(); 280 } 281 } 282 } 283 284 @Override 285 public String getMimetypeFromFilename(String filename) throws MimetypeNotFoundException { 286 if (filename == null) { 287 throw new MimetypeNotFoundException("filename is null"); 288 } 289 String extension = FilenameUtils.getExtension(filename); 290 if (StringUtils.isBlank(extension)) { 291 throw new MimetypeNotFoundException(filename + "has no extension"); 292 } 293 return getMimetypeFromExtension(extension); 294 } 295 296 // the stream based detection is deprecated and should be replaced by 297 // StreamingBlob detection instead to make serialization efficient for 298 // remote call 299 @Override 300 @Deprecated 301 public String getMimetypeFromStream(InputStream stream) 302 throws MimetypeNotFoundException, MimetypeDetectionException { 303 File file = null; 304 try { 305 file = Framework.createTempFile("NXMimetypeBean", ".bin"); 306 try { 307 FileUtils.copyToFile(stream, file); 308 return getMimetypeFromFile(file); 309 } finally { 310 file.delete(); 311 } 312 } catch (IOException e) { 313 throw new MimetypeDetectionException(e.getMessage(), e); 314 } 315 } 316 317 /** 318 * Finds the mimetype of a stream content and returns provided default if not possible. 319 * 320 * @param is content to be analyzed 321 * @param defaultMimetype default mimetype to be used if no found 322 * @return the string mimetype 323 * @throws MimetypeDetectionException 324 * @author lgodard 325 */ 326 @Override 327 @Deprecated 328 // use getMimetypeFromBlobWithDefault instead 329 public String getMimetypeFromStreamWithDefault(InputStream is, String defaultMimetype) 330 throws MimetypeDetectionException { 331 try { 332 return getMimetypeFromStream(is); 333 } catch (MimetypeNotFoundException e) { 334 return defaultMimetype; 335 } 336 } 337 338 @Override 339 public String getMimetypeFromBlob(Blob blob) throws MimetypeNotFoundException, MimetypeDetectionException { 340 File file = null; 341 try { 342 file = Framework.createTempFile("NXMimetypeBean", ".bin"); 343 try { 344 InputStream is = blob.getStream(); 345 try { 346 FileUtils.copyToFile(is, file); 347 } finally { 348 is.close(); 349 } 350 return getMimetypeFromFile(file); 351 } finally { 352 file.delete(); 353 } 354 } catch (IOException e) { 355 throw new MimetypeDetectionException(e.getMessage(), e); 356 } 357 } 358 359 @Override 360 public MimetypeEntry getMimetypeEntryByMimeType(String mimetype) { 361 MimetypeEntry mtype = mimetypeByNormalisedRegistry.get("application/octet-stream"); 362 if (mimetype != null) { 363 for (String key : mimetypeByNormalisedRegistry.keySet()) { 364 MimetypeEntry entry = mimetypeByNormalisedRegistry.get(key); 365 if (mimetype.equals(entry.getNormalized()) || entry.getMimetypes().contains(mimetype)) { 366 mtype = entry; 367 break; 368 } 369 } 370 } 371 return mtype; 372 } 373 374 @Override 375 public String getMimetypeFromBlobWithDefault(Blob blob, String defaultMimetype) throws MimetypeDetectionException { 376 try { 377 return getMimetypeFromBlob(blob); 378 } catch (MimetypeNotFoundException e) { 379 return defaultMimetype; 380 } 381 } 382 383 @Override 384 public String getMimetypeFromFilenameAndBlobWithDefault(String filename, Blob blob, String defaultMimetype) 385 throws MimetypeDetectionException { 386 try { 387 return getMimetypeFromFilename(filename); 388 } catch (MimetypeNotFoundException e) { 389 // failed to detect mimetype on extension: 390 // fallback to calculate mimetype from blob content 391 return getMimetypeFromBlobWithDefault(blob, defaultMimetype); 392 } 393 } 394 395 @Override 396 public String getMimetypeFromFilenameWithBlobMimetypeFallback(String filename, Blob blob, String defaultMimetype) 397 throws MimetypeDetectionException { 398 try { 399 return getMimetypeFromFilename(filename); 400 } catch (MimetypeNotFoundException e) { 401 // failed to detect mimetype on extension: 402 // fallback to the blob defined mimetype 403 String mimeTypeName = blob.getMimeType(); 404 if (isMimetypeEntry(mimeTypeName)) { 405 return mimeTypeName; 406 } else { 407 // failed to detect mimetype on blob: 408 // fallback to calculate mimetype from blob content 409 return getMimetypeFromBlobWithDefault(blob, defaultMimetype); 410 } 411 } 412 } 413 414 @Override 415 public Blob updateMimetype(Blob blob, String filename, Boolean withBlobMimetypeFallback) 416 throws MimetypeDetectionException { 417 if (filename == null) { 418 filename = blob.getFilename(); 419 } else if (blob.getFilename() == null) { 420 blob.setFilename(filename); 421 } 422 if (withBlobMimetypeFallback) { 423 blob.setMimeType(getMimetypeFromFilenameWithBlobMimetypeFallback(filename, blob, DEFAULT_MIMETYPE)); 424 } else { 425 blob.setMimeType(getMimetypeFromFilenameAndBlobWithDefault(filename, blob, DEFAULT_MIMETYPE)); 426 } 427 return blob; 428 } 429 430 @Override 431 public Blob updateMimetype(Blob blob, String filename) throws MimetypeDetectionException { 432 return updateMimetype(blob, filename, false); 433 } 434 435 @Override 436 public Blob updateMimetype(Blob blob) throws MimetypeDetectionException { 437 return updateMimetype(blob, null); 438 } 439 440}