001/* 002 * (C) Copyright 2006-2018 Nuxeo (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 */ 019package org.nuxeo.ecm.platform.mimetype.service; 020 021import java.io.File; 022import java.io.FileInputStream; 023import java.io.IOException; 024import java.io.InputStream; 025import java.util.ArrayList; 026import java.util.Collection; 027import java.util.HashMap; 028import java.util.Iterator; 029import java.util.List; 030import java.util.Map; 031 032import org.apache.commons.io.FileUtils; 033import org.apache.commons.io.FilenameUtils; 034import org.apache.commons.lang3.StringUtils; 035import org.apache.commons.logging.Log; 036import org.apache.commons.logging.LogFactory; 037import org.nuxeo.ecm.core.api.Blob; 038import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException; 039import org.nuxeo.ecm.platform.mimetype.MimetypeNotFoundException; 040import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry; 041import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 042import org.nuxeo.runtime.api.Framework; 043import org.nuxeo.runtime.model.ComponentContext; 044import org.nuxeo.runtime.model.ComponentName; 045import org.nuxeo.runtime.model.DefaultComponent; 046import org.nuxeo.runtime.model.Extension; 047import org.nuxeo.runtime.model.RuntimeContext; 048 049import net.sf.jmimemagic.Magic; 050import net.sf.jmimemagic.MagicException; 051import net.sf.jmimemagic.MagicMatch; 052import net.sf.jmimemagic.MagicMatchNotFoundException; 053import net.sf.jmimemagic.MagicParseException; 054 055/** 056 * MimetypeEntry registry service. 057 * <p> 058 * Singleton holding a registry of mimetype entries and exposes an API to grab information related to these mimetypes. 059 * As well, this is possible to ask for a mimetype magic detection from a stream or file using the API. 060 * 061 * @author <a href="mailto:ja@nuxeo.com">Julien Anguenot</a> 062 */ 063public class MimetypeRegistryService extends DefaultComponent implements MimetypeRegistry { 064 065 public static final ComponentName NAME = new ComponentName( 066 "org.nuxeo.ecm.platform.mimetype.service.MimetypeRegistryService"); 067 068 // 10 MB is the max size to allow full file scan 069 public static final long MAX_SIZE_FOR_SCAN = 10 * 1024 * 1024; 070 071 public static final String TMP_EXTENSION = "tmp"; 072 073 public static final String MSOFFICE_TMP_PREFIX = "~$"; 074 075 private static final Log log = LogFactory.getLog(MimetypeRegistryService.class); 076 077 protected Map<String, MimetypeEntry> mimetypeByNormalisedRegistry; 078 079 protected Map<String, MimetypeEntry> mimetypeByExtensionRegistry; 080 081 protected Map<String, ExtensionDescriptor> extensionRegistry; 082 083 private RuntimeContext bundle; 084 085 public MimetypeRegistryService() { 086 initializeRegistries(); 087 } 088 089 protected void initializeRegistries() { 090 mimetypeByNormalisedRegistry = new HashMap<>(); 091 mimetypeByExtensionRegistry = new HashMap<>(); 092 extensionRegistry = new HashMap<>(); 093 } 094 095 protected boolean isMimetypeEntry(String mimetypeName) { 096 return mimetypeByNormalisedRegistry.containsKey(mimetypeName); 097 } 098 099 @Override 100 public void activate(ComponentContext context) { 101 bundle = context.getRuntimeContext(); 102 initializeRegistries(); 103 } 104 105 @Override 106 public void deactivate(ComponentContext context) { 107 mimetypeByNormalisedRegistry = null; 108 mimetypeByExtensionRegistry = null; 109 extensionRegistry = null; 110 } 111 112 @Override 113 public void registerExtension(Extension extension) { 114 Object[] contribs = extension.getContributions(); 115 if (contribs == null) { 116 return; 117 } 118 for (Object contrib : contribs) { 119 if (contrib instanceof MimetypeDescriptor) { 120 MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib; 121 registerMimetype(mimetypeDescriptor.getMimetype()); 122 } else if (contrib instanceof ExtensionDescriptor) { 123 registerFileExtension((ExtensionDescriptor) contrib); 124 } 125 } 126 } 127 128 public void registerMimetype(MimetypeEntry mimetype) { 129 log.debug("Registering mimetype: " + mimetype.getNormalized()); 130 mimetypeByNormalisedRegistry.put(mimetype.getNormalized(), mimetype); 131 for (String extension : mimetype.getExtensions()) { 132 mimetypeByExtensionRegistry.put(extension, mimetype); 133 } 134 } 135 136 public void registerFileExtension(ExtensionDescriptor extensionDescriptor) { 137 log.debug("Registering file extension: " + extensionDescriptor.getName()); 138 extensionRegistry.put(extensionDescriptor.getName(), extensionDescriptor); 139 } 140 141 @Override 142 public void unregisterExtension(Extension extension) { 143 Object[] contribs = extension.getContributions(); 144 if (contribs == null) { 145 return; 146 } 147 for (Object contrib : contribs) { 148 if (contrib instanceof MimetypeDescriptor) { 149 MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib; 150 unregisterMimetype(mimetypeDescriptor.getNormalized()); 151 } else if (contrib instanceof ExtensionDescriptor) { 152 ExtensionDescriptor extensionDescriptor = (ExtensionDescriptor) contrib; 153 unregisterFileExtension(extensionDescriptor); 154 } 155 } 156 } 157 158 public void unregisterMimetype(String mimetypeName) { 159 log.debug("Unregistering mimetype: " + mimetypeName); 160 MimetypeEntry mimetype = mimetypeByNormalisedRegistry.get(mimetypeName); 161 if (mimetype == null) { 162 return; 163 } 164 List<String> extensions = mimetype.getExtensions(); 165 mimetypeByNormalisedRegistry.remove(mimetypeName); 166 for (String extension : extensions) { 167 mimetypeByExtensionRegistry.remove(extension); 168 } 169 } 170 171 public void unregisterFileExtension(ExtensionDescriptor extensionDescriptor) { 172 log.debug("Unregistering file extension: " + extensionDescriptor.getName()); 173 extensionRegistry.remove(extensionDescriptor.getName()); 174 } 175 176 public RuntimeContext getContext() { 177 return bundle; 178 } 179 180 @Override 181 public List<String> getExtensionsFromMimetypeName(String mimetypeName) { 182 List<String> extensions = new ArrayList<>(); 183 for (String key : mimetypeByNormalisedRegistry.keySet()) { 184 MimetypeEntry mimetypeEntry = mimetypeByNormalisedRegistry.get(key); 185 if (mimetypeEntry.getMimetypes().contains(mimetypeName)) { 186 extensions.addAll(mimetypeEntry.getExtensions()); 187 } 188 } 189 return extensions; 190 } 191 192 @Override 193 public MimetypeEntry getMimetypeEntryByName(String name) { 194 return mimetypeByNormalisedRegistry.get(name); 195 } 196 197 @Override 198 public String getMimetypeFromFile(File file) throws MimetypeNotFoundException, MimetypeDetectionException { 199 if (file.length() > MAX_SIZE_FOR_SCAN) { 200 String exceptionMessage = "Not able to determine mime type from filename and file is too big for binary scan."; 201 if (file.getAbsolutePath() == null) { 202 throw new MimetypeNotFoundException(exceptionMessage); 203 } 204 try { 205 return getMimetypeFromFilename(file.getAbsolutePath()); 206 } catch (MimetypeNotFoundException e) { 207 throw new MimetypeNotFoundException(exceptionMessage, e); 208 } 209 } 210 try { 211 MagicMatch match = Magic.getMagicMatch(file, true, false); 212 String mimeType; 213 214 if (match.getSubMatches().isEmpty()) { 215 mimeType = match.getMimeType(); 216 } else { 217 // Submatches found 218 // TODO: we only take the first here 219 // what to do with other possible responses ? 220 // b.t.w., multiple responses denotes a non-accuracy problem in 221 // magic.xml but be careful to nested possible 222 // sub-sub-...-submatches make this as recursive ? 223 Collection<MagicMatch> possibilities = match.getSubMatches(); 224 Iterator<MagicMatch> iter = possibilities.iterator(); 225 MagicMatch m = iter.next(); 226 mimeType = m.getMimeType(); 227 // need to clean for subsequent calls 228 possibilities.clear(); 229 match.setSubMatches(possibilities); 230 } 231 if ("text/plain".equals(mimeType)) { 232 // check we didn't mis-detect files with zeroes 233 // check first 16 bytes 234 byte[] bytes = new byte[16]; 235 int n = 0; 236 try (FileInputStream is = new FileInputStream(file)) { 237 n = is.read(bytes); 238 } 239 for (int i = 0; i < n; i++) { 240 if (bytes[i] == 0) { 241 mimeType = "application/octet-stream"; 242 break; 243 } 244 } 245 } 246 return mimeType; 247 } catch (MagicMatchNotFoundException e) { 248 if (file.getAbsolutePath() != null) { 249 return getMimetypeFromFilename(file.getAbsolutePath()); 250 } 251 throw new MimetypeNotFoundException(e.getMessage(), e); 252 } catch (MagicException | MagicParseException | IOException e) { 253 throw new MimetypeDetectionException(e.getMessage(), e); 254 } 255 } 256 257 @Override 258 public String getMimetypeFromExtension(String extension) throws MimetypeNotFoundException { 259 String lowerCaseExtension = extension.toLowerCase(); 260 ExtensionDescriptor extensionDescriptor = extensionRegistry.get(lowerCaseExtension); 261 if (extensionDescriptor == null) { 262 // no explicit extension rule, analyse the inverted mimetype 263 // registry 264 MimetypeEntry mimetype = mimetypeByExtensionRegistry.get(lowerCaseExtension); 265 if (mimetype == null) { 266 throw new MimetypeNotFoundException("no registered mimetype has extension: " + lowerCaseExtension); 267 } else { 268 return mimetype.getNormalized(); 269 } 270 } else { 271 if (extensionDescriptor.isAmbiguous()) { 272 throw new MimetypeNotFoundException( 273 String.format("mimetype for %s is ambiguous, binary sniffing needed", lowerCaseExtension)); 274 } else { 275 return extensionDescriptor.getMimetype(); 276 } 277 } 278 } 279 280 @Override 281 public String getMimetypeFromFilename(String filename) throws MimetypeNotFoundException { 282 if (filename == null) { 283 throw new MimetypeNotFoundException("filename is null"); 284 } 285 if (isTemporaryFile(filename)) { 286 return DEFAULT_MIMETYPE; 287 } 288 String extension = FilenameUtils.getExtension(filename); 289 if (StringUtils.isBlank(extension)) { 290 throw new MimetypeNotFoundException(filename + "has no extension"); 291 } 292 return getMimetypeFromExtension(extension); 293 } 294 295 protected boolean isTemporaryFile(String filename) { 296 return FilenameUtils.getExtension(filename).equalsIgnoreCase(TMP_EXTENSION) 297 || FilenameUtils.getName(filename).startsWith(MSOFFICE_TMP_PREFIX); 298 } 299 300 @Override 301 public String getMimetypeFromBlob(Blob blob) throws MimetypeNotFoundException, MimetypeDetectionException { 302 File file; 303 try { 304 file = Framework.createTempFile("NXMimetypeBean", ".bin"); 305 try (InputStream is = blob.getStream()) { 306 FileUtils.copyInputStreamToFile(is, file); 307 return getMimetypeFromFile(file); 308 } finally { 309 file.delete(); 310 } 311 } catch (IOException e) { 312 throw new MimetypeDetectionException(e.getMessage(), e); 313 } 314 } 315 316 @Override 317 public MimetypeEntry getMimetypeEntryByMimeType(String mimetype) { 318 MimetypeEntry mtype = mimetypeByNormalisedRegistry.get("application/octet-stream"); 319 if (mimetype != null) { 320 for (String key : mimetypeByNormalisedRegistry.keySet()) { 321 MimetypeEntry entry = mimetypeByNormalisedRegistry.get(key); 322 if (mimetype.equals(entry.getNormalized()) || entry.getMimetypes().contains(mimetype)) { 323 mtype = entry; 324 break; 325 } 326 } 327 } 328 return mtype; 329 } 330 331 @Override 332 public String getMimetypeFromBlobWithDefault(Blob blob, String defaultMimetype) throws MimetypeDetectionException { 333 try { 334 return getMimetypeFromBlob(blob); 335 } catch (MimetypeNotFoundException e) { 336 return defaultMimetype; 337 } 338 } 339 340 @Override 341 public String getMimetypeFromFilenameAndBlobWithDefault(String filename, Blob blob, String defaultMimetype) 342 throws MimetypeDetectionException { 343 try { 344 return getMimetypeFromFilename(filename); 345 } catch (MimetypeNotFoundException e) { 346 // failed to detect mimetype on extension: 347 // fallback to calculate mimetype from blob content 348 return getMimetypeFromBlobWithDefault(blob, defaultMimetype); 349 } 350 } 351 352 @Override 353 public String getMimetypeFromFilenameWithBlobMimetypeFallback(String filename, Blob blob, String defaultMimetype) 354 throws MimetypeDetectionException { 355 try { 356 return getMimetypeFromFilename(filename); 357 } catch (MimetypeNotFoundException e) { 358 // failed to detect mimetype on extension: 359 // fallback to the blob defined mimetype 360 String mimeTypeName = blob.getMimeType(); 361 if (isMimetypeEntry(mimeTypeName)) { 362 return mimeTypeName; 363 } else { 364 // failed to detect mimetype on blob: 365 // fallback to calculate mimetype from blob content 366 return getMimetypeFromBlobWithDefault(blob, defaultMimetype); 367 } 368 } 369 } 370 371 @Override 372 public Blob updateMimetype(Blob blob, String filename, Boolean withBlobMimetypeFallback) 373 throws MimetypeDetectionException { 374 if (filename == null) { 375 filename = blob.getFilename(); 376 } else if (blob.getFilename() == null) { 377 blob.setFilename(filename); 378 } 379 if (withBlobMimetypeFallback) { 380 blob.setMimeType(getMimetypeFromFilenameWithBlobMimetypeFallback(filename, blob, DEFAULT_MIMETYPE)); 381 } else { 382 blob.setMimeType(getMimetypeFromFilenameAndBlobWithDefault(filename, blob, DEFAULT_MIMETYPE)); 383 } 384 return blob; 385 } 386 387 @Override 388 public Blob updateMimetype(Blob blob, String filename) throws MimetypeDetectionException { 389 return updateMimetype(blob, filename, false); 390 } 391 392 @Override 393 public Blob updateMimetype(Blob blob) throws MimetypeDetectionException { 394 return updateMimetype(blob, null); 395 } 396 397}