001/* 002 * (C) Copyright 2006-2019 Nuxeo (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 */ 019package org.nuxeo.ecm.platform.mimetype.service; 020 021import java.io.File; 022import java.io.FileInputStream; 023import java.io.IOException; 024import java.io.InputStream; 025import java.util.Collection; 026import java.util.HashMap; 027import java.util.Iterator; 028import java.util.List; 029import java.util.Map; 030import java.util.Optional; 031import java.util.Set; 032import java.util.stream.Collectors; 033 034import org.apache.commons.io.FileUtils; 035import org.apache.commons.io.FilenameUtils; 036import org.apache.commons.lang3.StringUtils; 037import org.apache.commons.logging.Log; 038import org.apache.commons.logging.LogFactory; 039import org.nuxeo.ecm.core.api.Blob; 040import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException; 041import org.nuxeo.ecm.platform.mimetype.MimetypeNotFoundException; 042import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry; 043import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 044import org.nuxeo.runtime.api.Framework; 045import org.nuxeo.runtime.model.ComponentContext; 046import org.nuxeo.runtime.model.ComponentName; 047import org.nuxeo.runtime.model.DefaultComponent; 048import org.nuxeo.runtime.model.Extension; 049import org.nuxeo.runtime.model.RuntimeContext; 050 051import net.sf.jmimemagic.Magic; 052import net.sf.jmimemagic.MagicException; 053import net.sf.jmimemagic.MagicMatch; 054import net.sf.jmimemagic.MagicMatchNotFoundException; 055import net.sf.jmimemagic.MagicParseException; 056 057/** 058 * MimetypeEntry registry service. 059 * <p> 060 * Singleton holding a registry of mimetype entries and exposes an API to grab information related to these mimetypes. 061 * As well, this is possible to ask for a mimetype magic detection from a stream or file using the API. 062 * 063 * @author <a href="mailto:ja@nuxeo.com">Julien Anguenot</a> 064 */ 065public class MimetypeRegistryService extends DefaultComponent implements MimetypeRegistry { 066 067 public static final ComponentName NAME = new ComponentName( 068 "org.nuxeo.ecm.platform.mimetype.service.MimetypeRegistryService"); 069 070 // 10 MB is the max size to allow full file scan 071 public static final long MAX_SIZE_FOR_SCAN = 10 * 1024 * 1024; 072 073 public static final String TMP_EXTENSION = "tmp"; 074 075 public static final String MSOFFICE_TMP_PREFIX = "~$"; 076 077 private static final Log log = LogFactory.getLog(MimetypeRegistryService.class); 078 079 protected Map<String, MimetypeEntry> mimetypeByNormalisedRegistry; 080 081 protected Map<String, MimetypeEntry> mimetypeByExtensionRegistry; 082 083 protected Map<String, ExtensionDescriptor> extensionRegistry; 084 085 private RuntimeContext bundle; 086 087 public MimetypeRegistryService() { 088 initializeRegistries(); 089 } 090 091 protected void initializeRegistries() { 092 mimetypeByNormalisedRegistry = new HashMap<>(); 093 mimetypeByExtensionRegistry = new HashMap<>(); 094 extensionRegistry = new HashMap<>(); 095 } 096 097 /** 098 * @deprecated since 11.1. Use {@link #isMimeTypeNormalized(String)} instead. 099 */ 100 @Deprecated(since = "11.1", forRemoval = true) 101 protected boolean isMimetypeEntry(String mimetypeName) { 102 return mimetypeByNormalisedRegistry.containsKey(mimetypeName); 103 } 104 105 @Override 106 public void activate(ComponentContext context) { 107 bundle = context.getRuntimeContext(); 108 initializeRegistries(); 109 } 110 111 @Override 112 public void deactivate(ComponentContext context) { 113 mimetypeByNormalisedRegistry = null; 114 mimetypeByExtensionRegistry = null; 115 extensionRegistry = null; 116 } 117 118 @Override 119 public void registerExtension(Extension extension) { 120 Object[] contribs = extension.getContributions(); 121 if (contribs == null) { 122 return; 123 } 124 for (Object contrib : contribs) { 125 if (contrib instanceof MimetypeDescriptor) { 126 MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib; 127 registerMimetype(mimetypeDescriptor.getMimetype()); 128 } else if (contrib instanceof ExtensionDescriptor) { 129 registerFileExtension((ExtensionDescriptor) contrib); 130 } 131 } 132 } 133 134 public void registerMimetype(MimetypeEntry mimetype) { 135 log.debug("Registering mimetype: " + mimetype.getNormalized()); 136 mimetypeByNormalisedRegistry.put(mimetype.getNormalized(), mimetype); 137 for (String extension : mimetype.getExtensions()) { 138 mimetypeByExtensionRegistry.put(extension, mimetype); 139 } 140 } 141 142 public void registerFileExtension(ExtensionDescriptor extensionDescriptor) { 143 log.debug("Registering file extension: " + extensionDescriptor.getName()); 144 extensionRegistry.put(extensionDescriptor.getName(), extensionDescriptor); 145 } 146 147 @Override 148 public void unregisterExtension(Extension extension) { 149 Object[] contribs = extension.getContributions(); 150 if (contribs == null) { 151 return; 152 } 153 for (Object contrib : contribs) { 154 if (contrib instanceof MimetypeDescriptor) { 155 MimetypeDescriptor mimetypeDescriptor = (MimetypeDescriptor) contrib; 156 unregisterMimetype(mimetypeDescriptor.getNormalized()); 157 } else if (contrib instanceof ExtensionDescriptor) { 158 ExtensionDescriptor extensionDescriptor = (ExtensionDescriptor) contrib; 159 unregisterFileExtension(extensionDescriptor); 160 } 161 } 162 } 163 164 public void unregisterMimetype(String mimetypeName) { 165 log.debug("Unregistering mimetype: " + mimetypeName); 166 MimetypeEntry mimetype = mimetypeByNormalisedRegistry.get(mimetypeName); 167 if (mimetype == null) { 168 return; 169 } 170 List<String> extensions = mimetype.getExtensions(); 171 mimetypeByNormalisedRegistry.remove(mimetypeName); 172 for (String extension : extensions) { 173 mimetypeByExtensionRegistry.remove(extension); 174 } 175 } 176 177 public void unregisterFileExtension(ExtensionDescriptor extensionDescriptor) { 178 log.debug("Unregistering file extension: " + extensionDescriptor.getName()); 179 extensionRegistry.remove(extensionDescriptor.getName()); 180 } 181 182 public RuntimeContext getContext() { 183 return bundle; 184 } 185 186 @Override 187 public List<String> getExtensionsFromMimetypeName(String mimetypeName) { 188 return mimetypeByNormalisedRegistry.entrySet() 189 .stream() 190 .filter(e -> e.getValue().getMimetypes().contains(mimetypeName)) 191 .flatMap(e -> e.getValue().getExtensions().stream()) 192 .collect(Collectors.toList()); 193 } 194 195 @Override 196 public MimetypeEntry getMimetypeEntryByName(String name) { 197 return mimetypeByNormalisedRegistry.get(name); 198 } 199 200 @Override 201 public String getMimetypeFromFile(File file) { 202 if (file.length() > MAX_SIZE_FOR_SCAN) { 203 String exceptionMessage = "Not able to determine mime type from filename and file is too big for binary scan."; 204 if (file.getAbsolutePath() == null) { 205 throw new MimetypeNotFoundException(exceptionMessage); 206 } 207 try { 208 return getMimetypeFromFilename(file.getAbsolutePath()); 209 } catch (MimetypeNotFoundException e) { 210 throw new MimetypeNotFoundException(exceptionMessage, e); 211 } 212 } 213 try { 214 MagicMatch match = Magic.getMagicMatch(file, true, false); 215 String mimeType; 216 217 if (match.getSubMatches().isEmpty()) { 218 mimeType = match.getMimeType(); 219 } else { 220 // Submatches found 221 // TODO: we only take the first here 222 // what to do with other possible responses ? 223 // b.t.w., multiple responses denotes a non-accuracy problem in 224 // magic.xml but be careful to nested possible 225 // sub-sub-...-submatches make this as recursive ? 226 Collection<MagicMatch> possibilities = match.getSubMatches(); 227 Iterator<MagicMatch> iter = possibilities.iterator(); 228 MagicMatch m = iter.next(); 229 mimeType = m.getMimeType(); 230 // need to clean for subsequent calls 231 possibilities.clear(); 232 match.setSubMatches(possibilities); 233 } 234 if ("text/plain".equals(mimeType)) { 235 // check we didn't mis-detect files with zeroes 236 // check first 16 bytes 237 byte[] bytes = new byte[16]; 238 int n = 0; 239 try (FileInputStream is = new FileInputStream(file)) { 240 n = is.read(bytes); 241 } 242 for (int i = 0; i < n; i++) { 243 if (bytes[i] == 0) { 244 mimeType = DEFAULT_MIMETYPE; 245 break; 246 } 247 } 248 } 249 return mimeType; 250 } catch (MagicMatchNotFoundException e) { 251 if (file.getAbsolutePath() != null) { 252 return getMimetypeFromFilename(file.getAbsolutePath()); 253 } 254 throw new MimetypeNotFoundException(e.getMessage(), e); 255 } catch (MagicException | MagicParseException | IOException e) { 256 throw new MimetypeDetectionException(e.getMessage(), e); 257 } 258 } 259 260 @Override 261 public String getMimetypeFromExtension(String extension) { 262 String lowerCaseExtension = extension.toLowerCase(); 263 ExtensionDescriptor extensionDescriptor = extensionRegistry.get(lowerCaseExtension); 264 if (extensionDescriptor == null) { 265 // no explicit extension rule, analyse the inverted mimetype 266 // registry 267 MimetypeEntry mimetype = mimetypeByExtensionRegistry.get(lowerCaseExtension); 268 if (mimetype == null) { 269 throw new MimetypeNotFoundException("no registered mimetype has extension: " + lowerCaseExtension); 270 } else { 271 return mimetype.getNormalized(); 272 } 273 } else { 274 if (extensionDescriptor.isAmbiguous()) { 275 throw new MimetypeNotFoundException( 276 String.format("mimetype for %s is ambiguous, binary sniffing needed", lowerCaseExtension)); 277 } else { 278 return extensionDescriptor.getMimetype(); 279 } 280 } 281 } 282 283 @Override 284 public String getMimetypeFromFilename(String filename) { 285 if (filename == null) { 286 throw new MimetypeNotFoundException("filename is null"); 287 } 288 if (isTemporaryFile(filename)) { 289 return DEFAULT_MIMETYPE; 290 } 291 String extension = FilenameUtils.getExtension(filename); 292 if (StringUtils.isBlank(extension)) { 293 throw new MimetypeNotFoundException(filename + "has no extension"); 294 } 295 return getMimetypeFromExtension(extension); 296 } 297 298 protected boolean isTemporaryFile(String filename) { 299 return FilenameUtils.getExtension(filename).equalsIgnoreCase(TMP_EXTENSION) 300 || FilenameUtils.getName(filename).startsWith(MSOFFICE_TMP_PREFIX); 301 } 302 303 @Override 304 public String getMimetypeFromBlob(Blob blob) { 305 File file; 306 try { 307 file = Framework.createTempFile("NXMimetypeBean", ".bin"); 308 try (InputStream is = blob.getStream()) { 309 FileUtils.copyInputStreamToFile(is, file); 310 return getMimetypeFromFile(file); 311 } finally { 312 file.delete(); 313 } 314 } catch (IOException e) { 315 throw new MimetypeDetectionException(e.getMessage(), e); 316 } 317 } 318 319 @Override 320 public MimetypeEntry getMimetypeEntryByMimeType(String mimetype) { 321 String normalized = getNormalizedMimeType(mimetype).orElse(DEFAULT_MIMETYPE); 322 return mimetypeByNormalisedRegistry.get(normalized); 323 } 324 325 @Override 326 public String getMimetypeFromBlobWithDefault(Blob blob, String defaultMimetype) { 327 try { 328 return getMimetypeFromBlob(blob); 329 } catch (MimetypeNotFoundException e) { 330 return defaultMimetype; 331 } 332 } 333 334 @Override 335 public String getMimetypeFromFilenameAndBlobWithDefault(String filename, Blob blob, String defaultMimetype) { 336 try { 337 return getMimetypeFromFilename(filename); 338 } catch (MimetypeNotFoundException e) { 339 // failed to detect mimetype on extension: 340 // fallback to calculate mimetype from blob content 341 return getMimetypeFromBlobWithDefault(blob, defaultMimetype); 342 } 343 } 344 345 @Override 346 public String getMimetypeFromFilenameWithBlobMimetypeFallback(String filename, Blob blob, String defaultMimetype) { 347 try { 348 return getMimetypeFromFilename(filename); 349 } catch (MimetypeNotFoundException e) { 350 // failed to detect mimetype on extension: 351 // fallback to the blob defined mimetype 352 String mimeTypeName = blob.getMimeType(); 353 if (isMimeTypeNormalized(mimeTypeName)) { 354 return mimeTypeName; 355 } else { 356 // failed to detect mimetype on blob: 357 // fallback to calculate mimetype from blob content 358 return getMimetypeFromBlobWithDefault(blob, defaultMimetype); 359 } 360 } 361 } 362 363 @Override 364 public Blob updateMimetype(Blob blob, String filename, Boolean withBlobMimetypeFallback) { 365 if (filename == null) { 366 filename = blob.getFilename(); 367 } else if (blob.getFilename() == null) { 368 blob.setFilename(filename); 369 } 370 if (withBlobMimetypeFallback) { 371 blob.setMimeType(getMimetypeFromFilenameWithBlobMimetypeFallback(filename, blob, DEFAULT_MIMETYPE)); 372 } else { 373 blob.setMimeType(getMimetypeFromFilenameAndBlobWithDefault(filename, blob, DEFAULT_MIMETYPE)); 374 } 375 return blob; 376 } 377 378 @Override 379 public Blob updateMimetype(Blob blob, String filename) { 380 return updateMimetype(blob, filename, false); 381 } 382 383 @Override 384 public Blob updateMimetype(Blob blob) { 385 return updateMimetype(blob, null); 386 } 387 388 @Override 389 public Optional<String> getNormalizedMimeType(String mimeType) { 390 if (mimeType == null) { 391 return Optional.empty(); 392 } 393 394 Set<Map.Entry<String, MimetypeEntry>> entries = mimetypeByNormalisedRegistry.entrySet(); 395 return entries.stream() 396 .filter(e -> e.getKey().equals(mimeType) || e.getValue().getMimetypes().contains(mimeType)) 397 .findAny() 398 .map(Map.Entry::getKey); 399 } 400 401 @Override 402 public boolean isMimeTypeNormalized(String mimeType) { 403 return mimetypeByNormalisedRegistry.containsKey(mimeType); 404 } 405}