001/* 002 * Copyright (c) 2006-2011 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the Eclipse Public License v1.0 006 * which accompanies this distribution, and is available at 007 * http://www.eclipse.org/legal/epl-v10.html 008 * 009 * Contributors: 010 * Florent Guillaume 011 */ 012package org.nuxeo.ecm.core.utils; 013 014import java.util.ArrayList; 015import java.util.Arrays; 016import java.util.HashMap; 017import java.util.List; 018import java.util.Map; 019import java.util.Set; 020 021import org.apache.commons.logging.Log; 022import org.apache.commons.logging.LogFactory; 023import org.nuxeo.ecm.core.api.Blob; 024import org.nuxeo.ecm.core.api.DocumentModel; 025import org.nuxeo.ecm.core.api.model.Property; 026import org.nuxeo.ecm.core.schema.DocumentType; 027import org.nuxeo.ecm.core.schema.SchemaManager; 028import org.nuxeo.ecm.core.schema.TypeConstants; 029import org.nuxeo.ecm.core.schema.types.ComplexType; 030import org.nuxeo.ecm.core.schema.types.Field; 031import org.nuxeo.ecm.core.schema.types.ListType; 032import org.nuxeo.ecm.core.schema.types.Schema; 033import org.nuxeo.ecm.core.schema.types.Type; 034import org.nuxeo.runtime.api.Framework; 035 036/** 037 * Extractor for all the blobs of a document. 038 * 039 * @author Florent Guillaume 040 * @author Benjamin Jalon 041 */ 042public class BlobsExtractor { 043 044 protected static final Log log = LogFactory.getLog(BlobsExtractor.class); 045 046 protected final Map<String, Map<String, List<String>>> blobFieldPaths = new HashMap<String, Map<String, List<String>>>(); 047 048 protected List<String> docTypeCached = new ArrayList<String>(); 049 050 protected SchemaManager schemaManager; 051 052 private Set<String> pathProperties; 053 054 private Set<String> excludedPathProperties; 055 056 private boolean indexAllBinary = false; 057 058 private boolean isDefaultConfiguration = true; 059 060 protected SchemaManager getSchemaManager() { 061 if (schemaManager == null) { 062 schemaManager = Framework.getService(SchemaManager.class); 063 } 064 return schemaManager; 065 } 066 067 /** 068 * Get properties of the given document that contain a blob value. This method uses the cache engine to find these 069 * properties. 070 */ 071 public List<Property> getBlobsProperties(DocumentModel doc) { 072 073 List<Property> result = new ArrayList<Property>(); 074 for (String schema : getBlobFieldPathForDocumentType(doc.getType()).keySet()) { 075 List<String> pathsList = getBlobFieldPathForDocumentType(doc.getType()).get(schema); 076 for (String path : pathsList) { 077 if (!isInterestingBlobProperty(path, schemaManager.getSchema(schema).getNamespace().prefix)) { 078 continue; 079 } 080 List<String> pathSplitted = Arrays.asList(path.split("/[*]/")); 081 if (pathSplitted.size() == 0) { 082 throw new IllegalStateException("Path detected not wellformed: " + pathsList); 083 } 084 Property prop = doc.getProperty(schema + ":" + pathSplitted.get(0)); 085 086 if (pathSplitted.size() >= 1) { 087 List<String> subPath = pathSplitted.subList(1, pathSplitted.size()); 088 getBlobValue(prop, subPath, path, result); 089 } 090 } 091 } 092 093 return result; 094 } 095 096 /** 097 * Get path list of properties that may contain a blob for the given document type. 098 * 099 * @param documentType document type name 100 * @return return the property names that contain blob 101 */ 102 public Map<String, List<String>> getBlobFieldPathForDocumentType(String documentType) { 103 DocumentType docType = getSchemaManager().getDocumentType(documentType); 104 105 if (!docTypeCached.contains(documentType)) { 106 Map<String, List<String>> paths = new HashMap<String, List<String>>(); 107 blobFieldPaths.put(docType.getName(), paths); 108 109 createCacheForDocumentType(docType); 110 } 111 112 return blobFieldPaths.get(documentType); 113 } 114 115 public void invalidateDocumentTypeCache(String docType) { 116 if (docTypeCached.contains(docType)) { 117 docTypeCached.remove(docType); 118 } 119 } 120 121 public void invalidateCache() { 122 docTypeCached = new ArrayList<String>(); 123 } 124 125 protected void createCacheForDocumentType(DocumentType docType) { 126 127 for (Schema schema : docType.getSchemas()) { 128 findInteresting(docType, schema, "", schema); 129 } 130 131 if (!docTypeCached.contains(docType.getName())) { 132 docTypeCached.add(docType.getName()); 133 } 134 } 135 136 /** 137 * Analyzes the document's schemas to find which fields and complex types contain blobs. For each blob fields type 138 * found, {@link BlobsExtractor#blobMatched(DocumentType, Schema, String, Field)} is called and for each property 139 * that contains a subProperty containing a Blob, 140 * {@link BlobsExtractor#containsBlob(DocumentType, Schema, String, Field)} is called 141 * 142 * @param schema The parent schema that contains the field 143 * @param ct Current type parsed 144 * @return {@code true} if the passed complex type contains at least one blob field 145 */ 146 protected boolean findInteresting(DocumentType docType, Schema schema, String path, ComplexType ct) { 147 boolean interesting = false; 148 for (Field field : ct.getFields()) { 149 Type type = field.getType(); 150 if (type.isSimpleType()) { 151 continue; // not binary text 152 } else if (type.isListType()) { 153 Type ftype = ((ListType) type).getField().getType(); 154 if (ftype.isComplexType()) { 155 String blobMatchedPath = path + String.format("/%s/*", field.getName().getLocalName()); 156 if (findInteresting(docType, schema, blobMatchedPath, (ComplexType) ftype)) { 157 containsBlob(docType, schema, blobMatchedPath, field); 158 interesting |= true; 159 } 160 } else { 161 continue; // not binary text 162 } 163 } else { // complex type 164 ComplexType ctype = (ComplexType) type; 165 if (type.getName().equals(TypeConstants.CONTENT)) { 166 // CB: Fix for NXP-3847 - do not accumulate field name in 167 // the path 168 String blobMatchedPath = path + String.format("/%s", field.getName().getLocalName()); 169 blobMatched(docType, schema, blobMatchedPath, field); 170 interesting = true; 171 } else { 172 String blobMatchedPath = path + String.format("/%s", field.getName().getLocalName()); 173 interesting |= findInteresting(docType, schema, blobMatchedPath, ctype); 174 } 175 } 176 } 177 if (interesting) { 178 containsBlob(docType, schema, path, null); 179 } 180 return interesting; 181 } 182 183 /** 184 * Call during the parsing of the schema structure in {@link BlobsExtractor#findInteresting} if field is a Blob 185 * Type. This method stores the path to that Field. 186 * 187 * @param schema The parent schema that contains the field 188 * @param field Field that is a BlobType 189 */ 190 protected void blobMatched(DocumentType docType, Schema schema, String path, Field field) { 191 Map<String, List<String>> blobPathsForDocType = blobFieldPaths.get(docType.getName()); 192 List<String> pathsList = blobPathsForDocType.get(schema.getName()); 193 if (pathsList == null) { 194 pathsList = new ArrayList<String>(); 195 blobPathsForDocType.put(schema.getName(), pathsList); 196 blobFieldPaths.put(docType.getName(), blobPathsForDocType); 197 } 198 pathsList.add(path); 199 } 200 201 /** 202 * Called during the parsing of the schema structure in {@link BlobsExtractor#findInteresting} if field contains a 203 * subfield of type Blob. This method does nothing. 204 * 205 * @param schema The parent schema that contains the field 206 * @param field Field that contains a subField of type BlobType 207 */ 208 protected void containsBlob(DocumentType docType, Schema schema, String path, Field field) { 209 } 210 211 protected void getBlobValue(Property prop, List<String> subPath, String completePath, List<Property> result) { 212 if (subPath.size() == 0) { 213 if (!(prop.getValue() instanceof Blob)) { 214 log.debug("Path Field not contains a blob value: " + completePath); 215 return; 216 } 217 result.add(prop); 218 return; 219 } 220 221 for (Property childProp : prop.getChildren()) { 222 if ("/*".equals(subPath.get(0))) { 223 log.debug("TODO : BLOB IN A LIST NOT IMPLEMENTED for this path " + completePath); 224 } 225 Property childSubProp = childProp.get(subPath.get(0)); 226 getBlobValue(childSubProp, subPath.subList(1, subPath.size()), completePath, result); 227 } 228 } 229 230 /** 231 * Finds all the blobs of the document. 232 * <p> 233 * This method is not thread-safe. 234 * 235 * @param doc the document 236 * @return the list of blobs in the document 237 */ 238 public List<Blob> getBlobs(DocumentModel doc) { 239 List<Blob> result = new ArrayList<Blob>(); 240 for (Property blobField : getBlobsProperties(doc)) { 241 Blob blob = (Blob) blobField.getValue(); 242 result.add(blob); 243 } 244 return result; 245 } 246 247 public void setExtractorProperties(Set<String> pathProps, Set<String> excludedPathProps, boolean indexBlobs) { 248 pathProperties = pathProps; 249 excludedPathProperties = excludedPathProps; 250 indexAllBinary = indexBlobs; 251 isDefaultConfiguration = (pathProps == null && excludedPathProps == null && Boolean.TRUE.equals(indexBlobs)); 252 } 253 254 private boolean isInterestingBlobProperty(String path, String prefix) { 255 if (isDefaultConfiguration) { 256 return true; 257 } else if (pathProperties != null && matchProperty(prefix, path, pathProperties)) { 258 return true; 259 } else if (excludedPathProperties != null && matchProperty(prefix, path, excludedPathProperties)) { 260 return false; 261 } else if (Boolean.TRUE.equals(indexAllBinary)) { 262 return true; 263 } 264 return false; 265 } 266 267 private boolean matchProperty(String prefix, String fieldPath, Set<String> propPaths) { 268 if (!prefix.equals("")) { 269 prefix += ":"; 270 } 271 String pathToMatch = prefix + fieldPath.substring(1); 272 for (String propPath : propPaths) { 273 if (propPath.startsWith(pathToMatch)) { 274 return true; 275 } 276 } 277 return false; 278 } 279}