001/* 002 * (C) Copyright 2006-2011 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.core.utils; 020 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.HashMap; 024import java.util.List; 025import java.util.Map; 026import java.util.Set; 027 028import org.apache.commons.logging.Log; 029import org.apache.commons.logging.LogFactory; 030import org.nuxeo.ecm.core.api.Blob; 031import org.nuxeo.ecm.core.api.DocumentModel; 032import org.nuxeo.ecm.core.api.model.Property; 033import org.nuxeo.ecm.core.schema.DocumentType; 034import org.nuxeo.ecm.core.schema.SchemaManager; 035import org.nuxeo.ecm.core.schema.TypeConstants; 036import org.nuxeo.ecm.core.schema.types.ComplexType; 037import org.nuxeo.ecm.core.schema.types.Field; 038import org.nuxeo.ecm.core.schema.types.ListType; 039import org.nuxeo.ecm.core.schema.types.Schema; 040import org.nuxeo.ecm.core.schema.types.Type; 041import org.nuxeo.runtime.api.Framework; 042 043/** 044 * Extractor for all the blobs of a document. 045 * 046 * @author Florent Guillaume 047 * @author Benjamin Jalon 048 */ 049public class BlobsExtractor { 050 051 protected static final Log log = LogFactory.getLog(BlobsExtractor.class); 052 053 protected final Map<String, Map<String, List<String>>> blobFieldPaths = new HashMap<String, Map<String, List<String>>>(); 054 055 protected List<String> docTypeCached = new ArrayList<String>(); 056 057 protected SchemaManager schemaManager; 058 059 private Set<String> pathProperties; 060 061 private Set<String> excludedPathProperties; 062 063 private boolean indexAllBinary = false; 064 065 private boolean isDefaultConfiguration = true; 066 067 protected SchemaManager getSchemaManager() { 068 if (schemaManager == null) { 069 schemaManager = Framework.getService(SchemaManager.class); 070 } 071 return schemaManager; 072 } 073 074 /** 075 * Get properties of the given document that contain a blob value. This method uses the cache engine to find these 076 * properties. 077 */ 078 public List<Property> getBlobsProperties(DocumentModel doc) { 079 080 List<Property> result = new ArrayList<Property>(); 081 for (String schema : getBlobFieldPathForDocumentType(doc.getType()).keySet()) { 082 List<String> pathsList = getBlobFieldPathForDocumentType(doc.getType()).get(schema); 083 for (String path : pathsList) { 084 if (!isInterestingBlobProperty(path, schemaManager.getSchema(schema).getNamespace().prefix)) { 085 continue; 086 } 087 List<String> pathSplitted = Arrays.asList(path.split("/[*]/")); 088 if (pathSplitted.size() == 0) { 089 throw new IllegalStateException("Path detected not wellformed: " + pathsList); 090 } 091 Property prop = doc.getProperty(schema + ":" + pathSplitted.get(0)); 092 093 if (pathSplitted.size() >= 1) { 094 List<String> subPath = pathSplitted.subList(1, pathSplitted.size()); 095 getBlobValue(prop, subPath, path, result); 096 } 097 } 098 } 099 100 return result; 101 } 102 103 /** 104 * Get path list of properties that may contain a blob for the given document type. 105 * 106 * @param documentType document type name 107 * @return return the property names that contain blob 108 */ 109 public Map<String, List<String>> getBlobFieldPathForDocumentType(String documentType) { 110 DocumentType docType = getSchemaManager().getDocumentType(documentType); 111 112 if (!docTypeCached.contains(documentType)) { 113 Map<String, List<String>> paths = new HashMap<String, List<String>>(); 114 blobFieldPaths.put(docType.getName(), paths); 115 116 createCacheForDocumentType(docType); 117 } 118 119 return blobFieldPaths.get(documentType); 120 } 121 122 public void invalidateDocumentTypeCache(String docType) { 123 if (docTypeCached.contains(docType)) { 124 docTypeCached.remove(docType); 125 } 126 } 127 128 public void invalidateCache() { 129 docTypeCached = new ArrayList<String>(); 130 } 131 132 protected void createCacheForDocumentType(DocumentType docType) { 133 134 for (Schema schema : docType.getSchemas()) { 135 findInteresting(docType, schema, "", schema); 136 } 137 138 if (!docTypeCached.contains(docType.getName())) { 139 docTypeCached.add(docType.getName()); 140 } 141 } 142 143 /** 144 * Analyzes the document's schemas to find which fields and complex types contain blobs. For each blob fields type 145 * found, {@link BlobsExtractor#blobMatched(DocumentType, Schema, String, Field)} is called and for each property 146 * that contains a subProperty containing a Blob, 147 * {@link BlobsExtractor#containsBlob(DocumentType, Schema, String, Field)} is called 148 * 149 * @param schema The parent schema that contains the field 150 * @param ct Current type parsed 151 * @return {@code true} if the passed complex type contains at least one blob field 152 */ 153 protected boolean findInteresting(DocumentType docType, Schema schema, String path, ComplexType ct) { 154 boolean interesting = false; 155 for (Field field : ct.getFields()) { 156 Type type = field.getType(); 157 if (type.isSimpleType()) { 158 continue; // not binary text 159 } else if (type.isListType()) { 160 Type ftype = ((ListType) type).getField().getType(); 161 if (ftype.isComplexType()) { 162 String blobMatchedPath = path + String.format("/%s/*", field.getName().getLocalName()); 163 if (findInteresting(docType, schema, blobMatchedPath, (ComplexType) ftype)) { 164 containsBlob(docType, schema, blobMatchedPath, field); 165 interesting |= true; 166 } 167 } else { 168 continue; // not binary text 169 } 170 } else { // complex type 171 ComplexType ctype = (ComplexType) type; 172 if (type.getName().equals(TypeConstants.CONTENT)) { 173 // CB: Fix for NXP-3847 - do not accumulate field name in 174 // the path 175 String blobMatchedPath = path + String.format("/%s", field.getName().getLocalName()); 176 blobMatched(docType, schema, blobMatchedPath, field); 177 interesting = true; 178 } else { 179 String blobMatchedPath = path + String.format("/%s", field.getName().getLocalName()); 180 interesting |= findInteresting(docType, schema, blobMatchedPath, ctype); 181 } 182 } 183 } 184 if (interesting) { 185 containsBlob(docType, schema, path, null); 186 } 187 return interesting; 188 } 189 190 /** 191 * Call during the parsing of the schema structure in {@link BlobsExtractor#findInteresting} if field is a Blob 192 * Type. This method stores the path to that Field. 193 * 194 * @param schema The parent schema that contains the field 195 * @param field Field that is a BlobType 196 */ 197 protected void blobMatched(DocumentType docType, Schema schema, String path, Field field) { 198 Map<String, List<String>> blobPathsForDocType = blobFieldPaths.get(docType.getName()); 199 List<String> pathsList = blobPathsForDocType.get(schema.getName()); 200 if (pathsList == null) { 201 pathsList = new ArrayList<String>(); 202 blobPathsForDocType.put(schema.getName(), pathsList); 203 blobFieldPaths.put(docType.getName(), blobPathsForDocType); 204 } 205 pathsList.add(path); 206 } 207 208 /** 209 * Called during the parsing of the schema structure in {@link BlobsExtractor#findInteresting} if field contains a 210 * subfield of type Blob. This method does nothing. 211 * 212 * @param schema The parent schema that contains the field 213 * @param field Field that contains a subField of type BlobType 214 */ 215 protected void containsBlob(DocumentType docType, Schema schema, String path, Field field) { 216 } 217 218 protected void getBlobValue(Property prop, List<String> subPath, String completePath, List<Property> result) { 219 if (subPath.size() == 0) { 220 if (!(prop.getValue() instanceof Blob)) { 221 log.debug("Path Field not contains a blob value: " + completePath); 222 return; 223 } 224 result.add(prop); 225 return; 226 } 227 228 for (Property childProp : prop.getChildren()) { 229 if ("/*".equals(subPath.get(0))) { 230 log.debug("TODO : BLOB IN A LIST NOT IMPLEMENTED for this path " + completePath); 231 } 232 Property childSubProp = childProp.get(subPath.get(0)); 233 getBlobValue(childSubProp, subPath.subList(1, subPath.size()), completePath, result); 234 } 235 } 236 237 /** 238 * Finds all the blobs of the document. 239 * <p> 240 * This method is not thread-safe. 241 * 242 * @param doc the document 243 * @return the list of blobs in the document 244 */ 245 public List<Blob> getBlobs(DocumentModel doc) { 246 List<Blob> result = new ArrayList<Blob>(); 247 for (Property blobField : getBlobsProperties(doc)) { 248 Blob blob = (Blob) blobField.getValue(); 249 result.add(blob); 250 } 251 return result; 252 } 253 254 public void setExtractorProperties(Set<String> pathProps, Set<String> excludedPathProps, boolean indexBlobs) { 255 pathProperties = pathProps; 256 excludedPathProperties = excludedPathProps; 257 indexAllBinary = indexBlobs; 258 isDefaultConfiguration = (pathProps == null && excludedPathProps == null && Boolean.TRUE.equals(indexBlobs)); 259 } 260 261 private boolean isInterestingBlobProperty(String path, String prefix) { 262 if (isDefaultConfiguration) { 263 return true; 264 } else if (pathProperties != null && matchProperty(prefix, path, pathProperties)) { 265 return true; 266 } else if (excludedPathProperties != null && matchProperty(prefix, path, excludedPathProperties)) { 267 return false; 268 } else if (Boolean.TRUE.equals(indexAllBinary)) { 269 return true; 270 } 271 return false; 272 } 273 274 private boolean matchProperty(String prefix, String fieldPath, Set<String> propPaths) { 275 if (!prefix.equals("")) { 276 prefix += ":"; 277 } 278 String pathToMatch = prefix + fieldPath.substring(1); 279 for (String propPath : propPaths) { 280 if (propPath.startsWith(pathToMatch)) { 281 return true; 282 } 283 } 284 return false; 285 } 286}