001/*
002 * Copyright (c) 2006-2011 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the Eclipse Public License v1.0
006 * which accompanies this distribution, and is available at
007 * http://www.eclipse.org/legal/epl-v10.html
008 *
009 * Contributors:
010 *     Florent Guillaume
011 */
012package org.nuxeo.ecm.core.utils;
013
014import java.util.ArrayList;
015import java.util.Arrays;
016import java.util.HashMap;
017import java.util.List;
018import java.util.Map;
019import java.util.Set;
020
021import org.apache.commons.logging.Log;
022import org.apache.commons.logging.LogFactory;
023import org.nuxeo.ecm.core.api.Blob;
024import org.nuxeo.ecm.core.api.DocumentModel;
025import org.nuxeo.ecm.core.api.model.Property;
026import org.nuxeo.ecm.core.schema.DocumentType;
027import org.nuxeo.ecm.core.schema.SchemaManager;
028import org.nuxeo.ecm.core.schema.TypeConstants;
029import org.nuxeo.ecm.core.schema.types.ComplexType;
030import org.nuxeo.ecm.core.schema.types.Field;
031import org.nuxeo.ecm.core.schema.types.ListType;
032import org.nuxeo.ecm.core.schema.types.Schema;
033import org.nuxeo.ecm.core.schema.types.Type;
034import org.nuxeo.runtime.api.Framework;
035
036/**
037 * Extractor for all the blobs of a document.
038 *
039 * @author Florent Guillaume
040 * @author Benjamin Jalon
041 */
042public class BlobsExtractor {
043
044    protected static final Log log = LogFactory.getLog(BlobsExtractor.class);
045
046    protected final Map<String, Map<String, List<String>>> blobFieldPaths = new HashMap<String, Map<String, List<String>>>();
047
048    protected List<String> docTypeCached = new ArrayList<String>();
049
050    protected SchemaManager schemaManager;
051
052    private Set<String> pathProperties;
053
054    private Set<String> excludedPathProperties;
055
056    private boolean indexAllBinary = false;
057
058    private boolean isDefaultConfiguration = true;
059
060    protected SchemaManager getSchemaManager() {
061        if (schemaManager == null) {
062            schemaManager = Framework.getService(SchemaManager.class);
063        }
064        return schemaManager;
065    }
066
067    /**
068     * Get properties of the given document that contain a blob value. This method uses the cache engine to find these
069     * properties.
070     */
071    public List<Property> getBlobsProperties(DocumentModel doc) {
072
073        List<Property> result = new ArrayList<Property>();
074        for (String schema : getBlobFieldPathForDocumentType(doc.getType()).keySet()) {
075            List<String> pathsList = getBlobFieldPathForDocumentType(doc.getType()).get(schema);
076            for (String path : pathsList) {
077                if (!isInterestingBlobProperty(path, schemaManager.getSchema(schema).getNamespace().prefix)) {
078                    continue;
079                }
080                List<String> pathSplitted = Arrays.asList(path.split("/[*]/"));
081                if (pathSplitted.size() == 0) {
082                    throw new IllegalStateException("Path detected not wellformed: " + pathsList);
083                }
084                Property prop = doc.getProperty(schema + ":" + pathSplitted.get(0));
085
086                if (pathSplitted.size() >= 1) {
087                    List<String> subPath = pathSplitted.subList(1, pathSplitted.size());
088                    getBlobValue(prop, subPath, path, result);
089                }
090            }
091        }
092
093        return result;
094    }
095
096    /**
097     * Get path list of properties that may contain a blob for the given document type.
098     *
099     * @param documentType document type name
100     * @return return the property names that contain blob
101     */
102    public Map<String, List<String>> getBlobFieldPathForDocumentType(String documentType) {
103        DocumentType docType = getSchemaManager().getDocumentType(documentType);
104
105        if (!docTypeCached.contains(documentType)) {
106            Map<String, List<String>> paths = new HashMap<String, List<String>>();
107            blobFieldPaths.put(docType.getName(), paths);
108
109            createCacheForDocumentType(docType);
110        }
111
112        return blobFieldPaths.get(documentType);
113    }
114
115    public void invalidateDocumentTypeCache(String docType) {
116        if (docTypeCached.contains(docType)) {
117            docTypeCached.remove(docType);
118        }
119    }
120
121    public void invalidateCache() {
122        docTypeCached = new ArrayList<String>();
123    }
124
125    protected void createCacheForDocumentType(DocumentType docType) {
126
127        for (Schema schema : docType.getSchemas()) {
128            findInteresting(docType, schema, "", schema);
129        }
130
131        if (!docTypeCached.contains(docType.getName())) {
132            docTypeCached.add(docType.getName());
133        }
134    }
135
136    /**
137     * Analyzes the document's schemas to find which fields and complex types contain blobs. For each blob fields type
138     * found, {@link BlobsExtractor#blobMatched(DocumentType, Schema, String, Field)} is called and for each property
139     * that contains a subProperty containing a Blob,
140     * {@link BlobsExtractor#containsBlob(DocumentType, Schema, String, Field)} is called
141     *
142     * @param schema The parent schema that contains the field
143     * @param ct Current type parsed
144     * @return {@code true} if the passed complex type contains at least one blob field
145     */
146    protected boolean findInteresting(DocumentType docType, Schema schema, String path, ComplexType ct) {
147        boolean interesting = false;
148        for (Field field : ct.getFields()) {
149            Type type = field.getType();
150            if (type.isSimpleType()) {
151                continue; // not binary text
152            } else if (type.isListType()) {
153                Type ftype = ((ListType) type).getField().getType();
154                if (ftype.isComplexType()) {
155                    String blobMatchedPath = path + String.format("/%s/*", field.getName().getLocalName());
156                    if (findInteresting(docType, schema, blobMatchedPath, (ComplexType) ftype)) {
157                        containsBlob(docType, schema, blobMatchedPath, field);
158                        interesting |= true;
159                    }
160                } else {
161                    continue; // not binary text
162                }
163            } else { // complex type
164                ComplexType ctype = (ComplexType) type;
165                if (type.getName().equals(TypeConstants.CONTENT)) {
166                    // CB: Fix for NXP-3847 - do not accumulate field name in
167                    // the path
168                    String blobMatchedPath = path + String.format("/%s", field.getName().getLocalName());
169                    blobMatched(docType, schema, blobMatchedPath, field);
170                    interesting = true;
171                } else {
172                    String blobMatchedPath = path + String.format("/%s", field.getName().getLocalName());
173                    interesting |= findInteresting(docType, schema, blobMatchedPath, ctype);
174                }
175            }
176        }
177        if (interesting) {
178            containsBlob(docType, schema, path, null);
179        }
180        return interesting;
181    }
182
183    /**
184     * Call during the parsing of the schema structure in {@link BlobsExtractor#findInteresting} if field is a Blob
185     * Type. This method stores the path to that Field.
186     *
187     * @param schema The parent schema that contains the field
188     * @param field Field that is a BlobType
189     */
190    protected void blobMatched(DocumentType docType, Schema schema, String path, Field field) {
191        Map<String, List<String>> blobPathsForDocType = blobFieldPaths.get(docType.getName());
192        List<String> pathsList = blobPathsForDocType.get(schema.getName());
193        if (pathsList == null) {
194            pathsList = new ArrayList<String>();
195            blobPathsForDocType.put(schema.getName(), pathsList);
196            blobFieldPaths.put(docType.getName(), blobPathsForDocType);
197        }
198        pathsList.add(path);
199    }
200
201    /**
202     * Called during the parsing of the schema structure in {@link BlobsExtractor#findInteresting} if field contains a
203     * subfield of type Blob. This method does nothing.
204     *
205     * @param schema The parent schema that contains the field
206     * @param field Field that contains a subField of type BlobType
207     */
208    protected void containsBlob(DocumentType docType, Schema schema, String path, Field field) {
209    }
210
211    protected void getBlobValue(Property prop, List<String> subPath, String completePath, List<Property> result) {
212        if (subPath.size() == 0) {
213            if (!(prop.getValue() instanceof Blob)) {
214                log.debug("Path Field not contains a blob value: " + completePath);
215                return;
216            }
217            result.add(prop);
218            return;
219        }
220
221        for (Property childProp : prop.getChildren()) {
222            if ("/*".equals(subPath.get(0))) {
223                log.debug("TODO : BLOB IN A LIST NOT IMPLEMENTED for this path " + completePath);
224            }
225            Property childSubProp = childProp.get(subPath.get(0));
226            getBlobValue(childSubProp, subPath.subList(1, subPath.size()), completePath, result);
227        }
228    }
229
230    /**
231     * Finds all the blobs of the document.
232     * <p>
233     * This method is not thread-safe.
234     *
235     * @param doc the document
236     * @return the list of blobs in the document
237     */
238    public List<Blob> getBlobs(DocumentModel doc) {
239        List<Blob> result = new ArrayList<Blob>();
240        for (Property blobField : getBlobsProperties(doc)) {
241            Blob blob = (Blob) blobField.getValue();
242            result.add(blob);
243        }
244        return result;
245    }
246
247    public void setExtractorProperties(Set<String> pathProps, Set<String> excludedPathProps, boolean indexBlobs) {
248        pathProperties = pathProps;
249        excludedPathProperties = excludedPathProps;
250        indexAllBinary = indexBlobs;
251        isDefaultConfiguration = (pathProps == null && excludedPathProps == null && Boolean.TRUE.equals(indexBlobs));
252    }
253
254    private boolean isInterestingBlobProperty(String path, String prefix) {
255        if (isDefaultConfiguration) {
256            return true;
257        } else if (pathProperties != null && matchProperty(prefix, path, pathProperties)) {
258            return true;
259        } else if (excludedPathProperties != null && matchProperty(prefix, path, excludedPathProperties)) {
260            return false;
261        } else if (Boolean.TRUE.equals(indexAllBinary)) {
262            return true;
263        }
264        return false;
265    }
266
267    private boolean matchProperty(String prefix, String fieldPath, Set<String> propPaths) {
268        if (!prefix.equals("")) {
269            prefix += ":";
270        }
271        String pathToMatch = prefix + fieldPath.substring(1);
272        for (String propPath : propPaths) {
273            if (propPath.startsWith(pathToMatch)) {
274                return true;
275            }
276        }
277        return false;
278    }
279}