001/*
002 * (C) Copyright 2006-2014 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019
020package org.nuxeo.ecm.core.storage;
021
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.HashMap;
025import java.util.HashSet;
026import java.util.LinkedHashSet;
027import java.util.List;
028import java.util.Map;
029import java.util.Set;
030
031import org.apache.commons.lang.StringUtils;
032import org.apache.commons.logging.Log;
033import org.apache.commons.logging.LogFactory;
034import org.nuxeo.ecm.core.schema.DocumentType;
035import org.nuxeo.ecm.core.schema.FacetNames;
036import org.nuxeo.ecm.core.schema.Namespace;
037import org.nuxeo.ecm.core.schema.SchemaManager;
038import org.nuxeo.ecm.core.schema.TypeConstants;
039import org.nuxeo.ecm.core.schema.types.ComplexType;
040import org.nuxeo.ecm.core.schema.types.Field;
041import org.nuxeo.ecm.core.schema.types.ListType;
042import org.nuxeo.ecm.core.schema.types.Schema;
043import org.nuxeo.ecm.core.schema.types.SimpleTypeImpl;
044import org.nuxeo.ecm.core.schema.types.Type;
045import org.nuxeo.ecm.core.schema.types.primitives.BinaryType;
046import org.nuxeo.ecm.core.schema.types.primitives.StringType;
047import org.nuxeo.ecm.core.storage.FulltextDescriptor.FulltextIndexDescriptor;
048import org.nuxeo.runtime.api.Framework;
049
050/**
051 * Info about the fulltext configuration.
052 */
053public class FulltextConfiguration {
054
055    private static final Log log = LogFactory.getLog(FulltextConfiguration.class);
056
057    public static final String ROOT_TYPE = "Root";
058
059    public static final String PROP_TYPE_STRING = "string";
060
061    public static final String PROP_TYPE_BLOB = "blob";
062
063    public static final String FULLTEXT_DEFAULT_INDEX = "default";
064
065    /** All index names. */
066    public final Set<String> indexNames = new LinkedHashSet<String>();
067
068    /** Indexes holding exactly one field. */
069    public final Map<String, String> fieldToIndexName = new HashMap<String, String>();
070
071    /** Indexes containing all simple properties. */
072    public final Set<String> indexesAllSimple = new HashSet<String>();
073
074    /** Indexes containing all binaries properties. */
075    public final Set<String> indexesAllBinary = new HashSet<String>();
076
077    /** Indexes for each specific simple property path. */
078    public final Map<String, Set<String>> indexesByPropPathSimple = new HashMap<String, Set<String>>();
079
080    /** Indexes for each specific binary property path. */
081    // DBSTransactionState.findDirtyDocuments expects this to contain unprefixed versions for schemas
082    // without prefix, like "content/data".
083    public final Map<String, Set<String>> indexesByPropPathBinary = new HashMap<String, Set<String>>();
084
085    /** Indexes for each specific simple property path excluded. */
086    public final Map<String, Set<String>> indexesByPropPathExcludedSimple = new HashMap<String, Set<String>>();
087
088    /** Indexes for each specific binary property path excluded. */
089    public final Map<String, Set<String>> indexesByPropPathExcludedBinary = new HashMap<String, Set<String>>();
090
091    // inverse of above maps
092    public final Map<String, Set<String>> propPathsByIndexSimple = new HashMap<String, Set<String>>();
093
094    public final Map<String, Set<String>> propPathsByIndexBinary = new HashMap<String, Set<String>>();
095
096    public final Map<String, Set<String>> propPathsExcludedByIndexSimple = new HashMap<String, Set<String>>();
097
098    public final Map<String, Set<String>> propPathsExcludedByIndexBinary = new HashMap<String, Set<String>>();
099
100    public final Set<String> excludedTypes = new HashSet<String>();
101
102    public final Set<String> includedTypes = new HashSet<String>();
103
104    public final boolean fulltextSearchDisabled;
105
106    public FulltextConfiguration(FulltextDescriptor fulltextDescriptor) {
107        SchemaManager schemaManager = Framework.getService(SchemaManager.class);
108
109        fulltextSearchDisabled = fulltextDescriptor.getFulltextSearchDisabled();
110
111        // find what paths we mean by "all"
112        // for schemas without prefix, we add both the unprefixed and the prefixed version
113        Set<String> allSimplePaths = new HashSet<>();
114        Set<String> allBinaryPaths = new HashSet<>();
115        PathsFinder pathsFinder = new PathsFinder(allSimplePaths, allBinaryPaths);
116        for (Schema schema : schemaManager.getSchemas()) {
117            pathsFinder.walkSchema(schema);
118        }
119
120        List<FulltextIndexDescriptor> descs = fulltextDescriptor.getFulltextIndexes();
121        if (descs == null) {
122            descs = new ArrayList<FulltextIndexDescriptor>(1);
123        }
124        if (descs.isEmpty()) {
125            descs.add(new FulltextIndexDescriptor());
126        }
127        for (FulltextIndexDescriptor desc : descs) {
128            String name = desc.name == null ? FULLTEXT_DEFAULT_INDEX : desc.name;
129            indexNames.add(name);
130            if (desc.fields == null) {
131                desc.fields = new HashSet<String>();
132            }
133            if (desc.excludeFields == null) {
134                desc.excludeFields = new HashSet<String>();
135            }
136            if (desc.fields.size() == 1 && desc.excludeFields.isEmpty()) {
137                fieldToIndexName.put(desc.fields.iterator().next(), name);
138            }
139
140            if (desc.fieldType != null) {
141                if (desc.fieldType.equals(FulltextConfiguration.PROP_TYPE_STRING)) {
142                    indexesAllSimple.add(name);
143                } else if (desc.fieldType.equals(FulltextConfiguration.PROP_TYPE_BLOB)) {
144                    indexesAllBinary.add(name);
145                } else {
146                    log.error("Ignoring unknow repository fulltext configuration fieldType: " + desc.fieldType);
147                }
148
149            }
150            if (desc.fields.isEmpty() && desc.fieldType == null) {
151                // no fields specified and no field type -> all of them
152                indexesAllSimple.add(name);
153                indexesAllBinary.add(name);
154            }
155
156            if (indexesAllSimple.contains(name)) {
157                propPathsByIndexSimple.put(name, new HashSet<>(allSimplePaths));
158                for (String path : allSimplePaths) {
159                    indexesByPropPathSimple.computeIfAbsent(path, p -> new HashSet<>()).add(name);
160                }
161            }
162            if (indexesAllBinary.contains(name)) {
163                propPathsByIndexBinary.put(name, new HashSet<>(allBinaryPaths));
164                for (String path : allBinaryPaths) {
165                    indexesByPropPathBinary.computeIfAbsent(path, p -> new HashSet<>()).add(name);
166                }
167            }
168
169            if (fulltextDescriptor.getFulltextExcludedTypes() != null) {
170                excludedTypes.addAll(fulltextDescriptor.getFulltextExcludedTypes());
171            }
172            if (fulltextDescriptor.getFulltextIncludedTypes() != null) {
173                includedTypes.addAll(fulltextDescriptor.getFulltextIncludedTypes());
174            }
175
176            for (Set<String> fields : Arrays.asList(desc.fields, desc.excludeFields)) {
177                boolean include = fields == desc.fields;
178                for (String path : fields) {
179                    Field field = schemaManager.getField(path);
180                    if (field == null && !path.contains(":")) {
181                        // check without prefix
182                        // TODO precompute this in SchemaManagerImpl
183                        int slash = path.indexOf('/');
184                        String first = slash == -1 ? path : path.substring(0, slash);
185                        for (Schema schema : schemaManager.getSchemas()) {
186                            if (!schema.getNamespace().hasPrefix()) {
187                                // schema without prefix, try it
188                                if (schema.getField(first) != null) {
189                                    path = schema.getName() + ":" + path;
190                                    field = schemaManager.getField(path);
191                                    break;
192                                }
193                            }
194                        }
195                    }
196                    if (field == null) {
197                        log.error(String.format("Ignoring unknown property '%s' in fulltext configuration: %s", path,
198                                name));
199                        continue;
200                    }
201                    Type baseType = getBaseType(field.getType());
202                    Map<String, Set<String>> indexesByPropPath;
203                    Map<String, Set<String>> propPathsByIndex;
204                    if (baseType instanceof ComplexType && TypeConstants.isContentType(baseType)) {
205                        baseType = ((ComplexType) baseType).getField(BaseDocument.BLOB_DATA).getType(); // BinaryType
206                    }
207                    if (baseType instanceof StringType) {
208                        indexesByPropPath = include ? indexesByPropPathSimple : indexesByPropPathExcludedSimple;
209                        propPathsByIndex = include ? propPathsByIndexSimple : propPathsExcludedByIndexSimple;
210                    } else if (baseType instanceof BinaryType) {
211                        indexesByPropPath = include ? indexesByPropPathBinary : indexesByPropPathExcludedBinary;
212                        propPathsByIndex = include ? propPathsByIndexBinary : propPathsExcludedByIndexBinary;
213                        if (!path.endsWith("/" + BaseDocument.BLOB_DATA)) {
214                            path += "/" + BaseDocument.BLOB_DATA;
215                            // needed for indexesByPropPathBinary as DBSTransactionState.findDirtyDocuments expects this
216                            // to be in the same format as what DirtyPathsFinder expects, like "content/data".
217                        }
218                    } else {
219                        log.error(String.format("Ignoring property '%s' with bad type %s in fulltext configuration: %s",
220                                path, field.getType(), name));
221                        continue;
222                    }
223                    indexesByPropPath.computeIfAbsent(path, p -> new HashSet<>()).add(name);
224                    propPathsByIndex.computeIfAbsent(name, n -> new HashSet<>()).add(path);
225                }
226            }
227        }
228
229        // Add document types with the NotFulltextIndexable facet
230        for (DocumentType documentType : schemaManager.getDocumentTypes()) {
231            if (documentType.hasFacet(FacetNames.NOT_FULLTEXT_INDEXABLE)) {
232                excludedTypes.add(documentType.getName());
233            }
234        }
235    }
236
237    protected Type getBaseType(Type type) {
238        if (type instanceof SimpleTypeImpl) {
239            return getBaseType(type.getSuperType());
240        }
241        if (type instanceof ListType) {
242            return getBaseType(((ListType) type).getFieldType());
243        }
244        return type;
245    }
246
247    /**
248     * Accumulates paths for string and binary properties in schemas passed to {@link #walkSchema}.
249     * <p>
250     * For schemas without prefix the path is accumulated both with and without prefix.
251     * <p>
252     * For binaries the path includes the final "/data" part.
253     */
254    // TODO precompute this in SchemaManagerImpl
255    public static class PathsFinder {
256
257        protected final Set<String> simplePaths;
258
259        protected final Set<String> binaryPaths;
260
261        public PathsFinder(Set<String> simplePaths, Set<String> binaryPaths) {
262            this.simplePaths = simplePaths;
263            this.binaryPaths = binaryPaths;
264        }
265
266        public void walkSchema(Schema schema) {
267            String addPrefix = schema.getNamespace().hasPrefix() ? null : schema.getName();
268            walkComplexType(schema, null, addPrefix);
269        }
270
271        protected void walkComplexType(ComplexType complexType, String path, String addPrefix) {
272            for (Field field : complexType.getFields()) {
273                String name = field.getName().getPrefixedName();
274                String fieldPath = path == null ? name : path + '/' + name;
275                walkType(field.getType(), fieldPath, addPrefix);
276            }
277        }
278
279        protected void walkType(Type type, String path, String addPrefix) {
280            if (type.isSimpleType()) {
281                walkSimpleType(type, path, addPrefix);
282            } else if (type.isListType()) {
283                String listPath = path + "/*";
284                Type ftype = ((ListType) type).getField().getType();
285                if (ftype.isComplexType()) {
286                    // complex list
287                    walkComplexType((ComplexType) ftype, listPath, addPrefix);
288                } else {
289                    // array
290                    walkSimpleType(ftype, listPath, addPrefix);
291                }
292            } else {
293                // complex type
294                ComplexType ctype = (ComplexType) type;
295                walkComplexType(ctype, path, addPrefix);
296            }
297        }
298
299        protected void walkSimpleType(Type type, String path, String addPrefix) {
300            while (type instanceof SimpleTypeImpl) {
301                // type with constraint
302                type = type.getSuperType();
303            }
304            if (type instanceof StringType) {
305                simplePaths.add(path);
306                if (addPrefix != null) {
307                    simplePaths.add(addPrefix + ":" + path);
308                }
309            } else if (type instanceof BinaryType) {
310                binaryPaths.add(path);
311                if (addPrefix != null) {
312                    binaryPaths.add(addPrefix + ":" + path);
313                }
314            }
315        }
316    }
317
318    public boolean isFulltextIndexable(String typeName) {
319        if (ROOT_TYPE.equals(typeName)) {
320            return false;
321        }
322        if (includedTypes.contains(typeName) || (includedTypes.isEmpty() && !excludedTypes.contains(typeName))) {
323            return true;
324        }
325        return false;
326    }
327
328}