001/*
002 * (C) Copyright 2006-2014 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019
020package org.nuxeo.ecm.core.storage;
021
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.HashMap;
025import java.util.HashSet;
026import java.util.LinkedHashSet;
027import java.util.List;
028import java.util.Map;
029import java.util.Set;
030
031import org.apache.commons.lang.StringUtils;
032import org.apache.commons.logging.Log;
033import org.apache.commons.logging.LogFactory;
034import org.nuxeo.ecm.core.schema.DocumentType;
035import org.nuxeo.ecm.core.schema.FacetNames;
036import org.nuxeo.ecm.core.schema.Namespace;
037import org.nuxeo.ecm.core.schema.SchemaManager;
038import org.nuxeo.ecm.core.schema.TypeConstants;
039import org.nuxeo.ecm.core.schema.types.ComplexType;
040import org.nuxeo.ecm.core.schema.types.Field;
041import org.nuxeo.ecm.core.schema.types.ListType;
042import org.nuxeo.ecm.core.schema.types.Schema;
043import org.nuxeo.ecm.core.schema.types.SimpleTypeImpl;
044import org.nuxeo.ecm.core.schema.types.Type;
045import org.nuxeo.ecm.core.schema.types.primitives.BinaryType;
046import org.nuxeo.ecm.core.schema.types.primitives.StringType;
047import org.nuxeo.ecm.core.storage.FulltextDescriptor.FulltextIndexDescriptor;
048import org.nuxeo.runtime.api.Framework;
049
050/**
051 * Info about the fulltext configuration.
052 */
053public class FulltextConfiguration {
054
055    private static final Log log = LogFactory.getLog(FulltextConfiguration.class);
056
057    public static final String ROOT_TYPE = "Root";
058
059    public static final String PROP_TYPE_STRING = "string";
060
061    public static final String PROP_TYPE_BLOB = "blob";
062
063    public static final String FULLTEXT_DEFAULT_INDEX = "default";
064
065    /** All index names. */
066    public final Set<String> indexNames = new LinkedHashSet<String>();
067
068    /** Indexes holding exactly one field. */
069    public final Map<String, String> fieldToIndexName = new HashMap<String, String>();
070
071    /** Indexes containing all simple properties. */
072    public final Set<String> indexesAllSimple = new HashSet<String>();
073
074    /** Indexes containing all binaries properties. */
075    public final Set<String> indexesAllBinary = new HashSet<String>();
076
077    /** Indexes for each specific simple property path. */
078    public final Map<String, Set<String>> indexesByPropPathSimple = new HashMap<String, Set<String>>();
079
080    /** Indexes for each specific binary property path. */
081    // DBSTransactionState.findDirtyDocuments expects this to contain unprefixed versions for schemas
082    // without prefix, like "content/data".
083    public final Map<String, Set<String>> indexesByPropPathBinary = new HashMap<String, Set<String>>();
084
085    /** Indexes for each specific simple property path excluded. */
086    public final Map<String, Set<String>> indexesByPropPathExcludedSimple = new HashMap<String, Set<String>>();
087
088    /** Indexes for each specific binary property path excluded. */
089    public final Map<String, Set<String>> indexesByPropPathExcludedBinary = new HashMap<String, Set<String>>();
090
091    // inverse of above maps
092    public final Map<String, Set<String>> propPathsByIndexSimple = new HashMap<String, Set<String>>();
093
094    public final Map<String, Set<String>> propPathsByIndexBinary = new HashMap<String, Set<String>>();
095
096    public final Map<String, Set<String>> propPathsExcludedByIndexSimple = new HashMap<String, Set<String>>();
097
098    public final Map<String, Set<String>> propPathsExcludedByIndexBinary = new HashMap<String, Set<String>>();
099
100    public final Set<String> excludedTypes = new HashSet<String>();
101
102    public final Set<String> includedTypes = new HashSet<String>();
103
104    public final boolean fulltextSearchDisabled;
105
106    public final int fulltextFieldSizeLimit;
107
108    public FulltextConfiguration(FulltextDescriptor fulltextDescriptor) {
109        SchemaManager schemaManager = Framework.getService(SchemaManager.class);
110
111        fulltextFieldSizeLimit = fulltextDescriptor.getFulltextFieldSizeLimit();
112
113        fulltextSearchDisabled = fulltextDescriptor.getFulltextSearchDisabled();
114
115        // find what paths we mean by "all"
116        // for schemas without prefix, we add both the unprefixed and the prefixed version
117        Set<String> allSimplePaths = new HashSet<>();
118        Set<String> allBinaryPaths = new HashSet<>();
119        PathsFinder pathsFinder = new PathsFinder(allSimplePaths, allBinaryPaths);
120        for (Schema schema : schemaManager.getSchemas()) {
121            pathsFinder.walkSchema(schema);
122        }
123
124        List<FulltextIndexDescriptor> descs = fulltextDescriptor.getFulltextIndexes();
125        if (descs == null) {
126            descs = new ArrayList<FulltextIndexDescriptor>(1);
127        }
128        if (descs.isEmpty()) {
129            descs.add(new FulltextIndexDescriptor());
130        }
131        for (FulltextIndexDescriptor desc : descs) {
132            String name = desc.name == null ? FULLTEXT_DEFAULT_INDEX : desc.name;
133            indexNames.add(name);
134            if (desc.fields == null) {
135                desc.fields = new HashSet<String>();
136            }
137            if (desc.excludeFields == null) {
138                desc.excludeFields = new HashSet<String>();
139            }
140            if (desc.fields.size() == 1 && desc.excludeFields.isEmpty()) {
141                fieldToIndexName.put(desc.fields.iterator().next(), name);
142            }
143
144            if (desc.fieldType != null) {
145                if (desc.fieldType.equals(FulltextConfiguration.PROP_TYPE_STRING)) {
146                    indexesAllSimple.add(name);
147                } else if (desc.fieldType.equals(FulltextConfiguration.PROP_TYPE_BLOB)) {
148                    indexesAllBinary.add(name);
149                } else {
150                    log.error("Ignoring unknow repository fulltext configuration fieldType: " + desc.fieldType);
151                }
152
153            }
154            if (desc.fields.isEmpty() && desc.fieldType == null) {
155                // no fields specified and no field type -> all of them
156                indexesAllSimple.add(name);
157                indexesAllBinary.add(name);
158            }
159
160            if (indexesAllSimple.contains(name)) {
161                propPathsByIndexSimple.put(name, new HashSet<>(allSimplePaths));
162                for (String path : allSimplePaths) {
163                    indexesByPropPathSimple.computeIfAbsent(path, p -> new HashSet<>()).add(name);
164                }
165            }
166            if (indexesAllBinary.contains(name)) {
167                propPathsByIndexBinary.put(name, new HashSet<>(allBinaryPaths));
168                for (String path : allBinaryPaths) {
169                    indexesByPropPathBinary.computeIfAbsent(path, p -> new HashSet<>()).add(name);
170                }
171            }
172
173            if (fulltextDescriptor.getFulltextExcludedTypes() != null) {
174                excludedTypes.addAll(fulltextDescriptor.getFulltextExcludedTypes());
175            }
176            if (fulltextDescriptor.getFulltextIncludedTypes() != null) {
177                includedTypes.addAll(fulltextDescriptor.getFulltextIncludedTypes());
178            }
179
180            for (Set<String> fields : Arrays.asList(desc.fields, desc.excludeFields)) {
181                boolean include = fields == desc.fields;
182                for (String path : fields) {
183                    Field field = schemaManager.getField(path);
184                    if (field == null && !path.contains(":")) {
185                        // check without prefix
186                        // TODO precompute this in SchemaManagerImpl
187                        int slash = path.indexOf('/');
188                        String first = slash == -1 ? path : path.substring(0, slash);
189                        for (Schema schema : schemaManager.getSchemas()) {
190                            if (!schema.getNamespace().hasPrefix()) {
191                                // schema without prefix, try it
192                                if (schema.getField(first) != null) {
193                                    path = schema.getName() + ":" + path;
194                                    field = schemaManager.getField(path);
195                                    break;
196                                }
197                            }
198                        }
199                    }
200                    if (field == null) {
201                        log.error(String.format("Ignoring unknown property '%s' in fulltext configuration: %s", path,
202                                name));
203                        continue;
204                    }
205                    Type baseType = getBaseType(field.getType());
206                    Map<String, Set<String>> indexesByPropPath;
207                    Map<String, Set<String>> propPathsByIndex;
208                    if (baseType instanceof ComplexType && TypeConstants.isContentType(baseType)) {
209                        baseType = ((ComplexType) baseType).getField(BaseDocument.BLOB_DATA).getType(); // BinaryType
210                    }
211                    if (baseType instanceof StringType) {
212                        indexesByPropPath = include ? indexesByPropPathSimple : indexesByPropPathExcludedSimple;
213                        propPathsByIndex = include ? propPathsByIndexSimple : propPathsExcludedByIndexSimple;
214                    } else if (baseType instanceof BinaryType) {
215                        indexesByPropPath = include ? indexesByPropPathBinary : indexesByPropPathExcludedBinary;
216                        propPathsByIndex = include ? propPathsByIndexBinary : propPathsExcludedByIndexBinary;
217                        if (!path.endsWith("/" + BaseDocument.BLOB_DATA)) {
218                            path += "/" + BaseDocument.BLOB_DATA;
219                            // needed for indexesByPropPathBinary as DBSTransactionState.findDirtyDocuments expects this
220                            // to be in the same format as what DirtyPathsFinder expects, like "content/data".
221                        }
222                    } else {
223                        log.error(String.format("Ignoring property '%s' with bad type %s in fulltext configuration: %s",
224                                path, field.getType(), name));
225                        continue;
226                    }
227                    indexesByPropPath.computeIfAbsent(path, p -> new HashSet<>()).add(name);
228                    propPathsByIndex.computeIfAbsent(name, n -> new HashSet<>()).add(path);
229                }
230            }
231        }
232
233        // Add document types with the NotFulltextIndexable facet
234        for (DocumentType documentType : schemaManager.getDocumentTypes()) {
235            if (documentType.hasFacet(FacetNames.NOT_FULLTEXT_INDEXABLE)) {
236                excludedTypes.add(documentType.getName());
237            }
238        }
239    }
240
241    protected Type getBaseType(Type type) {
242        if (type instanceof SimpleTypeImpl) {
243            return getBaseType(type.getSuperType());
244        }
245        if (type instanceof ListType) {
246            return getBaseType(((ListType) type).getFieldType());
247        }
248        return type;
249    }
250
251    /**
252     * Accumulates paths for string and binary properties in schemas passed to {@link #walkSchema}.
253     * <p>
254     * For schemas without prefix the path is accumulated both with and without prefix.
255     * <p>
256     * For binaries the path includes the final "/data" part.
257     */
258    // TODO precompute this in SchemaManagerImpl
259    public static class PathsFinder {
260
261        protected final Set<String> simplePaths;
262
263        protected final Set<String> binaryPaths;
264
265        public PathsFinder(Set<String> simplePaths, Set<String> binaryPaths) {
266            this.simplePaths = simplePaths;
267            this.binaryPaths = binaryPaths;
268        }
269
270        public void walkSchema(Schema schema) {
271            String addPrefix = schema.getNamespace().hasPrefix() ? null : schema.getName();
272            walkComplexType(schema, null, addPrefix);
273        }
274
275        protected void walkComplexType(ComplexType complexType, String path, String addPrefix) {
276            for (Field field : complexType.getFields()) {
277                String name = field.getName().getPrefixedName();
278                String fieldPath = path == null ? name : path + '/' + name;
279                walkType(field.getType(), fieldPath, addPrefix);
280            }
281        }
282
283        protected void walkType(Type type, String path, String addPrefix) {
284            if (type.isSimpleType()) {
285                walkSimpleType(type, path, addPrefix);
286            } else if (type.isListType()) {
287                String listPath = path + "/*";
288                Type ftype = ((ListType) type).getField().getType();
289                if (ftype.isComplexType()) {
290                    // complex list
291                    walkComplexType((ComplexType) ftype, listPath, addPrefix);
292                } else {
293                    // array
294                    walkSimpleType(ftype, listPath, addPrefix);
295                }
296            } else {
297                // complex type
298                ComplexType ctype = (ComplexType) type;
299                walkComplexType(ctype, path, addPrefix);
300            }
301        }
302
303        protected void walkSimpleType(Type type, String path, String addPrefix) {
304            while (type instanceof SimpleTypeImpl) {
305                // type with constraint
306                type = type.getSuperType();
307            }
308            if (type instanceof StringType) {
309                simplePaths.add(path);
310                if (addPrefix != null) {
311                    simplePaths.add(addPrefix + ":" + path);
312                }
313            } else if (type instanceof BinaryType) {
314                binaryPaths.add(path);
315                if (addPrefix != null) {
316                    binaryPaths.add(addPrefix + ":" + path);
317                }
318            }
319        }
320    }
321
322    public boolean isFulltextIndexable(String typeName) {
323        if (ROOT_TYPE.equals(typeName)) {
324            return false;
325        }
326        if (includedTypes.contains(typeName) || (includedTypes.isEmpty() && !excludedTypes.contains(typeName))) {
327            return true;
328        }
329        return false;
330    }
331
332}