001/*
002 * (C) Copyright 2006-2018 Nuxeo (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019
020package org.nuxeo.ecm.core.storage;
021
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.HashMap;
025import java.util.HashSet;
026import java.util.LinkedHashSet;
027import java.util.List;
028import java.util.Map;
029import java.util.Set;
030
031import org.apache.commons.logging.Log;
032import org.apache.commons.logging.LogFactory;
033import org.nuxeo.ecm.core.schema.DocumentType;
034import org.nuxeo.ecm.core.schema.FacetNames;
035import org.nuxeo.ecm.core.schema.SchemaManager;
036import org.nuxeo.ecm.core.schema.TypeConstants;
037import org.nuxeo.ecm.core.schema.types.ComplexType;
038import org.nuxeo.ecm.core.schema.types.Field;
039import org.nuxeo.ecm.core.schema.types.ListType;
040import org.nuxeo.ecm.core.schema.types.Schema;
041import org.nuxeo.ecm.core.schema.types.SimpleTypeImpl;
042import org.nuxeo.ecm.core.schema.types.Type;
043import org.nuxeo.ecm.core.schema.types.primitives.BinaryType;
044import org.nuxeo.ecm.core.schema.types.primitives.StringType;
045import org.nuxeo.ecm.core.storage.FulltextDescriptor.FulltextIndexDescriptor;
046import org.nuxeo.runtime.api.Framework;
047
048/**
049 * Info about the fulltext configuration.
050 */
051public class FulltextConfiguration {
052
053    private static final Log log = LogFactory.getLog(FulltextConfiguration.class);
054
055    public static final String ROOT_TYPE = "Root";
056
057    public static final String PROP_TYPE_STRING = "string";
058
059    public static final String PROP_TYPE_BLOB = "blob";
060
061    public static final String FULLTEXT_DEFAULT_INDEX = "default";
062
063    /** All index names. */
064    public final Set<String> indexNames = new LinkedHashSet<>();
065
066    /** Indexes holding exactly one field. */
067    public final Map<String, String> fieldToIndexName = new HashMap<>();
068
069    /** Indexes containing all simple properties. */
070    public final Set<String> indexesAllSimple = new HashSet<>();
071
072    /** Indexes containing all binaries properties. */
073    public final Set<String> indexesAllBinary = new HashSet<>();
074
075    /** Indexes for each specific simple property path. */
076    public final Map<String, Set<String>> indexesByPropPathSimple = new HashMap<>();
077
078    /** Indexes for each specific binary property path. */
079    // DBSTransactionState.findDirtyDocuments expects this to contain unprefixed versions for schemas
080    // without prefix, like "content/data".
081    public final Map<String, Set<String>> indexesByPropPathBinary = new HashMap<>();
082
083    /** Indexes for each specific simple property path excluded. */
084    public final Map<String, Set<String>> indexesByPropPathExcludedSimple = new HashMap<>();
085
086    /** Indexes for each specific binary property path excluded. */
087    public final Map<String, Set<String>> indexesByPropPathExcludedBinary = new HashMap<>();
088
089    // inverse of above maps
090    public final Map<String, Set<String>> propPathsByIndexSimple = new HashMap<>();
091
092    public final Map<String, Set<String>> propPathsByIndexBinary = new HashMap<>();
093
094    public final Map<String, Set<String>> propPathsExcludedByIndexSimple = new HashMap<>();
095
096    public final Map<String, Set<String>> propPathsExcludedByIndexBinary = new HashMap<>();
097
098    public final Set<String> excludedTypes = new HashSet<>();
099
100    public final Set<String> includedTypes = new HashSet<>();
101
102    public final boolean fulltextSearchDisabled;
103
104    public final int fulltextFieldSizeLimit;
105
106    public FulltextConfiguration(FulltextDescriptor fulltextDescriptor) {
107        SchemaManager schemaManager = Framework.getService(SchemaManager.class);
108
109        fulltextFieldSizeLimit = fulltextDescriptor.getFulltextFieldSizeLimit();
110
111        fulltextSearchDisabled = fulltextDescriptor.getFulltextSearchDisabled();
112
113        // find what paths we mean by "all"
114        // for schemas without prefix, we add both the unprefixed and the prefixed version
115        Set<String> allSimplePaths = new HashSet<>();
116        Set<String> allBinaryPaths = new HashSet<>();
117        PathsFinder pathsFinder = new PathsFinder(allSimplePaths, allBinaryPaths);
118        for (Schema schema : schemaManager.getSchemas()) {
119            pathsFinder.walkSchema(schema);
120        }
121
122        List<FulltextIndexDescriptor> descs = fulltextDescriptor.getFulltextIndexes();
123        if (descs == null) {
124            descs = new ArrayList<>(1);
125        }
126        if (descs.isEmpty()) {
127            descs.add(new FulltextIndexDescriptor());
128        }
129        for (FulltextIndexDescriptor desc : descs) {
130            String name = desc.name == null ? FULLTEXT_DEFAULT_INDEX : desc.name;
131            indexNames.add(name);
132            if (desc.fields == null) {
133                desc.fields = new HashSet<>();
134            }
135            if (desc.excludeFields == null) {
136                desc.excludeFields = new HashSet<>();
137            }
138            if (desc.fields.size() == 1 && desc.excludeFields.isEmpty()) {
139                fieldToIndexName.put(desc.fields.iterator().next(), name);
140            }
141
142            if (desc.fieldType != null) {
143                if (desc.fieldType.equals(FulltextConfiguration.PROP_TYPE_STRING)) {
144                    indexesAllSimple.add(name);
145                } else if (desc.fieldType.equals(FulltextConfiguration.PROP_TYPE_BLOB)) {
146                    indexesAllBinary.add(name);
147                } else {
148                    log.error("Ignoring unknow repository fulltext configuration fieldType: " + desc.fieldType);
149                }
150
151            }
152            if (desc.fields.isEmpty() && desc.fieldType == null) {
153                // no fields specified and no field type -> all of them
154                indexesAllSimple.add(name);
155                indexesAllBinary.add(name);
156            }
157
158            if (indexesAllSimple.contains(name)) {
159                propPathsByIndexSimple.put(name, new HashSet<>(allSimplePaths));
160                for (String path : allSimplePaths) {
161                    indexesByPropPathSimple.computeIfAbsent(path, p -> new HashSet<>()).add(name);
162                }
163            }
164            if (indexesAllBinary.contains(name)) {
165                propPathsByIndexBinary.put(name, new HashSet<>(allBinaryPaths));
166                for (String path : allBinaryPaths) {
167                    indexesByPropPathBinary.computeIfAbsent(path, p -> new HashSet<>()).add(name);
168                }
169            }
170
171            if (fulltextDescriptor.getFulltextExcludedTypes() != null) {
172                excludedTypes.addAll(fulltextDescriptor.getFulltextExcludedTypes());
173            }
174            if (fulltextDescriptor.getFulltextIncludedTypes() != null) {
175                includedTypes.addAll(fulltextDescriptor.getFulltextIncludedTypes());
176            }
177
178            for (Set<String> fields : Arrays.asList(desc.fields, desc.excludeFields)) {
179                boolean include = fields == desc.fields;
180                for (String path : fields) {
181                    Field field = schemaManager.getField(path);
182                    if (field == null && !path.contains(":")) {
183                        // check without prefix
184                        // TODO precompute this in SchemaManagerImpl
185                        int slash = path.indexOf('/');
186                        String first = slash == -1 ? path : path.substring(0, slash);
187                        for (Schema schema : schemaManager.getSchemas()) {
188                            if (!schema.getNamespace().hasPrefix()) {
189                                // schema without prefix, try it
190                                if (schema.getField(first) != null) {
191                                    path = schema.getName() + ":" + path;
192                                    field = schemaManager.getField(path);
193                                    break;
194                                }
195                            }
196                        }
197                    }
198                    if (field == null) {
199                        log.error(String.format("Ignoring unknown property '%s' in fulltext configuration: %s", path,
200                                name));
201                        continue;
202                    }
203                    Type baseType = getBaseType(field.getType());
204                    Map<String, Set<String>> indexesByPropPath;
205                    Map<String, Set<String>> propPathsByIndex;
206                    if (baseType instanceof ComplexType && TypeConstants.isContentType(baseType)) {
207                        baseType = ((ComplexType) baseType).getField(BaseDocument.BLOB_DATA).getType(); // BinaryType
208                    }
209                    if (baseType instanceof StringType) {
210                        indexesByPropPath = include ? indexesByPropPathSimple : indexesByPropPathExcludedSimple;
211                        propPathsByIndex = include ? propPathsByIndexSimple : propPathsExcludedByIndexSimple;
212                    } else if (baseType instanceof BinaryType) {
213                        indexesByPropPath = include ? indexesByPropPathBinary : indexesByPropPathExcludedBinary;
214                        propPathsByIndex = include ? propPathsByIndexBinary : propPathsExcludedByIndexBinary;
215                        if (!path.endsWith("/" + BaseDocument.BLOB_DATA)) {
216                            path += "/" + BaseDocument.BLOB_DATA;
217                            // needed for indexesByPropPathBinary as DBSTransactionState.findDirtyDocuments expects this
218                            // to be in the same format as what DirtyPathsFinder expects, like "content/data".
219                        }
220                    } else {
221                        log.error(String.format("Ignoring property '%s' with bad type %s in fulltext configuration: %s",
222                                path, field.getType(), name));
223                        continue;
224                    }
225                    indexesByPropPath.computeIfAbsent(path, p -> new HashSet<>()).add(name);
226                    propPathsByIndex.computeIfAbsent(name, n -> new HashSet<>()).add(path);
227                }
228            }
229        }
230
231        // Add document types with the NotFulltextIndexable facet
232        for (DocumentType documentType : schemaManager.getDocumentTypes()) {
233            if (documentType.hasFacet(FacetNames.NOT_FULLTEXT_INDEXABLE)) {
234                excludedTypes.add(documentType.getName());
235            }
236        }
237    }
238
239    protected Type getBaseType(Type type) {
240        if (type instanceof SimpleTypeImpl) {
241            return getBaseType(type.getSuperType());
242        }
243        if (type instanceof ListType) {
244            return getBaseType(((ListType) type).getFieldType());
245        }
246        return type;
247    }
248
249    /**
250     * Accumulates paths for string and binary properties in schemas passed to {@link #walkSchema}.
251     * <p>
252     * For schemas without prefix the path is accumulated both with and without prefix.
253     * <p>
254     * For binaries the path includes the final "/data" part.
255     */
256    // TODO precompute this in SchemaManagerImpl
257    public static class PathsFinder {
258
259        protected final Set<String> simplePaths;
260
261        protected final Set<String> binaryPaths;
262
263        public PathsFinder(Set<String> simplePaths, Set<String> binaryPaths) {
264            this.simplePaths = simplePaths;
265            this.binaryPaths = binaryPaths;
266        }
267
268        public void walkSchema(Schema schema) {
269            String addPrefix = schema.getNamespace().hasPrefix() ? null : schema.getName();
270            walkComplexType(schema, null, addPrefix);
271        }
272
273        protected void walkComplexType(ComplexType complexType, String path, String addPrefix) {
274            for (Field field : complexType.getFields()) {
275                String name = field.getName().getPrefixedName();
276                String fieldPath = path == null ? name : path + '/' + name;
277                walkType(field.getType(), fieldPath, addPrefix);
278            }
279        }
280
281        protected void walkType(Type type, String path, String addPrefix) {
282            if (type.isSimpleType()) {
283                walkSimpleType(type, path, addPrefix);
284            } else if (type.isListType()) {
285                String listPath = path + "/*";
286                Type ftype = ((ListType) type).getField().getType();
287                if (ftype.isComplexType()) {
288                    // complex list
289                    walkComplexType((ComplexType) ftype, listPath, addPrefix);
290                } else {
291                    // array
292                    walkSimpleType(ftype, listPath, addPrefix);
293                }
294            } else {
295                // complex type
296                ComplexType ctype = (ComplexType) type;
297                walkComplexType(ctype, path, addPrefix);
298            }
299        }
300
301        protected void walkSimpleType(Type type, String path, String addPrefix) {
302            while (type instanceof SimpleTypeImpl) {
303                // type with constraint
304                type = type.getSuperType();
305            }
306            if (type instanceof StringType) {
307                simplePaths.add(path);
308                if (addPrefix != null) {
309                    simplePaths.add(addPrefix + ":" + path);
310                }
311            } else if (type instanceof BinaryType) {
312                binaryPaths.add(path);
313                if (addPrefix != null) {
314                    binaryPaths.add(addPrefix + ":" + path);
315                }
316            }
317        }
318    }
319
320    public boolean isFulltextIndexable(String typeName) {
321        if (ROOT_TYPE.equals(typeName)) {
322            return false;
323        }
324        if (includedTypes.contains(typeName) || (includedTypes.isEmpty() && !excludedTypes.contains(typeName))) {
325            return true;
326        }
327        return false;
328    }
329
330}