001/*
002 * (C) Copyright 2006-2014 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019
020package org.nuxeo.ecm.core.storage;
021
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.HashMap;
025import java.util.HashSet;
026import java.util.LinkedHashSet;
027import java.util.List;
028import java.util.Map;
029import java.util.Set;
030
031import org.apache.commons.logging.Log;
032import org.apache.commons.logging.LogFactory;
033import org.nuxeo.ecm.core.schema.DocumentType;
034import org.nuxeo.ecm.core.schema.FacetNames;
035import org.nuxeo.ecm.core.schema.SchemaManager;
036import org.nuxeo.ecm.core.schema.types.ComplexType;
037import org.nuxeo.ecm.core.schema.types.Field;
038import org.nuxeo.ecm.core.schema.types.ListType;
039import org.nuxeo.ecm.core.schema.types.Schema;
040import org.nuxeo.ecm.core.schema.types.SimpleTypeImpl;
041import org.nuxeo.ecm.core.schema.types.Type;
042import org.nuxeo.ecm.core.schema.types.primitives.BinaryType;
043import org.nuxeo.ecm.core.schema.types.primitives.StringType;
044import org.nuxeo.ecm.core.storage.FulltextDescriptor.FulltextIndexDescriptor;
045import org.nuxeo.runtime.api.Framework;
046
047/**
048 * Info about the fulltext configuration.
049 */
050public class FulltextConfiguration {
051
052    private static final Log log = LogFactory.getLog(FulltextConfiguration.class);
053
054    public static final String ROOT_TYPE = "Root";
055
056    public static final String PROP_TYPE_STRING = "string";
057
058    public static final String PROP_TYPE_BLOB = "blob";
059
060    public static final String FULLTEXT_DEFAULT_INDEX = "default";
061
062    /** All index names. */
063    public final Set<String> indexNames = new LinkedHashSet<String>();
064
065    /** Indexes holding exactly one field. */
066    public final Map<String, String> fieldToIndexName = new HashMap<String, String>();
067
068    /** Indexes containing all simple properties. */
069    public final Set<String> indexesAllSimple = new HashSet<String>();
070
071    /** Indexes containing all binaries properties. */
072    public final Set<String> indexesAllBinary = new HashSet<String>();
073
074    /** Indexes for each specific simple property path. */
075    public final Map<String, Set<String>> indexesByPropPathSimple = new HashMap<String, Set<String>>();
076
077    /** Indexes for each specific binary property path. */
078    public final Map<String, Set<String>> indexesByPropPathBinary = new HashMap<String, Set<String>>();
079
080    /** Indexes for each specific simple property path excluded. */
081    public final Map<String, Set<String>> indexesByPropPathExcludedSimple = new HashMap<String, Set<String>>();
082
083    /** Indexes for each specific binary property path excluded. */
084    public final Map<String, Set<String>> indexesByPropPathExcludedBinary = new HashMap<String, Set<String>>();
085
086    // inverse of above maps
087    public final Map<String, Set<String>> propPathsByIndexSimple = new HashMap<String, Set<String>>();
088
089    public final Map<String, Set<String>> propPathsByIndexBinary = new HashMap<String, Set<String>>();
090
091    public final Map<String, Set<String>> propPathsExcludedByIndexSimple = new HashMap<String, Set<String>>();
092
093    public final Map<String, Set<String>> propPathsExcludedByIndexBinary = new HashMap<String, Set<String>>();
094
095    public final Set<String> excludedTypes = new HashSet<String>();
096
097    public final Set<String> includedTypes = new HashSet<String>();
098
099    public FulltextConfiguration(FulltextDescriptor fulltextDescriptor) {
100        SchemaManager schemaManager = Framework.getService(SchemaManager.class);
101
102        // find what paths we mean by "all"
103        Set<String> allSimplePaths = new HashSet<>();
104        Set<String> allBinaryPaths = new HashSet<>();
105        PathsFinder pathsFinder = new PathsFinder(allSimplePaths, allBinaryPaths);
106        for (Schema schema : schemaManager.getSchemas()) {
107            pathsFinder.walkSchema(schema);
108        }
109
110        List<FulltextIndexDescriptor> descs = fulltextDescriptor.getFulltextIndexes();
111        if (descs == null) {
112            descs = new ArrayList<FulltextIndexDescriptor>(1);
113        }
114        if (descs.isEmpty()) {
115            descs.add(new FulltextIndexDescriptor());
116        }
117        for (FulltextIndexDescriptor desc : descs) {
118            String name = desc.name == null ? FULLTEXT_DEFAULT_INDEX : desc.name;
119            indexNames.add(name);
120            if (desc.fields == null) {
121                desc.fields = new HashSet<String>();
122            }
123            if (desc.excludeFields == null) {
124                desc.excludeFields = new HashSet<String>();
125            }
126            if (desc.fields.size() == 1 && desc.excludeFields.isEmpty()) {
127                fieldToIndexName.put(desc.fields.iterator().next(), name);
128            }
129
130            if (desc.fieldType != null) {
131                if (desc.fieldType.equals(FulltextConfiguration.PROP_TYPE_STRING)) {
132                    indexesAllSimple.add(name);
133                } else if (desc.fieldType.equals(FulltextConfiguration.PROP_TYPE_BLOB)) {
134                    indexesAllBinary.add(name);
135                } else {
136                    log.error("Ignoring unknow repository fulltext configuration fieldType: " + desc.fieldType);
137                }
138
139            }
140            if (desc.fields.isEmpty() && desc.fieldType == null) {
141                // no fields specified and no field type -> all of them
142                indexesAllSimple.add(name);
143                indexesAllBinary.add(name);
144            }
145
146            if (indexesAllSimple.contains(name)) {
147                propPathsByIndexSimple.put(name, new HashSet<>(allSimplePaths));
148                for (String path : allSimplePaths) {
149                    indexesByPropPathSimple.computeIfAbsent(path, p -> new HashSet<>()).add(name);
150                }
151            }
152            if (indexesAllBinary.contains(name)) {
153                propPathsByIndexBinary.put(name, new HashSet<>(allBinaryPaths));
154                for (String path : allBinaryPaths) {
155                    indexesByPropPathBinary.computeIfAbsent(path, p -> new HashSet<>()).add(name);
156                }
157            }
158
159            if (fulltextDescriptor.getFulltextExcludedTypes() != null) {
160                excludedTypes.addAll(fulltextDescriptor.getFulltextExcludedTypes());
161            }
162            if (fulltextDescriptor.getFulltextIncludedTypes() != null) {
163                includedTypes.addAll(fulltextDescriptor.getFulltextIncludedTypes());
164            }
165
166            for (Set<String> fields : Arrays.asList(desc.fields, desc.excludeFields)) {
167                for (String path : fields) {
168                    boolean include = fields == desc.fields;
169                    Field field = schemaManager.getField(path);
170                    if (field == null) {
171                        log.error(String.format("Ignoring unknown property '%s' in fulltext configuration: %s", path,
172                                name));
173                        continue;
174                    }
175                    Type baseType = getBaseType(field.getType());
176                    Map<String, Set<String>> indexesByPropPath;
177                    Map<String, Set<String>> propPathsByIndex;
178                    if (baseType instanceof StringType) {
179                        indexesByPropPath = include ? indexesByPropPathSimple : indexesByPropPathExcludedSimple;
180                        propPathsByIndex = include ? propPathsByIndexSimple : propPathsExcludedByIndexSimple;
181                    } else if (baseType instanceof BinaryType) {
182                        indexesByPropPath = include ? indexesByPropPathBinary : indexesByPropPathExcludedBinary;
183                        propPathsByIndex = include ? propPathsByIndexBinary : propPathsExcludedByIndexBinary;
184                    } else {
185                        log.error(String.format("Ignoring property '%s' with bad type %s in fulltext configuration: %s",
186                                path, field.getType(), name));
187                        continue;
188                    }
189                    indexesByPropPath.computeIfAbsent(path, p -> new HashSet<>()).add(name);
190                    propPathsByIndex.computeIfAbsent(name, n -> new HashSet<>()).add(path);
191                }
192            }
193        }
194
195        // Add document types with the NotFulltextIndexable facet
196        for (DocumentType documentType : schemaManager.getDocumentTypes()) {
197            if (documentType.hasFacet(FacetNames.NOT_FULLTEXT_INDEXABLE)) {
198                excludedTypes.add(documentType.getName());
199            }
200        }
201    }
202
203    protected Type getBaseType(Type type) {
204        if (type instanceof SimpleTypeImpl) {
205            return getBaseType(type.getSuperType());
206        }
207        if (type instanceof ListType) {
208            return getBaseType(((ListType) type).getFieldType());
209        }
210        return type;
211    }
212
213    public static class PathsFinder {
214
215        protected final Set<String> simplePaths;
216
217        protected final Set<String> binaryPaths;
218
219        public PathsFinder(Set<String> simplePaths, Set<String> binaryPaths) {
220            this.simplePaths = simplePaths;
221            this.binaryPaths = binaryPaths;
222        }
223
224        public void walkSchema(Schema schema) {
225            walkComplexType(schema, null);
226        }
227
228        protected void walkComplexType(ComplexType complexType, String path) {
229            for (Field field : complexType.getFields()) {
230                String name = field.getName().getPrefixedName();
231                String fieldPath = path == null ? name : path + '/' + name;
232                walkType(field.getType(), fieldPath);
233            }
234        }
235
236        protected void walkType(Type type, String path) {
237            if (type.isSimpleType()) {
238                walkSimpleType(type, path);
239            } else if (type.isListType()) {
240                String listPath = path + "/*";
241                Type ftype = ((ListType) type).getField().getType();
242                if (ftype.isComplexType()) {
243                    // complex list
244                    walkComplexType((ComplexType) ftype, listPath);
245                } else {
246                    // array
247                    walkSimpleType(ftype, listPath);
248                }
249            } else {
250                // complex type
251                ComplexType ctype = (ComplexType) type;
252                walkComplexType(ctype, path);
253            }
254        }
255
256        protected void walkSimpleType(Type type, String path) {
257            while (type instanceof SimpleTypeImpl) {
258                // type with constraint
259                type = type.getSuperType();
260            }
261            if (type instanceof StringType) {
262                simplePaths.add(path);
263            } else if (type instanceof BinaryType) {
264                binaryPaths.add(path);
265            }
266        }
267    }
268
269    public boolean isFulltextIndexable(String typeName) {
270        if (ROOT_TYPE.equals(typeName)) {
271            return false;
272        }
273        if (includedTypes.contains(typeName) || (includedTypes.isEmpty() && !excludedTypes.contains(typeName))) {
274            return true;
275        }
276        return false;
277    }
278
279}