001/*
002 * (C) Copyright 2006-2014 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019
020package org.nuxeo.ecm.core.storage;
021
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.HashMap;
025import java.util.HashSet;
026import java.util.LinkedHashSet;
027import java.util.List;
028import java.util.Map;
029import java.util.Set;
030
031import org.apache.commons.logging.Log;
032import org.apache.commons.logging.LogFactory;
033import org.nuxeo.ecm.core.schema.DocumentType;
034import org.nuxeo.ecm.core.schema.FacetNames;
035import org.nuxeo.ecm.core.schema.SchemaManager;
036import org.nuxeo.ecm.core.schema.types.ComplexType;
037import org.nuxeo.ecm.core.schema.types.Field;
038import org.nuxeo.ecm.core.schema.types.ListType;
039import org.nuxeo.ecm.core.schema.types.Schema;
040import org.nuxeo.ecm.core.schema.types.SimpleTypeImpl;
041import org.nuxeo.ecm.core.schema.types.Type;
042import org.nuxeo.ecm.core.schema.types.primitives.BinaryType;
043import org.nuxeo.ecm.core.schema.types.primitives.StringType;
044import org.nuxeo.ecm.core.storage.FulltextDescriptor.FulltextIndexDescriptor;
045import org.nuxeo.runtime.api.Framework;
046
047/**
048 * Info about the fulltext configuration.
049 */
050public class FulltextConfiguration {
051
052    private static final Log log = LogFactory.getLog(FulltextConfiguration.class);
053
054    public static final String ROOT_TYPE = "Root";
055
056    public static final String PROP_TYPE_STRING = "string";
057
058    public static final String PROP_TYPE_BLOB = "blob";
059
060    public static final String FULLTEXT_DEFAULT_INDEX = "default";
061
062    /** All index names. */
063    public final Set<String> indexNames = new LinkedHashSet<String>();
064
065    /** Indexes holding exactly one field. */
066    public final Map<String, String> fieldToIndexName = new HashMap<String, String>();
067
068    /** Indexes containing all simple properties. */
069    public final Set<String> indexesAllSimple = new HashSet<String>();
070
071    /** Indexes containing all binaries properties. */
072    public final Set<String> indexesAllBinary = new HashSet<String>();
073
074    /** Indexes for each specific simple property path. */
075    public final Map<String, Set<String>> indexesByPropPathSimple = new HashMap<String, Set<String>>();
076
077    /** Indexes for each specific binary property path. */
078    public final Map<String, Set<String>> indexesByPropPathBinary = new HashMap<String, Set<String>>();
079
080    /** Indexes for each specific simple property path excluded. */
081    public final Map<String, Set<String>> indexesByPropPathExcludedSimple = new HashMap<String, Set<String>>();
082
083    /** Indexes for each specific binary property path excluded. */
084    public final Map<String, Set<String>> indexesByPropPathExcludedBinary = new HashMap<String, Set<String>>();
085
086    // inverse of above maps
087    public final Map<String, Set<String>> propPathsByIndexSimple = new HashMap<String, Set<String>>();
088
089    public final Map<String, Set<String>> propPathsByIndexBinary = new HashMap<String, Set<String>>();
090
091    public final Map<String, Set<String>> propPathsExcludedByIndexSimple = new HashMap<String, Set<String>>();
092
093    public final Map<String, Set<String>> propPathsExcludedByIndexBinary = new HashMap<String, Set<String>>();
094
095    public final Set<String> excludedTypes = new HashSet<String>();
096
097    public final Set<String> includedTypes = new HashSet<String>();
098
099    public final boolean fulltextSearchDisabled;
100
101    public FulltextConfiguration(FulltextDescriptor fulltextDescriptor) {
102        SchemaManager schemaManager = Framework.getService(SchemaManager.class);
103
104        fulltextSearchDisabled = fulltextDescriptor.getFulltextSearchDisabled();
105
106        // find what paths we mean by "all"
107        Set<String> allSimplePaths = new HashSet<>();
108        Set<String> allBinaryPaths = new HashSet<>();
109        PathsFinder pathsFinder = new PathsFinder(allSimplePaths, allBinaryPaths);
110        for (Schema schema : schemaManager.getSchemas()) {
111            pathsFinder.walkSchema(schema);
112        }
113
114        List<FulltextIndexDescriptor> descs = fulltextDescriptor.getFulltextIndexes();
115        if (descs == null) {
116            descs = new ArrayList<FulltextIndexDescriptor>(1);
117        }
118        if (descs.isEmpty()) {
119            descs.add(new FulltextIndexDescriptor());
120        }
121        for (FulltextIndexDescriptor desc : descs) {
122            String name = desc.name == null ? FULLTEXT_DEFAULT_INDEX : desc.name;
123            indexNames.add(name);
124            if (desc.fields == null) {
125                desc.fields = new HashSet<String>();
126            }
127            if (desc.excludeFields == null) {
128                desc.excludeFields = new HashSet<String>();
129            }
130            if (desc.fields.size() == 1 && desc.excludeFields.isEmpty()) {
131                fieldToIndexName.put(desc.fields.iterator().next(), name);
132            }
133
134            if (desc.fieldType != null) {
135                if (desc.fieldType.equals(FulltextConfiguration.PROP_TYPE_STRING)) {
136                    indexesAllSimple.add(name);
137                } else if (desc.fieldType.equals(FulltextConfiguration.PROP_TYPE_BLOB)) {
138                    indexesAllBinary.add(name);
139                } else {
140                    log.error("Ignoring unknow repository fulltext configuration fieldType: " + desc.fieldType);
141                }
142
143            }
144            if (desc.fields.isEmpty() && desc.fieldType == null) {
145                // no fields specified and no field type -> all of them
146                indexesAllSimple.add(name);
147                indexesAllBinary.add(name);
148            }
149
150            if (indexesAllSimple.contains(name)) {
151                propPathsByIndexSimple.put(name, new HashSet<>(allSimplePaths));
152                for (String path : allSimplePaths) {
153                    indexesByPropPathSimple.computeIfAbsent(path, p -> new HashSet<>()).add(name);
154                }
155            }
156            if (indexesAllBinary.contains(name)) {
157                propPathsByIndexBinary.put(name, new HashSet<>(allBinaryPaths));
158                for (String path : allBinaryPaths) {
159                    indexesByPropPathBinary.computeIfAbsent(path, p -> new HashSet<>()).add(name);
160                }
161            }
162
163            if (fulltextDescriptor.getFulltextExcludedTypes() != null) {
164                excludedTypes.addAll(fulltextDescriptor.getFulltextExcludedTypes());
165            }
166            if (fulltextDescriptor.getFulltextIncludedTypes() != null) {
167                includedTypes.addAll(fulltextDescriptor.getFulltextIncludedTypes());
168            }
169
170            for (Set<String> fields : Arrays.asList(desc.fields, desc.excludeFields)) {
171                for (String path : fields) {
172                    boolean include = fields == desc.fields;
173                    Field field = schemaManager.getField(path);
174                    if (field == null) {
175                        log.error(String.format("Ignoring unknown property '%s' in fulltext configuration: %s", path,
176                                name));
177                        continue;
178                    }
179                    Type baseType = getBaseType(field.getType());
180                    Map<String, Set<String>> indexesByPropPath;
181                    Map<String, Set<String>> propPathsByIndex;
182                    if (baseType instanceof StringType) {
183                        indexesByPropPath = include ? indexesByPropPathSimple : indexesByPropPathExcludedSimple;
184                        propPathsByIndex = include ? propPathsByIndexSimple : propPathsExcludedByIndexSimple;
185                    } else if (baseType instanceof BinaryType) {
186                        indexesByPropPath = include ? indexesByPropPathBinary : indexesByPropPathExcludedBinary;
187                        propPathsByIndex = include ? propPathsByIndexBinary : propPathsExcludedByIndexBinary;
188                    } else {
189                        log.error(String.format("Ignoring property '%s' with bad type %s in fulltext configuration: %s",
190                                path, field.getType(), name));
191                        continue;
192                    }
193                    indexesByPropPath.computeIfAbsent(path, p -> new HashSet<>()).add(name);
194                    propPathsByIndex.computeIfAbsent(name, n -> new HashSet<>()).add(path);
195                }
196            }
197        }
198
199        // Add document types with the NotFulltextIndexable facet
200        for (DocumentType documentType : schemaManager.getDocumentTypes()) {
201            if (documentType.hasFacet(FacetNames.NOT_FULLTEXT_INDEXABLE)) {
202                excludedTypes.add(documentType.getName());
203            }
204        }
205    }
206
207    protected Type getBaseType(Type type) {
208        if (type instanceof SimpleTypeImpl) {
209            return getBaseType(type.getSuperType());
210        }
211        if (type instanceof ListType) {
212            return getBaseType(((ListType) type).getFieldType());
213        }
214        return type;
215    }
216
217    public static class PathsFinder {
218
219        protected final Set<String> simplePaths;
220
221        protected final Set<String> binaryPaths;
222
223        public PathsFinder(Set<String> simplePaths, Set<String> binaryPaths) {
224            this.simplePaths = simplePaths;
225            this.binaryPaths = binaryPaths;
226        }
227
228        public void walkSchema(Schema schema) {
229            walkComplexType(schema, null);
230        }
231
232        protected void walkComplexType(ComplexType complexType, String path) {
233            for (Field field : complexType.getFields()) {
234                String name = field.getName().getPrefixedName();
235                String fieldPath = path == null ? name : path + '/' + name;
236                walkType(field.getType(), fieldPath);
237            }
238        }
239
240        protected void walkType(Type type, String path) {
241            if (type.isSimpleType()) {
242                walkSimpleType(type, path);
243            } else if (type.isListType()) {
244                String listPath = path + "/*";
245                Type ftype = ((ListType) type).getField().getType();
246                if (ftype.isComplexType()) {
247                    // complex list
248                    walkComplexType((ComplexType) ftype, listPath);
249                } else {
250                    // array
251                    walkSimpleType(ftype, listPath);
252                }
253            } else {
254                // complex type
255                ComplexType ctype = (ComplexType) type;
256                walkComplexType(ctype, path);
257            }
258        }
259
260        protected void walkSimpleType(Type type, String path) {
261            while (type instanceof SimpleTypeImpl) {
262                // type with constraint
263                type = type.getSuperType();
264            }
265            if (type instanceof StringType) {
266                simplePaths.add(path);
267            } else if (type instanceof BinaryType) {
268                binaryPaths.add(path);
269            }
270        }
271    }
272
273    public boolean isFulltextIndexable(String typeName) {
274        if (ROOT_TYPE.equals(typeName)) {
275            return false;
276        }
277        if (includedTypes.contains(typeName) || (includedTypes.isEmpty() && !excludedTypes.contains(typeName))) {
278            return true;
279        }
280        return false;
281    }
282
283}