001/*
002 * (C) Copyright 2006-2018 Nuxeo (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019package org.nuxeo.ecm.core.storage;
020
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.HashSet;
024import java.util.List;
025import java.util.Map;
026import java.util.Set;
027
028import org.apache.commons.logging.Log;
029import org.apache.commons.logging.LogFactory;
030import org.nuxeo.ecm.core.api.repository.FulltextConfiguration;
031import org.nuxeo.ecm.core.schema.DocumentType;
032import org.nuxeo.ecm.core.schema.FacetNames;
033import org.nuxeo.ecm.core.schema.SchemaManager;
034import org.nuxeo.ecm.core.schema.TypeConstants;
035import org.nuxeo.ecm.core.schema.types.ComplexType;
036import org.nuxeo.ecm.core.schema.types.Field;
037import org.nuxeo.ecm.core.schema.types.ListType;
038import org.nuxeo.ecm.core.schema.types.Schema;
039import org.nuxeo.ecm.core.schema.types.SimpleTypeImpl;
040import org.nuxeo.ecm.core.schema.types.Type;
041import org.nuxeo.ecm.core.schema.types.primitives.BinaryType;
042import org.nuxeo.ecm.core.schema.types.primitives.StringType;
043import org.nuxeo.ecm.core.storage.FulltextDescriptor.FulltextIndexDescriptor;
044import org.nuxeo.runtime.api.Framework;
045
046/**
047 * Factory building a {@link FulltextConfiguration} from a {@link FulltextDescriptor}.
048 *
049 * @since 10.3
050 */
051public class FulltextConfigurationFactory {
052
053    private static final Log log = LogFactory.getLog(FulltextConfigurationFactory.class);
054
055    public static final String PROP_TYPE_STRING = "string";
056
057    public static final String PROP_TYPE_BLOB = "blob";
058
059    public static final String FULLTEXT_DEFAULT_INDEX = "default";
060
061    public static FulltextConfiguration make(FulltextDescriptor fulltextDescriptor) {
062        SchemaManager schemaManager = Framework.getService(SchemaManager.class);
063        FulltextConfiguration ftc = new FulltextConfiguration();
064        ftc.fulltextFieldSizeLimit = fulltextDescriptor.getFulltextFieldSizeLimit();
065        ftc.fulltextSearchDisabled = fulltextDescriptor.getFulltextSearchDisabled();
066
067        // find what paths we mean by "all"
068        // for schemas without prefix, we add both the unprefixed and the prefixed version
069        Set<String> allSimplePaths = new HashSet<>();
070        Set<String> allBinaryPaths = new HashSet<>();
071        PathsFinder pathsFinder = new PathsFinder(allSimplePaths, allBinaryPaths);
072        for (Schema schema : schemaManager.getSchemas()) {
073            pathsFinder.walkSchema(schema);
074        }
075
076        List<FulltextIndexDescriptor> descs = fulltextDescriptor.getFulltextIndexes();
077        if (descs == null) {
078            descs = new ArrayList<>(1);
079        }
080        if (descs.isEmpty()) {
081            descs.add(new FulltextIndexDescriptor());
082        }
083        for (FulltextIndexDescriptor desc : descs) {
084            String name = desc.name == null ? FULLTEXT_DEFAULT_INDEX : desc.name;
085            ftc.indexNames.add(name);
086            if (desc.fields == null) {
087                desc.fields = new HashSet<>();
088            }
089            if (desc.excludeFields == null) {
090                desc.excludeFields = new HashSet<>();
091            }
092            if (desc.fields.size() == 1 && desc.excludeFields.isEmpty()) {
093                ftc.fieldToIndexName.put(desc.fields.iterator().next(), name);
094            }
095
096            if (desc.fieldType != null) {
097                if (desc.fieldType.equals(FulltextConfigurationFactory.PROP_TYPE_STRING)) {
098                    ftc.indexesAllSimple.add(name);
099                } else if (desc.fieldType.equals(FulltextConfigurationFactory.PROP_TYPE_BLOB)) {
100                    ftc.indexesAllBinary.add(name);
101                } else {
102                    log.error("Ignoring unknow repository fulltext configuration fieldType: " + desc.fieldType);
103                }
104
105            }
106            if (desc.fields.isEmpty() && desc.fieldType == null) {
107                // no fields specified and no field type -> all of them
108                ftc.indexesAllSimple.add(name);
109                ftc.indexesAllBinary.add(name);
110            }
111
112            if (ftc.indexesAllSimple.contains(name)) {
113                ftc.propPathsByIndexSimple.put(name, new HashSet<>(allSimplePaths));
114                for (String path : allSimplePaths) {
115                    ftc.indexesByPropPathSimple.computeIfAbsent(path, p -> new HashSet<>()).add(name);
116                }
117            }
118            if (ftc.indexesAllBinary.contains(name)) {
119                ftc.propPathsByIndexBinary.put(name, new HashSet<>(allBinaryPaths));
120                for (String path : allBinaryPaths) {
121                    ftc.indexesByPropPathBinary.computeIfAbsent(path, p -> new HashSet<>()).add(name);
122                }
123            }
124
125            if (fulltextDescriptor.getFulltextExcludedTypes() != null) {
126                ftc.excludedTypes.addAll(fulltextDescriptor.getFulltextExcludedTypes());
127            }
128            if (fulltextDescriptor.getFulltextIncludedTypes() != null) {
129                ftc.includedTypes.addAll(fulltextDescriptor.getFulltextIncludedTypes());
130            }
131
132            for (Set<String> fields : Arrays.asList(desc.fields, desc.excludeFields)) {
133                boolean include = fields == desc.fields;
134                for (String path : fields) {
135                    Field field = schemaManager.getField(path);
136                    if (field == null && !path.contains(":")) {
137                        // check without prefix
138                        // TODO precompute this in SchemaManagerImpl
139                        int slash = path.indexOf('/');
140                        String first = slash == -1 ? path : path.substring(0, slash);
141                        for (Schema schema : schemaManager.getSchemas()) {
142                            if (!schema.getNamespace().hasPrefix()) {
143                                // schema without prefix, try it
144                                if (schema.getField(first) != null) {
145                                    path = schema.getName() + ":" + path;
146                                    field = schemaManager.getField(path);
147                                    break;
148                                }
149                            }
150                        }
151                    }
152                    if (field == null) {
153                        log.error(String.format("Ignoring unknown property '%s' in fulltext configuration: %s", path,
154                                name));
155                        continue;
156                    }
157                    Type baseType = getBaseType(field.getType());
158                    Map<String, Set<String>> indexesByPropPath;
159                    Map<String, Set<String>> propPathsByIndex;
160                    if (baseType instanceof ComplexType && TypeConstants.isContentType(baseType)) {
161                        baseType = ((ComplexType) baseType).getField(BaseDocument.BLOB_DATA).getType(); // BinaryType
162                    }
163                    if (baseType instanceof StringType) {
164                        indexesByPropPath = include ? ftc.indexesByPropPathSimple : ftc.indexesByPropPathExcludedSimple;
165                        propPathsByIndex = include ? ftc.propPathsByIndexSimple : ftc.propPathsExcludedByIndexSimple;
166                    } else if (baseType instanceof BinaryType) {
167                        indexesByPropPath = include ? ftc.indexesByPropPathBinary : ftc.indexesByPropPathExcludedBinary;
168                        propPathsByIndex = include ? ftc.propPathsByIndexBinary : ftc.propPathsExcludedByIndexBinary;
169                        if (!path.endsWith("/" + BaseDocument.BLOB_DATA)) {
170                            path += "/" + BaseDocument.BLOB_DATA;
171                            // needed for indexesByPropPathBinary as DBSTransactionState.findDirtyDocuments expects this
172                            // to be in the same format as what DirtyPathsFinder expects, like "content/data".
173                        }
174                    } else {
175                        log.error(String.format("Ignoring property '%s' with bad type %s in fulltext configuration: %s",
176                                path, field.getType(), name));
177                        continue;
178                    }
179                    indexesByPropPath.computeIfAbsent(path, p -> new HashSet<>()).add(name);
180                    propPathsByIndex.computeIfAbsent(name, n -> new HashSet<>()).add(path);
181                }
182            }
183        }
184
185        // Add document types with the NotFulltextIndexable facet
186        for (DocumentType documentType : schemaManager.getDocumentTypes()) {
187            if (documentType.hasFacet(FacetNames.NOT_FULLTEXT_INDEXABLE)) {
188                ftc.excludedTypes.add(documentType.getName());
189            }
190        }
191
192        return ftc;
193    }
194
195    protected static Type getBaseType(Type type) {
196        if (type instanceof SimpleTypeImpl) {
197            return getBaseType(type.getSuperType());
198        }
199        if (type instanceof ListType) {
200            return getBaseType(((ListType) type).getFieldType());
201        }
202        return type;
203    }
204
205    /**
206     * Accumulates paths for string and binary properties in schemas passed to {@link #walkSchema}.
207     * <p>
208     * For schemas without prefix the path is accumulated both with and without prefix.
209     * <p>
210     * For binaries the path includes the final "/data" part.
211     */
212    // TODO precompute this in SchemaManagerImpl
213    public static class PathsFinder {
214
215        protected final Set<String> simplePaths;
216
217        protected final Set<String> binaryPaths;
218
219        public PathsFinder(Set<String> simplePaths, Set<String> binaryPaths) {
220            this.simplePaths = simplePaths;
221            this.binaryPaths = binaryPaths;
222        }
223
224        public void walkSchema(Schema schema) {
225            String addPrefix = schema.getNamespace().hasPrefix() ? null : schema.getName();
226            walkComplexType(schema, null, addPrefix);
227        }
228
229        protected void walkComplexType(ComplexType complexType, String path, String addPrefix) {
230            for (Field field : complexType.getFields()) {
231                String name = field.getName().getPrefixedName();
232                String fieldPath = path == null ? name : path + '/' + name;
233                walkType(field.getType(), fieldPath, addPrefix);
234            }
235        }
236
237        protected void walkType(Type type, String path, String addPrefix) {
238            if (type.isSimpleType()) {
239                walkSimpleType(type, path, addPrefix);
240            } else if (type.isListType()) {
241                String listPath = path + "/*";
242                Type ftype = ((ListType) type).getField().getType();
243                if (ftype.isComplexType()) {
244                    // complex list
245                    walkComplexType((ComplexType) ftype, listPath, addPrefix);
246                } else {
247                    // array
248                    walkSimpleType(ftype, listPath, addPrefix);
249                }
250            } else {
251                // complex type
252                ComplexType ctype = (ComplexType) type;
253                walkComplexType(ctype, path, addPrefix);
254            }
255        }
256
257        protected void walkSimpleType(Type type, String path, String addPrefix) {
258            while (type instanceof SimpleTypeImpl) {
259                // type with constraint
260                type = type.getSuperType();
261            }
262            if (type instanceof StringType) {
263                simplePaths.add(path);
264                if (addPrefix != null) {
265                    simplePaths.add(addPrefix + ":" + path);
266                }
267            } else if (type instanceof BinaryType) {
268                binaryPaths.add(path);
269                if (addPrefix != null) {
270                    binaryPaths.add(addPrefix + ":" + path);
271                }
272            }
273        }
274    }
275
276}