001/*
002 * (C) Copyright 2006-2018 Nuxeo (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019package org.nuxeo.ecm.core.storage;
020
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.HashSet;
024import java.util.List;
025import java.util.Map;
026import java.util.Set;
027
028import org.apache.commons.logging.Log;
029import org.apache.commons.logging.LogFactory;
030import org.nuxeo.ecm.core.api.repository.FulltextConfiguration;
031import org.nuxeo.ecm.core.schema.DocumentType;
032import org.nuxeo.ecm.core.schema.FacetNames;
033import org.nuxeo.ecm.core.schema.SchemaManager;
034import org.nuxeo.ecm.core.schema.TypeConstants;
035import org.nuxeo.ecm.core.schema.types.ComplexType;
036import org.nuxeo.ecm.core.schema.types.Field;
037import org.nuxeo.ecm.core.schema.types.ListType;
038import org.nuxeo.ecm.core.schema.types.Schema;
039import org.nuxeo.ecm.core.schema.types.SimpleTypeImpl;
040import org.nuxeo.ecm.core.schema.types.Type;
041import org.nuxeo.ecm.core.schema.types.primitives.BinaryType;
042import org.nuxeo.ecm.core.schema.types.primitives.StringType;
043import org.nuxeo.ecm.core.storage.FulltextDescriptor.FulltextIndexDescriptor;
044import org.nuxeo.runtime.api.Framework;
045
046/**
047 * Factory building a {@link FulltextConfiguration} from a {@link FulltextDescriptor}.
048 *
049 * @since 10.3
050 */
051public class FulltextConfigurationFactory {
052
053    private static final Log log = LogFactory.getLog(FulltextConfigurationFactory.class);
054
055    public static final String PROP_TYPE_STRING = "string";
056
057    public static final String PROP_TYPE_BLOB = "blob";
058
059    public static final String FULLTEXT_DEFAULT_INDEX = "default";
060
061    public static FulltextConfiguration make(FulltextDescriptor fulltextDescriptor) {
062        SchemaManager schemaManager = Framework.getService(SchemaManager.class);
063        FulltextConfiguration ftc = new FulltextConfiguration();
064        ftc.fulltextFieldSizeLimit = fulltextDescriptor.getFulltextFieldSizeLimit();
065        ftc.fulltextStoredInBlob = fulltextDescriptor.getFulltextStoredInBlob();
066        ftc.fulltextSearchDisabled = fulltextDescriptor.getFulltextSearchDisabled();
067
068        // find what paths we mean by "all"
069        // for schemas without prefix, we add both the unprefixed and the prefixed version
070        Set<String> allSimplePaths = new HashSet<>();
071        Set<String> allBinaryPaths = new HashSet<>();
072        PathsFinder pathsFinder = new PathsFinder(allSimplePaths, allBinaryPaths);
073        for (Schema schema : schemaManager.getSchemas()) {
074            pathsFinder.walkSchema(schema);
075        }
076
077        List<FulltextIndexDescriptor> descs = fulltextDescriptor.getFulltextIndexes();
078        if (descs == null) {
079            descs = new ArrayList<>(1);
080        }
081        if (descs.isEmpty()) {
082            descs.add(new FulltextIndexDescriptor());
083        }
084        for (FulltextIndexDescriptor desc : descs) {
085            String name = desc.name == null ? FULLTEXT_DEFAULT_INDEX : desc.name;
086            ftc.indexNames.add(name);
087            if (desc.fields == null) {
088                desc.fields = new HashSet<>();
089            }
090            if (desc.excludeFields == null) {
091                desc.excludeFields = new HashSet<>();
092            }
093            if (desc.fields.size() == 1 && desc.excludeFields.isEmpty()) {
094                ftc.fieldToIndexName.put(desc.fields.iterator().next(), name);
095            }
096
097            if (desc.fieldType != null) {
098                if (desc.fieldType.equals(FulltextConfigurationFactory.PROP_TYPE_STRING)) {
099                    ftc.indexesAllSimple.add(name);
100                } else if (desc.fieldType.equals(FulltextConfigurationFactory.PROP_TYPE_BLOB)) {
101                    ftc.indexesAllBinary.add(name);
102                } else {
103                    log.error("Ignoring unknow repository fulltext configuration fieldType: " + desc.fieldType);
104                }
105
106            }
107            if (desc.fields.isEmpty() && desc.fieldType == null) {
108                // no fields specified and no field type -> all of them
109                ftc.indexesAllSimple.add(name);
110                ftc.indexesAllBinary.add(name);
111            }
112
113            if (ftc.indexesAllSimple.contains(name)) {
114                ftc.propPathsByIndexSimple.put(name, new HashSet<>(allSimplePaths));
115                for (String path : allSimplePaths) {
116                    ftc.indexesByPropPathSimple.computeIfAbsent(path, p -> new HashSet<>()).add(name);
117                }
118            }
119            if (ftc.indexesAllBinary.contains(name)) {
120                ftc.propPathsByIndexBinary.put(name, new HashSet<>(allBinaryPaths));
121                for (String path : allBinaryPaths) {
122                    ftc.indexesByPropPathBinary.computeIfAbsent(path, p -> new HashSet<>()).add(name);
123                }
124            }
125
126            if (fulltextDescriptor.getFulltextExcludedTypes() != null) {
127                ftc.excludedTypes.addAll(fulltextDescriptor.getFulltextExcludedTypes());
128            }
129            if (fulltextDescriptor.getFulltextIncludedTypes() != null) {
130                ftc.includedTypes.addAll(fulltextDescriptor.getFulltextIncludedTypes());
131            }
132
133            for (Set<String> fields : Arrays.asList(desc.fields, desc.excludeFields)) {
134                boolean include = fields == desc.fields;
135                for (String path : fields) {
136                    Field field = schemaManager.getField(path);
137                    if (field == null && !path.contains(":")) {
138                        // check without prefix
139                        // TODO precompute this in SchemaManagerImpl
140                        int slash = path.indexOf('/');
141                        String first = slash == -1 ? path : path.substring(0, slash);
142                        for (Schema schema : schemaManager.getSchemas()) {
143                            if (!schema.getNamespace().hasPrefix()) {
144                                // schema without prefix, try it
145                                if (schema.getField(first) != null) {
146                                    path = schema.getName() + ":" + path;
147                                    field = schemaManager.getField(path);
148                                    break;
149                                }
150                            }
151                        }
152                    }
153                    if (field == null) {
154                        log.error(String.format("Ignoring unknown property '%s' in fulltext configuration: %s", path,
155                                name));
156                        continue;
157                    }
158                    Type baseType = getBaseType(field.getType());
159                    Map<String, Set<String>> indexesByPropPath;
160                    Map<String, Set<String>> propPathsByIndex;
161                    if (baseType instanceof ComplexType && TypeConstants.isContentType(baseType)) {
162                        baseType = ((ComplexType) baseType).getField(BaseDocument.BLOB_DATA).getType(); // BinaryType
163                    }
164                    if (baseType instanceof StringType) {
165                        indexesByPropPath = include ? ftc.indexesByPropPathSimple : ftc.indexesByPropPathExcludedSimple;
166                        propPathsByIndex = include ? ftc.propPathsByIndexSimple : ftc.propPathsExcludedByIndexSimple;
167                    } else if (baseType instanceof BinaryType) {
168                        indexesByPropPath = include ? ftc.indexesByPropPathBinary : ftc.indexesByPropPathExcludedBinary;
169                        propPathsByIndex = include ? ftc.propPathsByIndexBinary : ftc.propPathsExcludedByIndexBinary;
170                        if (!path.endsWith("/" + BaseDocument.BLOB_DATA)) {
171                            path += "/" + BaseDocument.BLOB_DATA;
172                            // needed for indexesByPropPathBinary as DBSTransactionState.findDirtyDocuments expects this
173                            // to be in the same format as what DirtyPathsFinder expects, like "content/data".
174                        }
175                    } else {
176                        log.error(String.format("Ignoring property '%s' with bad type %s in fulltext configuration: %s",
177                                path, field.getType(), name));
178                        continue;
179                    }
180                    indexesByPropPath.computeIfAbsent(path, p -> new HashSet<>()).add(name);
181                    propPathsByIndex.computeIfAbsent(name, n -> new HashSet<>()).add(path);
182                }
183            }
184        }
185
186        // Add document types with the NotFulltextIndexable facet
187        for (DocumentType documentType : schemaManager.getDocumentTypes()) {
188            if (documentType.hasFacet(FacetNames.NOT_FULLTEXT_INDEXABLE)) {
189                ftc.excludedTypes.add(documentType.getName());
190            }
191        }
192
193        return ftc;
194    }
195
196    protected static Type getBaseType(Type type) {
197        if (type instanceof SimpleTypeImpl) {
198            return getBaseType(type.getSuperType());
199        }
200        if (type instanceof ListType) {
201            return getBaseType(((ListType) type).getFieldType());
202        }
203        return type;
204    }
205
206    /**
207     * Accumulates paths for string and binary properties in schemas passed to {@link #walkSchema}.
208     * <p>
209     * For schemas without prefix the path is accumulated both with and without prefix.
210     * <p>
211     * For binaries the path includes the final "/data" part.
212     */
213    // TODO precompute this in SchemaManagerImpl
214    public static class PathsFinder {
215
216        protected final Set<String> simplePaths;
217
218        protected final Set<String> binaryPaths;
219
220        public PathsFinder(Set<String> simplePaths, Set<String> binaryPaths) {
221            this.simplePaths = simplePaths;
222            this.binaryPaths = binaryPaths;
223        }
224
225        public void walkSchema(Schema schema) {
226            String addPrefix = schema.getNamespace().hasPrefix() ? null : schema.getName();
227            walkComplexType(schema, null, addPrefix);
228        }
229
230        protected void walkComplexType(ComplexType complexType, String path, String addPrefix) {
231            for (Field field : complexType.getFields()) {
232                String name = field.getName().getPrefixedName();
233                String fieldPath = path == null ? name : path + '/' + name;
234                walkType(field.getType(), fieldPath, addPrefix);
235            }
236        }
237
238        protected void walkType(Type type, String path, String addPrefix) {
239            if (type.isSimpleType()) {
240                walkSimpleType(type, path, addPrefix);
241            } else if (type.isListType()) {
242                String listPath = path + "/*";
243                Type ftype = ((ListType) type).getField().getType();
244                if (ftype.isComplexType()) {
245                    // complex list
246                    walkComplexType((ComplexType) ftype, listPath, addPrefix);
247                } else {
248                    // array
249                    walkSimpleType(ftype, listPath, addPrefix);
250                }
251            } else {
252                // complex type
253                ComplexType ctype = (ComplexType) type;
254                walkComplexType(ctype, path, addPrefix);
255            }
256        }
257
258        protected void walkSimpleType(Type type, String path, String addPrefix) {
259            while (type instanceof SimpleTypeImpl) {
260                // type with constraint
261                type = type.getSuperType();
262            }
263            if (type instanceof StringType) {
264                simplePaths.add(path);
265                if (addPrefix != null) {
266                    simplePaths.add(addPrefix + ":" + path);
267                }
268            } else if (type instanceof BinaryType) {
269                binaryPaths.add(path);
270                if (addPrefix != null) {
271                    binaryPaths.add(addPrefix + ":" + path);
272                }
273            }
274        }
275    }
276
277}