001/* 002 * (C) Copyright 2006-2018 Nuxeo (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019 020package org.nuxeo.ecm.core.storage; 021 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.HashMap; 025import java.util.HashSet; 026import java.util.LinkedHashSet; 027import java.util.List; 028import java.util.Map; 029import java.util.Set; 030 031import org.apache.commons.logging.Log; 032import org.apache.commons.logging.LogFactory; 033import org.nuxeo.ecm.core.schema.DocumentType; 034import org.nuxeo.ecm.core.schema.FacetNames; 035import org.nuxeo.ecm.core.schema.SchemaManager; 036import org.nuxeo.ecm.core.schema.TypeConstants; 037import org.nuxeo.ecm.core.schema.types.ComplexType; 038import org.nuxeo.ecm.core.schema.types.Field; 039import org.nuxeo.ecm.core.schema.types.ListType; 040import org.nuxeo.ecm.core.schema.types.Schema; 041import org.nuxeo.ecm.core.schema.types.SimpleTypeImpl; 042import org.nuxeo.ecm.core.schema.types.Type; 043import org.nuxeo.ecm.core.schema.types.primitives.BinaryType; 044import org.nuxeo.ecm.core.schema.types.primitives.StringType; 045import org.nuxeo.ecm.core.storage.FulltextDescriptor.FulltextIndexDescriptor; 046import org.nuxeo.runtime.api.Framework; 047 048/** 049 * Info about the fulltext configuration. 050 */ 051public class FulltextConfiguration { 052 053 private static final Log log = LogFactory.getLog(FulltextConfiguration.class); 054 055 public static final String ROOT_TYPE = "Root"; 056 057 public static final String PROP_TYPE_STRING = "string"; 058 059 public static final String PROP_TYPE_BLOB = "blob"; 060 061 public static final String FULLTEXT_DEFAULT_INDEX = "default"; 062 063 /** All index names. */ 064 public final Set<String> indexNames = new LinkedHashSet<>(); 065 066 /** Indexes holding exactly one field. */ 067 public final Map<String, String> fieldToIndexName = new HashMap<>(); 068 069 /** Indexes containing all simple properties. */ 070 public final Set<String> indexesAllSimple = new HashSet<>(); 071 072 /** Indexes containing all binaries properties. */ 073 public final Set<String> indexesAllBinary = new HashSet<>(); 074 075 /** Indexes for each specific simple property path. */ 076 public final Map<String, Set<String>> indexesByPropPathSimple = new HashMap<>(); 077 078 /** Indexes for each specific binary property path. */ 079 // DBSTransactionState.findDirtyDocuments expects this to contain unprefixed versions for schemas 080 // without prefix, like "content/data". 081 public final Map<String, Set<String>> indexesByPropPathBinary = new HashMap<>(); 082 083 /** Indexes for each specific simple property path excluded. */ 084 public final Map<String, Set<String>> indexesByPropPathExcludedSimple = new HashMap<>(); 085 086 /** Indexes for each specific binary property path excluded. */ 087 public final Map<String, Set<String>> indexesByPropPathExcludedBinary = new HashMap<>(); 088 089 // inverse of above maps 090 public final Map<String, Set<String>> propPathsByIndexSimple = new HashMap<>(); 091 092 public final Map<String, Set<String>> propPathsByIndexBinary = new HashMap<>(); 093 094 public final Map<String, Set<String>> propPathsExcludedByIndexSimple = new HashMap<>(); 095 096 public final Map<String, Set<String>> propPathsExcludedByIndexBinary = new HashMap<>(); 097 098 public final Set<String> excludedTypes = new HashSet<>(); 099 100 public final Set<String> includedTypes = new HashSet<>(); 101 102 public final boolean fulltextSearchDisabled; 103 104 public final int fulltextFieldSizeLimit; 105 106 public FulltextConfiguration(FulltextDescriptor fulltextDescriptor) { 107 SchemaManager schemaManager = Framework.getService(SchemaManager.class); 108 109 fulltextFieldSizeLimit = fulltextDescriptor.getFulltextFieldSizeLimit(); 110 111 fulltextSearchDisabled = fulltextDescriptor.getFulltextSearchDisabled(); 112 113 // find what paths we mean by "all" 114 // for schemas without prefix, we add both the unprefixed and the prefixed version 115 Set<String> allSimplePaths = new HashSet<>(); 116 Set<String> allBinaryPaths = new HashSet<>(); 117 PathsFinder pathsFinder = new PathsFinder(allSimplePaths, allBinaryPaths); 118 for (Schema schema : schemaManager.getSchemas()) { 119 pathsFinder.walkSchema(schema); 120 } 121 122 List<FulltextIndexDescriptor> descs = fulltextDescriptor.getFulltextIndexes(); 123 if (descs == null) { 124 descs = new ArrayList<>(1); 125 } 126 if (descs.isEmpty()) { 127 descs.add(new FulltextIndexDescriptor()); 128 } 129 for (FulltextIndexDescriptor desc : descs) { 130 String name = desc.name == null ? FULLTEXT_DEFAULT_INDEX : desc.name; 131 indexNames.add(name); 132 if (desc.fields == null) { 133 desc.fields = new HashSet<>(); 134 } 135 if (desc.excludeFields == null) { 136 desc.excludeFields = new HashSet<>(); 137 } 138 if (desc.fields.size() == 1 && desc.excludeFields.isEmpty()) { 139 fieldToIndexName.put(desc.fields.iterator().next(), name); 140 } 141 142 if (desc.fieldType != null) { 143 if (desc.fieldType.equals(FulltextConfiguration.PROP_TYPE_STRING)) { 144 indexesAllSimple.add(name); 145 } else if (desc.fieldType.equals(FulltextConfiguration.PROP_TYPE_BLOB)) { 146 indexesAllBinary.add(name); 147 } else { 148 log.error("Ignoring unknow repository fulltext configuration fieldType: " + desc.fieldType); 149 } 150 151 } 152 if (desc.fields.isEmpty() && desc.fieldType == null) { 153 // no fields specified and no field type -> all of them 154 indexesAllSimple.add(name); 155 indexesAllBinary.add(name); 156 } 157 158 if (indexesAllSimple.contains(name)) { 159 propPathsByIndexSimple.put(name, new HashSet<>(allSimplePaths)); 160 for (String path : allSimplePaths) { 161 indexesByPropPathSimple.computeIfAbsent(path, p -> new HashSet<>()).add(name); 162 } 163 } 164 if (indexesAllBinary.contains(name)) { 165 propPathsByIndexBinary.put(name, new HashSet<>(allBinaryPaths)); 166 for (String path : allBinaryPaths) { 167 indexesByPropPathBinary.computeIfAbsent(path, p -> new HashSet<>()).add(name); 168 } 169 } 170 171 if (fulltextDescriptor.getFulltextExcludedTypes() != null) { 172 excludedTypes.addAll(fulltextDescriptor.getFulltextExcludedTypes()); 173 } 174 if (fulltextDescriptor.getFulltextIncludedTypes() != null) { 175 includedTypes.addAll(fulltextDescriptor.getFulltextIncludedTypes()); 176 } 177 178 for (Set<String> fields : Arrays.asList(desc.fields, desc.excludeFields)) { 179 boolean include = fields == desc.fields; 180 for (String path : fields) { 181 Field field = schemaManager.getField(path); 182 if (field == null && !path.contains(":")) { 183 // check without prefix 184 // TODO precompute this in SchemaManagerImpl 185 int slash = path.indexOf('/'); 186 String first = slash == -1 ? path : path.substring(0, slash); 187 for (Schema schema : schemaManager.getSchemas()) { 188 if (!schema.getNamespace().hasPrefix()) { 189 // schema without prefix, try it 190 if (schema.getField(first) != null) { 191 path = schema.getName() + ":" + path; 192 field = schemaManager.getField(path); 193 break; 194 } 195 } 196 } 197 } 198 if (field == null) { 199 log.error(String.format("Ignoring unknown property '%s' in fulltext configuration: %s", path, 200 name)); 201 continue; 202 } 203 Type baseType = getBaseType(field.getType()); 204 Map<String, Set<String>> indexesByPropPath; 205 Map<String, Set<String>> propPathsByIndex; 206 if (baseType instanceof ComplexType && TypeConstants.isContentType(baseType)) { 207 baseType = ((ComplexType) baseType).getField(BaseDocument.BLOB_DATA).getType(); // BinaryType 208 } 209 if (baseType instanceof StringType) { 210 indexesByPropPath = include ? indexesByPropPathSimple : indexesByPropPathExcludedSimple; 211 propPathsByIndex = include ? propPathsByIndexSimple : propPathsExcludedByIndexSimple; 212 } else if (baseType instanceof BinaryType) { 213 indexesByPropPath = include ? indexesByPropPathBinary : indexesByPropPathExcludedBinary; 214 propPathsByIndex = include ? propPathsByIndexBinary : propPathsExcludedByIndexBinary; 215 if (!path.endsWith("/" + BaseDocument.BLOB_DATA)) { 216 path += "/" + BaseDocument.BLOB_DATA; 217 // needed for indexesByPropPathBinary as DBSTransactionState.findDirtyDocuments expects this 218 // to be in the same format as what DirtyPathsFinder expects, like "content/data". 219 } 220 } else { 221 log.error(String.format("Ignoring property '%s' with bad type %s in fulltext configuration: %s", 222 path, field.getType(), name)); 223 continue; 224 } 225 indexesByPropPath.computeIfAbsent(path, p -> new HashSet<>()).add(name); 226 propPathsByIndex.computeIfAbsent(name, n -> new HashSet<>()).add(path); 227 } 228 } 229 } 230 231 // Add document types with the NotFulltextIndexable facet 232 for (DocumentType documentType : schemaManager.getDocumentTypes()) { 233 if (documentType.hasFacet(FacetNames.NOT_FULLTEXT_INDEXABLE)) { 234 excludedTypes.add(documentType.getName()); 235 } 236 } 237 } 238 239 protected Type getBaseType(Type type) { 240 if (type instanceof SimpleTypeImpl) { 241 return getBaseType(type.getSuperType()); 242 } 243 if (type instanceof ListType) { 244 return getBaseType(((ListType) type).getFieldType()); 245 } 246 return type; 247 } 248 249 /** 250 * Accumulates paths for string and binary properties in schemas passed to {@link #walkSchema}. 251 * <p> 252 * For schemas without prefix the path is accumulated both with and without prefix. 253 * <p> 254 * For binaries the path includes the final "/data" part. 255 */ 256 // TODO precompute this in SchemaManagerImpl 257 public static class PathsFinder { 258 259 protected final Set<String> simplePaths; 260 261 protected final Set<String> binaryPaths; 262 263 public PathsFinder(Set<String> simplePaths, Set<String> binaryPaths) { 264 this.simplePaths = simplePaths; 265 this.binaryPaths = binaryPaths; 266 } 267 268 public void walkSchema(Schema schema) { 269 String addPrefix = schema.getNamespace().hasPrefix() ? null : schema.getName(); 270 walkComplexType(schema, null, addPrefix); 271 } 272 273 protected void walkComplexType(ComplexType complexType, String path, String addPrefix) { 274 for (Field field : complexType.getFields()) { 275 String name = field.getName().getPrefixedName(); 276 String fieldPath = path == null ? name : path + '/' + name; 277 walkType(field.getType(), fieldPath, addPrefix); 278 } 279 } 280 281 protected void walkType(Type type, String path, String addPrefix) { 282 if (type.isSimpleType()) { 283 walkSimpleType(type, path, addPrefix); 284 } else if (type.isListType()) { 285 String listPath = path + "/*"; 286 Type ftype = ((ListType) type).getField().getType(); 287 if (ftype.isComplexType()) { 288 // complex list 289 walkComplexType((ComplexType) ftype, listPath, addPrefix); 290 } else { 291 // array 292 walkSimpleType(ftype, listPath, addPrefix); 293 } 294 } else { 295 // complex type 296 ComplexType ctype = (ComplexType) type; 297 walkComplexType(ctype, path, addPrefix); 298 } 299 } 300 301 protected void walkSimpleType(Type type, String path, String addPrefix) { 302 while (type instanceof SimpleTypeImpl) { 303 // type with constraint 304 type = type.getSuperType(); 305 } 306 if (type instanceof StringType) { 307 simplePaths.add(path); 308 if (addPrefix != null) { 309 simplePaths.add(addPrefix + ":" + path); 310 } 311 } else if (type instanceof BinaryType) { 312 binaryPaths.add(path); 313 if (addPrefix != null) { 314 binaryPaths.add(addPrefix + ":" + path); 315 } 316 } 317 } 318 } 319 320 public boolean isFulltextIndexable(String typeName) { 321 if (ROOT_TYPE.equals(typeName)) { 322 return false; 323 } 324 if (includedTypes.contains(typeName) || (includedTypes.isEmpty() && !excludedTypes.contains(typeName))) { 325 return true; 326 } 327 return false; 328 } 329 330}