001/* 002 * (C) Copyright 2006-2014 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019 020package org.nuxeo.ecm.core.storage; 021 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.HashMap; 025import java.util.HashSet; 026import java.util.LinkedHashSet; 027import java.util.List; 028import java.util.Map; 029import java.util.Set; 030 031import org.apache.commons.lang.StringUtils; 032import org.apache.commons.logging.Log; 033import org.apache.commons.logging.LogFactory; 034import org.nuxeo.ecm.core.schema.DocumentType; 035import org.nuxeo.ecm.core.schema.FacetNames; 036import org.nuxeo.ecm.core.schema.Namespace; 037import org.nuxeo.ecm.core.schema.SchemaManager; 038import org.nuxeo.ecm.core.schema.TypeConstants; 039import org.nuxeo.ecm.core.schema.types.ComplexType; 040import org.nuxeo.ecm.core.schema.types.Field; 041import org.nuxeo.ecm.core.schema.types.ListType; 042import org.nuxeo.ecm.core.schema.types.Schema; 043import org.nuxeo.ecm.core.schema.types.SimpleTypeImpl; 044import org.nuxeo.ecm.core.schema.types.Type; 045import org.nuxeo.ecm.core.schema.types.primitives.BinaryType; 046import org.nuxeo.ecm.core.schema.types.primitives.StringType; 047import org.nuxeo.ecm.core.storage.FulltextDescriptor.FulltextIndexDescriptor; 048import org.nuxeo.runtime.api.Framework; 049 050/** 051 * Info about the fulltext configuration. 052 */ 053public class FulltextConfiguration { 054 055 private static final Log log = LogFactory.getLog(FulltextConfiguration.class); 056 057 public static final String ROOT_TYPE = "Root"; 058 059 public static final String PROP_TYPE_STRING = "string"; 060 061 public static final String PROP_TYPE_BLOB = "blob"; 062 063 public static final String FULLTEXT_DEFAULT_INDEX = "default"; 064 065 /** All index names. */ 066 public final Set<String> indexNames = new LinkedHashSet<String>(); 067 068 /** Indexes holding exactly one field. */ 069 public final Map<String, String> fieldToIndexName = new HashMap<String, String>(); 070 071 /** Indexes containing all simple properties. */ 072 public final Set<String> indexesAllSimple = new HashSet<String>(); 073 074 /** Indexes containing all binaries properties. */ 075 public final Set<String> indexesAllBinary = new HashSet<String>(); 076 077 /** Indexes for each specific simple property path. */ 078 public final Map<String, Set<String>> indexesByPropPathSimple = new HashMap<String, Set<String>>(); 079 080 /** Indexes for each specific binary property path. */ 081 // DBSTransactionState.findDirtyDocuments expects this to contain unprefixed versions for schemas 082 // without prefix, like "content/data". 083 public final Map<String, Set<String>> indexesByPropPathBinary = new HashMap<String, Set<String>>(); 084 085 /** Indexes for each specific simple property path excluded. */ 086 public final Map<String, Set<String>> indexesByPropPathExcludedSimple = new HashMap<String, Set<String>>(); 087 088 /** Indexes for each specific binary property path excluded. */ 089 public final Map<String, Set<String>> indexesByPropPathExcludedBinary = new HashMap<String, Set<String>>(); 090 091 // inverse of above maps 092 public final Map<String, Set<String>> propPathsByIndexSimple = new HashMap<String, Set<String>>(); 093 094 public final Map<String, Set<String>> propPathsByIndexBinary = new HashMap<String, Set<String>>(); 095 096 public final Map<String, Set<String>> propPathsExcludedByIndexSimple = new HashMap<String, Set<String>>(); 097 098 public final Map<String, Set<String>> propPathsExcludedByIndexBinary = new HashMap<String, Set<String>>(); 099 100 public final Set<String> excludedTypes = new HashSet<String>(); 101 102 public final Set<String> includedTypes = new HashSet<String>(); 103 104 public final boolean fulltextSearchDisabled; 105 106 public final int fulltextFieldSizeLimit; 107 108 public FulltextConfiguration(FulltextDescriptor fulltextDescriptor) { 109 SchemaManager schemaManager = Framework.getService(SchemaManager.class); 110 111 fulltextFieldSizeLimit = fulltextDescriptor.getFulltextFieldSizeLimit(); 112 113 fulltextSearchDisabled = fulltextDescriptor.getFulltextSearchDisabled(); 114 115 // find what paths we mean by "all" 116 // for schemas without prefix, we add both the unprefixed and the prefixed version 117 Set<String> allSimplePaths = new HashSet<>(); 118 Set<String> allBinaryPaths = new HashSet<>(); 119 PathsFinder pathsFinder = new PathsFinder(allSimplePaths, allBinaryPaths); 120 for (Schema schema : schemaManager.getSchemas()) { 121 pathsFinder.walkSchema(schema); 122 } 123 124 List<FulltextIndexDescriptor> descs = fulltextDescriptor.getFulltextIndexes(); 125 if (descs == null) { 126 descs = new ArrayList<FulltextIndexDescriptor>(1); 127 } 128 if (descs.isEmpty()) { 129 descs.add(new FulltextIndexDescriptor()); 130 } 131 for (FulltextIndexDescriptor desc : descs) { 132 String name = desc.name == null ? FULLTEXT_DEFAULT_INDEX : desc.name; 133 indexNames.add(name); 134 if (desc.fields == null) { 135 desc.fields = new HashSet<String>(); 136 } 137 if (desc.excludeFields == null) { 138 desc.excludeFields = new HashSet<String>(); 139 } 140 if (desc.fields.size() == 1 && desc.excludeFields.isEmpty()) { 141 fieldToIndexName.put(desc.fields.iterator().next(), name); 142 } 143 144 if (desc.fieldType != null) { 145 if (desc.fieldType.equals(FulltextConfiguration.PROP_TYPE_STRING)) { 146 indexesAllSimple.add(name); 147 } else if (desc.fieldType.equals(FulltextConfiguration.PROP_TYPE_BLOB)) { 148 indexesAllBinary.add(name); 149 } else { 150 log.error("Ignoring unknow repository fulltext configuration fieldType: " + desc.fieldType); 151 } 152 153 } 154 if (desc.fields.isEmpty() && desc.fieldType == null) { 155 // no fields specified and no field type -> all of them 156 indexesAllSimple.add(name); 157 indexesAllBinary.add(name); 158 } 159 160 if (indexesAllSimple.contains(name)) { 161 propPathsByIndexSimple.put(name, new HashSet<>(allSimplePaths)); 162 for (String path : allSimplePaths) { 163 indexesByPropPathSimple.computeIfAbsent(path, p -> new HashSet<>()).add(name); 164 } 165 } 166 if (indexesAllBinary.contains(name)) { 167 propPathsByIndexBinary.put(name, new HashSet<>(allBinaryPaths)); 168 for (String path : allBinaryPaths) { 169 indexesByPropPathBinary.computeIfAbsent(path, p -> new HashSet<>()).add(name); 170 } 171 } 172 173 if (fulltextDescriptor.getFulltextExcludedTypes() != null) { 174 excludedTypes.addAll(fulltextDescriptor.getFulltextExcludedTypes()); 175 } 176 if (fulltextDescriptor.getFulltextIncludedTypes() != null) { 177 includedTypes.addAll(fulltextDescriptor.getFulltextIncludedTypes()); 178 } 179 180 for (Set<String> fields : Arrays.asList(desc.fields, desc.excludeFields)) { 181 boolean include = fields == desc.fields; 182 for (String path : fields) { 183 Field field = schemaManager.getField(path); 184 if (field == null && !path.contains(":")) { 185 // check without prefix 186 // TODO precompute this in SchemaManagerImpl 187 int slash = path.indexOf('/'); 188 String first = slash == -1 ? path : path.substring(0, slash); 189 for (Schema schema : schemaManager.getSchemas()) { 190 if (!schema.getNamespace().hasPrefix()) { 191 // schema without prefix, try it 192 if (schema.getField(first) != null) { 193 path = schema.getName() + ":" + path; 194 field = schemaManager.getField(path); 195 break; 196 } 197 } 198 } 199 } 200 if (field == null) { 201 log.error(String.format("Ignoring unknown property '%s' in fulltext configuration: %s", path, 202 name)); 203 continue; 204 } 205 Type baseType = getBaseType(field.getType()); 206 Map<String, Set<String>> indexesByPropPath; 207 Map<String, Set<String>> propPathsByIndex; 208 if (baseType instanceof ComplexType && TypeConstants.isContentType(baseType)) { 209 baseType = ((ComplexType) baseType).getField(BaseDocument.BLOB_DATA).getType(); // BinaryType 210 } 211 if (baseType instanceof StringType) { 212 indexesByPropPath = include ? indexesByPropPathSimple : indexesByPropPathExcludedSimple; 213 propPathsByIndex = include ? propPathsByIndexSimple : propPathsExcludedByIndexSimple; 214 } else if (baseType instanceof BinaryType) { 215 indexesByPropPath = include ? indexesByPropPathBinary : indexesByPropPathExcludedBinary; 216 propPathsByIndex = include ? propPathsByIndexBinary : propPathsExcludedByIndexBinary; 217 if (!path.endsWith("/" + BaseDocument.BLOB_DATA)) { 218 path += "/" + BaseDocument.BLOB_DATA; 219 // needed for indexesByPropPathBinary as DBSTransactionState.findDirtyDocuments expects this 220 // to be in the same format as what DirtyPathsFinder expects, like "content/data". 221 } 222 } else { 223 log.error(String.format("Ignoring property '%s' with bad type %s in fulltext configuration: %s", 224 path, field.getType(), name)); 225 continue; 226 } 227 indexesByPropPath.computeIfAbsent(path, p -> new HashSet<>()).add(name); 228 propPathsByIndex.computeIfAbsent(name, n -> new HashSet<>()).add(path); 229 } 230 } 231 } 232 233 // Add document types with the NotFulltextIndexable facet 234 for (DocumentType documentType : schemaManager.getDocumentTypes()) { 235 if (documentType.hasFacet(FacetNames.NOT_FULLTEXT_INDEXABLE)) { 236 excludedTypes.add(documentType.getName()); 237 } 238 } 239 } 240 241 protected Type getBaseType(Type type) { 242 if (type instanceof SimpleTypeImpl) { 243 return getBaseType(type.getSuperType()); 244 } 245 if (type instanceof ListType) { 246 return getBaseType(((ListType) type).getFieldType()); 247 } 248 return type; 249 } 250 251 /** 252 * Accumulates paths for string and binary properties in schemas passed to {@link #walkSchema}. 253 * <p> 254 * For schemas without prefix the path is accumulated both with and without prefix. 255 * <p> 256 * For binaries the path includes the final "/data" part. 257 */ 258 // TODO precompute this in SchemaManagerImpl 259 public static class PathsFinder { 260 261 protected final Set<String> simplePaths; 262 263 protected final Set<String> binaryPaths; 264 265 public PathsFinder(Set<String> simplePaths, Set<String> binaryPaths) { 266 this.simplePaths = simplePaths; 267 this.binaryPaths = binaryPaths; 268 } 269 270 public void walkSchema(Schema schema) { 271 String addPrefix = schema.getNamespace().hasPrefix() ? null : schema.getName(); 272 walkComplexType(schema, null, addPrefix); 273 } 274 275 protected void walkComplexType(ComplexType complexType, String path, String addPrefix) { 276 for (Field field : complexType.getFields()) { 277 String name = field.getName().getPrefixedName(); 278 String fieldPath = path == null ? name : path + '/' + name; 279 walkType(field.getType(), fieldPath, addPrefix); 280 } 281 } 282 283 protected void walkType(Type type, String path, String addPrefix) { 284 if (type.isSimpleType()) { 285 walkSimpleType(type, path, addPrefix); 286 } else if (type.isListType()) { 287 String listPath = path + "/*"; 288 Type ftype = ((ListType) type).getField().getType(); 289 if (ftype.isComplexType()) { 290 // complex list 291 walkComplexType((ComplexType) ftype, listPath, addPrefix); 292 } else { 293 // array 294 walkSimpleType(ftype, listPath, addPrefix); 295 } 296 } else { 297 // complex type 298 ComplexType ctype = (ComplexType) type; 299 walkComplexType(ctype, path, addPrefix); 300 } 301 } 302 303 protected void walkSimpleType(Type type, String path, String addPrefix) { 304 while (type instanceof SimpleTypeImpl) { 305 // type with constraint 306 type = type.getSuperType(); 307 } 308 if (type instanceof StringType) { 309 simplePaths.add(path); 310 if (addPrefix != null) { 311 simplePaths.add(addPrefix + ":" + path); 312 } 313 } else if (type instanceof BinaryType) { 314 binaryPaths.add(path); 315 if (addPrefix != null) { 316 binaryPaths.add(addPrefix + ":" + path); 317 } 318 } 319 } 320 } 321 322 public boolean isFulltextIndexable(String typeName) { 323 if (ROOT_TYPE.equals(typeName)) { 324 return false; 325 } 326 if (includedTypes.contains(typeName) || (includedTypes.isEmpty() && !excludedTypes.contains(typeName))) { 327 return true; 328 } 329 return false; 330 } 331 332}