001/* 002 * (C) Copyright 2014-2016 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.core.storage; 020 021import static java.util.stream.Collectors.groupingBy; 022import static java.util.stream.Collectors.toList; 023 024import java.util.ArrayList; 025import java.util.Arrays; 026import java.util.Collection; 027import java.util.Collections; 028import java.util.HashMap; 029import java.util.HashSet; 030import java.util.Iterator; 031import java.util.LinkedHashMap; 032import java.util.List; 033import java.util.Map; 034import java.util.Map.Entry; 035import java.util.Set; 036import java.util.stream.Collector; 037 038import org.nuxeo.ecm.core.api.impl.FacetFilter; 039import org.nuxeo.ecm.core.query.QueryParseException; 040import org.nuxeo.ecm.core.query.sql.NXQL; 041import org.nuxeo.ecm.core.query.sql.model.DefaultQueryVisitor; 042import org.nuxeo.ecm.core.query.sql.model.Expression; 043import org.nuxeo.ecm.core.query.sql.model.FromClause; 044import org.nuxeo.ecm.core.query.sql.model.FromList; 045import org.nuxeo.ecm.core.query.sql.model.IdentityQueryTransformer; 046import org.nuxeo.ecm.core.query.sql.model.Literal; 047import org.nuxeo.ecm.core.query.sql.model.LiteralList; 048import org.nuxeo.ecm.core.query.sql.model.MultiExpression; 049import org.nuxeo.ecm.core.query.sql.model.Operand; 050import org.nuxeo.ecm.core.query.sql.model.Operator; 051import org.nuxeo.ecm.core.query.sql.model.OrderByClause; 052import org.nuxeo.ecm.core.query.sql.model.Predicate; 053import org.nuxeo.ecm.core.query.sql.model.Reference; 054import org.nuxeo.ecm.core.query.sql.model.SQLQuery; 055import org.nuxeo.ecm.core.query.sql.model.SelectClause; 056import org.nuxeo.ecm.core.query.sql.model.StringLiteral; 057import org.nuxeo.ecm.core.query.sql.model.WhereClause; 058import org.nuxeo.ecm.core.schema.SchemaManager; 059import org.nuxeo.ecm.core.schema.types.Type; 060import org.nuxeo.runtime.api.Framework; 061 062/** 063 * Generic optimizer for a NXQL query. 064 * 065 * @since 5.9.4 066 */ 067public abstract class QueryOptimizer { 068 069 public static final String TYPE_ROOT = "Root"; 070 071 public static final String TYPE_DOCUMENT = "Document"; 072 073 public static final String TYPE_RELATION = "Relation"; 074 075 protected static final int CORR_BASE = 100_000; 076 077 protected FacetFilter facetFilter; 078 079 protected final SchemaManager schemaManager; 080 081 protected final Set<String> neverPerInstanceMixins; 082 083 protected int correlationCounter; 084 085 /** Do we match only relations? */ 086 protected boolean onlyRelations; 087 088 // group by prefix, keeping order 089 protected static Collector<Predicate, ?, Map<String, List<Predicate>>> GROUPING_BY_EXPR_PREFIX = groupingBy( 090 QueryOptimizer::getPredicatePrefix, LinkedHashMap::new, toList()); 091 092 public QueryOptimizer() { 093 // schemaManager may be null in unit tests 094 schemaManager = Framework.getService(SchemaManager.class); 095 Set<String> facets = schemaManager == null ? Collections.emptySet() 096 : schemaManager.getNoPerDocumentQueryFacets(); 097 neverPerInstanceMixins = new HashSet<>(facets); 098 } 099 100 public QueryOptimizer withFacetFilter(FacetFilter facetFilter) { 101 this.facetFilter = facetFilter; 102 return this; 103 } 104 105 /** 106 * Optimizes a query to provide a WHERE clause containing facet filters, primary and mixin types. In addition, the 107 * top-level AND clauses are analyzed to provide prefix info. 108 */ 109 public SQLQuery optimize(SQLQuery query) { 110 // rewrite some uncorrelated wildcards and add NOT NULL clauses for projection ones 111 query = addWildcardNotNullClauses(query); 112 113 List<Predicate> clauses = new ArrayList<>(); 114 addFacetFilters(clauses, facetFilter); 115 addTypes(clauses, query.from); 116 addWhere(clauses, query.where); 117 simplifyTypes(clauses); 118 MultiExpression multiExpression = new MultiExpression(Operator.AND, clauses); 119 120 // collect information about common reference prefixes (stored on Reference and Expression) 121 new ReferencePrefixAnalyzer().visitMultiExpression(multiExpression); 122 123 Predicate whereExpr; 124 PrefixInfo info = (PrefixInfo) multiExpression.getInfo(); 125 if (!info.prefix.isEmpty()) { 126 // all references have a common prefix 127 whereExpr = multiExpression; 128 } else { 129 // do grouping by common prefix 130 Map<String, List<Predicate>> grouped = clauses.stream().collect(GROUPING_BY_EXPR_PREFIX); 131 Map<String, Predicate> groupedExpressions = new LinkedHashMap<>(); 132 for (Entry<String, List<Predicate>> en : grouped.entrySet()) { 133 String prefix = en.getKey(); 134 List<Predicate> list = en.getValue(); 135 groupedExpressions.put(prefix, makeSingleAndPredicate(prefix, list)); 136 } 137 138 // potentially reorganize into nested grouping 139 reorganizeGroupedExpressions(groupedExpressions); 140 141 List<Predicate> expressions = new ArrayList<>(groupedExpressions.values()); 142 whereExpr = makeSingleAndPredicate("", expressions); 143 } 144 145 return query.withPredicate(whereExpr); 146 } 147 148 /** 149 * Makes a single AND predicate from several expressions known to have a common prefix. 150 */ 151 public static Predicate makeSingleAndPredicate(String prefix, List<Predicate> predicates) { 152 if (predicates.size() == 1) { 153 return predicates.get(0); 154 } else { 155 int count = prefix.isEmpty() ? 0 : predicates.stream().mapToInt(QueryOptimizer::getExpressionCount).sum(); 156 Predicate e = new MultiExpression(Operator.AND, predicates); 157 e.setInfo(new PrefixInfo(prefix, count)); 158 return e; 159 } 160 } 161 162 protected static String getPredicatePrefix(Predicate predicate) { 163 PrefixInfo info = (PrefixInfo) predicate.getInfo(); 164 return info == null ? "" : info.prefix; 165 } 166 167 protected static int getExpressionCount(Expression expr) { 168 PrefixInfo info = (PrefixInfo) expr.getInfo(); 169 return info == null ? 0 : info.count; 170 } 171 172 /** 173 * Reorganizes the grouped expressions in order to have 2-level nesting in case a group is a prefix of another. 174 * 175 * @since 9.3 176 */ 177 public static void reorganizeGroupedExpressions(Map<String, Predicate> groupedExpressions) { 178 if (groupedExpressions.size() > 1) { 179 List<String> keys = new ArrayList<>(groupedExpressions.keySet()); 180 List<String> withPrefix = new ArrayList<>(); 181 String prefix = findPrefix(keys, withPrefix); 182 if (prefix != null) { 183 // first part, the expression corresponding to the prefix 184 Predicate first = groupedExpressions.remove(prefix); 185 186 // second part, all those that had that prefix 187 List<Predicate> exprs = new ArrayList<>(); 188 for (String k : withPrefix) { 189 exprs.add(groupedExpressions.remove(k)); 190 } 191 String secondPrefix; 192 if (withPrefix.size() == 1) { 193 secondPrefix = withPrefix.get(0); 194 } else { 195 throw new QueryParseException("Too complex correlated wildcards in query: " + groupedExpressions); 196 } 197 Predicate second = makeSingleAndPredicate(secondPrefix, exprs); 198 199 // finally bring them all together 200 Predicate expr = makeSingleAndPredicate(prefix, Arrays.asList(first, second)); 201 groupedExpressions.put(prefix, expr); 202 } 203 } 204 } 205 206 /** 207 * Finds a non-empty prefix in the strings. 208 * <p> 209 * If a prefix is found, the other strings having it as a prefix are collected in {@code withPrefix}, and the prefix 210 * and the found strings are moved from the input {@code string}. 211 * <p> 212 * {@code strings} and {@code withPrefix} must both be ArrayLists as they will be mutated and queried by index. 213 * 214 * @param strings the input strings 215 * @param withPrefix (return value) the strings that have the found prefix as a prefix 216 * @return the prefix if found, or null if not 217 * @since 9.3 218 */ 219 public static String findPrefix(List<String> strings, List<String> withPrefix) { 220 // naive algorithm as list size is very small 221 String prefix = null; 222 ALL: // 223 for (int i = 0; i < strings.size(); i++) { 224 String candidate = strings.get(i); 225 if (candidate.isEmpty()) { 226 continue; 227 } 228 for (int j = 0; j < strings.size(); j++) { 229 if (i == j) { 230 continue; 231 } 232 String s = strings.get(j); 233 if (s.isEmpty()) { 234 continue; 235 } 236 if (s.startsWith(candidate + '/')) { 237 prefix = candidate; 238 break ALL; 239 } 240 } 241 } 242 if (prefix != null) { 243 for (Iterator<String> it = strings.iterator(); it.hasNext();) { 244 String s = it.next(); 245 if (s.isEmpty()) { 246 continue; 247 } 248 if (s.equals(prefix)) { 249 it.remove(); // will be returned as method result 250 continue; 251 } 252 if (s.startsWith(prefix + '/')) { 253 it.remove(); // will be returned in withPrefix 254 withPrefix.add(s); 255 } 256 } 257 } 258 return prefix; 259 } 260 261 protected void addFacetFilters(List<Predicate> clauses, FacetFilter facetFilter) { 262 if (facetFilter == null) { 263 return; 264 } 265 for (String mixin : facetFilter.required) { 266 // every facet is required, not just any of them, 267 // so do them one by one 268 Predicate expr = new Predicate(new Reference(NXQL.ECM_MIXINTYPE), Operator.EQ, new StringLiteral(mixin)); 269 clauses.add(expr); 270 } 271 if (!facetFilter.excluded.isEmpty()) { 272 LiteralList list = new LiteralList(); 273 for (String mixin : facetFilter.excluded) { 274 list.add(new StringLiteral(mixin)); 275 } 276 Predicate expr = new Predicate(new Reference(NXQL.ECM_MIXINTYPE), Operator.NOTIN, list); 277 clauses.add(expr); 278 } 279 } 280 281 // methods using SchemaManager easily overrideable for easier testing 282 283 protected Set<String> getDocumentTypeNamesForFacet(String mixin) { 284 Set<String> types = schemaManager.getDocumentTypeNamesForFacet(mixin); 285 if (types == null) { 286 // unknown mixin 287 types = Collections.emptySet(); 288 } 289 return types; 290 } 291 292 protected Set<String> getDocumentTypeNamesExtending(String typeName) { 293 Set<String> types = schemaManager.getDocumentTypeNamesExtending(typeName); 294 if (types == null) { 295 throw new QueryParseException("Unknown type: " + typeName); 296 } 297 return types; 298 } 299 300 protected boolean isTypeRelation(String typeName) { 301 do { 302 if (TYPE_RELATION.equals(typeName)) { 303 return true; 304 } 305 Type t = schemaManager.getDocumentType(typeName); 306 if (t != null) { 307 t = t.getSuperType(); 308 } 309 typeName = t == null ? null : t.getName(); 310 } while (typeName != null); 311 return false; 312 } 313 314 /** 315 * Finds all the types to take into account (all concrete types being a subtype of the passed types) based on the 316 * FROM list. 317 * <p> 318 * Adds them as a ecm:primaryType match in the toplevel operands. 319 */ 320 protected void addTypes(List<Predicate> clauses, FromClause node) { 321 onlyRelations = true; 322 Set<String> fromTypes = new HashSet<>(); 323 FromList elements = node.elements; 324 for (String typeName : elements.values()) { 325 if (TYPE_DOCUMENT.equalsIgnoreCase(typeName)) { 326 typeName = TYPE_DOCUMENT; 327 } 328 fromTypes.addAll(getDocumentTypeNamesExtending(typeName)); 329 boolean isRelation = isTypeRelation(typeName); 330 onlyRelations = onlyRelations && isRelation; 331 } 332 fromTypes.remove(TYPE_ROOT); 333 LiteralList list = new LiteralList(); 334 for (String type : fromTypes) { 335 list.add(new StringLiteral(type)); 336 } 337 clauses.add(new Predicate(new Reference(NXQL.ECM_PRIMARYTYPE), Operator.IN, list)); 338 } 339 340 /** 341 * Adds a flattened version of all toplevel ANDed WHERE clauses. 342 */ 343 protected void addWhere(List<Predicate> clauses, WhereClause where) { 344 if (where != null) { 345 addWhere(clauses, where.predicate); 346 } 347 } 348 349 protected void addWhere(List<Predicate> clauses, Predicate expr) { 350 if (expr.operator == Operator.AND && expr.lvalue instanceof Expression && expr.rvalue instanceof Expression) { 351 addWhere(clauses, (Predicate) expr.lvalue); 352 addWhere(clauses, (Predicate) expr.rvalue); 353 } else if (expr.operator == Operator.AND && expr instanceof MultiExpression) { 354 for (Predicate pred : ((MultiExpression) expr).predicates) { 355 addWhere(clauses, pred); 356 } 357 } else { 358 clauses.add(expr); 359 } 360 } 361 362 /** 363 * Simplify ecm:primaryType positive references, and non-per-instance mixin types. 364 */ 365 protected void simplifyTypes(List<Predicate> clauses) { 366 Set<String> primaryTypes = null; // if defined, required 367 for (Iterator<Predicate> it = clauses.iterator(); it.hasNext();) { 368 // whenever we don't know how to optimize the expression, 369 // we just continue the loop 370 Predicate predicate = it.next(); 371 if (!(predicate.lvalue instanceof Reference)) { 372 continue; 373 } 374 String name = ((Reference) predicate.lvalue).name; 375 Operator op = predicate.operator; 376 Operand rvalue = predicate.rvalue; 377 if (NXQL.ECM_PRIMARYTYPE.equals(name)) { 378 if (op != Operator.EQ && op != Operator.IN) { 379 continue; 380 } 381 Set<String> set; 382 if (op == Operator.EQ) { 383 if (!(rvalue instanceof StringLiteral)) { 384 continue; 385 } 386 String primaryType = ((StringLiteral) rvalue).value; 387 set = new HashSet<>(Collections.singleton(primaryType)); 388 } else { // Operator.IN 389 if (!(rvalue instanceof LiteralList)) { 390 continue; 391 } 392 set = getStringLiterals((LiteralList) rvalue); 393 } 394 if (primaryTypes == null) { 395 primaryTypes = set; 396 } else { 397 primaryTypes.retainAll(set); 398 } 399 it.remove(); // expression simplified into primaryTypes set 400 } else if (NXQL.ECM_MIXINTYPE.equals(name)) { 401 if (op != Operator.EQ && op != Operator.NOTEQ) { 402 continue; 403 } 404 if (!(rvalue instanceof StringLiteral)) { 405 continue; 406 } 407 String mixin = ((StringLiteral) rvalue).value; 408 if (!neverPerInstanceMixins.contains(mixin)) { 409 // mixin per instance -> primary type checks not enough 410 continue; 411 } 412 Set<String> set = getDocumentTypeNamesForFacet(mixin); 413 if (primaryTypes == null) { 414 if (op == Operator.EQ) { 415 primaryTypes = new HashSet<>(set); // copy 416 } else { 417 continue; // unknown positive, no optimization 418 } 419 } else { 420 if (op == Operator.EQ) { 421 primaryTypes.retainAll(set); 422 } else { 423 primaryTypes.removeAll(set); 424 } 425 } 426 it.remove(); // expression simplified into primaryTypes set 427 } 428 } 429 // readd the simplified primary types constraints 430 if (primaryTypes != null) { 431 if (primaryTypes.isEmpty()) { 432 // TODO better removal 433 primaryTypes.add("__NOSUCHTYPE__"); 434 } 435 Predicate predicate; 436 if (primaryTypes.size() == 1) { 437 String pt = primaryTypes.iterator().next(); 438 predicate = new Predicate(new Reference(NXQL.ECM_PRIMARYTYPE), Operator.EQ, new StringLiteral(pt)); 439 } else { // primaryTypes.size() > 1 440 LiteralList list = new LiteralList(); 441 for (String pt : primaryTypes) { 442 list.add(new StringLiteral(pt)); 443 } 444 predicate = new Predicate(new Reference(NXQL.ECM_PRIMARYTYPE), Operator.IN, list); 445 } 446 clauses.add(predicate); 447 } 448 } 449 450 protected static Set<String> getStringLiterals(LiteralList list) { 451 Set<String> set = new HashSet<>(); 452 for (Literal literal : list) { 453 if (!(literal instanceof StringLiteral)) { 454 throw new QueryParseException("requires string literals"); 455 } 456 set.add(((StringLiteral) literal).value); 457 } 458 return set; 459 } 460 461 /** 462 * If we have a query like {@code SELECT dc:subjects/* FROM ...} then we must make sure we don't match documents 463 * that don't have a {@code dc:subjects} at all, as this would make the evaluator return one {@code null} value for 464 * each due to its semantics of doing the equivalent of LEFT JOINs. 465 * <p> 466 * To prevent this, we add a clause {@code ... AND dc:subjects/* IS NOT NULL}. 467 * <p> 468 * For correlated wildcards this is enough, but for uncorrelated wildcards we must avoid adding extra JOINs, so we 469 * must artificially correlated them. This requires rewriting the query with correlated wildcards instead of 470 * uncorrelated ones. 471 */ 472 protected SQLQuery addWildcardNotNullClauses(SQLQuery query) { 473 ProjectionWildcardsFinder finder = new ProjectionWildcardsFinder(); 474 finder.visitQuery(query); 475 476 // find wildcards in the projection 477 Set<String> wildcards = finder.projectionWildcards; 478 Set<String> uncorrelatedWildcards = finder.uncorrelatedProjectionWildcards; 479 if (!uncorrelatedWildcards.isEmpty()) { 480 // rename uncorrelated wildcards to unique correlation names 481 Map<String, String> map = new HashMap<>(uncorrelatedWildcards.size()); 482 for (String name : uncorrelatedWildcards) { 483 String newName = name + (CORR_BASE + correlationCounter++); 484 map.put(name, newName); 485 wildcards.remove(name); 486 wildcards.add(newName); 487 } 488 // rename them in the whole query 489 query = new ProjectionReferenceRenamer(map).transform(query); 490 } 491 492 // add IS NOT NULL clauses for all projection wildcards (now all correlated) 493 if (!wildcards.isEmpty()) { 494 query = addIsNotNullClauses(query, wildcards); 495 } 496 497 return query; 498 } 499 500 protected SQLQuery addIsNotNullClauses(SQLQuery query, Collection<String> names) { 501 List<Predicate> values = names.stream() 502 .map(name -> new Predicate(new Reference(name), Operator.ISNOTNULL, null)) 503 .collect(toList()); 504 Predicate expr = new Predicate(query.where.predicate, Operator.AND, new MultiExpression(Operator.AND, values)); 505 return query.withPredicate(expr); 506 } 507 508 protected static class ProjectionWildcardsFinder extends DefaultQueryVisitor { 509 510 protected final Set<String> projectionWildcards = new HashSet<>(); 511 512 protected final Set<String> uncorrelatedProjectionWildcards = new HashSet<>(); 513 514 protected boolean inProjection; 515 516 protected boolean inOrderBy; 517 518 @Override 519 public void visitSelectClause(SelectClause node) { 520 inProjection = true; 521 super.visitSelectClause(node); 522 inProjection = false; 523 } 524 525 @Override 526 public void visitOrderByClause(OrderByClause node) { 527 inOrderBy = true; 528 super.visitOrderByClause(node); 529 inOrderBy = false; 530 } 531 532 @Override 533 public void visitReference(Reference ref) { 534 if (inProjection) { 535 String name = ref.name; 536 if (name.endsWith("*")) { 537 projectionWildcards.add(name); 538 if (name.endsWith("/*")) { 539 uncorrelatedProjectionWildcards.add(name); 540 } 541 } 542 } 543 } 544 } 545 546 /** 547 * Renames references if they are in the projection part. 548 */ 549 protected static class ProjectionReferenceRenamer extends IdentityQueryTransformer { 550 551 protected final Map<String, String> map; 552 553 protected boolean inProjection; 554 555 public ProjectionReferenceRenamer(Map<String, String> map) { 556 this.map = map; 557 } 558 559 @Override 560 public SelectClause transform(SelectClause node) { 561 inProjection = true; 562 node = super.transform(node); 563 inProjection = false; 564 return node; 565 } 566 567 @Override 568 public Reference transform(Reference node) { 569 if (!inProjection) { 570 return node; 571 } 572 String name = node.name; 573 String newName = map.getOrDefault(name, name); 574 Reference newReference = new Reference(newName, node.cast, node.esHint); 575 if (newReference.originalName == null) { 576 newReference.originalName = name; 577 } 578 newReference.info = node.info; 579 return newReference; 580 } 581 } 582 583 /** 584 * Info about a prefix: the prefix, and how many times it was encountered. 585 * 586 * @since 9.3 587 */ 588 public static class PrefixInfo { 589 590 public static final PrefixInfo EMPTY = new PrefixInfo("", 0); 591 592 public final String prefix; 593 594 public final int count; 595 596 public PrefixInfo(String prefix, int count) { 597 this.prefix = prefix; 598 this.count = count; 599 } 600 } 601 602 /** 603 * Analyzes references to compute common prefix info in order to later factor them in a parent expression. 604 * 605 * @since 9.3 606 */ 607 public class ReferencePrefixAnalyzer extends DefaultQueryVisitor { 608 609 @Override 610 public void visitReference(Reference node) { 611 super.visitReference(node); 612 processReference(node); 613 } 614 615 @Override 616 public void visitMultiExpression(MultiExpression node) { 617 super.visitMultiExpression(node); 618 processExpression(node, node.predicates); 619 } 620 621 @Override 622 public void visitExpression(Expression node) { 623 super.visitExpression(node); 624 processExpression(node, Arrays.asList(node.lvalue, node.rvalue)); 625 } 626 627 protected void processReference(Reference node) { 628 String prefix = getCorrelatedWildcardPrefix(node.name); 629 int count = prefix.isEmpty() ? 0 : 1; 630 node.setInfo(new PrefixInfo(prefix, count)); 631 } 632 633 protected void processExpression(Expression node, List<? extends Operand> operands) { 634 PrefixInfo commonInfo = null; 635 for (Operand operand : operands) { 636 // find longest prefix for the operand 637 PrefixInfo info; 638 if (operand instanceof Reference) { 639 Reference reference = (Reference) operand; 640 info = (PrefixInfo) reference.getInfo(); 641 } else if (operand instanceof Expression) { 642 Expression expression = (Expression) operand; 643 info = (PrefixInfo) expression.getInfo(); 644 } else { 645 info = null; 646 } 647 if (info != null) { 648 if (commonInfo == null) { 649 commonInfo = info; 650 } else if (commonInfo.prefix.equals(info.prefix)) { 651 commonInfo = new PrefixInfo(commonInfo.prefix, commonInfo.count + info.count); 652 } else { 653 commonInfo = PrefixInfo.EMPTY; 654 } 655 } 656 } 657 node.setInfo(commonInfo); 658 } 659 } 660 661 /** 662 * Gets the prefix to use for this reference name (NXQL) if it contains a correlated wildcard. 663 * <p> 664 * The prefix is used to group together sets of expression that all use references with the same prefix. 665 * 666 * @param name the reference name (NXQL) 667 * @return the prefix, or an empty string if there is no correlated wildcard 668 * @since 9.3 669 */ 670 public abstract String getCorrelatedWildcardPrefix(String name); 671 672}