001/* 002 * (C) Copyright 2014-2016 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.core.storage; 020 021import static java.util.stream.Collectors.groupingBy; 022import static java.util.stream.Collectors.toList; 023 024import java.util.ArrayList; 025import java.util.Arrays; 026import java.util.Collection; 027import java.util.Collections; 028import java.util.HashMap; 029import java.util.HashSet; 030import java.util.Iterator; 031import java.util.LinkedHashMap; 032import java.util.List; 033import java.util.Map; 034import java.util.Map.Entry; 035import java.util.Set; 036import java.util.stream.Collector; 037 038import org.nuxeo.ecm.core.api.impl.FacetFilter; 039import org.nuxeo.ecm.core.query.QueryParseException; 040import org.nuxeo.ecm.core.query.sql.NXQL; 041import org.nuxeo.ecm.core.query.sql.model.DefaultQueryVisitor; 042import org.nuxeo.ecm.core.query.sql.model.Expression; 043import org.nuxeo.ecm.core.query.sql.model.FromClause; 044import org.nuxeo.ecm.core.query.sql.model.FromList; 045import org.nuxeo.ecm.core.query.sql.model.IdentityQueryTransformer; 046import org.nuxeo.ecm.core.query.sql.model.Literal; 047import org.nuxeo.ecm.core.query.sql.model.LiteralList; 048import org.nuxeo.ecm.core.query.sql.model.MultiExpression; 049import org.nuxeo.ecm.core.query.sql.model.Operand; 050import org.nuxeo.ecm.core.query.sql.model.Operator; 051import org.nuxeo.ecm.core.query.sql.model.OrderByClause; 052import org.nuxeo.ecm.core.query.sql.model.Reference; 053import org.nuxeo.ecm.core.query.sql.model.SQLQuery; 054import org.nuxeo.ecm.core.query.sql.model.SelectClause; 055import org.nuxeo.ecm.core.query.sql.model.StringLiteral; 056import org.nuxeo.ecm.core.query.sql.model.WhereClause; 057import org.nuxeo.ecm.core.schema.SchemaManager; 058import org.nuxeo.ecm.core.schema.types.Type; 059import org.nuxeo.runtime.api.Framework; 060 061/** 062 * Generic optimizer for a NXQL query. 063 * 064 * @since 5.9.4 065 */ 066public abstract class QueryOptimizer { 067 068 public static final String TYPE_ROOT = "Root"; 069 070 public static final String TYPE_DOCUMENT = "Document"; 071 072 public static final String TYPE_RELATION = "Relation"; 073 074 protected static final int CORR_BASE = 100_000; 075 076 protected FacetFilter facetFilter; 077 078 protected final SchemaManager schemaManager; 079 080 protected final Set<String> neverPerInstanceMixins; 081 082 protected int correlationCounter; 083 084 /** Do we match only relations? */ 085 protected boolean onlyRelations; 086 087 // group by prefix, keeping order 088 protected static Collector<Expression, ?, Map<String, List<Expression>>> GROUPING_BY_EXPR_PREFIX = groupingBy( 089 QueryOptimizer::getExpressionPrefix, LinkedHashMap::new, toList()); 090 091 public QueryOptimizer() { 092 // schemaManager may be null in unit tests 093 schemaManager = Framework.getService(SchemaManager.class); 094 Set<String> facets = schemaManager == null ? Collections.emptySet() 095 : schemaManager.getNoPerDocumentQueryFacets(); 096 neverPerInstanceMixins = new HashSet<>(facets); 097 } 098 099 public QueryOptimizer withFacetFilter(FacetFilter facetFilter) { 100 this.facetFilter = facetFilter; 101 return this; 102 } 103 104 /** 105 * Optimizes a query to provide a WHERE clause containing facet filters, primary and mixin types. In addition, the 106 * top-level AND clauses are analyzed to provide prefix info. 107 */ 108 public SQLQuery optimize(SQLQuery query) { 109 // rewrite some uncorrelated wildcards and add NOT NULL clauses for projection ones 110 query = addWildcardNotNullClauses(query); 111 112 List<Expression> clauses = new ArrayList<>(); 113 addFacetFilters(clauses, facetFilter); 114 addTypes(clauses, query.from); 115 addWhere(clauses, query.where); 116 simplifyTypes(clauses); 117 MultiExpression multiExpression = MultiExpression.fromExpressionList(Operator.AND, clauses); 118 119 // collect information about common reference prefixes (stored on Reference and Expression) 120 new ReferencePrefixAnalyzer().visitMultiExpression(multiExpression); 121 122 Expression whereExpr; 123 PrefixInfo info = (PrefixInfo) multiExpression.getInfo(); 124 if (!info.prefix.isEmpty()) { 125 // all references have a common prefix 126 whereExpr = multiExpression; 127 } else { 128 // do grouping by common prefix 129 Map<String, List<Expression>> grouped = clauses.stream().collect(GROUPING_BY_EXPR_PREFIX); 130 Map<String, Expression> groupedExpressions = new LinkedHashMap<>(); 131 for (Entry<String, List<Expression>> en : grouped.entrySet()) { 132 String prefix = en.getKey(); 133 List<Expression> list = en.getValue(); 134 groupedExpressions.put(prefix, makeSingleAndExpression(prefix, list)); 135 } 136 137 // potentially reorganize into nested grouping 138 reorganizeGroupedExpressions(groupedExpressions); 139 140 List<Expression> expressions = new ArrayList<>(groupedExpressions.values()); 141 whereExpr = makeSingleAndExpression("", expressions); 142 } 143 144 return query.withWhereExpression(whereExpr); 145 } 146 147 /** 148 * Makes a single AND expression from several expressions known to have a common prefix. 149 */ 150 public static Expression makeSingleAndExpression(String prefix, List<Expression> exprs) { 151 if (exprs.size() == 1) { 152 return exprs.get(0); 153 } else { 154 int count = prefix.isEmpty() ? 0 : exprs.stream().mapToInt(QueryOptimizer::getExpressionCount).sum(); 155 Expression e = MultiExpression.fromExpressionList(Operator.AND, exprs); 156 e.setInfo(new PrefixInfo(prefix, count)); 157 return e; 158 } 159 } 160 161 protected static String getExpressionPrefix(Expression expr) { 162 PrefixInfo info = (PrefixInfo) expr.getInfo(); 163 return info == null ? "" : info.prefix; 164 } 165 166 protected static int getExpressionCount(Expression expr) { 167 PrefixInfo info = (PrefixInfo) expr.getInfo(); 168 return info == null ? 0 : info.count; 169 } 170 171 /** 172 * Reorganizes the grouped expressions in order to have 2-level nesting in case a group is a prefix of another. 173 * 174 * @since 9.3 175 */ 176 public static void reorganizeGroupedExpressions(Map<String, Expression> groupedExpressions) { 177 if (groupedExpressions.size() > 1) { 178 List<String> keys = new ArrayList<>(groupedExpressions.keySet()); 179 List<String> withPrefix = new ArrayList<>(); 180 String prefix = findPrefix(keys, withPrefix); 181 if (prefix != null) { 182 // first part, the expression corresponding to the prefix 183 Expression first = groupedExpressions.remove(prefix); 184 185 // second part, all those that had that prefix 186 List<Expression> exprs = new ArrayList<>(); 187 for (String k : withPrefix) { 188 exprs.add(groupedExpressions.remove(k)); 189 } 190 String secondPrefix; 191 if (withPrefix.size() == 1) { 192 secondPrefix = withPrefix.get(0); 193 } else { 194 throw new QueryParseException("Too complex correlated wildcards in query: " + groupedExpressions); 195 } 196 Expression second = makeSingleAndExpression(secondPrefix, exprs); 197 198 // finally bring them all together 199 Expression expr = makeSingleAndExpression(prefix, Arrays.asList(first, second)); 200 groupedExpressions.put(prefix, expr); 201 } 202 } 203 } 204 205 /** 206 * Finds a non-empty prefix in the strings. 207 * <p> 208 * If a prefix is found, the other strings having it as a prefix are collected in {@code withPrefix}, and the prefix 209 * and the found strings are moved from the input {@code string}. 210 * <p> 211 * {@code strings} and {@code withPrefix} must both be ArrayLists as they will be mutated and queried by index. 212 * 213 * @param strings the input strings 214 * @param withPrefix (return value) the strings that have the found prefix as a prefix 215 * @return the prefix if found, or null if not 216 * @since 9.3 217 */ 218 public static String findPrefix(List<String> strings, List<String> withPrefix) { 219 // naive algorithm as list size is very small 220 String prefix = null; 221 ALL: // 222 for (int i = 0; i < strings.size(); i++) { 223 String candidate = strings.get(i); 224 if (candidate.isEmpty()) { 225 continue; 226 } 227 for (int j = 0; j < strings.size(); j++) { 228 if (i == j) { 229 continue; 230 } 231 String s = strings.get(j); 232 if (s.isEmpty()) { 233 continue; 234 } 235 if (s.startsWith(candidate + '/')) { 236 prefix = candidate; 237 break ALL; 238 } 239 } 240 } 241 if (prefix != null) { 242 for (Iterator<String> it = strings.iterator(); it.hasNext();) { 243 String s = it.next(); 244 if (s.isEmpty()) { 245 continue; 246 } 247 if (s.equals(prefix)) { 248 it.remove(); // will be returned as method result 249 continue; 250 } 251 if (s.startsWith(prefix + '/')) { 252 it.remove(); // will be returned in withPrefix 253 withPrefix.add(s); 254 } 255 } 256 } 257 return prefix; 258 } 259 260 protected void addFacetFilters(List<Expression> clauses, FacetFilter facetFilter) { 261 if (facetFilter == null) { 262 return; 263 } 264 for (String mixin : facetFilter.required) { 265 // every facet is required, not just any of them, 266 // so do them one by one 267 Expression expr = new Expression(new Reference(NXQL.ECM_MIXINTYPE), Operator.EQ, new StringLiteral(mixin)); 268 clauses.add(expr); 269 } 270 if (!facetFilter.excluded.isEmpty()) { 271 LiteralList list = new LiteralList(); 272 for (String mixin : facetFilter.excluded) { 273 list.add(new StringLiteral(mixin)); 274 } 275 Expression expr = new Expression(new Reference(NXQL.ECM_MIXINTYPE), Operator.NOTIN, list); 276 clauses.add(expr); 277 } 278 } 279 280 // methods using SchemaManager easily overrideable for easier testing 281 282 protected Set<String> getDocumentTypeNamesForFacet(String mixin) { 283 Set<String> types = schemaManager.getDocumentTypeNamesForFacet(mixin); 284 if (types == null) { 285 // unknown mixin 286 types = Collections.emptySet(); 287 } 288 return types; 289 } 290 291 protected Set<String> getDocumentTypeNamesExtending(String typeName) { 292 Set<String> types = schemaManager.getDocumentTypeNamesExtending(typeName); 293 if (types == null) { 294 throw new RuntimeException("Unknown type: " + typeName); 295 } 296 return types; 297 } 298 299 protected boolean isTypeRelation(String typeName) { 300 do { 301 if (TYPE_RELATION.equals(typeName)) { 302 return true; 303 } 304 Type t = schemaManager.getDocumentType(typeName); 305 if (t != null) { 306 t = t.getSuperType(); 307 } 308 typeName = t == null ? null : t.getName(); 309 } while (typeName != null); 310 return false; 311 } 312 313 /** 314 * Finds all the types to take into account (all concrete types being a subtype of the passed types) based on the 315 * FROM list. 316 * <p> 317 * Adds them as a ecm:primaryType match in the toplevel operands. 318 */ 319 protected void addTypes(List<Expression> clauses, FromClause node) { 320 onlyRelations = true; 321 Set<String> fromTypes = new HashSet<>(); 322 FromList elements = node.elements; 323 for (String typeName : elements.values()) { 324 if (TYPE_DOCUMENT.equalsIgnoreCase(typeName)) { 325 typeName = TYPE_DOCUMENT; 326 } 327 fromTypes.addAll(getDocumentTypeNamesExtending(typeName)); 328 boolean isRelation = isTypeRelation(typeName); 329 onlyRelations = onlyRelations && isRelation; 330 } 331 fromTypes.remove(TYPE_ROOT); 332 LiteralList list = new LiteralList(); 333 for (String type : fromTypes) { 334 list.add(new StringLiteral(type)); 335 } 336 clauses.add(new Expression(new Reference(NXQL.ECM_PRIMARYTYPE), Operator.IN, list)); 337 } 338 339 /** 340 * Adds a flattened version of all toplevel ANDed WHERE clauses. 341 */ 342 protected void addWhere(List<Expression> clauses, WhereClause where) { 343 if (where != null) { 344 addWhere(clauses, where.predicate); 345 } 346 } 347 348 protected void addWhere(List<Expression> clauses, Expression expr) { 349 if (expr.operator == Operator.AND && expr.lvalue instanceof Expression && expr.rvalue instanceof Expression) { 350 addWhere(clauses, (Expression) expr.lvalue); 351 addWhere(clauses, (Expression) expr.rvalue); 352 } else if (expr.operator == Operator.AND && expr instanceof MultiExpression) { 353 List<Operand> remainingOperands = new ArrayList<>(); 354 for (Operand oper : ((MultiExpression) expr).values) { 355 if (oper instanceof Expression) { 356 addWhere(clauses, (Expression) oper); 357 } else { 358 remainingOperands.add(oper); 359 } 360 } 361 if (!remainingOperands.isEmpty()) { 362 clauses.add(new MultiExpression(Operator.AND, remainingOperands)); 363 } 364 } else { 365 clauses.add(expr); 366 } 367 } 368 369 /** 370 * Simplify ecm:primaryType positive references, and non-per-instance mixin types. 371 */ 372 protected void simplifyTypes(List<Expression> clauses) { 373 Set<String> primaryTypes = null; // if defined, required 374 for (Iterator<Expression> it = clauses.iterator(); it.hasNext();) { 375 // whenever we don't know how to optimize the expression, 376 // we just continue the loop 377 Expression expr = it.next(); 378 if (!(expr.lvalue instanceof Reference)) { 379 continue; 380 } 381 String name = ((Reference) expr.lvalue).name; 382 Operator op = expr.operator; 383 Operand rvalue = expr.rvalue; 384 if (NXQL.ECM_PRIMARYTYPE.equals(name)) { 385 if (op != Operator.EQ && op != Operator.IN) { 386 continue; 387 } 388 Set<String> set; 389 if (op == Operator.EQ) { 390 if (!(rvalue instanceof StringLiteral)) { 391 continue; 392 } 393 String primaryType = ((StringLiteral) rvalue).value; 394 set = new HashSet<>(Collections.singleton(primaryType)); 395 } else { // Operator.IN 396 if (!(rvalue instanceof LiteralList)) { 397 continue; 398 } 399 set = getStringLiterals((LiteralList) rvalue); 400 } 401 if (primaryTypes == null) { 402 primaryTypes = set; 403 } else { 404 primaryTypes.retainAll(set); 405 } 406 it.remove(); // expression simplified into primaryTypes set 407 } else if (NXQL.ECM_MIXINTYPE.equals(name)) { 408 if (op != Operator.EQ && op != Operator.NOTEQ) { 409 continue; 410 } 411 if (!(rvalue instanceof StringLiteral)) { 412 continue; 413 } 414 String mixin = ((StringLiteral) rvalue).value; 415 if (!neverPerInstanceMixins.contains(mixin)) { 416 // mixin per instance -> primary type checks not enough 417 continue; 418 } 419 Set<String> set = getDocumentTypeNamesForFacet(mixin); 420 if (primaryTypes == null) { 421 if (op == Operator.EQ) { 422 primaryTypes = new HashSet<>(set); // copy 423 } else { 424 continue; // unknown positive, no optimization 425 } 426 } else { 427 if (op == Operator.EQ) { 428 primaryTypes.retainAll(set); 429 } else { 430 primaryTypes.removeAll(set); 431 } 432 } 433 it.remove(); // expression simplified into primaryTypes set 434 } 435 } 436 // readd the simplified primary types constraints 437 if (primaryTypes != null) { 438 if (primaryTypes.isEmpty()) { 439 // TODO better removal 440 primaryTypes.add("__NOSUCHTYPE__"); 441 } 442 Expression expr; 443 if (primaryTypes.size() == 1) { 444 String pt = primaryTypes.iterator().next(); 445 expr = new Expression(new Reference(NXQL.ECM_PRIMARYTYPE), Operator.EQ, new StringLiteral(pt)); 446 } else { // primaryTypes.size() > 1 447 LiteralList list = new LiteralList(); 448 for (String pt : primaryTypes) { 449 list.add(new StringLiteral(pt)); 450 } 451 expr = new Expression(new Reference(NXQL.ECM_PRIMARYTYPE), Operator.IN, list); 452 } 453 clauses.add(expr); 454 } 455 } 456 457 protected static Set<String> getStringLiterals(LiteralList list) { 458 Set<String> set = new HashSet<>(); 459 for (Literal literal : list) { 460 if (!(literal instanceof StringLiteral)) { 461 throw new RuntimeException("requires string literals"); 462 } 463 set.add(((StringLiteral) literal).value); 464 } 465 return set; 466 } 467 468 /** 469 * If we have a query like {@code SELECT dc:subjects/* FROM ...} then we must make sure we don't match documents 470 * that don't have a {@code dc:subjects} at all, as this would make the evaluator return one {@code null} value for 471 * each due to its semantics of doing the equivalent of LEFT JOINs. 472 * <p> 473 * To prevent this, we add a clause {@code ... AND dc:subjects/* IS NOT NULL}. 474 * <p> 475 * For correlated wildcards this is enough, but for uncorrelated wildcards we must avoid adding extra JOINs, so we 476 * must artificially correlated them. This requires rewriting the query with correlated wildcards instead of 477 * uncorrelated ones. 478 */ 479 protected SQLQuery addWildcardNotNullClauses(SQLQuery query) { 480 ProjectionWildcardsFinder finder = new ProjectionWildcardsFinder(); 481 finder.visitQuery(query); 482 483 // find wildcards in the projection 484 Set<String> wildcards = finder.projectionWildcards; 485 Set<String> uncorrelatedWildcards = finder.uncorrelatedProjectionWildcards; 486 if (!uncorrelatedWildcards.isEmpty()) { 487 // rename uncorrelated wildcards to unique correlation names 488 Map<String, String> map = new HashMap<>(uncorrelatedWildcards.size()); 489 for (String name : uncorrelatedWildcards) { 490 String newName = name + (CORR_BASE + correlationCounter++); 491 map.put(name, newName); 492 wildcards.remove(name); 493 wildcards.add(newName); 494 } 495 // rename them in the whole query 496 query = new ProjectionReferenceRenamer(map).transform(query); 497 } 498 499 // add IS NOT NULL clauses for all projection wildcards (now all correlated) 500 if (!wildcards.isEmpty()) { 501 query = addIsNotNullClauses(query, wildcards); 502 } 503 504 return query; 505 } 506 507 protected SQLQuery addIsNotNullClauses(SQLQuery query, Collection<String> names) { 508 List<Operand> values = names.stream() 509 .map(name -> new Expression(new Reference(name), Operator.ISNOTNULL, null)) 510 .collect(toList()); 511 Expression expr = new Expression(query.where.predicate, Operator.AND, 512 new MultiExpression(Operator.AND, values)); 513 return query.withWhereExpression(expr); 514 } 515 516 protected static class ProjectionWildcardsFinder extends DefaultQueryVisitor { 517 518 private static final long serialVersionUID = 1L; 519 520 protected final Set<String> projectionWildcards = new HashSet<>(); 521 522 protected final Set<String> uncorrelatedProjectionWildcards = new HashSet<>(); 523 524 protected boolean inProjection; 525 526 protected boolean inOrderBy; 527 528 @Override 529 public void visitSelectClause(SelectClause node) { 530 inProjection = true; 531 super.visitSelectClause(node); 532 inProjection = false; 533 } 534 535 @Override 536 public void visitOrderByClause(OrderByClause node) { 537 inOrderBy = true; 538 super.visitOrderByClause(node); 539 inOrderBy = false; 540 } 541 542 @Override 543 public void visitReference(Reference ref) { 544 if (inProjection) { 545 String name = ref.name; 546 if (name.endsWith("*")) { 547 projectionWildcards.add(name); 548 if (name.endsWith("/*")) { 549 uncorrelatedProjectionWildcards.add(name); 550 } 551 } 552 } 553 } 554 } 555 556 /** 557 * Renames references if they are in the projection part. 558 */ 559 protected static class ProjectionReferenceRenamer extends IdentityQueryTransformer { 560 561 protected final Map<String, String> map; 562 563 protected boolean inProjection; 564 565 public ProjectionReferenceRenamer(Map<String, String> map) { 566 this.map = map; 567 } 568 569 @Override 570 public SelectClause transform(SelectClause node) { 571 inProjection = true; 572 node = super.transform(node); 573 inProjection = false; 574 return node; 575 } 576 577 @Override 578 public Reference transform(Reference node) { 579 if (!inProjection) { 580 return node; 581 } 582 String name = node.name; 583 String newName = map.getOrDefault(name, name); 584 Reference newReference = new Reference(newName, node.cast, node.esHint); 585 if (newReference.originalName == null) { 586 newReference.originalName = name; 587 } 588 newReference.info = node.info; 589 return newReference; 590 } 591 } 592 593 /** 594 * Info about a prefix: the prefix, and how many times it was encountered. 595 * 596 * @since 9.3 597 */ 598 public static class PrefixInfo { 599 600 public static final PrefixInfo EMPTY = new PrefixInfo("", 0); 601 602 public final String prefix; 603 604 public final int count; 605 606 public PrefixInfo(String prefix, int count) { 607 this.prefix = prefix; 608 this.count = count; 609 } 610 } 611 612 /** 613 * Analyzes references to compute common prefix info in order to later factor them in a parent expression. 614 * 615 * @since 9.3 616 */ 617 public class ReferencePrefixAnalyzer extends DefaultQueryVisitor { 618 619 private static final long serialVersionUID = 1L; 620 621 @Override 622 public void visitReference(Reference node) { 623 super.visitReference(node); 624 processReference(node); 625 } 626 627 @Override 628 public void visitMultiExpression(MultiExpression node) { 629 super.visitMultiExpression(node); 630 processExpression(node, node.values); 631 } 632 633 @Override 634 public void visitExpression(Expression node) { 635 super.visitExpression(node); 636 processExpression(node, Arrays.asList(node.lvalue, node.rvalue)); 637 } 638 639 protected void processReference(Reference node) { 640 String prefix = getCorrelatedWildcardPrefix(node.name); 641 int count = prefix.isEmpty() ? 0 : 1; 642 node.setInfo(new PrefixInfo(prefix, count)); 643 } 644 645 protected void processExpression(Expression node, List<Operand> operands) { 646 PrefixInfo commonInfo = null; 647 for (Operand operand : operands) { 648 // find longest prefix for the operand 649 PrefixInfo info; 650 if (operand instanceof Reference) { 651 Reference reference = (Reference) operand; 652 info = (PrefixInfo) reference.getInfo(); 653 } else if (operand instanceof Expression) { 654 Expression expression = (Expression) operand; 655 info = (PrefixInfo) expression.getInfo(); 656 } else { 657 info = null; 658 } 659 if (info != null) { 660 if (commonInfo == null) { 661 commonInfo = info; 662 } else if (commonInfo.prefix.equals(info.prefix)) { 663 commonInfo = new PrefixInfo(commonInfo.prefix, commonInfo.count + info.count); 664 } else { 665 commonInfo = PrefixInfo.EMPTY; 666 } 667 } 668 } 669 node.setInfo(commonInfo); 670 } 671 } 672 673 /** 674 * Gets the prefix to use for this reference name (NXQL) if it contains a correlated wildcard. 675 * <p> 676 * The prefix is used to group together sets of expression that all use references with the same prefix. 677 * 678 * @param name the reference name (NXQL) 679 * @return the prefix, or an empty string if there is no correlated wildcard 680 * @since 9.3 681 */ 682 public abstract String getCorrelatedWildcardPrefix(String name); 683 684}