001/*
002 * (C) Copyright 2014-2016 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019package org.nuxeo.ecm.core.storage;
020
021import static java.util.stream.Collectors.groupingBy;
022import static java.util.stream.Collectors.toList;
023
024import java.util.ArrayList;
025import java.util.Arrays;
026import java.util.Collection;
027import java.util.Collections;
028import java.util.HashMap;
029import java.util.HashSet;
030import java.util.Iterator;
031import java.util.LinkedHashMap;
032import java.util.List;
033import java.util.Map;
034import java.util.Map.Entry;
035import java.util.Set;
036import java.util.stream.Collector;
037
038import org.nuxeo.ecm.core.api.impl.FacetFilter;
039import org.nuxeo.ecm.core.query.QueryParseException;
040import org.nuxeo.ecm.core.query.sql.NXQL;
041import org.nuxeo.ecm.core.query.sql.model.DefaultQueryVisitor;
042import org.nuxeo.ecm.core.query.sql.model.Expression;
043import org.nuxeo.ecm.core.query.sql.model.FromClause;
044import org.nuxeo.ecm.core.query.sql.model.FromList;
045import org.nuxeo.ecm.core.query.sql.model.IdentityQueryTransformer;
046import org.nuxeo.ecm.core.query.sql.model.Literal;
047import org.nuxeo.ecm.core.query.sql.model.LiteralList;
048import org.nuxeo.ecm.core.query.sql.model.MultiExpression;
049import org.nuxeo.ecm.core.query.sql.model.Operand;
050import org.nuxeo.ecm.core.query.sql.model.Operator;
051import org.nuxeo.ecm.core.query.sql.model.OrderByClause;
052import org.nuxeo.ecm.core.query.sql.model.Reference;
053import org.nuxeo.ecm.core.query.sql.model.SQLQuery;
054import org.nuxeo.ecm.core.query.sql.model.SelectClause;
055import org.nuxeo.ecm.core.query.sql.model.StringLiteral;
056import org.nuxeo.ecm.core.query.sql.model.WhereClause;
057import org.nuxeo.ecm.core.schema.SchemaManager;
058import org.nuxeo.ecm.core.schema.types.Type;
059import org.nuxeo.runtime.api.Framework;
060
061/**
062 * Generic optimizer for a NXQL query.
063 *
064 * @since 5.9.4
065 */
066public abstract class QueryOptimizer {
067
068    public static final String TYPE_ROOT = "Root";
069
070    public static final String TYPE_DOCUMENT = "Document";
071
072    public static final String TYPE_RELATION = "Relation";
073
074    protected static final int CORR_BASE = 100_000;
075
076    protected FacetFilter facetFilter;
077
078    protected final SchemaManager schemaManager;
079
080    protected final Set<String> neverPerInstanceMixins;
081
082    protected int correlationCounter;
083
084    /** Do we match only relations? */
085    protected boolean onlyRelations;
086
087    // group by prefix, keeping order
088    protected static Collector<Expression, ?, Map<String, List<Expression>>> GROUPING_BY_EXPR_PREFIX = groupingBy(
089            QueryOptimizer::getExpressionPrefix, LinkedHashMap::new, toList());
090
091    public QueryOptimizer() {
092        // schemaManager may be null in unit tests
093        schemaManager = Framework.getService(SchemaManager.class);
094        Set<String> facets = schemaManager == null ? Collections.emptySet()
095                : schemaManager.getNoPerDocumentQueryFacets();
096        neverPerInstanceMixins = new HashSet<>(facets);
097    }
098
099    public QueryOptimizer withFacetFilter(FacetFilter facetFilter) {
100        this.facetFilter = facetFilter;
101        return this;
102    }
103
104    /**
105     * Optimizes a query to provide a WHERE clause containing facet filters, primary and mixin types. In addition, the
106     * top-level AND clauses are analyzed to provide prefix info.
107     */
108    public SQLQuery optimize(SQLQuery query) {
109        // rewrite some uncorrelated wildcards and add NOT NULL clauses for projection ones
110        query = addWildcardNotNullClauses(query);
111
112        List<Expression> clauses = new ArrayList<>();
113        addFacetFilters(clauses, facetFilter);
114        addTypes(clauses, query.from);
115        addWhere(clauses, query.where);
116        simplifyTypes(clauses);
117        MultiExpression multiExpression = MultiExpression.fromExpressionList(Operator.AND, clauses);
118
119        // collect information about common reference prefixes (stored on Reference and Expression)
120        new ReferencePrefixAnalyzer().visitMultiExpression(multiExpression);
121
122        Expression whereExpr;
123        PrefixInfo info = (PrefixInfo) multiExpression.getInfo();
124        if (!info.prefix.isEmpty()) {
125            // all references have a common prefix
126            whereExpr = multiExpression;
127        } else {
128            // do grouping by common prefix
129            Map<String, List<Expression>> grouped = clauses.stream().collect(GROUPING_BY_EXPR_PREFIX);
130            Map<String, Expression> groupedExpressions = new LinkedHashMap<>();
131            for (Entry<String, List<Expression>> en : grouped.entrySet()) {
132                String prefix = en.getKey();
133                List<Expression> list = en.getValue();
134                groupedExpressions.put(prefix, makeSingleAndExpression(prefix, list));
135            }
136
137            // potentially reorganize into nested grouping
138            reorganizeGroupedExpressions(groupedExpressions);
139
140            List<Expression> expressions = new ArrayList<>(groupedExpressions.values());
141            whereExpr = makeSingleAndExpression("", expressions);
142        }
143
144        return query.withWhereExpression(whereExpr);
145    }
146
147    /**
148     * Makes a single AND expression from several expressions known to have a common prefix.
149     */
150    public static Expression makeSingleAndExpression(String prefix, List<Expression> exprs) {
151        if (exprs.size() == 1) {
152            return exprs.get(0);
153        } else {
154            int count = prefix.isEmpty() ? 0 : exprs.stream().mapToInt(QueryOptimizer::getExpressionCount).sum();
155            Expression e = MultiExpression.fromExpressionList(Operator.AND, exprs);
156            e.setInfo(new PrefixInfo(prefix, count));
157            return e;
158        }
159    }
160
161    protected static String getExpressionPrefix(Expression expr) {
162        PrefixInfo info = (PrefixInfo) expr.getInfo();
163        return info == null ? "" : info.prefix;
164    }
165
166    protected static int getExpressionCount(Expression expr) {
167        PrefixInfo info = (PrefixInfo) expr.getInfo();
168        return info == null ? 0 : info.count;
169    }
170
171    /**
172     * Reorganizes the grouped expressions in order to have 2-level nesting in case a group is a prefix of another.
173     *
174     * @since 9.3
175     */
176    public static void reorganizeGroupedExpressions(Map<String, Expression> groupedExpressions) {
177        if (groupedExpressions.size() > 1) {
178            List<String> keys = new ArrayList<>(groupedExpressions.keySet());
179            List<String> withPrefix = new ArrayList<>();
180            String prefix = findPrefix(keys, withPrefix);
181            if (prefix != null) {
182                // first part, the expression corresponding to the prefix
183                Expression first = groupedExpressions.remove(prefix);
184
185                // second part, all those that had that prefix
186                List<Expression> exprs = new ArrayList<>();
187                for (String k : withPrefix) {
188                    exprs.add(groupedExpressions.remove(k));
189                }
190                String secondPrefix;
191                if (withPrefix.size() == 1) {
192                    secondPrefix = withPrefix.get(0);
193                } else {
194                    throw new QueryParseException("Too complex correlated wildcards in query: " + groupedExpressions);
195                }
196                Expression second = makeSingleAndExpression(secondPrefix, exprs);
197
198                // finally bring them all together
199                Expression expr = makeSingleAndExpression(prefix, Arrays.asList(first, second));
200                groupedExpressions.put(prefix, expr);
201            }
202        }
203    }
204
205    /**
206     * Finds a non-empty prefix in the strings.
207     * <p>
208     * If a prefix is found, the other strings having it as a prefix are collected in {@code withPrefix}, and the prefix
209     * and the found strings are moved from the input {@code string}.
210     * <p>
211     * {@code strings} and {@code withPrefix} must both be ArrayLists as they will be mutated and queried by index.
212     *
213     * @param strings the input strings
214     * @param withPrefix (return value) the strings that have the found prefix as a prefix
215     * @return the prefix if found, or null if not
216     * @since 9.3
217     */
218    public static String findPrefix(List<String> strings, List<String> withPrefix) {
219        // naive algorithm as list size is very small
220        String prefix = null;
221        ALL: //
222        for (int i = 0; i < strings.size(); i++) {
223            String candidate = strings.get(i);
224            if (candidate.isEmpty()) {
225                continue;
226            }
227            for (int j = 0; j < strings.size(); j++) {
228                if (i == j) {
229                    continue;
230                }
231                String s = strings.get(j);
232                if (s.isEmpty()) {
233                    continue;
234                }
235                if (s.startsWith(candidate + '/')) {
236                    prefix = candidate;
237                    break ALL;
238                }
239            }
240        }
241        if (prefix != null) {
242            for (Iterator<String> it = strings.iterator(); it.hasNext();) {
243                String s = it.next();
244                if (s.isEmpty()) {
245                    continue;
246                }
247                if (s.equals(prefix)) {
248                    it.remove(); // will be returned as method result
249                    continue;
250                }
251                if (s.startsWith(prefix + '/')) {
252                    it.remove(); // will be returned in withPrefix
253                    withPrefix.add(s);
254                }
255            }
256        }
257        return prefix;
258    }
259
260    protected void addFacetFilters(List<Expression> clauses, FacetFilter facetFilter) {
261        if (facetFilter == null) {
262            return;
263        }
264        for (String mixin : facetFilter.required) {
265            // every facet is required, not just any of them,
266            // so do them one by one
267            Expression expr = new Expression(new Reference(NXQL.ECM_MIXINTYPE), Operator.EQ, new StringLiteral(mixin));
268            clauses.add(expr);
269        }
270        if (!facetFilter.excluded.isEmpty()) {
271            LiteralList list = new LiteralList();
272            for (String mixin : facetFilter.excluded) {
273                list.add(new StringLiteral(mixin));
274            }
275            Expression expr = new Expression(new Reference(NXQL.ECM_MIXINTYPE), Operator.NOTIN, list);
276            clauses.add(expr);
277        }
278    }
279
280    // methods using SchemaManager easily overrideable for easier testing
281
282    protected Set<String> getDocumentTypeNamesForFacet(String mixin) {
283        Set<String> types = schemaManager.getDocumentTypeNamesForFacet(mixin);
284        if (types == null) {
285            // unknown mixin
286            types = Collections.emptySet();
287        }
288        return types;
289    }
290
291    protected Set<String> getDocumentTypeNamesExtending(String typeName) {
292        Set<String> types = schemaManager.getDocumentTypeNamesExtending(typeName);
293        if (types == null) {
294            throw new RuntimeException("Unknown type: " + typeName);
295        }
296        return types;
297    }
298
299    protected boolean isTypeRelation(String typeName) {
300        do {
301            if (TYPE_RELATION.equals(typeName)) {
302                return true;
303            }
304            Type t = schemaManager.getDocumentType(typeName);
305            if (t != null) {
306                t = t.getSuperType();
307            }
308            typeName = t == null ? null : t.getName();
309        } while (typeName != null);
310        return false;
311    }
312
313    /**
314     * Finds all the types to take into account (all concrete types being a subtype of the passed types) based on the
315     * FROM list.
316     * <p>
317     * Adds them as a ecm:primaryType match in the toplevel operands.
318     */
319    protected void addTypes(List<Expression> clauses, FromClause node) {
320        onlyRelations = true;
321        Set<String> fromTypes = new HashSet<>();
322        FromList elements = node.elements;
323        for (String typeName : elements.values()) {
324            if (TYPE_DOCUMENT.equalsIgnoreCase(typeName)) {
325                typeName = TYPE_DOCUMENT;
326            }
327            fromTypes.addAll(getDocumentTypeNamesExtending(typeName));
328            boolean isRelation = isTypeRelation(typeName);
329            onlyRelations = onlyRelations && isRelation;
330        }
331        fromTypes.remove(TYPE_ROOT);
332        LiteralList list = new LiteralList();
333        for (String type : fromTypes) {
334            list.add(new StringLiteral(type));
335        }
336        clauses.add(new Expression(new Reference(NXQL.ECM_PRIMARYTYPE), Operator.IN, list));
337    }
338
339    /**
340     * Adds a flattened version of all toplevel ANDed WHERE clauses.
341     */
342    protected void addWhere(List<Expression> clauses, WhereClause where) {
343        if (where != null) {
344            addWhere(clauses, where.predicate);
345        }
346    }
347
348    protected void addWhere(List<Expression> clauses, Expression expr) {
349        if (expr.operator == Operator.AND && expr.lvalue instanceof Expression && expr.rvalue instanceof Expression) {
350            addWhere(clauses, (Expression) expr.lvalue);
351            addWhere(clauses, (Expression) expr.rvalue);
352        } else if (expr.operator == Operator.AND && expr instanceof MultiExpression) {
353            List<Operand> remainingOperands = new ArrayList<>();
354            for (Operand oper : ((MultiExpression) expr).values) {
355                if (oper instanceof Expression) {
356                    addWhere(clauses, (Expression) oper);
357                } else {
358                    remainingOperands.add(oper);
359                }
360            }
361            if (!remainingOperands.isEmpty()) {
362                clauses.add(new MultiExpression(Operator.AND, remainingOperands));
363            }
364        } else {
365            clauses.add(expr);
366        }
367    }
368
369    /**
370     * Simplify ecm:primaryType positive references, and non-per-instance mixin types.
371     */
372    protected void simplifyTypes(List<Expression> clauses) {
373        Set<String> primaryTypes = null; // if defined, required
374        for (Iterator<Expression> it = clauses.iterator(); it.hasNext();) {
375            // whenever we don't know how to optimize the expression,
376            // we just continue the loop
377            Expression expr = it.next();
378            if (!(expr.lvalue instanceof Reference)) {
379                continue;
380            }
381            String name = ((Reference) expr.lvalue).name;
382            Operator op = expr.operator;
383            Operand rvalue = expr.rvalue;
384            if (NXQL.ECM_PRIMARYTYPE.equals(name)) {
385                if (op != Operator.EQ && op != Operator.IN) {
386                    continue;
387                }
388                Set<String> set;
389                if (op == Operator.EQ) {
390                    if (!(rvalue instanceof StringLiteral)) {
391                        continue;
392                    }
393                    String primaryType = ((StringLiteral) rvalue).value;
394                    set = new HashSet<>(Collections.singleton(primaryType));
395                } else { // Operator.IN
396                    if (!(rvalue instanceof LiteralList)) {
397                        continue;
398                    }
399                    set = getStringLiterals((LiteralList) rvalue);
400                }
401                if (primaryTypes == null) {
402                    primaryTypes = set;
403                } else {
404                    primaryTypes.retainAll(set);
405                }
406                it.remove(); // expression simplified into primaryTypes set
407            } else if (NXQL.ECM_MIXINTYPE.equals(name)) {
408                if (op != Operator.EQ && op != Operator.NOTEQ) {
409                    continue;
410                }
411                if (!(rvalue instanceof StringLiteral)) {
412                    continue;
413                }
414                String mixin = ((StringLiteral) rvalue).value;
415                if (!neverPerInstanceMixins.contains(mixin)) {
416                    // mixin per instance -> primary type checks not enough
417                    continue;
418                }
419                Set<String> set = getDocumentTypeNamesForFacet(mixin);
420                if (primaryTypes == null) {
421                    if (op == Operator.EQ) {
422                        primaryTypes = new HashSet<>(set); // copy
423                    } else {
424                        continue; // unknown positive, no optimization
425                    }
426                } else {
427                    if (op == Operator.EQ) {
428                        primaryTypes.retainAll(set);
429                    } else {
430                        primaryTypes.removeAll(set);
431                    }
432                }
433                it.remove(); // expression simplified into primaryTypes set
434            }
435        }
436        // readd the simplified primary types constraints
437        if (primaryTypes != null) {
438            if (primaryTypes.isEmpty()) {
439                // TODO better removal
440                primaryTypes.add("__NOSUCHTYPE__");
441            }
442            Expression expr;
443            if (primaryTypes.size() == 1) {
444                String pt = primaryTypes.iterator().next();
445                expr = new Expression(new Reference(NXQL.ECM_PRIMARYTYPE), Operator.EQ, new StringLiteral(pt));
446            } else { // primaryTypes.size() > 1
447                LiteralList list = new LiteralList();
448                for (String pt : primaryTypes) {
449                    list.add(new StringLiteral(pt));
450                }
451                expr = new Expression(new Reference(NXQL.ECM_PRIMARYTYPE), Operator.IN, list);
452            }
453            clauses.add(expr);
454        }
455    }
456
457    protected static Set<String> getStringLiterals(LiteralList list) {
458        Set<String> set = new HashSet<>();
459        for (Literal literal : list) {
460            if (!(literal instanceof StringLiteral)) {
461                throw new RuntimeException("requires string literals");
462            }
463            set.add(((StringLiteral) literal).value);
464        }
465        return set;
466    }
467
468    /**
469     * If we have a query like {@code SELECT dc:subjects/* FROM ...} then we must make sure we don't match documents
470     * that don't have a {@code dc:subjects} at all, as this would make the evaluator return one {@code null} value for
471     * each due to its semantics of doing the equivalent of LEFT JOINs.
472     * <p>
473     * To prevent this, we add a clause {@code ... AND dc:subjects/* IS NOT NULL}.
474     * <p>
475     * For correlated wildcards this is enough, but for uncorrelated wildcards we must avoid adding extra JOINs, so we
476     * must artificially correlated them. This requires rewriting the query with correlated wildcards instead of
477     * uncorrelated ones.
478     */
479    protected SQLQuery addWildcardNotNullClauses(SQLQuery query) {
480        ProjectionWildcardsFinder finder = new ProjectionWildcardsFinder();
481        finder.visitQuery(query);
482
483        // find wildcards in the projection
484        Set<String> wildcards = finder.projectionWildcards;
485        Set<String> uncorrelatedWildcards = finder.uncorrelatedProjectionWildcards;
486        if (!uncorrelatedWildcards.isEmpty()) {
487            // rename uncorrelated wildcards to unique correlation names
488            Map<String, String> map = new HashMap<>(uncorrelatedWildcards.size());
489            for (String name : uncorrelatedWildcards) {
490                String newName = name + (CORR_BASE + correlationCounter++);
491                map.put(name, newName);
492                wildcards.remove(name);
493                wildcards.add(newName);
494            }
495            // rename them in the whole query
496            query = new ProjectionReferenceRenamer(map).transform(query);
497        }
498
499        // add IS NOT NULL clauses for all projection wildcards (now all correlated)
500        if (!wildcards.isEmpty()) {
501            query = addIsNotNullClauses(query, wildcards);
502        }
503
504        return query;
505    }
506
507    protected SQLQuery addIsNotNullClauses(SQLQuery query, Collection<String> names) {
508        List<Operand> values = names.stream()
509                                    .map(name -> new Expression(new Reference(name), Operator.ISNOTNULL, null))
510                                    .collect(toList());
511        Expression expr = new Expression(query.where.predicate, Operator.AND,
512                new MultiExpression(Operator.AND, values));
513        return query.withWhereExpression(expr);
514    }
515
516    protected static class ProjectionWildcardsFinder extends DefaultQueryVisitor {
517
518        private static final long serialVersionUID = 1L;
519
520        protected final Set<String> projectionWildcards = new HashSet<>();
521
522        protected final Set<String> uncorrelatedProjectionWildcards = new HashSet<>();
523
524        protected boolean inProjection;
525
526        protected boolean inOrderBy;
527
528        @Override
529        public void visitSelectClause(SelectClause node) {
530            inProjection = true;
531            super.visitSelectClause(node);
532            inProjection = false;
533        }
534
535        @Override
536        public void visitOrderByClause(OrderByClause node) {
537            inOrderBy = true;
538            super.visitOrderByClause(node);
539            inOrderBy = false;
540        }
541
542        @Override
543        public void visitReference(Reference ref) {
544            if (inProjection) {
545                String name = ref.name;
546                if (name.endsWith("*")) {
547                    projectionWildcards.add(name);
548                    if (name.endsWith("/*")) {
549                        uncorrelatedProjectionWildcards.add(name);
550                    }
551                }
552            }
553        }
554    }
555
556    /**
557     * Renames references if they are in the projection part.
558     */
559    protected static class ProjectionReferenceRenamer extends IdentityQueryTransformer {
560
561        protected final Map<String, String> map;
562
563        protected boolean inProjection;
564
565        public ProjectionReferenceRenamer(Map<String, String> map) {
566            this.map = map;
567        }
568
569        @Override
570        public SelectClause transform(SelectClause node) {
571            inProjection = true;
572            node = super.transform(node);
573            inProjection = false;
574            return node;
575        }
576
577        @Override
578        public Reference transform(Reference node) {
579            if (!inProjection) {
580                return node;
581            }
582            String name = node.name;
583            String newName = map.getOrDefault(name, name);
584            Reference newReference = new Reference(newName, node.cast, node.esHint);
585            if (newReference.originalName == null) {
586                newReference.originalName = name;
587            }
588            newReference.info = node.info;
589            return newReference;
590        }
591    }
592
593    /**
594     * Info about a prefix: the prefix, and how many times it was encountered.
595     *
596     * @since 9.3
597     */
598    public static class PrefixInfo {
599
600        public static final PrefixInfo EMPTY = new PrefixInfo("", 0);
601
602        public final String prefix;
603
604        public final int count;
605
606        public PrefixInfo(String prefix, int count) {
607            this.prefix = prefix;
608            this.count = count;
609        }
610    }
611
612    /**
613     * Analyzes references to compute common prefix info in order to later factor them in a parent expression.
614     *
615     * @since 9.3
616     */
617    public class ReferencePrefixAnalyzer extends DefaultQueryVisitor {
618
619        private static final long serialVersionUID = 1L;
620
621        @Override
622        public void visitReference(Reference node) {
623            super.visitReference(node);
624            processReference(node);
625        }
626
627        @Override
628        public void visitMultiExpression(MultiExpression node) {
629            super.visitMultiExpression(node);
630            processExpression(node, node.values);
631        }
632
633        @Override
634        public void visitExpression(Expression node) {
635            super.visitExpression(node);
636            processExpression(node, Arrays.asList(node.lvalue, node.rvalue));
637        }
638
639        protected void processReference(Reference node) {
640            String prefix = getCorrelatedWildcardPrefix(node.name);
641            int count = prefix.isEmpty() ? 0 : 1;
642            node.setInfo(new PrefixInfo(prefix, count));
643        }
644
645        protected void processExpression(Expression node, List<Operand> operands) {
646            PrefixInfo commonInfo = null;
647            for (Operand operand : operands) {
648                // find longest prefix for the operand
649                PrefixInfo info;
650                if (operand instanceof Reference) {
651                    Reference reference = (Reference) operand;
652                    info = (PrefixInfo) reference.getInfo();
653                } else if (operand instanceof Expression) {
654                    Expression expression = (Expression) operand;
655                    info = (PrefixInfo) expression.getInfo();
656                } else {
657                    info = null;
658                }
659                if (info != null) {
660                    if (commonInfo == null) {
661                        commonInfo = info;
662                    } else if (commonInfo.prefix.equals(info.prefix)) {
663                        commonInfo = new PrefixInfo(commonInfo.prefix, commonInfo.count + info.count);
664                    } else {
665                        commonInfo = PrefixInfo.EMPTY;
666                    }
667                }
668            }
669            node.setInfo(commonInfo);
670        }
671    }
672
673    /**
674     * Gets the prefix to use for this reference name (NXQL) if it contains a correlated wildcard.
675     * <p>
676     * The prefix is used to group together sets of expression that all use references with the same prefix.
677     *
678     * @param name the reference name (NXQL)
679     * @return the prefix, or an empty string if there is no correlated wildcard
680     * @since 9.3
681     */
682    public abstract String getCorrelatedWildcardPrefix(String name);
683
684}