001/*
002 * (C) Copyright 2014-2016 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019package org.nuxeo.ecm.core.storage;
020
021import static java.util.stream.Collectors.groupingBy;
022import static java.util.stream.Collectors.toList;
023
024import java.util.ArrayList;
025import java.util.Arrays;
026import java.util.Collection;
027import java.util.Collections;
028import java.util.HashMap;
029import java.util.HashSet;
030import java.util.Iterator;
031import java.util.LinkedHashMap;
032import java.util.List;
033import java.util.Map;
034import java.util.Map.Entry;
035import java.util.Set;
036import java.util.stream.Collector;
037
038import org.nuxeo.ecm.core.api.impl.FacetFilter;
039import org.nuxeo.ecm.core.query.QueryParseException;
040import org.nuxeo.ecm.core.query.sql.NXQL;
041import org.nuxeo.ecm.core.query.sql.model.DefaultQueryVisitor;
042import org.nuxeo.ecm.core.query.sql.model.Expression;
043import org.nuxeo.ecm.core.query.sql.model.FromClause;
044import org.nuxeo.ecm.core.query.sql.model.FromList;
045import org.nuxeo.ecm.core.query.sql.model.IdentityQueryTransformer;
046import org.nuxeo.ecm.core.query.sql.model.Literal;
047import org.nuxeo.ecm.core.query.sql.model.LiteralList;
048import org.nuxeo.ecm.core.query.sql.model.MultiExpression;
049import org.nuxeo.ecm.core.query.sql.model.Operand;
050import org.nuxeo.ecm.core.query.sql.model.Operator;
051import org.nuxeo.ecm.core.query.sql.model.OrderByClause;
052import org.nuxeo.ecm.core.query.sql.model.Predicate;
053import org.nuxeo.ecm.core.query.sql.model.Reference;
054import org.nuxeo.ecm.core.query.sql.model.SQLQuery;
055import org.nuxeo.ecm.core.query.sql.model.SelectClause;
056import org.nuxeo.ecm.core.query.sql.model.StringLiteral;
057import org.nuxeo.ecm.core.query.sql.model.WhereClause;
058import org.nuxeo.ecm.core.schema.SchemaManager;
059import org.nuxeo.ecm.core.schema.types.Type;
060import org.nuxeo.runtime.api.Framework;
061
062/**
063 * Generic optimizer for a NXQL query.
064 *
065 * @since 5.9.4
066 */
067public abstract class QueryOptimizer {
068
069    public static final String TYPE_ROOT = "Root";
070
071    public static final String TYPE_DOCUMENT = "Document";
072
073    public static final String TYPE_RELATION = "Relation";
074
075    protected static final int CORR_BASE = 100_000;
076
077    protected FacetFilter facetFilter;
078
079    protected final SchemaManager schemaManager;
080
081    protected final Set<String> neverPerInstanceMixins;
082
083    protected int correlationCounter;
084
085    /** Do we match only relations? */
086    protected boolean onlyRelations;
087
088    // group by prefix, keeping order
089    protected static Collector<Predicate, ?, Map<String, List<Predicate>>> GROUPING_BY_EXPR_PREFIX = groupingBy(
090            QueryOptimizer::getPredicatePrefix, LinkedHashMap::new, toList());
091
092    public QueryOptimizer() {
093        // schemaManager may be null in unit tests
094        schemaManager = Framework.getService(SchemaManager.class);
095        Set<String> facets = schemaManager == null ? Collections.emptySet()
096                : schemaManager.getNoPerDocumentQueryFacets();
097        neverPerInstanceMixins = new HashSet<>(facets);
098    }
099
100    public QueryOptimizer withFacetFilter(FacetFilter facetFilter) {
101        this.facetFilter = facetFilter;
102        return this;
103    }
104
105    /**
106     * Optimizes a query to provide a WHERE clause containing facet filters, primary and mixin types. In addition, the
107     * top-level AND clauses are analyzed to provide prefix info.
108     */
109    public SQLQuery optimize(SQLQuery query) {
110        // rewrite some uncorrelated wildcards and add NOT NULL clauses for projection ones
111        query = addWildcardNotNullClauses(query);
112
113        List<Predicate> clauses = new ArrayList<>();
114        addFacetFilters(clauses, facetFilter);
115        addTypes(clauses, query.from);
116        addWhere(clauses, query.where);
117        simplifyTypes(clauses);
118        MultiExpression multiExpression = new MultiExpression(Operator.AND, clauses);
119
120        // collect information about common reference prefixes (stored on Reference and Expression)
121        new ReferencePrefixAnalyzer().visitMultiExpression(multiExpression);
122
123        Predicate whereExpr;
124        PrefixInfo info = (PrefixInfo) multiExpression.getInfo();
125        if (!info.prefix.isEmpty()) {
126            // all references have a common prefix
127            whereExpr = multiExpression;
128        } else {
129            // do grouping by common prefix
130            Map<String, List<Predicate>> grouped = clauses.stream().collect(GROUPING_BY_EXPR_PREFIX);
131            Map<String, Predicate> groupedExpressions = new LinkedHashMap<>();
132            for (Entry<String, List<Predicate>> en : grouped.entrySet()) {
133                String prefix = en.getKey();
134                List<Predicate> list = en.getValue();
135                groupedExpressions.put(prefix, makeSingleAndPredicate(prefix, list));
136            }
137
138            // potentially reorganize into nested grouping
139            reorganizeGroupedExpressions(groupedExpressions);
140
141            List<Predicate> expressions = new ArrayList<>(groupedExpressions.values());
142            whereExpr = makeSingleAndPredicate("", expressions);
143        }
144
145        return query.withPredicate(whereExpr);
146    }
147
148    /**
149     * Makes a single AND predicate from several expressions known to have a common prefix.
150     */
151    public static Predicate makeSingleAndPredicate(String prefix, List<Predicate> predicates) {
152        if (predicates.size() == 1) {
153            return predicates.get(0);
154        } else {
155            int count = prefix.isEmpty() ? 0 : predicates.stream().mapToInt(QueryOptimizer::getExpressionCount).sum();
156            Predicate e = new MultiExpression(Operator.AND, predicates);
157            e.setInfo(new PrefixInfo(prefix, count));
158            return e;
159        }
160    }
161
162    protected static String getPredicatePrefix(Predicate predicate) {
163        PrefixInfo info = (PrefixInfo) predicate.getInfo();
164        return info == null ? "" : info.prefix;
165    }
166
167    protected static int getExpressionCount(Expression expr) {
168        PrefixInfo info = (PrefixInfo) expr.getInfo();
169        return info == null ? 0 : info.count;
170    }
171
172    /**
173     * Reorganizes the grouped expressions in order to have 2-level nesting in case a group is a prefix of another.
174     *
175     * @since 9.3
176     */
177    public static void reorganizeGroupedExpressions(Map<String, Predicate> groupedExpressions) {
178        if (groupedExpressions.size() > 1) {
179            List<String> keys = new ArrayList<>(groupedExpressions.keySet());
180            List<String> withPrefix = new ArrayList<>();
181            String prefix = findPrefix(keys, withPrefix);
182            if (prefix != null) {
183                // first part, the expression corresponding to the prefix
184                Predicate first = groupedExpressions.remove(prefix);
185
186                // second part, all those that had that prefix
187                List<Predicate> exprs = new ArrayList<>();
188                for (String k : withPrefix) {
189                    exprs.add(groupedExpressions.remove(k));
190                }
191                String secondPrefix;
192                if (withPrefix.size() == 1) {
193                    secondPrefix = withPrefix.get(0);
194                } else {
195                    throw new QueryParseException("Too complex correlated wildcards in query: " + groupedExpressions);
196                }
197                Predicate second = makeSingleAndPredicate(secondPrefix, exprs);
198
199                // finally bring them all together
200                Predicate expr = makeSingleAndPredicate(prefix, Arrays.asList(first, second));
201                groupedExpressions.put(prefix, expr);
202            }
203        }
204    }
205
206    /**
207     * Finds a non-empty prefix in the strings.
208     * <p>
209     * If a prefix is found, the other strings having it as a prefix are collected in {@code withPrefix}, and the prefix
210     * and the found strings are moved from the input {@code string}.
211     * <p>
212     * {@code strings} and {@code withPrefix} must both be ArrayLists as they will be mutated and queried by index.
213     *
214     * @param strings the input strings
215     * @param withPrefix (return value) the strings that have the found prefix as a prefix
216     * @return the prefix if found, or null if not
217     * @since 9.3
218     */
219    public static String findPrefix(List<String> strings, List<String> withPrefix) {
220        // naive algorithm as list size is very small
221        String prefix = null;
222        ALL: //
223        for (int i = 0; i < strings.size(); i++) {
224            String candidate = strings.get(i);
225            if (candidate.isEmpty()) {
226                continue;
227            }
228            for (int j = 0; j < strings.size(); j++) {
229                if (i == j) {
230                    continue;
231                }
232                String s = strings.get(j);
233                if (s.isEmpty()) {
234                    continue;
235                }
236                if (s.startsWith(candidate + '/')) {
237                    prefix = candidate;
238                    break ALL;
239                }
240            }
241        }
242        if (prefix != null) {
243            for (Iterator<String> it = strings.iterator(); it.hasNext();) {
244                String s = it.next();
245                if (s.isEmpty()) {
246                    continue;
247                }
248                if (s.equals(prefix)) {
249                    it.remove(); // will be returned as method result
250                    continue;
251                }
252                if (s.startsWith(prefix + '/')) {
253                    it.remove(); // will be returned in withPrefix
254                    withPrefix.add(s);
255                }
256            }
257        }
258        return prefix;
259    }
260
261    protected void addFacetFilters(List<Predicate> clauses, FacetFilter facetFilter) {
262        if (facetFilter == null) {
263            return;
264        }
265        for (String mixin : facetFilter.required) {
266            // every facet is required, not just any of them,
267            // so do them one by one
268            Predicate expr = new Predicate(new Reference(NXQL.ECM_MIXINTYPE), Operator.EQ, new StringLiteral(mixin));
269            clauses.add(expr);
270        }
271        if (!facetFilter.excluded.isEmpty()) {
272            LiteralList list = new LiteralList();
273            for (String mixin : facetFilter.excluded) {
274                list.add(new StringLiteral(mixin));
275            }
276            Predicate expr = new Predicate(new Reference(NXQL.ECM_MIXINTYPE), Operator.NOTIN, list);
277            clauses.add(expr);
278        }
279    }
280
281    // methods using SchemaManager easily overrideable for easier testing
282
283    protected Set<String> getDocumentTypeNamesForFacet(String mixin) {
284        Set<String> types = schemaManager.getDocumentTypeNamesForFacet(mixin);
285        if (types == null) {
286            // unknown mixin
287            types = Collections.emptySet();
288        }
289        return types;
290    }
291
292    protected Set<String> getDocumentTypeNamesExtending(String typeName) {
293        Set<String> types = schemaManager.getDocumentTypeNamesExtending(typeName);
294        if (types == null) {
295            throw new RuntimeException("Unknown type: " + typeName);
296        }
297        return types;
298    }
299
300    protected boolean isTypeRelation(String typeName) {
301        do {
302            if (TYPE_RELATION.equals(typeName)) {
303                return true;
304            }
305            Type t = schemaManager.getDocumentType(typeName);
306            if (t != null) {
307                t = t.getSuperType();
308            }
309            typeName = t == null ? null : t.getName();
310        } while (typeName != null);
311        return false;
312    }
313
314    /**
315     * Finds all the types to take into account (all concrete types being a subtype of the passed types) based on the
316     * FROM list.
317     * <p>
318     * Adds them as a ecm:primaryType match in the toplevel operands.
319     */
320    protected void addTypes(List<Predicate> clauses, FromClause node) {
321        onlyRelations = true;
322        Set<String> fromTypes = new HashSet<>();
323        FromList elements = node.elements;
324        for (String typeName : elements.values()) {
325            if (TYPE_DOCUMENT.equalsIgnoreCase(typeName)) {
326                typeName = TYPE_DOCUMENT;
327            }
328            fromTypes.addAll(getDocumentTypeNamesExtending(typeName));
329            boolean isRelation = isTypeRelation(typeName);
330            onlyRelations = onlyRelations && isRelation;
331        }
332        fromTypes.remove(TYPE_ROOT);
333        LiteralList list = new LiteralList();
334        for (String type : fromTypes) {
335            list.add(new StringLiteral(type));
336        }
337        clauses.add(new Predicate(new Reference(NXQL.ECM_PRIMARYTYPE), Operator.IN, list));
338    }
339
340    /**
341     * Adds a flattened version of all toplevel ANDed WHERE clauses.
342     */
343    protected void addWhere(List<Predicate> clauses, WhereClause where) {
344        if (where != null) {
345            addWhere(clauses, where.predicate);
346        }
347    }
348
349    protected void addWhere(List<Predicate> clauses, Predicate expr) {
350        if (expr.operator == Operator.AND && expr.lvalue instanceof Expression && expr.rvalue instanceof Expression) {
351            addWhere(clauses, (Predicate) expr.lvalue);
352            addWhere(clauses, (Predicate) expr.rvalue);
353        } else if (expr.operator == Operator.AND && expr instanceof MultiExpression) {
354            for (Predicate pred : ((MultiExpression) expr).predicates) {
355                addWhere(clauses, pred);
356            }
357        } else {
358            clauses.add(expr);
359        }
360    }
361
362    /**
363     * Simplify ecm:primaryType positive references, and non-per-instance mixin types.
364     */
365    protected void simplifyTypes(List<Predicate> clauses) {
366        Set<String> primaryTypes = null; // if defined, required
367        for (Iterator<Predicate> it = clauses.iterator(); it.hasNext();) {
368            // whenever we don't know how to optimize the expression,
369            // we just continue the loop
370            Predicate predicate = it.next();
371            if (!(predicate.lvalue instanceof Reference)) {
372                continue;
373            }
374            String name = ((Reference) predicate.lvalue).name;
375            Operator op = predicate.operator;
376            Operand rvalue = predicate.rvalue;
377            if (NXQL.ECM_PRIMARYTYPE.equals(name)) {
378                if (op != Operator.EQ && op != Operator.IN) {
379                    continue;
380                }
381                Set<String> set;
382                if (op == Operator.EQ) {
383                    if (!(rvalue instanceof StringLiteral)) {
384                        continue;
385                    }
386                    String primaryType = ((StringLiteral) rvalue).value;
387                    set = new HashSet<>(Collections.singleton(primaryType));
388                } else { // Operator.IN
389                    if (!(rvalue instanceof LiteralList)) {
390                        continue;
391                    }
392                    set = getStringLiterals((LiteralList) rvalue);
393                }
394                if (primaryTypes == null) {
395                    primaryTypes = set;
396                } else {
397                    primaryTypes.retainAll(set);
398                }
399                it.remove(); // expression simplified into primaryTypes set
400            } else if (NXQL.ECM_MIXINTYPE.equals(name)) {
401                if (op != Operator.EQ && op != Operator.NOTEQ) {
402                    continue;
403                }
404                if (!(rvalue instanceof StringLiteral)) {
405                    continue;
406                }
407                String mixin = ((StringLiteral) rvalue).value;
408                if (!neverPerInstanceMixins.contains(mixin)) {
409                    // mixin per instance -> primary type checks not enough
410                    continue;
411                }
412                Set<String> set = getDocumentTypeNamesForFacet(mixin);
413                if (primaryTypes == null) {
414                    if (op == Operator.EQ) {
415                        primaryTypes = new HashSet<>(set); // copy
416                    } else {
417                        continue; // unknown positive, no optimization
418                    }
419                } else {
420                    if (op == Operator.EQ) {
421                        primaryTypes.retainAll(set);
422                    } else {
423                        primaryTypes.removeAll(set);
424                    }
425                }
426                it.remove(); // expression simplified into primaryTypes set
427            }
428        }
429        // readd the simplified primary types constraints
430        if (primaryTypes != null) {
431            if (primaryTypes.isEmpty()) {
432                // TODO better removal
433                primaryTypes.add("__NOSUCHTYPE__");
434            }
435            Predicate predicate;
436            if (primaryTypes.size() == 1) {
437                String pt = primaryTypes.iterator().next();
438                predicate = new Predicate(new Reference(NXQL.ECM_PRIMARYTYPE), Operator.EQ, new StringLiteral(pt));
439            } else { // primaryTypes.size() > 1
440                LiteralList list = new LiteralList();
441                for (String pt : primaryTypes) {
442                    list.add(new StringLiteral(pt));
443                }
444                predicate = new Predicate(new Reference(NXQL.ECM_PRIMARYTYPE), Operator.IN, list);
445            }
446            clauses.add(predicate);
447        }
448    }
449
450    protected static Set<String> getStringLiterals(LiteralList list) {
451        Set<String> set = new HashSet<>();
452        for (Literal literal : list) {
453            if (!(literal instanceof StringLiteral)) {
454                throw new RuntimeException("requires string literals");
455            }
456            set.add(((StringLiteral) literal).value);
457        }
458        return set;
459    }
460
461    /**
462     * If we have a query like {@code SELECT dc:subjects/* FROM ...} then we must make sure we don't match documents
463     * that don't have a {@code dc:subjects} at all, as this would make the evaluator return one {@code null} value for
464     * each due to its semantics of doing the equivalent of LEFT JOINs.
465     * <p>
466     * To prevent this, we add a clause {@code ... AND dc:subjects/* IS NOT NULL}.
467     * <p>
468     * For correlated wildcards this is enough, but for uncorrelated wildcards we must avoid adding extra JOINs, so we
469     * must artificially correlated them. This requires rewriting the query with correlated wildcards instead of
470     * uncorrelated ones.
471     */
472    protected SQLQuery addWildcardNotNullClauses(SQLQuery query) {
473        ProjectionWildcardsFinder finder = new ProjectionWildcardsFinder();
474        finder.visitQuery(query);
475
476        // find wildcards in the projection
477        Set<String> wildcards = finder.projectionWildcards;
478        Set<String> uncorrelatedWildcards = finder.uncorrelatedProjectionWildcards;
479        if (!uncorrelatedWildcards.isEmpty()) {
480            // rename uncorrelated wildcards to unique correlation names
481            Map<String, String> map = new HashMap<>(uncorrelatedWildcards.size());
482            for (String name : uncorrelatedWildcards) {
483                String newName = name + (CORR_BASE + correlationCounter++);
484                map.put(name, newName);
485                wildcards.remove(name);
486                wildcards.add(newName);
487            }
488            // rename them in the whole query
489            query = new ProjectionReferenceRenamer(map).transform(query);
490        }
491
492        // add IS NOT NULL clauses for all projection wildcards (now all correlated)
493        if (!wildcards.isEmpty()) {
494            query = addIsNotNullClauses(query, wildcards);
495        }
496
497        return query;
498    }
499
500    protected SQLQuery addIsNotNullClauses(SQLQuery query, Collection<String> names) {
501        List<Predicate> values = names.stream()
502                                      .map(name -> new Predicate(new Reference(name), Operator.ISNOTNULL, null))
503                                      .collect(toList());
504        Predicate expr = new Predicate(query.where.predicate, Operator.AND, new MultiExpression(Operator.AND, values));
505        return query.withPredicate(expr);
506    }
507
508    protected static class ProjectionWildcardsFinder extends DefaultQueryVisitor {
509
510        protected final Set<String> projectionWildcards = new HashSet<>();
511
512        protected final Set<String> uncorrelatedProjectionWildcards = new HashSet<>();
513
514        protected boolean inProjection;
515
516        protected boolean inOrderBy;
517
518        @Override
519        public void visitSelectClause(SelectClause node) {
520            inProjection = true;
521            super.visitSelectClause(node);
522            inProjection = false;
523        }
524
525        @Override
526        public void visitOrderByClause(OrderByClause node) {
527            inOrderBy = true;
528            super.visitOrderByClause(node);
529            inOrderBy = false;
530        }
531
532        @Override
533        public void visitReference(Reference ref) {
534            if (inProjection) {
535                String name = ref.name;
536                if (name.endsWith("*")) {
537                    projectionWildcards.add(name);
538                    if (name.endsWith("/*")) {
539                        uncorrelatedProjectionWildcards.add(name);
540                    }
541                }
542            }
543        }
544    }
545
546    /**
547     * Renames references if they are in the projection part.
548     */
549    protected static class ProjectionReferenceRenamer extends IdentityQueryTransformer {
550
551        protected final Map<String, String> map;
552
553        protected boolean inProjection;
554
555        public ProjectionReferenceRenamer(Map<String, String> map) {
556            this.map = map;
557        }
558
559        @Override
560        public SelectClause transform(SelectClause node) {
561            inProjection = true;
562            node = super.transform(node);
563            inProjection = false;
564            return node;
565        }
566
567        @Override
568        public Reference transform(Reference node) {
569            if (!inProjection) {
570                return node;
571            }
572            String name = node.name;
573            String newName = map.getOrDefault(name, name);
574            Reference newReference = new Reference(newName, node.cast, node.esHint);
575            if (newReference.originalName == null) {
576                newReference.originalName = name;
577            }
578            newReference.info = node.info;
579            return newReference;
580        }
581    }
582
583    /**
584     * Info about a prefix: the prefix, and how many times it was encountered.
585     *
586     * @since 9.3
587     */
588    public static class PrefixInfo {
589
590        public static final PrefixInfo EMPTY = new PrefixInfo("", 0);
591
592        public final String prefix;
593
594        public final int count;
595
596        public PrefixInfo(String prefix, int count) {
597            this.prefix = prefix;
598            this.count = count;
599        }
600    }
601
602    /**
603     * Analyzes references to compute common prefix info in order to later factor them in a parent expression.
604     *
605     * @since 9.3
606     */
607    public class ReferencePrefixAnalyzer extends DefaultQueryVisitor {
608
609        @Override
610        public void visitReference(Reference node) {
611            super.visitReference(node);
612            processReference(node);
613        }
614
615        @Override
616        public void visitMultiExpression(MultiExpression node) {
617            super.visitMultiExpression(node);
618            processExpression(node, node.predicates);
619        }
620
621        @Override
622        public void visitExpression(Expression node) {
623            super.visitExpression(node);
624            processExpression(node, Arrays.asList(node.lvalue, node.rvalue));
625        }
626
627        protected void processReference(Reference node) {
628            String prefix = getCorrelatedWildcardPrefix(node.name);
629            int count = prefix.isEmpty() ? 0 : 1;
630            node.setInfo(new PrefixInfo(prefix, count));
631        }
632
633        protected void processExpression(Expression node, List<? extends Operand> operands) {
634            PrefixInfo commonInfo = null;
635            for (Operand operand : operands) {
636                // find longest prefix for the operand
637                PrefixInfo info;
638                if (operand instanceof Reference) {
639                    Reference reference = (Reference) operand;
640                    info = (PrefixInfo) reference.getInfo();
641                } else if (operand instanceof Expression) {
642                    Expression expression = (Expression) operand;
643                    info = (PrefixInfo) expression.getInfo();
644                } else {
645                    info = null;
646                }
647                if (info != null) {
648                    if (commonInfo == null) {
649                        commonInfo = info;
650                    } else if (commonInfo.prefix.equals(info.prefix)) {
651                        commonInfo = new PrefixInfo(commonInfo.prefix, commonInfo.count + info.count);
652                    } else {
653                        commonInfo = PrefixInfo.EMPTY;
654                    }
655                }
656            }
657            node.setInfo(commonInfo);
658        }
659    }
660
661    /**
662     * Gets the prefix to use for this reference name (NXQL) if it contains a correlated wildcard.
663     * <p>
664     * The prefix is used to group together sets of expression that all use references with the same prefix.
665     *
666     * @param name the reference name (NXQL)
667     * @return the prefix, or an empty string if there is no correlated wildcard
668     * @since 9.3
669     */
670    public abstract String getCorrelatedWildcardPrefix(String name);
671
672}