001/*
002 * (C) Copyright 2014-2018 Nuxeo (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019package org.nuxeo.ecm.core.storage.mongodb;
020
021import static java.lang.Boolean.FALSE;
022import static java.lang.Boolean.TRUE;
023import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.FACETED_TAG;
024import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.FACETED_TAG_LABEL;
025import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACL;
026import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACL_NAME;
027import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACP;
028import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_FULLTEXT_SCORE;
029import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.PROP_MAJOR_VERSION;
030import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.PROP_MINOR_VERSION;
031import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.PROP_UID_MAJOR_VERSION;
032import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.PROP_UID_MINOR_VERSION;
033import static org.nuxeo.ecm.core.storage.mongodb.MongoDBRepository.MONGODB_ID;
034import static org.nuxeo.ecm.core.storage.mongodb.MongoDBRepository.MONGODB_META;
035import static org.nuxeo.ecm.core.storage.mongodb.MongoDBRepository.MONGODB_TEXT_SCORE;
036import static org.nuxeo.ecm.core.trash.TrashService.Feature.TRASHED_STATE_IS_DEDICATED_PROPERTY;
037
038import java.util.ArrayList;
039import java.util.Arrays;
040import java.util.Collections;
041import java.util.Date;
042import java.util.HashMap;
043import java.util.HashSet;
044import java.util.LinkedList;
045import java.util.List;
046import java.util.Map;
047import java.util.Set;
048import java.util.concurrent.atomic.AtomicInteger;
049import java.util.regex.Pattern;
050
051import org.apache.commons.lang3.StringUtils;
052import org.apache.commons.lang3.math.NumberUtils;
053import org.bson.Document;
054import org.nuxeo.ecm.core.api.LifeCycleConstants;
055import org.nuxeo.ecm.core.query.QueryParseException;
056import org.nuxeo.ecm.core.query.sql.NXQL;
057import org.nuxeo.ecm.core.query.sql.model.BooleanLiteral;
058import org.nuxeo.ecm.core.query.sql.model.DateLiteral;
059import org.nuxeo.ecm.core.query.sql.model.DoubleLiteral;
060import org.nuxeo.ecm.core.query.sql.model.Expression;
061import org.nuxeo.ecm.core.query.sql.model.Function;
062import org.nuxeo.ecm.core.query.sql.model.IntegerLiteral;
063import org.nuxeo.ecm.core.query.sql.model.Literal;
064import org.nuxeo.ecm.core.query.sql.model.LiteralList;
065import org.nuxeo.ecm.core.query.sql.model.MultiExpression;
066import org.nuxeo.ecm.core.query.sql.model.Operand;
067import org.nuxeo.ecm.core.query.sql.model.Operator;
068import org.nuxeo.ecm.core.query.sql.model.OrderByClause;
069import org.nuxeo.ecm.core.query.sql.model.OrderByExpr;
070import org.nuxeo.ecm.core.query.sql.model.Reference;
071import org.nuxeo.ecm.core.query.sql.model.SelectClause;
072import org.nuxeo.ecm.core.query.sql.model.StringLiteral;
073import org.nuxeo.ecm.core.schema.DocumentType;
074import org.nuxeo.ecm.core.schema.SchemaManager;
075import org.nuxeo.ecm.core.schema.types.ComplexType;
076import org.nuxeo.ecm.core.schema.types.Field;
077import org.nuxeo.ecm.core.schema.types.ListType;
078import org.nuxeo.ecm.core.schema.types.Schema;
079import org.nuxeo.ecm.core.schema.types.Type;
080import org.nuxeo.ecm.core.schema.types.primitives.BooleanType;
081import org.nuxeo.ecm.core.schema.types.primitives.DateType;
082import org.nuxeo.ecm.core.schema.types.primitives.StringType;
083import org.nuxeo.ecm.core.storage.ExpressionEvaluator;
084import org.nuxeo.ecm.core.storage.ExpressionEvaluator.PathResolver;
085import org.nuxeo.ecm.core.storage.FulltextQueryAnalyzer;
086import org.nuxeo.ecm.core.storage.FulltextQueryAnalyzer.FulltextQuery;
087import org.nuxeo.ecm.core.storage.FulltextQueryAnalyzer.Op;
088import org.nuxeo.ecm.core.storage.QueryOptimizer.PrefixInfo;
089import org.nuxeo.ecm.core.storage.dbs.DBSDocument;
090import org.nuxeo.ecm.core.storage.dbs.DBSSession;
091import org.nuxeo.ecm.core.trash.TrashService;
092import org.nuxeo.runtime.api.Framework;
093
094import com.mongodb.QueryOperators;
095
096/**
097 * Query builder for a MongoDB query from an {@link Expression}.
098 *
099 * @since 5.9.4
100 */
101public class MongoDBQueryBuilder {
102
103    public static final Long LONG_ZERO = Long.valueOf(0);
104
105    public static final Long LONG_ONE = Long.valueOf(1);
106
107    public static final Double ONE = Double.valueOf(1);
108
109    public static final Double MINUS_ONE = Double.valueOf(-1);
110
111    protected static final String DATE_CAST = "DATE";
112
113    protected final AtomicInteger counter = new AtomicInteger();
114
115    protected final SchemaManager schemaManager;
116
117    protected final MongoDBConverter converter;
118
119    protected final String idKey;
120
121    protected List<String> documentTypes;
122
123    protected final Expression expression;
124
125    protected final SelectClause selectClause;
126
127    protected final OrderByClause orderByClause;
128
129    protected final PathResolver pathResolver;
130
131    public boolean hasFulltext;
132
133    public boolean sortOnFulltextScore;
134
135    protected Document query;
136
137    protected Document orderBy;
138
139    protected Document projection;
140
141    protected Map<String, String> propertyKeys;
142
143    boolean projectionHasWildcard;
144
145    private boolean fulltextSearchDisabled;
146
147    /**
148     * Prefix to remove for $elemMatch (including final dot), or {@code null} if there's no current prefix to remove.
149     */
150    protected String elemMatchPrefix;
151
152    public MongoDBQueryBuilder(MongoDBRepository repository, Expression expression, SelectClause selectClause,
153            OrderByClause orderByClause, PathResolver pathResolver, boolean fulltextSearchDisabled) {
154        schemaManager = Framework.getService(SchemaManager.class);
155        converter = repository.converter;
156        idKey = repository.idKey;
157        this.expression = expression;
158        this.selectClause = selectClause;
159        this.orderByClause = orderByClause;
160        this.pathResolver = pathResolver;
161        this.fulltextSearchDisabled = fulltextSearchDisabled;
162        this.propertyKeys = new HashMap<>();
163    }
164
165    public void walk() {
166        query = walkExpression(expression); // computes hasFulltext
167        walkOrderBy(); // computes sortOnFulltextScore
168        walkProjection(); // needs hasFulltext and sortOnFulltextScore
169    }
170
171    public Document getQuery() {
172        return query;
173    }
174
175    public Document getOrderBy() {
176        return orderBy;
177    }
178
179    public Document getProjection() {
180        return projection;
181    }
182
183    public boolean hasProjectionWildcard() {
184        return projectionHasWildcard;
185    }
186
187    protected void walkOrderBy() {
188        sortOnFulltextScore = false;
189        if (orderByClause == null) {
190            orderBy = null;
191        } else {
192            orderBy = new Document();
193            for (OrderByExpr ob : orderByClause.elements) {
194                Reference ref = ob.reference;
195                boolean desc = ob.isDescending;
196                String field = walkReference(ref).queryField;
197                if (!orderBy.containsKey(field)) {
198                    Object value;
199                    if (KEY_FULLTEXT_SCORE.equals(field)) {
200                        if (!desc) {
201                            throw new QueryParseException("Cannot sort by " + NXQL.ECM_FULLTEXT_SCORE + " ascending");
202                        }
203                        sortOnFulltextScore = true;
204                        value = new Document(MONGODB_META, MONGODB_TEXT_SCORE);
205                    } else {
206                        value = desc ? MINUS_ONE : ONE;
207                    }
208                    orderBy.put(field, value);
209                }
210            }
211            if (sortOnFulltextScore && orderBy.size() > 1) {
212                throw new QueryParseException("Cannot sort by " + NXQL.ECM_FULLTEXT_SCORE + " and other criteria");
213            }
214        }
215    }
216
217    protected void walkProjection() {
218        projection = new Document();
219        boolean projectionOnFulltextScore = false;
220        for (Operand op : selectClause.getSelectList().values()) {
221            if (!(op instanceof Reference)) {
222                throw new QueryParseException("Projection not supported: " + op);
223            }
224            FieldInfo fieldInfo = walkReference((Reference) op);
225            String propertyField = fieldInfo.prop;
226            if (!propertyField.equals(NXQL.ECM_UUID) //
227                    && !propertyField.equals(fieldInfo.projectionField) //
228                    && !propertyField.contains("/")) {
229                propertyKeys.put(fieldInfo.projectionField, propertyField);
230            }
231            projection.put(fieldInfo.projectionField, ONE);
232            if (propertyField.contains("*")) {
233                projectionHasWildcard = true;
234            }
235            if (fieldInfo.projectionField.equals(KEY_FULLTEXT_SCORE)) {
236                projectionOnFulltextScore = true;
237            }
238        }
239        if (projectionOnFulltextScore || sortOnFulltextScore) {
240            if (!hasFulltext) {
241                throw new QueryParseException(NXQL.ECM_FULLTEXT_SCORE + " cannot be used without " + NXQL.ECM_FULLTEXT);
242            }
243            projection.put(KEY_FULLTEXT_SCORE, new Document(MONGODB_META, MONGODB_TEXT_SCORE));
244        }
245    }
246
247    public Document walkExpression(Expression expr) {
248        Operator op = expr.operator;
249        Operand lvalue = expr.lvalue;
250        Operand rvalue = expr.rvalue;
251        Reference ref = lvalue instanceof Reference ? (Reference) lvalue : null;
252        String name = ref != null ? ref.name : null;
253        String cast = ref != null ? ref.cast : null;
254        if (DATE_CAST.equals(cast)) {
255            checkDateLiteralForCast(op, rvalue, name);
256        }
257        if (op == Operator.STARTSWITH) {
258            return walkStartsWith(lvalue, rvalue);
259        } else if (NXQL.ECM_PATH.equals(name)) {
260            return walkEcmPath(op, rvalue);
261        } else if (NXQL.ECM_ANCESTORID.equals(name)) {
262            return walkAncestorId(op, rvalue);
263        } else if (NXQL.ECM_ISTRASHED.equals(name)) {
264            return walkIsTrashed(op, rvalue);
265        } else if (name != null && name.startsWith(NXQL.ECM_FULLTEXT) && !NXQL.ECM_FULLTEXT_JOBID.equals(name)) {
266            return walkEcmFulltext(name, op, rvalue);
267        } else if (op == Operator.SUM) {
268            throw new UnsupportedOperationException("SUM");
269        } else if (op == Operator.SUB) {
270            throw new UnsupportedOperationException("SUB");
271        } else if (op == Operator.MUL) {
272            throw new UnsupportedOperationException("MUL");
273        } else if (op == Operator.DIV) {
274            throw new UnsupportedOperationException("DIV");
275        } else if (op == Operator.LT) {
276            return walkLt(lvalue, rvalue);
277        } else if (op == Operator.GT) {
278            return walkGt(lvalue, rvalue);
279        } else if (op == Operator.EQ) {
280            return walkEq(lvalue, rvalue);
281        } else if (op == Operator.NOTEQ) {
282            return walkNotEq(lvalue, rvalue);
283        } else if (op == Operator.LTEQ) {
284            return walkLtEq(lvalue, rvalue);
285        } else if (op == Operator.GTEQ) {
286            return walkGtEq(lvalue, rvalue);
287        } else if (op == Operator.AND) {
288            if (expr instanceof MultiExpression) {
289                return walkAndMultiExpression((MultiExpression) expr);
290            } else {
291                return walkAnd(expr);
292            }
293        } else if (op == Operator.NOT) {
294            return walkNot(lvalue);
295        } else if (op == Operator.OR) {
296            return walkOr(lvalue, rvalue);
297        } else if (op == Operator.LIKE) {
298            return walkLike(lvalue, rvalue, true, false);
299        } else if (op == Operator.ILIKE) {
300            return walkLike(lvalue, rvalue, true, true);
301        } else if (op == Operator.NOTLIKE) {
302            return walkLike(lvalue, rvalue, false, false);
303        } else if (op == Operator.NOTILIKE) {
304            return walkLike(lvalue, rvalue, false, true);
305        } else if (op == Operator.IN) {
306            return walkIn(lvalue, rvalue, true);
307        } else if (op == Operator.NOTIN) {
308            return walkIn(lvalue, rvalue, false);
309        } else if (op == Operator.ISNULL) {
310            return walkIsNull(lvalue);
311        } else if (op == Operator.ISNOTNULL) {
312            return walkIsNotNull(lvalue);
313        } else if (op == Operator.BETWEEN) {
314            return walkBetween(lvalue, rvalue, true);
315        } else if (op == Operator.NOTBETWEEN) {
316            return walkBetween(lvalue, rvalue, false);
317        } else {
318            throw new QueryParseException("Unknown operator: " + op);
319        }
320    }
321
322    protected void checkDateLiteralForCast(Operator op, Operand value, String name) {
323        if (op == Operator.BETWEEN || op == Operator.NOTBETWEEN) {
324            LiteralList l = (LiteralList) value;
325            checkDateLiteralForCast(l.get(0), name);
326            checkDateLiteralForCast(l.get(1), name);
327        } else {
328            checkDateLiteralForCast(value, name);
329        }
330    }
331
332    protected void checkDateLiteralForCast(Operand value, String name) {
333        if (value instanceof DateLiteral && !((DateLiteral) value).onlyDate) {
334            throw new QueryParseException("DATE() cast must be used with DATE literal, not TIMESTAMP: " + name);
335        }
336    }
337
338    protected Document walkEcmPath(Operator op, Operand rvalue) {
339        if (op != Operator.EQ && op != Operator.NOTEQ) {
340            throw new QueryParseException(NXQL.ECM_PATH + " requires = or <> operator");
341        }
342        if (!(rvalue instanceof StringLiteral)) {
343            throw new QueryParseException(NXQL.ECM_PATH + " requires literal path as right argument");
344        }
345        String path = ((StringLiteral) rvalue).value;
346        if (path.length() > 1 && path.endsWith("/")) {
347            path = path.substring(0, path.length() - 1);
348        }
349        String id = pathResolver.getIdForPath(path);
350        if (id == null) {
351            // no such path
352            // TODO XXX do better
353            return new Document(MONGODB_ID, "__nosuchid__");
354        }
355        if (op == Operator.EQ) {
356            return new Document(idKey, id);
357        } else {
358            return new Document(idKey, new Document(QueryOperators.NE, id));
359        }
360    }
361
362    protected Document walkAncestorId(Operator op, Operand rvalue) {
363        if (op != Operator.EQ && op != Operator.NOTEQ) {
364            throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires = or <> operator");
365        }
366        if (!(rvalue instanceof StringLiteral)) {
367            throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires literal id as right argument");
368        }
369        String ancestorId = ((StringLiteral) rvalue).value;
370        if (op == Operator.EQ) {
371            return new Document(DBSDocument.KEY_ANCESTOR_IDS, ancestorId);
372        } else {
373            return new Document(DBSDocument.KEY_ANCESTOR_IDS, new Document(QueryOperators.NE, ancestorId));
374        }
375    }
376
377    protected Document walkEcmFulltext(String name, Operator op, Operand rvalue) {
378        if (op != Operator.EQ && op != Operator.LIKE) {
379            throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires = or LIKE operator");
380        }
381        if (!(rvalue instanceof StringLiteral)) {
382            throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires literal string as right argument");
383        }
384        if (fulltextSearchDisabled) {
385            throw new QueryParseException("Fulltext search disabled by configuration");
386        }
387        String fulltextQuery = ((StringLiteral) rvalue).value;
388        if (name.equals(NXQL.ECM_FULLTEXT)) {
389            // standard fulltext query
390            hasFulltext = true;
391            String ft = getMongoDBFulltextQuery(fulltextQuery);
392            if (ft == null) {
393                // empty query, matches nothing
394                return new Document(MONGODB_ID, "__nosuchid__");
395            }
396            Document textSearch = new Document();
397            textSearch.put(QueryOperators.SEARCH, ft);
398            // TODO language?
399            return new Document(QueryOperators.TEXT, textSearch);
400        } else {
401            // secondary index match with explicit field
402            // do a regexp on the field
403            if (name.charAt(NXQL.ECM_FULLTEXT.length()) != '.') {
404                throw new QueryParseException(name + " has incorrect syntax" + " for a secondary fulltext index");
405            }
406            String prop = name.substring(NXQL.ECM_FULLTEXT.length() + 1);
407            String ft = fulltextQuery.replace(" ", "%");
408            rvalue = new StringLiteral(ft);
409            return walkLike(new Reference(prop), rvalue, true, true);
410        }
411    }
412
413    protected Document walkIsTrashed(Operator op, Operand rvalue) {
414        if (op != Operator.EQ && op != Operator.NOTEQ) {
415            throw new QueryParseException(NXQL.ECM_ISTRASHED + " requires = or <> operator");
416        }
417        long v;
418        if (!(rvalue instanceof IntegerLiteral)
419                || ((v = ((IntegerLiteral) rvalue).value) != 0 && v != 1)) {
420            throw new QueryParseException(NXQL.ECM_ISTRASHED + " requires literal 0 or 1 as right argument");
421        }
422        Reference ref;
423        Literal val;
424        TrashService trashService = Framework.getService(TrashService.class);
425        if (trashService.hasFeature(TRASHED_STATE_IS_DEDICATED_PROPERTY)) {
426            ref = new Reference(NXQL.ECM_ISTRASHED);
427            val = new BooleanLiteral(true); // give true to match equalsDeleted mechanism
428        } else {
429            ref = new Reference(NXQL.ECM_LIFECYCLESTATE);
430            val = new StringLiteral(LifeCycleConstants.DELETED_STATE);
431        }
432        boolean equalsDeleted = op == Operator.EQ ^ v == 0;
433        if (equalsDeleted) {
434            return walkEq(ref, val);
435        } else {
436            return walkNotEq(ref, val);
437        }
438    }
439
440    // public static for tests
441    public static String getMongoDBFulltextQuery(String query) {
442        FulltextQuery ft = FulltextQueryAnalyzer.analyzeFulltextQuery(query);
443        if (ft == null) {
444            return null;
445        }
446        // translate into MongoDB syntax
447        return translateFulltext(ft, false);
448    }
449
450    /**
451     * Transforms the NXQL fulltext syntax into MongoDB syntax.
452     * <p>
453     * The MongoDB fulltext query syntax is badly documented, but is actually the following:
454     * <ul>
455     * <li>a term is a word,
456     * <li>a phrase is a set of spaced-separated words enclosed in double quotes,
457     * <li>negation is done by prepending a -,
458     * <li>the query is a space-separated set of terms, negated terms, phrases, or negated phrases.
459     * <li>all the words of non-negated phrases are also added to the terms.
460     * </ul>
461     * <p>
462     * The matching algorithm is (excluding stemming and stop words):
463     * <ul>
464     * <li>filter out documents with the negative terms, the negative phrases, or missing the phrases,
465     * <li>then if any term is present in the document then it's a match.
466     * </ul>
467     */
468    protected static String translateFulltext(FulltextQuery ft, boolean and) {
469        List<String> buf = new ArrayList<>();
470        translateFulltext(ft, buf, and);
471        return StringUtils.join(buf, ' ');
472    }
473
474    protected static void translateFulltext(FulltextQuery ft, List<String> buf, boolean and) {
475        if (ft.op == Op.OR) {
476            for (FulltextQuery term : ft.terms) {
477                // don't quote words for OR
478                translateFulltext(term, buf, false);
479            }
480        } else if (ft.op == Op.AND) {
481            for (FulltextQuery term : ft.terms) {
482                // quote words for AND
483                translateFulltext(term, buf, true);
484            }
485        } else {
486            String neg;
487            if (ft.op == Op.NOTWORD) {
488                neg = "-";
489            } else { // Op.WORD
490                neg = "";
491            }
492            String word = ft.word.toLowerCase();
493            if (ft.isPhrase() || and) {
494                buf.add(neg + '"' + word + '"');
495            } else {
496                buf.add(neg + word);
497            }
498        }
499    }
500
501    public Document walkNot(Operand value) {
502        Object val = walkOperand(value);
503        Object not = pushDownNot(val);
504        if (!(not instanceof Document)) {
505            throw new QueryParseException("Cannot do NOT on: " + val);
506        }
507        return (Document) not;
508    }
509
510    protected Object pushDownNot(Object object) {
511        if (!(object instanceof Document)) {
512            throw new QueryParseException("Cannot do NOT on: " + object);
513        }
514        Document ob = (Document) object;
515        Set<String> keySet = ob.keySet();
516        if (keySet.size() != 1) {
517            throw new QueryParseException("Cannot do NOT on: " + ob);
518        }
519        String key = keySet.iterator().next();
520        Object value = ob.get(key);
521        if (!key.startsWith("$")) {
522            if (value instanceof Document) {
523                // push down inside dbobject
524                return new Document(key, pushDownNot(value));
525            } else {
526                // k = v -> k != v
527                return new Document(key, new Document(QueryOperators.NE, value));
528            }
529        }
530        if (QueryOperators.NE.equals(key)) {
531            // NOT k != v -> k = v
532            return value;
533        }
534        if (QueryOperators.NOT.equals(key)) {
535            // NOT NOT v -> v
536            return value;
537        }
538        if (QueryOperators.AND.equals(key) || QueryOperators.OR.equals(key)) {
539            // boolean algebra
540            // NOT (v1 AND v2) -> NOT v1 OR NOT v2
541            // NOT (v1 OR v2) -> NOT v1 AND NOT v2
542            String op = QueryOperators.AND.equals(key) ? QueryOperators.OR : QueryOperators.AND;
543            List<Object> list = (List<Object>) value;
544            for (int i = 0; i < list.size(); i++) {
545                list.set(i, pushDownNot(list.get(i)));
546            }
547            return new Document(op, list);
548        }
549        if (QueryOperators.IN.equals(key) || QueryOperators.NIN.equals(key)) {
550            // boolean algebra
551            // IN <-> NIN
552            String op = QueryOperators.IN.equals(key) ? QueryOperators.NIN : QueryOperators.IN;
553            return new Document(op, value);
554        }
555        if (QueryOperators.LT.equals(key) || QueryOperators.GT.equals(key) || QueryOperators.LTE.equals(key)
556                || QueryOperators.GTE.equals(key)) {
557            // TODO use inverse operators?
558            return new Document(QueryOperators.NOT, ob);
559        }
560        throw new QueryParseException("Unknown operator for NOT: " + key);
561    }
562
563    protected Document newDocumentWithField(FieldInfo fieldInfo, Object value) {
564        return new Document(fieldInfo.queryField, value);
565    }
566
567    public Document walkIsNull(Operand value) {
568        FieldInfo fieldInfo = walkReference(value);
569        return newDocumentWithField(fieldInfo, null);
570    }
571
572    public Document walkIsNotNull(Operand value) {
573        FieldInfo fieldInfo = walkReference(value);
574        return newDocumentWithField(fieldInfo, new Document(QueryOperators.NE, null));
575    }
576
577    public Document walkAndMultiExpression(MultiExpression expr) {
578        return walkAnd(expr, expr.values);
579    }
580
581    public Document walkAnd(Expression expr) {
582        return walkAnd(expr, Arrays.asList(expr.lvalue, expr.rvalue));
583    }
584
585    protected static final Pattern SLASH_WILDCARD_SLASH = Pattern.compile("/\\*\\d+(/)?");
586
587    protected Document walkAnd(Expression expr, List<Operand> values) {
588        if (values.size() == 1) {
589            return (Document) walkOperand(values.get(0));
590        }
591        // PrefixInfo was computed by the QueryOptimizer
592        PrefixInfo info = (PrefixInfo) expr.getInfo();
593        if (info == null || info.count < 2) {
594            List<Object> list = walkOperandList(values);
595            return new Document(QueryOperators.AND, list);
596        }
597
598        // we have a common prefix for all underlying references, extract it into an $elemMatch node
599
600        // info.prefix is the DBS common prefix, ex: foo/bar/*1; ecm:acp/*1/acl/*1
601        // compute MongoDB prefix: foo.bar.; ecm:acp.acl.
602        String prefix = SLASH_WILDCARD_SLASH.matcher(info.prefix).replaceAll(".");
603        // remove current prefix and trailing . for actual field match
604        String fieldBase = stripElemMatchPrefix(prefix.substring(0, prefix.length() - 1));
605
606        String previousElemMatchPrefix = elemMatchPrefix;
607        elemMatchPrefix = prefix;
608        List<Object> list = walkOperandList(values);
609        elemMatchPrefix = previousElemMatchPrefix;
610
611        return new Document(fieldBase, new Document(QueryOperators.ELEM_MATCH, new Document(QueryOperators.AND, list)));
612    }
613
614    protected String stripElemMatchPrefix(String field) {
615        if (elemMatchPrefix != null && field.startsWith(elemMatchPrefix)) {
616            field = field.substring(elemMatchPrefix.length());
617        }
618        return field;
619    }
620
621    public Document walkOr(Operand lvalue, Operand rvalue) {
622        Object left = walkOperand(lvalue);
623        Object right = walkOperand(rvalue);
624        List<Object> list = new ArrayList<>(Arrays.asList(left, right));
625        return new Document(QueryOperators.OR, list);
626    }
627
628    protected Object checkBoolean(FieldInfo fieldInfo, Object right) {
629        if (fieldInfo.isBoolean()) {
630            // convert 0 / 1 to actual booleans
631            if (right instanceof Long) {
632                if (LONG_ZERO.equals(right)) {
633                    right = fieldInfo.isTrueOrNullBoolean ? null : FALSE;
634                } else if (LONG_ONE.equals(right)) {
635                    right = TRUE;
636                } else {
637                    throw new QueryParseException("Invalid boolean: " + right);
638                }
639            }
640        }
641        return right;
642    }
643
644    public Document walkEq(Operand lvalue, Operand rvalue) {
645        FieldInfo fieldInfo = walkReference(lvalue);
646        Object right = walkOperand(rvalue);
647        if (isMixinTypes(fieldInfo)) {
648            if (!(right instanceof String)) {
649                throw new QueryParseException("Invalid EQ rhs: " + rvalue);
650            }
651            return walkMixinTypes(Collections.singletonList((String) right), true);
652        }
653        right = checkBoolean(fieldInfo, right);
654        // TODO check list fields
655        return newDocumentWithField(fieldInfo, right);
656    }
657
658    public Document walkNotEq(Operand lvalue, Operand rvalue) {
659        FieldInfo fieldInfo = walkReference(lvalue);
660        Object right = walkOperand(rvalue);
661        if (isMixinTypes(fieldInfo)) {
662            if (!(right instanceof String)) {
663                throw new QueryParseException("Invalid NE rhs: " + rvalue);
664            }
665            return walkMixinTypes(Collections.singletonList((String) right), false);
666        }
667        right = checkBoolean(fieldInfo, right);
668        // TODO check list fields
669        return newDocumentWithField(fieldInfo, new Document(QueryOperators.NE, right));
670    }
671
672    public Document walkLt(Operand lvalue, Operand rvalue) {
673        FieldInfo fieldInfo = walkReference(lvalue);
674        Object right = walkOperand(rvalue);
675        return newDocumentWithField(fieldInfo, new Document(QueryOperators.LT, right));
676    }
677
678    public Document walkGt(Operand lvalue, Operand rvalue) {
679        FieldInfo fieldInfo = walkReference(lvalue);
680        Object right = walkOperand(rvalue);
681        return newDocumentWithField(fieldInfo, new Document(QueryOperators.GT, right));
682    }
683
684    public Document walkLtEq(Operand lvalue, Operand rvalue) {
685        FieldInfo fieldInfo = walkReference(lvalue);
686        Object right = walkOperand(rvalue);
687        return newDocumentWithField(fieldInfo, new Document(QueryOperators.LTE, right));
688    }
689
690    public Document walkGtEq(Operand lvalue, Operand rvalue) {
691        FieldInfo fieldInfo = walkReference(lvalue);
692        Object right = walkOperand(rvalue);
693        return newDocumentWithField(fieldInfo, new Document(QueryOperators.GTE, right));
694    }
695
696    public Document walkBetween(Operand lvalue, Operand rvalue, boolean positive) {
697        LiteralList l = (LiteralList) rvalue;
698        FieldInfo fieldInfo = walkReference(lvalue);
699        Object left = walkOperand(l.get(0));
700        Object right = walkOperand(l.get(1));
701        if (positive) {
702            Document range = new Document();
703            range.put(QueryOperators.GTE, left);
704            range.put(QueryOperators.LTE, right);
705            return newDocumentWithField(fieldInfo, range);
706        } else {
707            Document a = newDocumentWithField(fieldInfo, new Document(QueryOperators.LT, left));
708            Document b = newDocumentWithField(fieldInfo, new Document(QueryOperators.GT, right));
709            return new Document(QueryOperators.OR, Arrays.asList(a, b));
710        }
711    }
712
713    public Document walkIn(Operand lvalue, Operand rvalue, boolean positive) {
714        FieldInfo fieldInfo = walkReference(lvalue);
715        Object right = walkOperand(rvalue);
716        if (!(right instanceof List)) {
717            throw new QueryParseException("Invalid IN, right hand side must be a list: " + rvalue);
718        }
719        if (isMixinTypes(fieldInfo)) {
720            return walkMixinTypes((List<String>) right, positive);
721        }
722        // TODO check list fields
723        List<Object> list = (List<Object>) right;
724        return newDocumentWithField(fieldInfo, new Document(positive ? QueryOperators.IN : QueryOperators.NIN, list));
725    }
726
727    public Document walkLike(Operand lvalue, Operand rvalue, boolean positive, boolean caseInsensitive) {
728        FieldInfo fieldInfo = walkReference(lvalue);
729        if (!(rvalue instanceof StringLiteral)) {
730            throw new QueryParseException("Invalid LIKE/ILIKE, right hand side must be a string: " + rvalue);
731        }
732        // TODO check list fields
733        String like = walkStringLiteral((StringLiteral) rvalue);
734        String regex = ExpressionEvaluator.likeToRegex(like);
735
736        int flags = caseInsensitive ? Pattern.CASE_INSENSITIVE : 0;
737        Pattern pattern = Pattern.compile(regex, flags);
738        Object value;
739        if (positive) {
740            value = pattern;
741        } else {
742            value = new Document(QueryOperators.NOT, pattern);
743        }
744        return newDocumentWithField(fieldInfo, value);
745    }
746
747    public Object walkOperand(Operand op) {
748        if (op instanceof Literal) {
749            return walkLiteral((Literal) op);
750        } else if (op instanceof LiteralList) {
751            return walkLiteralList((LiteralList) op);
752        } else if (op instanceof Function) {
753            return walkFunction((Function) op);
754        } else if (op instanceof Expression) {
755            return walkExpression((Expression) op);
756        } else if (op instanceof Reference) {
757            return walkReference((Reference) op);
758        } else {
759            throw new QueryParseException("Unknown operand: " + op);
760        }
761    }
762
763    public Object walkLiteral(Literal lit) {
764        if (lit instanceof BooleanLiteral) {
765            return walkBooleanLiteral((BooleanLiteral) lit);
766        } else if (lit instanceof DateLiteral) {
767            return walkDateLiteral((DateLiteral) lit);
768        } else if (lit instanceof DoubleLiteral) {
769            return walkDoubleLiteral((DoubleLiteral) lit);
770        } else if (lit instanceof IntegerLiteral) {
771            return walkIntegerLiteral((IntegerLiteral) lit);
772        } else if (lit instanceof StringLiteral) {
773            return walkStringLiteral((StringLiteral) lit);
774        } else {
775            throw new QueryParseException("Unknown literal: " + lit);
776        }
777    }
778
779    public Object walkBooleanLiteral(BooleanLiteral lit) {
780        return Boolean.valueOf(lit.value);
781    }
782
783    public Date walkDateLiteral(DateLiteral lit) {
784        return lit.value.toDate(); // TODO onlyDate
785    }
786
787    public Double walkDoubleLiteral(DoubleLiteral lit) {
788        return Double.valueOf(lit.value);
789    }
790
791    public Long walkIntegerLiteral(IntegerLiteral lit) {
792        return Long.valueOf(lit.value);
793    }
794
795    public String walkStringLiteral(StringLiteral lit) {
796        return lit.value;
797    }
798
799    public List<Object> walkLiteralList(LiteralList litList) {
800        List<Object> list = new ArrayList<>(litList.size());
801        for (Literal lit : litList) {
802            list.add(walkLiteral(lit));
803        }
804        return list;
805    }
806
807    protected List<Object> walkOperandList(List<Operand> values) {
808        List<Object> list = new LinkedList<>();
809        for (Operand value : values) {
810            list.add(walkOperand(value));
811        }
812        return list;
813    }
814
815    public Object walkFunction(Function func) {
816        throw new UnsupportedOperationException(func.name);
817    }
818
819    public Document walkStartsWith(Operand lvalue, Operand rvalue) {
820        if (!(lvalue instanceof Reference)) {
821            throw new QueryParseException("Invalid STARTSWITH query, left hand side must be a property: " + lvalue);
822        }
823        String name = ((Reference) lvalue).name;
824        if (!(rvalue instanceof StringLiteral)) {
825            throw new QueryParseException(
826                    "Invalid STARTSWITH query, right hand side must be a literal path: " + rvalue);
827        }
828        String path = ((StringLiteral) rvalue).value;
829        if (path.length() > 1 && path.endsWith("/")) {
830            path = path.substring(0, path.length() - 1);
831        }
832
833        if (NXQL.ECM_PATH.equals(name)) {
834            return walkStartsWithPath(path);
835        } else {
836            return walkStartsWithNonPath(lvalue, path);
837        }
838    }
839
840    protected Document walkStartsWithPath(String path) {
841        // resolve path
842        String ancestorId = pathResolver.getIdForPath(path);
843        if (ancestorId == null) {
844            // no such path
845            // TODO XXX do better
846            return new Document(MONGODB_ID, "__nosuchid__");
847        }
848        return new Document(DBSDocument.KEY_ANCESTOR_IDS, ancestorId);
849    }
850
851    protected Document walkStartsWithNonPath(Operand lvalue, String path) {
852        FieldInfo fieldInfo = walkReference(lvalue);
853        Document eq = newDocumentWithField(fieldInfo, path);
854        // escape except alphanumeric and others not needing escaping
855        String regex = path.replaceAll("([^a-zA-Z0-9 /])", "\\\\$1");
856        Pattern pattern = Pattern.compile(regex + "/.*");
857        Document like = newDocumentWithField(fieldInfo, pattern);
858        return new Document(QueryOperators.OR, Arrays.asList(eq, like));
859    }
860
861    protected FieldInfo walkReference(Operand value) {
862        if (!(value instanceof Reference)) {
863            throw new QueryParseException("Invalid query, left hand side must be a property: " + value);
864        }
865        return walkReference((Reference) value);
866    }
867
868    // non-canonical index syntax, for replaceAll
869    protected final static Pattern NON_CANON_INDEX = Pattern.compile("[^/\\[\\]]+" // name
870            + "\\[(\\d+|\\*|\\*\\d+)\\]" // index in brackets
871    );
872
873    /**
874     * Canonicalizes a Nuxeo-xpath.
875     * <p>
876     * Replaces {@code a/foo[123]/b} with {@code a/123/b}
877     * <p>
878     * A star or a star followed by digits can be used instead of just the digits as well.
879     *
880     * @param xpath the xpath
881     * @return the canonicalized xpath.
882     */
883    public static String canonicalXPath(String xpath) {
884        while (xpath.length() > 0 && xpath.charAt(0) == '/') {
885            xpath = xpath.substring(1);
886        }
887        if (xpath.indexOf('[') == -1) {
888            return xpath;
889        } else {
890            return NON_CANON_INDEX.matcher(xpath).replaceAll("$1");
891        }
892    }
893
894    protected static class FieldInfo {
895
896        /** NXQL property. */
897        protected final String prop;
898
899        /** MongoDB field for query. foo/0/bar -> foo.0.bar; foo / * / bar -> foo.bar */
900        protected final String queryField;
901
902        /** MongoDB field for projection. */
903        protected final String projectionField;
904
905        protected final Type type;
906
907        /**
908         * Boolean system properties only use TRUE or NULL, not FALSE, so queries must be updated accordingly.
909         */
910        protected final boolean isTrueOrNullBoolean;
911
912        protected FieldInfo(String prop, String queryField, String projectionField, Type type,
913                boolean isTrueOrNullBoolean) {
914            this.prop = prop;
915            this.queryField = queryField;
916            this.projectionField = projectionField;
917            this.type = type;
918            this.isTrueOrNullBoolean = isTrueOrNullBoolean;
919        }
920
921        protected boolean isBoolean() {
922            return type instanceof BooleanType;
923        }
924    }
925
926    /**
927     * Returns the MongoDB field for this reference.
928     */
929    public FieldInfo walkReference(Reference ref) {
930        FieldInfo fieldInfo = walkReference(ref.name);
931        if (DATE_CAST.equals(ref.cast)) {
932            Type type = fieldInfo.type;
933            if (!(type instanceof DateType
934                    || (type instanceof ListType && ((ListType) type).getFieldType() instanceof DateType))) {
935                throw new QueryParseException("Cannot cast to " + ref.cast + ": " + ref.name);
936            }
937            // fieldInfo.isDateCast = true;
938        }
939        return fieldInfo;
940    }
941
942    protected FieldInfo walkReference(String name) {
943        String prop = canonicalXPath(name);
944        String[] parts = prop.split("/");
945        if (prop.startsWith(NXQL.ECM_PREFIX)) {
946            if (prop.startsWith(NXQL.ECM_ACL + "/")) {
947                return parseACP(prop, parts);
948            }
949            if (prop.startsWith(NXQL.ECM_TAG)) {
950                String queryField = FACETED_TAG + "." + FACETED_TAG_LABEL;
951                queryField = stripElemMatchPrefix(queryField);
952                return new FieldInfo(prop, queryField, queryField, StringType.INSTANCE, true);
953            }
954            // simple field
955            String field = DBSSession.convToInternal(prop);
956            Type type = DBSSession.getType(field);
957            String queryField = converter.keyToBson(field);
958            queryField = stripElemMatchPrefix(queryField);
959            return new FieldInfo(prop, queryField, field, type, true);
960        } else {
961            String first = parts[0];
962            Field field = schemaManager.getField(first);
963            if (field == null) {
964                if (first.indexOf(':') > -1) {
965                    throw new QueryParseException("No such property: " + name);
966                }
967                // check without prefix
968                // TODO precompute this in SchemaManagerImpl
969                for (Schema schema : schemaManager.getSchemas()) {
970                    if (!StringUtils.isBlank(schema.getNamespace().prefix)) {
971                        // schema with prefix, do not consider as candidate
972                        continue;
973                    }
974                    field = schema.getField(first);
975                    if (field != null) {
976                        break;
977                    }
978                }
979                if (field == null) {
980                    throw new QueryParseException("No such property: " + name);
981                }
982            }
983            Type type = field.getType();
984            if (PROP_UID_MAJOR_VERSION.equals(prop) || PROP_UID_MINOR_VERSION.equals(prop)
985                    || PROP_MAJOR_VERSION.equals(prop) || PROP_MINOR_VERSION.equals(prop)) {
986                String fieldName = DBSSession.convToInternal(prop);
987                return new FieldInfo(prop, fieldName, fieldName, type, true);
988            }
989
990            // canonical name
991            parts[0] = field.getName().getPrefixedName();
992            // are there wildcards or list indexes?
993            List<String> queryFieldParts = new LinkedList<>(); // field for query
994            List<String> projectionFieldParts = new LinkedList<>(); // field for projection
995            boolean firstPart = true;
996            for (String part : parts) {
997                if (NumberUtils.isDigits(part)) {
998                    // explicit list index
999                    queryFieldParts.add(part);
1000                    type = ((ListType) type).getFieldType();
1001                } else if (!part.startsWith("*")) {
1002                    // complex sub-property
1003                    queryFieldParts.add(part);
1004                    projectionFieldParts.add(part);
1005                    if (!firstPart) {
1006                        // we already computed the type of the first part
1007                        field = ((ComplexType) type).getField(part);
1008                        if (field == null) {
1009                            throw new QueryParseException("No such property: " + name);
1010                        }
1011                        type = field.getType();
1012                    }
1013                } else {
1014                    // wildcard
1015                    type = ((ListType) type).getFieldType();
1016                }
1017                firstPart = false;
1018            }
1019            String queryField = StringUtils.join(queryFieldParts, '.');
1020            String projectionField = StringUtils.join(projectionFieldParts, '.');
1021            queryField = stripElemMatchPrefix(queryField);
1022            return new FieldInfo(prop, queryField, projectionField, type, false);
1023        }
1024    }
1025
1026    protected FieldInfo parseACP(String prop, String[] parts) {
1027        if (parts.length != 3) {
1028            throw new QueryParseException("No such property: " + prop);
1029        }
1030        String wildcard = parts[1];
1031        if (NumberUtils.isDigits(wildcard)) {
1032            throw new QueryParseException("Cannot use explicit index in ACLs: " + prop);
1033        }
1034        String last = parts[2];
1035        String queryField;
1036        if (NXQL.ECM_ACL_NAME.equals(last)) {
1037            queryField = KEY_ACP + "." + KEY_ACL_NAME;
1038        } else {
1039            String fieldLast = DBSSession.convToInternalAce(last);
1040            if (fieldLast == null) {
1041                throw new QueryParseException("No such property: " + prop);
1042            }
1043            queryField = KEY_ACP + "." + KEY_ACL + "." + fieldLast;
1044        }
1045        Type type = DBSSession.getType(last);
1046        queryField = stripElemMatchPrefix(queryField);
1047        return new FieldInfo(prop, queryField, queryField, type, false);
1048    }
1049
1050    protected boolean isMixinTypes(FieldInfo fieldInfo) {
1051        return fieldInfo.queryField.equals(DBSDocument.KEY_MIXIN_TYPES);
1052    }
1053
1054    protected Set<String> getMixinDocumentTypes(String mixin) {
1055        Set<String> types = schemaManager.getDocumentTypeNamesForFacet(mixin);
1056        return types == null ? Collections.emptySet() : types;
1057    }
1058
1059    protected List<String> getDocumentTypes() {
1060        // TODO precompute in SchemaManager
1061        if (documentTypes == null) {
1062            documentTypes = new ArrayList<>();
1063            for (DocumentType docType : schemaManager.getDocumentTypes()) {
1064                documentTypes.add(docType.getName());
1065            }
1066        }
1067        return documentTypes;
1068    }
1069
1070    protected boolean isNeverPerInstanceMixin(String mixin) {
1071        return schemaManager.getNoPerDocumentQueryFacets().contains(mixin);
1072    }
1073
1074    /**
1075     * Matches the mixin types against a list of values.
1076     * <p>
1077     * Used for:
1078     * <ul>
1079     * <li>ecm:mixinTypes = 'Foo'
1080     * <li>ecm:mixinTypes != 'Foo'
1081     * <li>ecm:mixinTypes IN ('Foo', 'Bar')
1082     * <li>ecm:mixinTypes NOT IN ('Foo', 'Bar')
1083     * </ul>
1084     * <p>
1085     * ecm:mixinTypes IN ('Foo', 'Bar')
1086     *
1087     * <pre>
1088     * { "$or" : [ { "ecm:primaryType" : { "$in" : [ ... types with Foo or Bar ...]}} ,
1089     *             { "ecm:mixinTypes" : { "$in" : [ "Foo" , "Bar]}}]}
1090     * </pre>
1091     *
1092     * ecm:mixinTypes NOT IN ('Foo', 'Bar')
1093     * <p>
1094     *
1095     * <pre>
1096     * { "$and" : [ { "ecm:primaryType" : { "$in" : [ ... types without Foo nor Bar ...]}} ,
1097     *              { "ecm:mixinTypes" : { "$nin" : [ "Foo" , "Bar]}}]}
1098     * </pre>
1099     */
1100    public Document walkMixinTypes(List<String> mixins, boolean include) {
1101        /*
1102         * Primary types that match.
1103         */
1104        Set<String> matchPrimaryTypes;
1105        if (include) {
1106            matchPrimaryTypes = new HashSet<>();
1107            for (String mixin : mixins) {
1108                matchPrimaryTypes.addAll(getMixinDocumentTypes(mixin));
1109            }
1110        } else {
1111            matchPrimaryTypes = new HashSet<>(getDocumentTypes());
1112            for (String mixin : mixins) {
1113                matchPrimaryTypes.removeAll(getMixinDocumentTypes(mixin));
1114            }
1115        }
1116        /*
1117         * Instance mixins that match.
1118         */
1119        Set<String> matchMixinTypes = new HashSet<>();
1120        for (String mixin : mixins) {
1121            if (!isNeverPerInstanceMixin(mixin)) {
1122                matchMixinTypes.add(mixin);
1123            }
1124        }
1125        /*
1126         * MongoDB query generation.
1127         */
1128        // match on primary type
1129        Document p = new Document(DBSDocument.KEY_PRIMARY_TYPE, new Document(QueryOperators.IN, matchPrimaryTypes));
1130        // match on mixin types
1131        // $in/$nin with an array matches if any/no element of the array matches
1132        String innin = include ? QueryOperators.IN : QueryOperators.NIN;
1133        Document m = new Document(DBSDocument.KEY_MIXIN_TYPES, new Document(innin, matchMixinTypes));
1134        // and/or between those
1135        String op = include ? QueryOperators.OR : QueryOperators.AND;
1136        return new Document(op, Arrays.asList(p, m));
1137    }
1138
1139}