001/*
002 * (C) Copyright 2014-2018 Nuxeo (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019package org.nuxeo.ecm.core.storage.mongodb;
020
021import static java.lang.Boolean.FALSE;
022import static java.lang.Boolean.TRUE;
023import static org.nuxeo.ecm.core.api.trash.TrashService.Feature.TRASHED_STATE_IN_MIGRATION;
024import static org.nuxeo.ecm.core.api.trash.TrashService.Feature.TRASHED_STATE_IS_DEDICATED_PROPERTY;
025import static org.nuxeo.ecm.core.api.trash.TrashService.Feature.TRASHED_STATE_IS_DEDUCED_FROM_LIFECYCLE;
026import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.FACETED_TAG;
027import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.FACETED_TAG_LABEL;
028import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACL;
029import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACL_NAME;
030import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACP;
031import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_FULLTEXT_SCORE;
032import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.PROP_MAJOR_VERSION;
033import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.PROP_MINOR_VERSION;
034import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.PROP_UID_MAJOR_VERSION;
035import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.PROP_UID_MINOR_VERSION;
036import static org.nuxeo.ecm.core.storage.mongodb.MongoDBRepository.MONGODB_ID;
037import static org.nuxeo.ecm.core.storage.mongodb.MongoDBRepository.MONGODB_META;
038import static org.nuxeo.ecm.core.storage.mongodb.MongoDBRepository.MONGODB_TEXT_SCORE;
039
040import java.util.ArrayList;
041import java.util.Arrays;
042import java.util.Collections;
043import java.util.Date;
044import java.util.HashMap;
045import java.util.HashSet;
046import java.util.LinkedList;
047import java.util.List;
048import java.util.Map;
049import java.util.Set;
050import java.util.concurrent.atomic.AtomicInteger;
051import java.util.regex.Pattern;
052
053import org.apache.commons.lang3.StringUtils;
054import org.apache.commons.lang3.math.NumberUtils;
055import org.bson.Document;
056import org.nuxeo.ecm.core.api.LifeCycleConstants;
057import org.nuxeo.ecm.core.query.QueryParseException;
058import org.nuxeo.ecm.core.query.sql.NXQL;
059import org.nuxeo.ecm.core.query.sql.model.BooleanLiteral;
060import org.nuxeo.ecm.core.query.sql.model.DateLiteral;
061import org.nuxeo.ecm.core.query.sql.model.DoubleLiteral;
062import org.nuxeo.ecm.core.query.sql.model.Expression;
063import org.nuxeo.ecm.core.query.sql.model.Function;
064import org.nuxeo.ecm.core.query.sql.model.IntegerLiteral;
065import org.nuxeo.ecm.core.query.sql.model.Literal;
066import org.nuxeo.ecm.core.query.sql.model.LiteralList;
067import org.nuxeo.ecm.core.query.sql.model.MultiExpression;
068import org.nuxeo.ecm.core.query.sql.model.Operand;
069import org.nuxeo.ecm.core.query.sql.model.Operator;
070import org.nuxeo.ecm.core.query.sql.model.OrderByClause;
071import org.nuxeo.ecm.core.query.sql.model.OrderByExpr;
072import org.nuxeo.ecm.core.query.sql.model.Reference;
073import org.nuxeo.ecm.core.query.sql.model.SelectClause;
074import org.nuxeo.ecm.core.query.sql.model.StringLiteral;
075import org.nuxeo.ecm.core.schema.DocumentType;
076import org.nuxeo.ecm.core.schema.SchemaManager;
077import org.nuxeo.ecm.core.schema.types.ComplexType;
078import org.nuxeo.ecm.core.schema.types.Field;
079import org.nuxeo.ecm.core.schema.types.ListType;
080import org.nuxeo.ecm.core.schema.types.Schema;
081import org.nuxeo.ecm.core.schema.types.Type;
082import org.nuxeo.ecm.core.schema.types.primitives.BooleanType;
083import org.nuxeo.ecm.core.schema.types.primitives.DateType;
084import org.nuxeo.ecm.core.schema.types.primitives.StringType;
085import org.nuxeo.ecm.core.storage.ExpressionEvaluator;
086import org.nuxeo.ecm.core.storage.ExpressionEvaluator.PathResolver;
087import org.nuxeo.ecm.core.storage.FulltextQueryAnalyzer;
088import org.nuxeo.ecm.core.storage.FulltextQueryAnalyzer.FulltextQuery;
089import org.nuxeo.ecm.core.storage.FulltextQueryAnalyzer.Op;
090import org.nuxeo.ecm.core.storage.QueryOptimizer.PrefixInfo;
091import org.nuxeo.ecm.core.storage.dbs.DBSDocument;
092import org.nuxeo.ecm.core.storage.dbs.DBSSession;
093import org.nuxeo.ecm.core.trash.TrashService;
094import org.nuxeo.runtime.api.Framework;
095
096import com.mongodb.QueryOperators;
097
098/**
099 * Query builder for a MongoDB query from an {@link Expression}.
100 *
101 * @since 5.9.4
102 */
103public class MongoDBQueryBuilder {
104
105    public static final Long LONG_ZERO = Long.valueOf(0);
106
107    public static final Long LONG_ONE = Long.valueOf(1);
108
109    public static final Double ONE = Double.valueOf(1);
110
111    public static final Double MINUS_ONE = Double.valueOf(-1);
112
113    protected static final String DATE_CAST = "DATE";
114
115    protected final AtomicInteger counter = new AtomicInteger();
116
117    protected final SchemaManager schemaManager;
118
119    protected final MongoDBConverter converter;
120
121    protected final String idKey;
122
123    protected List<String> documentTypes;
124
125    protected final Expression expression;
126
127    protected final SelectClause selectClause;
128
129    protected final OrderByClause orderByClause;
130
131    protected final PathResolver pathResolver;
132
133    public boolean hasFulltext;
134
135    public boolean sortOnFulltextScore;
136
137    protected Document query;
138
139    protected Document orderBy;
140
141    protected Document projection;
142
143    protected Map<String, String> propertyKeys;
144
145    boolean projectionHasWildcard;
146
147    private boolean fulltextSearchDisabled;
148
149    /**
150     * Prefix to remove for $elemMatch (including final dot), or {@code null} if there's no current prefix to remove.
151     */
152    protected String elemMatchPrefix;
153
154    public MongoDBQueryBuilder(MongoDBRepository repository, Expression expression, SelectClause selectClause,
155            OrderByClause orderByClause, PathResolver pathResolver, boolean fulltextSearchDisabled) {
156        schemaManager = Framework.getService(SchemaManager.class);
157        converter = repository.converter;
158        idKey = repository.idKey;
159        this.expression = expression;
160        this.selectClause = selectClause;
161        this.orderByClause = orderByClause;
162        this.pathResolver = pathResolver;
163        this.fulltextSearchDisabled = fulltextSearchDisabled;
164        this.propertyKeys = new HashMap<>();
165    }
166
167    public void walk() {
168        query = walkExpression(expression); // computes hasFulltext
169        walkOrderBy(); // computes sortOnFulltextScore
170        walkProjection(); // needs hasFulltext and sortOnFulltextScore
171    }
172
173    public Document getQuery() {
174        return query;
175    }
176
177    public Document getOrderBy() {
178        return orderBy;
179    }
180
181    public Document getProjection() {
182        return projection;
183    }
184
185    public boolean hasProjectionWildcard() {
186        return projectionHasWildcard;
187    }
188
189    protected void walkOrderBy() {
190        sortOnFulltextScore = false;
191        if (orderByClause == null) {
192            orderBy = null;
193        } else {
194            orderBy = new Document();
195            for (OrderByExpr ob : orderByClause.elements) {
196                Reference ref = ob.reference;
197                boolean desc = ob.isDescending;
198                String field = walkReference(ref).queryField;
199                if (!orderBy.containsKey(field)) {
200                    Object value;
201                    if (KEY_FULLTEXT_SCORE.equals(field)) {
202                        if (!desc) {
203                            throw new QueryParseException("Cannot sort by " + NXQL.ECM_FULLTEXT_SCORE + " ascending");
204                        }
205                        sortOnFulltextScore = true;
206                        value = new Document(MONGODB_META, MONGODB_TEXT_SCORE);
207                    } else {
208                        value = desc ? MINUS_ONE : ONE;
209                    }
210                    orderBy.put(field, value);
211                }
212            }
213            if (sortOnFulltextScore && orderBy.size() > 1) {
214                throw new QueryParseException("Cannot sort by " + NXQL.ECM_FULLTEXT_SCORE + " and other criteria");
215            }
216        }
217    }
218
219    protected void walkProjection() {
220        projection = new Document();
221        boolean projectionOnFulltextScore = false;
222        for (Operand op : selectClause.getSelectList().values()) {
223            if (!(op instanceof Reference)) {
224                throw new QueryParseException("Projection not supported: " + op);
225            }
226            FieldInfo fieldInfo = walkReference((Reference) op);
227            String propertyField = fieldInfo.prop;
228            if (!propertyField.equals(NXQL.ECM_UUID) //
229                    && !propertyField.equals(fieldInfo.projectionField) //
230                    && !propertyField.contains("/")) {
231                propertyKeys.put(fieldInfo.projectionField, propertyField);
232            }
233            projection.put(fieldInfo.projectionField, ONE);
234            if (propertyField.contains("*")) {
235                projectionHasWildcard = true;
236            }
237            if (fieldInfo.projectionField.equals(KEY_FULLTEXT_SCORE)) {
238                projectionOnFulltextScore = true;
239            }
240        }
241        if (projectionOnFulltextScore || sortOnFulltextScore) {
242            if (!hasFulltext) {
243                throw new QueryParseException(NXQL.ECM_FULLTEXT_SCORE + " cannot be used without " + NXQL.ECM_FULLTEXT);
244            }
245            projection.put(KEY_FULLTEXT_SCORE, new Document(MONGODB_META, MONGODB_TEXT_SCORE));
246        }
247    }
248
249    public Document walkExpression(Expression expr) {
250        Operator op = expr.operator;
251        Operand lvalue = expr.lvalue;
252        Operand rvalue = expr.rvalue;
253        Reference ref = lvalue instanceof Reference ? (Reference) lvalue : null;
254        String name = ref != null ? ref.name : null;
255        String cast = ref != null ? ref.cast : null;
256        if (DATE_CAST.equals(cast)) {
257            checkDateLiteralForCast(op, rvalue, name);
258        }
259        if (op == Operator.STARTSWITH) {
260            return walkStartsWith(lvalue, rvalue);
261        } else if (NXQL.ECM_PATH.equals(name)) {
262            return walkEcmPath(op, rvalue);
263        } else if (NXQL.ECM_ANCESTORID.equals(name)) {
264            return walkAncestorId(op, rvalue);
265        } else if (NXQL.ECM_ISTRASHED.equals(name)) {
266            return walkIsTrashed(op, rvalue);
267        } else if (name != null && name.startsWith(NXQL.ECM_FULLTEXT) && !NXQL.ECM_FULLTEXT_JOBID.equals(name)) {
268            return walkEcmFulltext(name, op, rvalue);
269        } else if (op == Operator.SUM) {
270            throw new UnsupportedOperationException("SUM");
271        } else if (op == Operator.SUB) {
272            throw new UnsupportedOperationException("SUB");
273        } else if (op == Operator.MUL) {
274            throw new UnsupportedOperationException("MUL");
275        } else if (op == Operator.DIV) {
276            throw new UnsupportedOperationException("DIV");
277        } else if (op == Operator.LT) {
278            return walkLt(lvalue, rvalue);
279        } else if (op == Operator.GT) {
280            return walkGt(lvalue, rvalue);
281        } else if (op == Operator.EQ) {
282            return walkEq(lvalue, rvalue);
283        } else if (op == Operator.NOTEQ) {
284            return walkNotEq(lvalue, rvalue);
285        } else if (op == Operator.LTEQ) {
286            return walkLtEq(lvalue, rvalue);
287        } else if (op == Operator.GTEQ) {
288            return walkGtEq(lvalue, rvalue);
289        } else if (op == Operator.AND) {
290            if (expr instanceof MultiExpression) {
291                return walkAndMultiExpression((MultiExpression) expr);
292            } else {
293                return walkAnd(expr);
294            }
295        } else if (op == Operator.NOT) {
296            return walkNot(lvalue);
297        } else if (op == Operator.OR) {
298            return walkOr(lvalue, rvalue);
299        } else if (op == Operator.LIKE) {
300            return walkLike(lvalue, rvalue, true, false);
301        } else if (op == Operator.ILIKE) {
302            return walkLike(lvalue, rvalue, true, true);
303        } else if (op == Operator.NOTLIKE) {
304            return walkLike(lvalue, rvalue, false, false);
305        } else if (op == Operator.NOTILIKE) {
306            return walkLike(lvalue, rvalue, false, true);
307        } else if (op == Operator.IN) {
308            return walkIn(lvalue, rvalue, true);
309        } else if (op == Operator.NOTIN) {
310            return walkIn(lvalue, rvalue, false);
311        } else if (op == Operator.ISNULL) {
312            return walkIsNull(lvalue);
313        } else if (op == Operator.ISNOTNULL) {
314            return walkIsNotNull(lvalue);
315        } else if (op == Operator.BETWEEN) {
316            return walkBetween(lvalue, rvalue, true);
317        } else if (op == Operator.NOTBETWEEN) {
318            return walkBetween(lvalue, rvalue, false);
319        } else {
320            throw new QueryParseException("Unknown operator: " + op);
321        }
322    }
323
324    protected void checkDateLiteralForCast(Operator op, Operand value, String name) {
325        if (op == Operator.BETWEEN || op == Operator.NOTBETWEEN) {
326            LiteralList l = (LiteralList) value;
327            checkDateLiteralForCast(l.get(0), name);
328            checkDateLiteralForCast(l.get(1), name);
329        } else {
330            checkDateLiteralForCast(value, name);
331        }
332    }
333
334    protected void checkDateLiteralForCast(Operand value, String name) {
335        if (value instanceof DateLiteral && !((DateLiteral) value).onlyDate) {
336            throw new QueryParseException("DATE() cast must be used with DATE literal, not TIMESTAMP: " + name);
337        }
338    }
339
340    protected Document walkEcmPath(Operator op, Operand rvalue) {
341        if (op != Operator.EQ && op != Operator.NOTEQ) {
342            throw new QueryParseException(NXQL.ECM_PATH + " requires = or <> operator");
343        }
344        if (!(rvalue instanceof StringLiteral)) {
345            throw new QueryParseException(NXQL.ECM_PATH + " requires literal path as right argument");
346        }
347        String path = ((StringLiteral) rvalue).value;
348        if (path.length() > 1 && path.endsWith("/")) {
349            path = path.substring(0, path.length() - 1);
350        }
351        String id = pathResolver.getIdForPath(path);
352        if (id == null) {
353            // no such path
354            // TODO XXX do better
355            return new Document(MONGODB_ID, "__nosuchid__");
356        }
357        if (op == Operator.EQ) {
358            return new Document(idKey, id);
359        } else {
360            return new Document(idKey, new Document(QueryOperators.NE, id));
361        }
362    }
363
364    protected Document walkAncestorId(Operator op, Operand rvalue) {
365        if (op != Operator.EQ && op != Operator.NOTEQ) {
366            throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires = or <> operator");
367        }
368        if (!(rvalue instanceof StringLiteral)) {
369            throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires literal id as right argument");
370        }
371        String ancestorId = ((StringLiteral) rvalue).value;
372        if (op == Operator.EQ) {
373            return new Document(DBSDocument.KEY_ANCESTOR_IDS, ancestorId);
374        } else {
375            return new Document(DBSDocument.KEY_ANCESTOR_IDS, new Document(QueryOperators.NE, ancestorId));
376        }
377    }
378
379    protected Document walkEcmFulltext(String name, Operator op, Operand rvalue) {
380        if (op != Operator.EQ && op != Operator.LIKE) {
381            throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires = or LIKE operator");
382        }
383        if (!(rvalue instanceof StringLiteral)) {
384            throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires literal string as right argument");
385        }
386        if (fulltextSearchDisabled) {
387            throw new QueryParseException("Fulltext search disabled by configuration");
388        }
389        String fulltextQuery = ((StringLiteral) rvalue).value;
390        if (name.equals(NXQL.ECM_FULLTEXT)) {
391            // standard fulltext query
392            hasFulltext = true;
393            String ft = getMongoDBFulltextQuery(fulltextQuery);
394            if (ft == null) {
395                // empty query, matches nothing
396                return new Document(MONGODB_ID, "__nosuchid__");
397            }
398            Document textSearch = new Document();
399            textSearch.put(QueryOperators.SEARCH, ft);
400            // TODO language?
401            return new Document(QueryOperators.TEXT, textSearch);
402        } else {
403            // secondary index match with explicit field
404            // do a regexp on the field
405            if (name.charAt(NXQL.ECM_FULLTEXT.length()) != '.') {
406                throw new QueryParseException(name + " has incorrect syntax" + " for a secondary fulltext index");
407            }
408            String prop = name.substring(NXQL.ECM_FULLTEXT.length() + 1);
409            String ft = fulltextQuery.replace(" ", "%");
410            rvalue = new StringLiteral(ft);
411            return walkLike(new Reference(prop), rvalue, true, true);
412        }
413    }
414
415    protected Document walkIsTrashed(Operator op, Operand rvalue) {
416        if (op != Operator.EQ && op != Operator.NOTEQ) {
417            throw new QueryParseException(NXQL.ECM_ISTRASHED + " requires = or <> operator");
418        }
419        TrashService trashService = Framework.getService(TrashService.class);
420        if (trashService.hasFeature(TRASHED_STATE_IS_DEDUCED_FROM_LIFECYCLE)) {
421            return walkIsTrashed(new Reference(NXQL.ECM_LIFECYCLESTATE), op, rvalue,
422                    new StringLiteral(LifeCycleConstants.DELETED_STATE));
423        } else if (trashService.hasFeature(TRASHED_STATE_IN_MIGRATION)) {
424            Document lifeCycleTrashed = walkIsTrashed(new Reference(NXQL.ECM_LIFECYCLESTATE), op, rvalue,
425                    new StringLiteral(LifeCycleConstants.DELETED_STATE));
426            Document propertyTrashed = walkIsTrashed(new Reference(NXQL.ECM_ISTRASHED), op, rvalue,
427                    new BooleanLiteral(true));
428            return new Document(QueryOperators.OR, new ArrayList<>(Arrays.asList(lifeCycleTrashed, propertyTrashed)));
429        } else if (trashService.hasFeature(TRASHED_STATE_IS_DEDICATED_PROPERTY)) {
430            return walkIsTrashed(new Reference(NXQL.ECM_ISTRASHED), op, rvalue, new BooleanLiteral(true));
431        } else {
432            throw new UnsupportedOperationException("TrashService is in an unknown state");
433        }
434    }
435
436    protected Document walkIsTrashed(Reference ref, Operator op, Operand initialRvalue, Literal deletedRvalue) {
437        long v;
438        if (!(initialRvalue instanceof IntegerLiteral)
439                || ((v = ((IntegerLiteral) initialRvalue).value) != 0 && v != 1)) {
440            throw new QueryParseException(NXQL.ECM_ISTRASHED + " requires literal 0 or 1 as right argument");
441        }
442        boolean equalsDeleted = op == Operator.EQ ^ v == 0;
443        if (equalsDeleted) {
444            return walkEq(ref, deletedRvalue);
445        } else {
446            return walkNotEq(ref, deletedRvalue);
447        }
448    }
449
450    // public static for tests
451    public static String getMongoDBFulltextQuery(String query) {
452        FulltextQuery ft = FulltextQueryAnalyzer.analyzeFulltextQuery(query);
453        if (ft == null) {
454            return null;
455        }
456        // translate into MongoDB syntax
457        return translateFulltext(ft, false);
458    }
459
460    /**
461     * Transforms the NXQL fulltext syntax into MongoDB syntax.
462     * <p>
463     * The MongoDB fulltext query syntax is badly documented, but is actually the following:
464     * <ul>
465     * <li>a term is a word,
466     * <li>a phrase is a set of spaced-separated words enclosed in double quotes,
467     * <li>negation is done by prepending a -,
468     * <li>the query is a space-separated set of terms, negated terms, phrases, or negated phrases.
469     * <li>all the words of non-negated phrases are also added to the terms.
470     * </ul>
471     * <p>
472     * The matching algorithm is (excluding stemming and stop words):
473     * <ul>
474     * <li>filter out documents with the negative terms, the negative phrases, or missing the phrases,
475     * <li>then if any term is present in the document then it's a match.
476     * </ul>
477     */
478    protected static String translateFulltext(FulltextQuery ft, boolean and) {
479        List<String> buf = new ArrayList<>();
480        translateFulltext(ft, buf, and);
481        return StringUtils.join(buf, ' ');
482    }
483
484    protected static void translateFulltext(FulltextQuery ft, List<String> buf, boolean and) {
485        if (ft.op == Op.OR) {
486            for (FulltextQuery term : ft.terms) {
487                // don't quote words for OR
488                translateFulltext(term, buf, false);
489            }
490        } else if (ft.op == Op.AND) {
491            for (FulltextQuery term : ft.terms) {
492                // quote words for AND
493                translateFulltext(term, buf, true);
494            }
495        } else {
496            String neg;
497            if (ft.op == Op.NOTWORD) {
498                neg = "-";
499            } else { // Op.WORD
500                neg = "";
501            }
502            String word = ft.word.toLowerCase();
503            if (ft.isPhrase() || and) {
504                buf.add(neg + '"' + word + '"');
505            } else {
506                buf.add(neg + word);
507            }
508        }
509    }
510
511    public Document walkNot(Operand value) {
512        Object val = walkOperand(value);
513        Object not = pushDownNot(val);
514        if (!(not instanceof Document)) {
515            throw new QueryParseException("Cannot do NOT on: " + val);
516        }
517        return (Document) not;
518    }
519
520    protected Object pushDownNot(Object object) {
521        if (!(object instanceof Document)) {
522            throw new QueryParseException("Cannot do NOT on: " + object);
523        }
524        Document ob = (Document) object;
525        Set<String> keySet = ob.keySet();
526        if (keySet.size() != 1) {
527            throw new QueryParseException("Cannot do NOT on: " + ob);
528        }
529        String key = keySet.iterator().next();
530        Object value = ob.get(key);
531        if (!key.startsWith("$")) {
532            if (value instanceof Document) {
533                // push down inside dbobject
534                return new Document(key, pushDownNot(value));
535            } else {
536                // k = v -> k != v
537                return new Document(key, new Document(QueryOperators.NE, value));
538            }
539        }
540        if (QueryOperators.NE.equals(key)) {
541            // NOT k != v -> k = v
542            return value;
543        }
544        if (QueryOperators.NOT.equals(key)) {
545            // NOT NOT v -> v
546            return value;
547        }
548        if (QueryOperators.AND.equals(key) || QueryOperators.OR.equals(key)) {
549            // boolean algebra
550            // NOT (v1 AND v2) -> NOT v1 OR NOT v2
551            // NOT (v1 OR v2) -> NOT v1 AND NOT v2
552            String op = QueryOperators.AND.equals(key) ? QueryOperators.OR : QueryOperators.AND;
553            List<Object> list = (List<Object>) value;
554            for (int i = 0; i < list.size(); i++) {
555                list.set(i, pushDownNot(list.get(i)));
556            }
557            return new Document(op, list);
558        }
559        if (QueryOperators.IN.equals(key) || QueryOperators.NIN.equals(key)) {
560            // boolean algebra
561            // IN <-> NIN
562            String op = QueryOperators.IN.equals(key) ? QueryOperators.NIN : QueryOperators.IN;
563            return new Document(op, value);
564        }
565        if (QueryOperators.LT.equals(key) || QueryOperators.GT.equals(key) || QueryOperators.LTE.equals(key)
566                || QueryOperators.GTE.equals(key)) {
567            // TODO use inverse operators?
568            return new Document(QueryOperators.NOT, ob);
569        }
570        throw new QueryParseException("Unknown operator for NOT: " + key);
571    }
572
573    protected Document newDocumentWithField(FieldInfo fieldInfo, Object value) {
574        return new Document(fieldInfo.queryField, value);
575    }
576
577    public Document walkIsNull(Operand value) {
578        FieldInfo fieldInfo = walkReference(value);
579        return newDocumentWithField(fieldInfo, null);
580    }
581
582    public Document walkIsNotNull(Operand value) {
583        FieldInfo fieldInfo = walkReference(value);
584        return newDocumentWithField(fieldInfo, new Document(QueryOperators.NE, null));
585    }
586
587    public Document walkAndMultiExpression(MultiExpression expr) {
588        return walkAnd(expr, expr.values);
589    }
590
591    public Document walkAnd(Expression expr) {
592        return walkAnd(expr, Arrays.asList(expr.lvalue, expr.rvalue));
593    }
594
595    protected static final Pattern SLASH_WILDCARD_SLASH = Pattern.compile("/\\*\\d+(/)?");
596
597    protected Document walkAnd(Expression expr, List<Operand> values) {
598        if (values.size() == 1) {
599            return (Document) walkOperand(values.get(0));
600        }
601        // PrefixInfo was computed by the QueryOptimizer
602        PrefixInfo info = (PrefixInfo) expr.getInfo();
603        if (info == null || info.count < 2) {
604            List<Object> list = walkOperandList(values);
605            return new Document(QueryOperators.AND, list);
606        }
607
608        // we have a common prefix for all underlying references, extract it into an $elemMatch node
609
610        // info.prefix is the DBS common prefix, ex: foo/bar/*1; ecm:acp/*1/acl/*1
611        // compute MongoDB prefix: foo.bar.; ecm:acp.acl.
612        String prefix = SLASH_WILDCARD_SLASH.matcher(info.prefix).replaceAll(".");
613        // remove current prefix and trailing . for actual field match
614        String fieldBase = stripElemMatchPrefix(prefix.substring(0, prefix.length() - 1));
615
616        String previousElemMatchPrefix = elemMatchPrefix;
617        elemMatchPrefix = prefix;
618        List<Object> list = walkOperandList(values);
619        elemMatchPrefix = previousElemMatchPrefix;
620
621        return new Document(fieldBase, new Document(QueryOperators.ELEM_MATCH, new Document(QueryOperators.AND, list)));
622    }
623
624    protected String stripElemMatchPrefix(String field) {
625        if (elemMatchPrefix != null && field.startsWith(elemMatchPrefix)) {
626            field = field.substring(elemMatchPrefix.length());
627        }
628        return field;
629    }
630
631    public Document walkOr(Operand lvalue, Operand rvalue) {
632        Object left = walkOperand(lvalue);
633        Object right = walkOperand(rvalue);
634        List<Object> list = new ArrayList<>(Arrays.asList(left, right));
635        return new Document(QueryOperators.OR, list);
636    }
637
638    protected Object checkBoolean(FieldInfo fieldInfo, Object right) {
639        if (fieldInfo.isBoolean()) {
640            // convert 0 / 1 to actual booleans
641            if (right instanceof Long) {
642                if (LONG_ZERO.equals(right)) {
643                    right = fieldInfo.isTrueOrNullBoolean ? null : FALSE;
644                } else if (LONG_ONE.equals(right)) {
645                    right = TRUE;
646                } else {
647                    throw new QueryParseException("Invalid boolean: " + right);
648                }
649            }
650        }
651        return right;
652    }
653
654    public Document walkEq(Operand lvalue, Operand rvalue) {
655        FieldInfo fieldInfo = walkReference(lvalue);
656        Object right = walkOperand(rvalue);
657        if (isMixinTypes(fieldInfo)) {
658            if (!(right instanceof String)) {
659                throw new QueryParseException("Invalid EQ rhs: " + rvalue);
660            }
661            return walkMixinTypes(Collections.singletonList((String) right), true);
662        }
663        right = checkBoolean(fieldInfo, right);
664        // TODO check list fields
665        return newDocumentWithField(fieldInfo, right);
666    }
667
668    public Document walkNotEq(Operand lvalue, Operand rvalue) {
669        FieldInfo fieldInfo = walkReference(lvalue);
670        Object right = walkOperand(rvalue);
671        if (isMixinTypes(fieldInfo)) {
672            if (!(right instanceof String)) {
673                throw new QueryParseException("Invalid NE rhs: " + rvalue);
674            }
675            return walkMixinTypes(Collections.singletonList((String) right), false);
676        }
677        right = checkBoolean(fieldInfo, right);
678        // TODO check list fields
679        return newDocumentWithField(fieldInfo, new Document(QueryOperators.NE, right));
680    }
681
682    public Document walkLt(Operand lvalue, Operand rvalue) {
683        FieldInfo fieldInfo = walkReference(lvalue);
684        Object right = walkOperand(rvalue);
685        return newDocumentWithField(fieldInfo, new Document(QueryOperators.LT, right));
686    }
687
688    public Document walkGt(Operand lvalue, Operand rvalue) {
689        FieldInfo fieldInfo = walkReference(lvalue);
690        Object right = walkOperand(rvalue);
691        return newDocumentWithField(fieldInfo, new Document(QueryOperators.GT, right));
692    }
693
694    public Document walkLtEq(Operand lvalue, Operand rvalue) {
695        FieldInfo fieldInfo = walkReference(lvalue);
696        Object right = walkOperand(rvalue);
697        return newDocumentWithField(fieldInfo, new Document(QueryOperators.LTE, right));
698    }
699
700    public Document walkGtEq(Operand lvalue, Operand rvalue) {
701        FieldInfo fieldInfo = walkReference(lvalue);
702        Object right = walkOperand(rvalue);
703        return newDocumentWithField(fieldInfo, new Document(QueryOperators.GTE, right));
704    }
705
706    public Document walkBetween(Operand lvalue, Operand rvalue, boolean positive) {
707        LiteralList l = (LiteralList) rvalue;
708        FieldInfo fieldInfo = walkReference(lvalue);
709        Object left = walkOperand(l.get(0));
710        Object right = walkOperand(l.get(1));
711        if (positive) {
712            Document range = new Document();
713            range.put(QueryOperators.GTE, left);
714            range.put(QueryOperators.LTE, right);
715            return newDocumentWithField(fieldInfo, range);
716        } else {
717            Document a = newDocumentWithField(fieldInfo, new Document(QueryOperators.LT, left));
718            Document b = newDocumentWithField(fieldInfo, new Document(QueryOperators.GT, right));
719            return new Document(QueryOperators.OR, Arrays.asList(a, b));
720        }
721    }
722
723    public Document walkIn(Operand lvalue, Operand rvalue, boolean positive) {
724        FieldInfo fieldInfo = walkReference(lvalue);
725        Object right = walkOperand(rvalue);
726        if (!(right instanceof List)) {
727            throw new QueryParseException("Invalid IN, right hand side must be a list: " + rvalue);
728        }
729        if (isMixinTypes(fieldInfo)) {
730            return walkMixinTypes((List<String>) right, positive);
731        }
732        // TODO check list fields
733        List<Object> list = (List<Object>) right;
734        return newDocumentWithField(fieldInfo, new Document(positive ? QueryOperators.IN : QueryOperators.NIN, list));
735    }
736
737    public Document walkLike(Operand lvalue, Operand rvalue, boolean positive, boolean caseInsensitive) {
738        FieldInfo fieldInfo = walkReference(lvalue);
739        if (!(rvalue instanceof StringLiteral)) {
740            throw new QueryParseException("Invalid LIKE/ILIKE, right hand side must be a string: " + rvalue);
741        }
742        // TODO check list fields
743        String like = walkStringLiteral((StringLiteral) rvalue);
744        String regex = ExpressionEvaluator.likeToRegex(like);
745
746        int flags = caseInsensitive ? Pattern.CASE_INSENSITIVE : 0;
747        Pattern pattern = Pattern.compile(regex, flags);
748        Object value;
749        if (positive) {
750            value = pattern;
751        } else {
752            value = new Document(QueryOperators.NOT, pattern);
753        }
754        return newDocumentWithField(fieldInfo, value);
755    }
756
757    public Object walkOperand(Operand op) {
758        if (op instanceof Literal) {
759            return walkLiteral((Literal) op);
760        } else if (op instanceof LiteralList) {
761            return walkLiteralList((LiteralList) op);
762        } else if (op instanceof Function) {
763            return walkFunction((Function) op);
764        } else if (op instanceof Expression) {
765            return walkExpression((Expression) op);
766        } else if (op instanceof Reference) {
767            return walkReference((Reference) op);
768        } else {
769            throw new QueryParseException("Unknown operand: " + op);
770        }
771    }
772
773    public Object walkLiteral(Literal lit) {
774        if (lit instanceof BooleanLiteral) {
775            return walkBooleanLiteral((BooleanLiteral) lit);
776        } else if (lit instanceof DateLiteral) {
777            return walkDateLiteral((DateLiteral) lit);
778        } else if (lit instanceof DoubleLiteral) {
779            return walkDoubleLiteral((DoubleLiteral) lit);
780        } else if (lit instanceof IntegerLiteral) {
781            return walkIntegerLiteral((IntegerLiteral) lit);
782        } else if (lit instanceof StringLiteral) {
783            return walkStringLiteral((StringLiteral) lit);
784        } else {
785            throw new QueryParseException("Unknown literal: " + lit);
786        }
787    }
788
789    public Object walkBooleanLiteral(BooleanLiteral lit) {
790        return Boolean.valueOf(lit.value);
791    }
792
793    public Date walkDateLiteral(DateLiteral lit) {
794        return lit.value.toDate(); // TODO onlyDate
795    }
796
797    public Double walkDoubleLiteral(DoubleLiteral lit) {
798        return Double.valueOf(lit.value);
799    }
800
801    public Long walkIntegerLiteral(IntegerLiteral lit) {
802        return Long.valueOf(lit.value);
803    }
804
805    public String walkStringLiteral(StringLiteral lit) {
806        return lit.value;
807    }
808
809    public List<Object> walkLiteralList(LiteralList litList) {
810        List<Object> list = new ArrayList<>(litList.size());
811        for (Literal lit : litList) {
812            list.add(walkLiteral(lit));
813        }
814        return list;
815    }
816
817    protected List<Object> walkOperandList(List<Operand> values) {
818        List<Object> list = new LinkedList<>();
819        for (Operand value : values) {
820            list.add(walkOperand(value));
821        }
822        return list;
823    }
824
825    public Object walkFunction(Function func) {
826        throw new UnsupportedOperationException(func.name);
827    }
828
829    public Document walkStartsWith(Operand lvalue, Operand rvalue) {
830        if (!(lvalue instanceof Reference)) {
831            throw new QueryParseException("Invalid STARTSWITH query, left hand side must be a property: " + lvalue);
832        }
833        String name = ((Reference) lvalue).name;
834        if (!(rvalue instanceof StringLiteral)) {
835            throw new QueryParseException(
836                    "Invalid STARTSWITH query, right hand side must be a literal path: " + rvalue);
837        }
838        String path = ((StringLiteral) rvalue).value;
839        if (path.length() > 1 && path.endsWith("/")) {
840            path = path.substring(0, path.length() - 1);
841        }
842
843        if (NXQL.ECM_PATH.equals(name)) {
844            return walkStartsWithPath(path);
845        } else {
846            return walkStartsWithNonPath(lvalue, path);
847        }
848    }
849
850    protected Document walkStartsWithPath(String path) {
851        // resolve path
852        String ancestorId = pathResolver.getIdForPath(path);
853        if (ancestorId == null) {
854            // no such path
855            // TODO XXX do better
856            return new Document(MONGODB_ID, "__nosuchid__");
857        }
858        return new Document(DBSDocument.KEY_ANCESTOR_IDS, ancestorId);
859    }
860
861    protected Document walkStartsWithNonPath(Operand lvalue, String path) {
862        FieldInfo fieldInfo = walkReference(lvalue);
863        Document eq = newDocumentWithField(fieldInfo, path);
864        // escape except alphanumeric and others not needing escaping
865        String regex = path.replaceAll("([^a-zA-Z0-9 /])", "\\\\$1");
866        Pattern pattern = Pattern.compile(regex + "/.*");
867        Document like = newDocumentWithField(fieldInfo, pattern);
868        return new Document(QueryOperators.OR, Arrays.asList(eq, like));
869    }
870
871    protected FieldInfo walkReference(Operand value) {
872        if (!(value instanceof Reference)) {
873            throw new QueryParseException("Invalid query, left hand side must be a property: " + value);
874        }
875        return walkReference((Reference) value);
876    }
877
878    // non-canonical index syntax, for replaceAll
879    protected final static Pattern NON_CANON_INDEX = Pattern.compile("[^/\\[\\]]+" // name
880            + "\\[(\\d+|\\*|\\*\\d+)\\]" // index in brackets
881    );
882
883    /**
884     * Canonicalizes a Nuxeo-xpath.
885     * <p>
886     * Replaces {@code a/foo[123]/b} with {@code a/123/b}
887     * <p>
888     * A star or a star followed by digits can be used instead of just the digits as well.
889     *
890     * @param xpath the xpath
891     * @return the canonicalized xpath.
892     */
893    public static String canonicalXPath(String xpath) {
894        while (xpath.length() > 0 && xpath.charAt(0) == '/') {
895            xpath = xpath.substring(1);
896        }
897        if (xpath.indexOf('[') == -1) {
898            return xpath;
899        } else {
900            return NON_CANON_INDEX.matcher(xpath).replaceAll("$1");
901        }
902    }
903
904    protected static class FieldInfo {
905
906        /** NXQL property. */
907        protected final String prop;
908
909        /** MongoDB field for query. foo/0/bar -> foo.0.bar; foo / * / bar -> foo.bar */
910        protected final String queryField;
911
912        /** MongoDB field for projection. */
913        protected final String projectionField;
914
915        protected final Type type;
916
917        /**
918         * Boolean system properties only use TRUE or NULL, not FALSE, so queries must be updated accordingly.
919         */
920        protected final boolean isTrueOrNullBoolean;
921
922        protected FieldInfo(String prop, String queryField, String projectionField, Type type,
923                boolean isTrueOrNullBoolean) {
924            this.prop = prop;
925            this.queryField = queryField;
926            this.projectionField = projectionField;
927            this.type = type;
928            this.isTrueOrNullBoolean = isTrueOrNullBoolean;
929        }
930
931        protected boolean isBoolean() {
932            return type instanceof BooleanType;
933        }
934    }
935
936    /**
937     * Returns the MongoDB field for this reference.
938     */
939    public FieldInfo walkReference(Reference ref) {
940        FieldInfo fieldInfo = walkReference(ref.name);
941        if (DATE_CAST.equals(ref.cast)) {
942            Type type = fieldInfo.type;
943            if (!(type instanceof DateType
944                    || (type instanceof ListType && ((ListType) type).getFieldType() instanceof DateType))) {
945                throw new QueryParseException("Cannot cast to " + ref.cast + ": " + ref.name);
946            }
947            // fieldInfo.isDateCast = true;
948        }
949        return fieldInfo;
950    }
951
952    protected FieldInfo walkReference(String name) {
953        String prop = canonicalXPath(name);
954        String[] parts = prop.split("/");
955        if (prop.startsWith(NXQL.ECM_PREFIX)) {
956            if (prop.startsWith(NXQL.ECM_ACL + "/")) {
957                return parseACP(prop, parts);
958            }
959            if (prop.startsWith(NXQL.ECM_TAG)) {
960                String queryField = FACETED_TAG + "." + FACETED_TAG_LABEL;
961                queryField = stripElemMatchPrefix(queryField);
962                return new FieldInfo(prop, queryField, queryField, StringType.INSTANCE, true);
963            }
964            // simple field
965            String field = DBSSession.convToInternal(prop);
966            Type type = DBSSession.getType(field);
967            String queryField = converter.keyToBson(field);
968            queryField = stripElemMatchPrefix(queryField);
969            return new FieldInfo(prop, queryField, field, type, true);
970        } else {
971            String first = parts[0];
972            Field field = schemaManager.getField(first);
973            if (field == null) {
974                if (first.indexOf(':') > -1) {
975                    throw new QueryParseException("No such property: " + name);
976                }
977                // check without prefix
978                // TODO precompute this in SchemaManagerImpl
979                for (Schema schema : schemaManager.getSchemas()) {
980                    if (!StringUtils.isBlank(schema.getNamespace().prefix)) {
981                        // schema with prefix, do not consider as candidate
982                        continue;
983                    }
984                    field = schema.getField(first);
985                    if (field != null) {
986                        break;
987                    }
988                }
989                if (field == null) {
990                    throw new QueryParseException("No such property: " + name);
991                }
992            }
993            Type type = field.getType();
994            if (PROP_UID_MAJOR_VERSION.equals(prop) || PROP_UID_MINOR_VERSION.equals(prop)
995                    || PROP_MAJOR_VERSION.equals(prop) || PROP_MINOR_VERSION.equals(prop)) {
996                String fieldName = DBSSession.convToInternal(prop);
997                return new FieldInfo(prop, fieldName, fieldName, type, true);
998            }
999
1000            // canonical name
1001            parts[0] = field.getName().getPrefixedName();
1002            // are there wildcards or list indexes?
1003            List<String> queryFieldParts = new LinkedList<>(); // field for query
1004            List<String> projectionFieldParts = new LinkedList<>(); // field for projection
1005            boolean firstPart = true;
1006            for (String part : parts) {
1007                if (NumberUtils.isDigits(part)) {
1008                    // explicit list index
1009                    queryFieldParts.add(part);
1010                    type = ((ListType) type).getFieldType();
1011                } else if (!part.startsWith("*")) {
1012                    // complex sub-property
1013                    queryFieldParts.add(part);
1014                    projectionFieldParts.add(part);
1015                    if (!firstPart) {
1016                        // we already computed the type of the first part
1017                        field = ((ComplexType) type).getField(part);
1018                        if (field == null) {
1019                            throw new QueryParseException("No such property: " + name);
1020                        }
1021                        type = field.getType();
1022                    }
1023                } else {
1024                    // wildcard
1025                    type = ((ListType) type).getFieldType();
1026                }
1027                firstPart = false;
1028            }
1029            String queryField = StringUtils.join(queryFieldParts, '.');
1030            String projectionField = StringUtils.join(projectionFieldParts, '.');
1031            queryField = stripElemMatchPrefix(queryField);
1032            return new FieldInfo(prop, queryField, projectionField, type, false);
1033        }
1034    }
1035
1036    protected FieldInfo parseACP(String prop, String[] parts) {
1037        if (parts.length != 3) {
1038            throw new QueryParseException("No such property: " + prop);
1039        }
1040        String wildcard = parts[1];
1041        if (NumberUtils.isDigits(wildcard)) {
1042            throw new QueryParseException("Cannot use explicit index in ACLs: " + prop);
1043        }
1044        String last = parts[2];
1045        String queryField;
1046        if (NXQL.ECM_ACL_NAME.equals(last)) {
1047            queryField = KEY_ACP + "." + KEY_ACL_NAME;
1048        } else {
1049            String fieldLast = DBSSession.convToInternalAce(last);
1050            if (fieldLast == null) {
1051                throw new QueryParseException("No such property: " + prop);
1052            }
1053            queryField = KEY_ACP + "." + KEY_ACL + "." + fieldLast;
1054        }
1055        Type type = DBSSession.getType(last);
1056        queryField = stripElemMatchPrefix(queryField);
1057        return new FieldInfo(prop, queryField, queryField, type, false);
1058    }
1059
1060    protected boolean isMixinTypes(FieldInfo fieldInfo) {
1061        return fieldInfo.queryField.equals(DBSDocument.KEY_MIXIN_TYPES);
1062    }
1063
1064    protected Set<String> getMixinDocumentTypes(String mixin) {
1065        Set<String> types = schemaManager.getDocumentTypeNamesForFacet(mixin);
1066        return types == null ? Collections.emptySet() : types;
1067    }
1068
1069    protected List<String> getDocumentTypes() {
1070        // TODO precompute in SchemaManager
1071        if (documentTypes == null) {
1072            documentTypes = new ArrayList<>();
1073            for (DocumentType docType : schemaManager.getDocumentTypes()) {
1074                documentTypes.add(docType.getName());
1075            }
1076        }
1077        return documentTypes;
1078    }
1079
1080    protected boolean isNeverPerInstanceMixin(String mixin) {
1081        return schemaManager.getNoPerDocumentQueryFacets().contains(mixin);
1082    }
1083
1084    /**
1085     * Matches the mixin types against a list of values.
1086     * <p>
1087     * Used for:
1088     * <ul>
1089     * <li>ecm:mixinTypes = 'Foo'
1090     * <li>ecm:mixinTypes != 'Foo'
1091     * <li>ecm:mixinTypes IN ('Foo', 'Bar')
1092     * <li>ecm:mixinTypes NOT IN ('Foo', 'Bar')
1093     * </ul>
1094     * <p>
1095     * ecm:mixinTypes IN ('Foo', 'Bar')
1096     *
1097     * <pre>
1098     * { "$or" : [ { "ecm:primaryType" : { "$in" : [ ... types with Foo or Bar ...]}} ,
1099     *             { "ecm:mixinTypes" : { "$in" : [ "Foo" , "Bar]}}]}
1100     * </pre>
1101     *
1102     * ecm:mixinTypes NOT IN ('Foo', 'Bar')
1103     * <p>
1104     *
1105     * <pre>
1106     * { "$and" : [ { "ecm:primaryType" : { "$in" : [ ... types without Foo nor Bar ...]}} ,
1107     *              { "ecm:mixinTypes" : { "$nin" : [ "Foo" , "Bar]}}]}
1108     * </pre>
1109     */
1110    public Document walkMixinTypes(List<String> mixins, boolean include) {
1111        /*
1112         * Primary types that match.
1113         */
1114        Set<String> matchPrimaryTypes;
1115        if (include) {
1116            matchPrimaryTypes = new HashSet<>();
1117            for (String mixin : mixins) {
1118                matchPrimaryTypes.addAll(getMixinDocumentTypes(mixin));
1119            }
1120        } else {
1121            matchPrimaryTypes = new HashSet<>(getDocumentTypes());
1122            for (String mixin : mixins) {
1123                matchPrimaryTypes.removeAll(getMixinDocumentTypes(mixin));
1124            }
1125        }
1126        /*
1127         * Instance mixins that match.
1128         */
1129        Set<String> matchMixinTypes = new HashSet<>();
1130        for (String mixin : mixins) {
1131            if (!isNeverPerInstanceMixin(mixin)) {
1132                matchMixinTypes.add(mixin);
1133            }
1134        }
1135        /*
1136         * MongoDB query generation.
1137         */
1138        // match on primary type
1139        Document p = new Document(DBSDocument.KEY_PRIMARY_TYPE, new Document(QueryOperators.IN, matchPrimaryTypes));
1140        // match on mixin types
1141        // $in/$nin with an array matches if any/no element of the array matches
1142        String innin = include ? QueryOperators.IN : QueryOperators.NIN;
1143        Document m = new Document(DBSDocument.KEY_MIXIN_TYPES, new Document(innin, matchMixinTypes));
1144        // and/or between those
1145        String op = include ? QueryOperators.OR : QueryOperators.AND;
1146        return new Document(op, Arrays.asList(p, m));
1147    }
1148
1149}