001/*
002 * (C) Copyright 2014 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019package org.nuxeo.ecm.core.storage.mongodb;
020
021import static java.lang.Boolean.FALSE;
022import static java.lang.Boolean.TRUE;
023import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_GRANT;
024import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACL;
025import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACL_NAME;
026import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACP;
027import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_FULLTEXT_SCORE;
028import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ID;
029import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_NAME;
030import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PARENT_ID;
031import static org.nuxeo.ecm.core.storage.mongodb.MongoDBRepository.MONGODB_ID;
032import static org.nuxeo.ecm.core.storage.mongodb.MongoDBRepository.MONGODB_META;
033import static org.nuxeo.ecm.core.storage.mongodb.MongoDBRepository.MONGODB_TEXT_SCORE;
034
035import java.util.ArrayList;
036import java.util.Arrays;
037import java.util.Collections;
038import java.util.Date;
039import java.util.HashMap;
040import java.util.HashSet;
041import java.util.Iterator;
042import java.util.LinkedHashMap;
043import java.util.LinkedList;
044import java.util.List;
045import java.util.Map;
046import java.util.Map.Entry;
047import java.util.Set;
048import java.util.concurrent.atomic.AtomicInteger;
049import java.util.regex.Matcher;
050import java.util.regex.Pattern;
051
052import org.apache.commons.lang.StringUtils;
053import org.apache.commons.lang.math.NumberUtils;
054import org.nuxeo.ecm.core.query.QueryParseException;
055import org.nuxeo.ecm.core.query.sql.NXQL;
056import org.nuxeo.ecm.core.query.sql.model.BooleanLiteral;
057import org.nuxeo.ecm.core.query.sql.model.DateLiteral;
058import org.nuxeo.ecm.core.query.sql.model.DoubleLiteral;
059import org.nuxeo.ecm.core.query.sql.model.Expression;
060import org.nuxeo.ecm.core.query.sql.model.Function;
061import org.nuxeo.ecm.core.query.sql.model.IntegerLiteral;
062import org.nuxeo.ecm.core.query.sql.model.Literal;
063import org.nuxeo.ecm.core.query.sql.model.LiteralList;
064import org.nuxeo.ecm.core.query.sql.model.MultiExpression;
065import org.nuxeo.ecm.core.query.sql.model.Operand;
066import org.nuxeo.ecm.core.query.sql.model.Operator;
067import org.nuxeo.ecm.core.query.sql.model.OrderByClause;
068import org.nuxeo.ecm.core.query.sql.model.OrderByExpr;
069import org.nuxeo.ecm.core.query.sql.model.Reference;
070import org.nuxeo.ecm.core.query.sql.model.SelectClause;
071import org.nuxeo.ecm.core.query.sql.model.SelectList;
072import org.nuxeo.ecm.core.query.sql.model.StringLiteral;
073import org.nuxeo.ecm.core.schema.DocumentType;
074import org.nuxeo.ecm.core.schema.SchemaManager;
075import org.nuxeo.ecm.core.schema.types.ComplexType;
076import org.nuxeo.ecm.core.schema.types.Field;
077import org.nuxeo.ecm.core.schema.types.ListType;
078import org.nuxeo.ecm.core.schema.types.Schema;
079import org.nuxeo.ecm.core.schema.types.Type;
080import org.nuxeo.ecm.core.schema.types.primitives.BooleanType;
081import org.nuxeo.ecm.core.schema.types.primitives.DateType;
082import org.nuxeo.ecm.core.storage.ExpressionEvaluator;
083import org.nuxeo.ecm.core.storage.ExpressionEvaluator.PathResolver;
084import org.nuxeo.ecm.core.storage.dbs.DBSDocument;
085import org.nuxeo.ecm.core.storage.dbs.DBSSession;
086import org.nuxeo.ecm.core.storage.FulltextQueryAnalyzer;
087import org.nuxeo.ecm.core.storage.FulltextQueryAnalyzer.FulltextQuery;
088import org.nuxeo.ecm.core.storage.FulltextQueryAnalyzer.Op;
089import org.nuxeo.runtime.api.Framework;
090
091import com.mongodb.BasicDBObject;
092import com.mongodb.DBObject;
093import com.mongodb.QueryOperators;
094
095/**
096 * Query builder for a MongoDB query from an {@link Expression}.
097 *
098 * @since 5.9.4
099 */
100public class MongoDBQueryBuilder {
101
102    private static final Long ZERO = Long.valueOf(0);
103
104    private static final Long ONE = Long.valueOf(1);
105
106    private static final Long MINUS_ONE = Long.valueOf(-1);
107
108    protected static final String DATE_CAST = "DATE";
109
110    protected final AtomicInteger counter = new AtomicInteger();
111
112    protected final SchemaManager schemaManager;
113
114    protected List<String> documentTypes;
115
116    protected final Expression expression;
117
118    protected final SelectClause selectClause;
119
120    protected final OrderByClause orderByClause;
121
122    protected final PathResolver pathResolver;
123
124    public boolean hasFulltext;
125
126    public boolean sortOnFulltextScore;
127
128    protected DBObject query;
129
130    protected DBObject orderBy;
131
132    protected DBObject projection;
133
134    boolean projectionHasWildcard;
135
136    private boolean fulltextSearchDisabled;
137
138    public MongoDBQueryBuilder(Expression expression, SelectClause selectClause, OrderByClause orderByClause,
139            PathResolver pathResolver, boolean fulltextSearchDisabled) {
140        schemaManager = Framework.getLocalService(SchemaManager.class);
141        this.expression = expression;
142        this.selectClause = selectClause;
143        this.orderByClause = orderByClause;
144        this.pathResolver = pathResolver;
145        this.fulltextSearchDisabled = fulltextSearchDisabled;
146    }
147
148    public void walk() {
149        query = walkExpression(expression); // computes hasFulltext
150        walkOrderBy(); // computes sortOnFulltextScore
151        walkProjection(); // needs hasFulltext and sortOnFulltextScore
152    }
153
154    public DBObject getQuery() {
155        return query;
156    }
157
158    public DBObject getOrderBy() {
159        return orderBy;
160    }
161
162    public DBObject getProjection() {
163        return projection;
164    }
165
166    public boolean hasProjectionWildcard() {
167        return projectionHasWildcard;
168    }
169
170    protected void walkOrderBy() {
171        sortOnFulltextScore = false;
172        if (orderByClause == null) {
173            orderBy = null;
174        } else {
175            orderBy = new BasicDBObject();
176            for (OrderByExpr ob : orderByClause.elements) {
177                Reference ref = ob.reference;
178                boolean desc = ob.isDescending;
179                String field = walkReference(ref).queryField;
180                if (!orderBy.containsField(field)) {
181                    Object value;
182                    if (KEY_FULLTEXT_SCORE.equals(field)) {
183                        if (!desc) {
184                            throw new QueryParseException("Cannot sort by " + NXQL.ECM_FULLTEXT_SCORE + " ascending");
185                        }
186                        sortOnFulltextScore = true;
187                        value = new BasicDBObject(MONGODB_META, MONGODB_TEXT_SCORE);
188                    } else {
189                        value = desc ? MINUS_ONE : ONE;
190                    }
191                    orderBy.put(field, value);
192                }
193            }
194            if (sortOnFulltextScore && ((BasicDBObject) orderBy).size() > 1) {
195                throw new QueryParseException("Cannot sort by " + NXQL.ECM_FULLTEXT_SCORE + " and other criteria");
196            }
197        }
198    }
199
200    protected void walkProjection() {
201        projection = new BasicDBObject();
202        projection.put(KEY_ID, ONE); // always useful
203        projection.put(KEY_NAME, ONE); // used in ORDER BY ecm:path
204        projection.put(KEY_PARENT_ID, ONE); // used in ORDER BY ecm:path
205        boolean projectionOnFulltextScore = false;
206        for (int i = 0; i < selectClause.elements.size(); i++) {
207            Operand op = selectClause.elements.get(i);
208            if (!(op instanceof Reference)) {
209                throw new QueryParseException("Projection not supported: " + op);
210            }
211            FieldInfo fieldInfo = walkReference((Reference) op);
212            projection.put(fieldInfo.projectionField, ONE);
213            if (fieldInfo.hasWildcard) {
214                projectionHasWildcard = true;
215            }
216            if (fieldInfo.projectionField.equals(KEY_FULLTEXT_SCORE)) {
217                projectionOnFulltextScore = true;
218            }
219        }
220        if (projectionOnFulltextScore || sortOnFulltextScore) {
221            if (!hasFulltext) {
222                throw new QueryParseException(NXQL.ECM_FULLTEXT_SCORE + " cannot be used without " + NXQL.ECM_FULLTEXT);
223            }
224            projection.put(KEY_FULLTEXT_SCORE, new BasicDBObject(MONGODB_META, MONGODB_TEXT_SCORE));
225        }
226    }
227
228    public DBObject walkExpression(Expression expr) {
229        Operator op = expr.operator;
230        Operand lvalue = expr.lvalue;
231        Operand rvalue = expr.rvalue;
232        Reference ref = lvalue instanceof Reference ? (Reference) lvalue : null;
233        String name = ref != null ? ref.name : null;
234        String cast = ref != null ? ref.cast : null;
235        if (DATE_CAST.equals(cast)) {
236            checkDateLiteralForCast(op, rvalue, name);
237        }
238        if (op == Operator.STARTSWITH) {
239            return walkStartsWith(lvalue, rvalue);
240        } else if (NXQL.ECM_PATH.equals(name)) {
241            return walkEcmPath(op, rvalue);
242        } else if (NXQL.ECM_ANCESTORID.equals(name)) {
243            return walkAncestorId(op, rvalue);
244        } else if (name != null && name.startsWith(NXQL.ECM_FULLTEXT) && !NXQL.ECM_FULLTEXT_JOBID.equals(name)) {
245            return walkEcmFulltext(name, op, rvalue);
246        } else if (op == Operator.SUM) {
247            throw new UnsupportedOperationException("SUM");
248        } else if (op == Operator.SUB) {
249            throw new UnsupportedOperationException("SUB");
250        } else if (op == Operator.MUL) {
251            throw new UnsupportedOperationException("MUL");
252        } else if (op == Operator.DIV) {
253            throw new UnsupportedOperationException("DIV");
254        } else if (op == Operator.LT) {
255            return walkLt(lvalue, rvalue);
256        } else if (op == Operator.GT) {
257            return walkGt(lvalue, rvalue);
258        } else if (op == Operator.EQ) {
259            return walkEq(lvalue, rvalue);
260        } else if (op == Operator.NOTEQ) {
261            return walkNotEq(lvalue, rvalue);
262        } else if (op == Operator.LTEQ) {
263            return walkLtEq(lvalue, rvalue);
264        } else if (op == Operator.GTEQ) {
265            return walkGtEq(lvalue, rvalue);
266        } else if (op == Operator.AND) {
267            if (expr instanceof MultiExpression) {
268                return walkMultiExpression((MultiExpression) expr);
269            } else {
270                return walkAnd(lvalue, rvalue);
271            }
272        } else if (op == Operator.NOT) {
273            return walkNot(lvalue);
274        } else if (op == Operator.OR) {
275            return walkOr(lvalue, rvalue);
276        } else if (op == Operator.LIKE) {
277            return walkLike(lvalue, rvalue, true, false);
278        } else if (op == Operator.ILIKE) {
279            return walkLike(lvalue, rvalue, true, true);
280        } else if (op == Operator.NOTLIKE) {
281            return walkLike(lvalue, rvalue, false, false);
282        } else if (op == Operator.NOTILIKE) {
283            return walkLike(lvalue, rvalue, false, true);
284        } else if (op == Operator.IN) {
285            return walkIn(lvalue, rvalue, true);
286        } else if (op == Operator.NOTIN) {
287            return walkIn(lvalue, rvalue, false);
288        } else if (op == Operator.ISNULL) {
289            return walkIsNull(lvalue);
290        } else if (op == Operator.ISNOTNULL) {
291            return walkIsNotNull(lvalue);
292        } else if (op == Operator.BETWEEN) {
293            return walkBetween(lvalue, rvalue, true);
294        } else if (op == Operator.NOTBETWEEN) {
295            return walkBetween(lvalue, rvalue, false);
296        } else {
297            throw new QueryParseException("Unknown operator: " + op);
298        }
299    }
300
301    protected void checkDateLiteralForCast(Operator op, Operand value, String name) {
302        if (op == Operator.BETWEEN || op == Operator.NOTBETWEEN) {
303            LiteralList l = (LiteralList) value;
304            checkDateLiteralForCast(l.get(0), name);
305            checkDateLiteralForCast(l.get(1), name);
306        } else {
307            checkDateLiteralForCast(value, name);
308        }
309    }
310
311    protected void checkDateLiteralForCast(Operand value, String name) {
312        if (value instanceof DateLiteral && !((DateLiteral) value).onlyDate) {
313            throw new QueryParseException("DATE() cast must be used with DATE literal, not TIMESTAMP: " + name);
314        }
315    }
316
317    protected DBObject walkEcmPath(Operator op, Operand rvalue) {
318        if (op != Operator.EQ && op != Operator.NOTEQ) {
319            throw new QueryParseException(NXQL.ECM_PATH + " requires = or <> operator");
320        }
321        if (!(rvalue instanceof StringLiteral)) {
322            throw new QueryParseException(NXQL.ECM_PATH + " requires literal path as right argument");
323        }
324        String path = ((StringLiteral) rvalue).value;
325        if (path.length() > 1 && path.endsWith("/")) {
326            path = path.substring(0, path.length() - 1);
327        }
328        String id = pathResolver.getIdForPath(path);
329        if (id == null) {
330            // no such path
331            // TODO XXX do better
332            return new BasicDBObject(MONGODB_ID, "__nosuchid__");
333        }
334        if (op == Operator.EQ) {
335            return new BasicDBObject(DBSDocument.KEY_ID, id);
336        } else {
337            return new BasicDBObject(DBSDocument.KEY_ID, new BasicDBObject(QueryOperators.NE, id));
338        }
339    }
340
341    protected DBObject walkAncestorId(Operator op, Operand rvalue) {
342        if (op != Operator.EQ && op != Operator.NOTEQ) {
343            throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires = or <> operator");
344        }
345        if (!(rvalue instanceof StringLiteral)) {
346            throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires literal id as right argument");
347        }
348        String ancestorId = ((StringLiteral) rvalue).value;
349        if (op == Operator.EQ) {
350            return new BasicDBObject(DBSDocument.KEY_ANCESTOR_IDS, ancestorId);
351        } else {
352            return new BasicDBObject(DBSDocument.KEY_ANCESTOR_IDS, new BasicDBObject(QueryOperators.NE, ancestorId));
353        }
354    }
355
356    protected DBObject walkEcmFulltext(String name, Operator op, Operand rvalue) {
357        if (op != Operator.EQ && op != Operator.LIKE) {
358            throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires = or LIKE operator");
359        }
360        if (!(rvalue instanceof StringLiteral)) {
361            throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires literal string as right argument");
362        }
363        if (fulltextSearchDisabled) {
364            throw new QueryParseException("Fulltext search disabled by configuration");
365        }
366        String fulltextQuery = ((StringLiteral) rvalue).value;
367        if (name.equals(NXQL.ECM_FULLTEXT)) {
368            // standard fulltext query
369            hasFulltext = true;
370            String ft = getMongoDBFulltextQuery(fulltextQuery);
371            if (ft == null) {
372                // empty query, matches nothing
373                return new BasicDBObject(MONGODB_ID, "__nosuchid__");
374            }
375            DBObject textSearch = new BasicDBObject();
376            textSearch.put(QueryOperators.SEARCH, ft);
377            // TODO language?
378            return new BasicDBObject(QueryOperators.TEXT, textSearch);
379        } else {
380            // secondary index match with explicit field
381            // do a regexp on the field
382            if (name.charAt(NXQL.ECM_FULLTEXT.length()) != '.') {
383                throw new QueryParseException(name + " has incorrect syntax" + " for a secondary fulltext index");
384            }
385            String prop = name.substring(NXQL.ECM_FULLTEXT.length() + 1);
386            String ft = fulltextQuery.replace(" ", "%");
387            rvalue = new StringLiteral(ft);
388            return walkLike(new Reference(prop), rvalue, true, true);
389        }
390    }
391
392    // public static for tests
393    public static String getMongoDBFulltextQuery(String query) {
394        FulltextQuery ft = FulltextQueryAnalyzer.analyzeFulltextQuery(query);
395        if (ft == null) {
396            return null;
397        }
398        // translate into MongoDB syntax
399        return translateFulltext(ft, false);
400    }
401
402    /**
403     * Transforms the NXQL fulltext syntax into MongoDB syntax.
404     * <p>
405     * The MongoDB fulltext query syntax is badly documented, but is actually the following:
406     * <ul>
407     * <li>a term is a word,
408     * <li>a phrase is a set of spaced-separated words enclosed in double quotes,
409     * <li>negation is done by prepending a -,
410     * <li>the query is a space-separated set of terms, negated terms, phrases, or negated phrases.
411     * <li>all the words of non-negated phrases are also added to the terms.
412     * </ul>
413     * <p>
414     * The matching algorithm is (excluding stemming and stop words):
415     * <ul>
416     * <li>filter out documents with the negative terms, the negative phrases, or missing the phrases,
417     * <li>then if any term is present in the document then it's a match.
418     * </ul>
419     */
420    protected static String translateFulltext(FulltextQuery ft, boolean and) {
421        List<String> buf = new ArrayList<>();
422        translateFulltext(ft, buf, and);
423        return StringUtils.join(buf, ' ');
424    }
425
426    protected static void translateFulltext(FulltextQuery ft, List<String> buf, boolean and) {
427        if (ft.op == Op.OR) {
428            for (FulltextQuery term : ft.terms) {
429                // don't quote words for OR
430                translateFulltext(term, buf, false);
431            }
432        } else if (ft.op == Op.AND) {
433            for (FulltextQuery term : ft.terms) {
434                // quote words for AND
435                translateFulltext(term, buf, true);
436            }
437        } else {
438            String neg;
439            if (ft.op == Op.NOTWORD) {
440                neg = "-";
441            } else { // Op.WORD
442                neg = "";
443            }
444            String word = ft.word.toLowerCase();
445            if (ft.isPhrase() || and) {
446                buf.add(neg + '"' + word + '"');
447            } else {
448                buf.add(neg + word);
449            }
450        }
451    }
452
453    public DBObject walkNot(Operand value) {
454        Object val = walkOperand(value);
455        Object not = pushDownNot(val);
456        if (!(not instanceof DBObject)) {
457            throw new QueryParseException("Cannot do NOT on: " + val);
458        }
459        return (DBObject) not;
460    }
461
462    protected Object pushDownNot(Object object) {
463        if (!(object instanceof DBObject)) {
464            throw new QueryParseException("Cannot do NOT on: " + object);
465        }
466        DBObject ob = (DBObject) object;
467        Set<String> keySet = ob.keySet();
468        if (keySet.size() != 1) {
469            throw new QueryParseException("Cannot do NOT on: " + ob);
470        }
471        String key = keySet.iterator().next();
472        Object value = ob.get(key);
473        if (!key.startsWith("$")) {
474            if (value instanceof DBObject) {
475                // push down inside dbobject
476                return new BasicDBObject(key, pushDownNot(value));
477            } else {
478                // k = v -> k != v
479                return new BasicDBObject(key, new BasicDBObject(QueryOperators.NE, value));
480            }
481        }
482        if (QueryOperators.NE.equals(key)) {
483            // NOT k != v -> k = v
484            return value;
485        }
486        if (QueryOperators.NOT.equals(key)) {
487            // NOT NOT v -> v
488            return value;
489        }
490        if (QueryOperators.AND.equals(key) || QueryOperators.OR.equals(key)) {
491            // boolean algebra
492            // NOT (v1 AND v2) -> NOT v1 OR NOT v2
493            // NOT (v1 OR v2) -> NOT v1 AND NOT v2
494            String op = QueryOperators.AND.equals(key) ? QueryOperators.OR : QueryOperators.AND;
495            List<Object> list = (List<Object>) value;
496            for (int i = 0; i < list.size(); i++) {
497                list.set(i, pushDownNot(list.get(i)));
498            }
499            return new BasicDBObject(op, list);
500        }
501        if (QueryOperators.IN.equals(key) || QueryOperators.NIN.equals(key)) {
502            // boolean algebra
503            // IN <-> NIN
504            String op = QueryOperators.IN.equals(key) ? QueryOperators.NIN : QueryOperators.IN;
505            return new BasicDBObject(op, value);
506        }
507        if (QueryOperators.LT.equals(key) || QueryOperators.GT.equals(key) || QueryOperators.LTE.equals(key)
508                || QueryOperators.GTE.equals(key)) {
509            // TODO use inverse operators?
510            return new BasicDBObject(QueryOperators.NOT, ob);
511        }
512        throw new QueryParseException("Unknown operator for NOT: " + key);
513    }
514
515    public DBObject walkIsNull(Operand value) {
516        FieldInfo fieldInfo = walkReference(value);
517        return new FieldInfoDBObject(fieldInfo, null);
518    }
519
520    public DBObject walkIsNotNull(Operand value) {
521        FieldInfo fieldInfo = walkReference(value);
522        return new FieldInfoDBObject(fieldInfo, new BasicDBObject(QueryOperators.NE, null));
523    }
524
525    public DBObject walkMultiExpression(MultiExpression expr) {
526        return walkAnd(expr.values);
527    }
528
529    public DBObject walkAnd(Operand lvalue, Operand rvalue) {
530        return walkAnd(Arrays.asList(lvalue, rvalue));
531    }
532
533    protected DBObject walkAnd(List<Operand> values) {
534        List<Object> list = walkOperandList(values);
535        // check wildcards in the operands, extract common prefixes to use $elemMatch
536        Map<String, List<FieldInfoDBObject>> propBaseKeyToDBOs = new LinkedHashMap<>();
537        Map<String, String> propBaseKeyToFieldBase = new HashMap<>();
538        for (Iterator<Object> it = list.iterator(); it.hasNext();) {
539            Object ob = it.next();
540            if (ob instanceof FieldInfoDBObject) {
541                FieldInfoDBObject fidbo = (FieldInfoDBObject) ob;
542                FieldInfo fieldInfo = fidbo.fieldInfo;
543                if (fieldInfo.hasWildcard) {
544                    if (fieldInfo.fieldSuffix != null && fieldInfo.fieldSuffix.contains("*")) {
545                        // a double wildcard of the form foo/*/bar/* is not a problem if bar is an array
546                        // TODO prevent deep complex multiple wildcards
547                        // throw new QueryParseException("Cannot use two wildcards: " + fieldInfo.prop);
548                    }
549                    // generate a key unique per correlation for this element match
550                    String wildcardNumber = fieldInfo.fieldWildcard;
551                    if (wildcardNumber.isEmpty()) {
552                        // negative to not collide with regular correlated wildcards
553                        wildcardNumber = String.valueOf(-counter.incrementAndGet());
554                    }
555                    String propBaseKey = fieldInfo.fieldPrefix + "/*" + wildcardNumber;
556                    // store object for this key
557                    List<FieldInfoDBObject> dbos = propBaseKeyToDBOs.get(propBaseKey);
558                    if (dbos == null) {
559                        propBaseKeyToDBOs.put(propBaseKey, dbos = new LinkedList<>());
560                    }
561                    dbos.add(fidbo);
562                    // remember for which field base this is
563                    String fieldBase = fieldInfo.fieldPrefix.replace("/", ".");
564                    propBaseKeyToFieldBase.put(propBaseKey, fieldBase);
565                    // remove from list, will be re-added later through propBaseKeyToDBOs
566                    it.remove();
567                }
568            }
569        }
570        // generate $elemMatch items for correlated queries
571        for (Entry<String, List<FieldInfoDBObject>> es : propBaseKeyToDBOs.entrySet()) {
572            String propBaseKey = es.getKey();
573            List<FieldInfoDBObject> fidbos = es.getValue();
574            if (fidbos.size() == 1) {
575                // regular uncorrelated match
576                list.addAll(fidbos);
577            } else {
578                DBObject elemMatch = new BasicDBObject();
579                for (FieldInfoDBObject fidbo : fidbos) {
580                    // truncate field name to just the suffix
581                    FieldInfo fieldInfo = fidbo.fieldInfo;
582                    Object value = fidbo.get(fieldInfo.queryField);
583                    String fieldSuffix = fieldInfo.fieldSuffix.replace("/", ".");
584                    if (elemMatch.containsField(fieldSuffix)) {
585                        // ecm:acl/*1/principal = 'bob' AND ecm:acl/*1/principal = 'steve'
586                        // cannot match
587                        // TODO do better
588                        value = "__NOSUCHVALUE__";
589                    }
590                    elemMatch.put(fieldSuffix, value);
591                }
592                String fieldBase = propBaseKeyToFieldBase.get(propBaseKey);
593                BasicDBObject dbo = new BasicDBObject(fieldBase,
594                        new BasicDBObject(QueryOperators.ELEM_MATCH, elemMatch));
595                list.add(dbo);
596            }
597        }
598        if (list.size() == 1) {
599            return (DBObject) list.get(0);
600        } else {
601            return new BasicDBObject(QueryOperators.AND, list);
602        }
603    }
604
605    public DBObject walkOr(Operand lvalue, Operand rvalue) {
606        Object left = walkOperand(lvalue);
607        Object right = walkOperand(rvalue);
608        List<Object> list = new ArrayList<>(Arrays.asList(left, right));
609        return new BasicDBObject(QueryOperators.OR, list);
610    }
611
612    protected Object checkBoolean(FieldInfo fieldInfo, Object right) {
613        if (fieldInfo.isBoolean()) {
614            // convert 0 / 1 to actual booleans
615            if (right instanceof Long) {
616                if (ZERO.equals(right)) {
617                    right = fieldInfo.isTrueOrNullBoolean ? null : FALSE;
618                } else if (ONE.equals(right)) {
619                    right = TRUE;
620                } else {
621                    throw new QueryParseException("Invalid boolean: " + right);
622                }
623            }
624        }
625        return right;
626    }
627
628    public DBObject walkEq(Operand lvalue, Operand rvalue) {
629        FieldInfo fieldInfo = walkReference(lvalue);
630        Object right = walkOperand(rvalue);
631        if (isMixinTypes(fieldInfo)) {
632            if (!(right instanceof String)) {
633                throw new QueryParseException("Invalid EQ rhs: " + rvalue);
634            }
635            return walkMixinTypes(Collections.singletonList((String) right), true);
636        }
637        right = checkBoolean(fieldInfo, right);
638        // TODO check list fields
639        return new FieldInfoDBObject(fieldInfo, right);
640    }
641
642    public DBObject walkNotEq(Operand lvalue, Operand rvalue) {
643        FieldInfo fieldInfo = walkReference(lvalue);
644        Object right = walkOperand(rvalue);
645        if (isMixinTypes(fieldInfo)) {
646            if (!(right instanceof String)) {
647                throw new QueryParseException("Invalid NE rhs: " + rvalue);
648            }
649            return walkMixinTypes(Collections.singletonList((String) right), false);
650        }
651        right = checkBoolean(fieldInfo, right);
652        // TODO check list fields
653        return new FieldInfoDBObject(fieldInfo, new BasicDBObject(QueryOperators.NE, right));
654    }
655
656    public DBObject walkLt(Operand lvalue, Operand rvalue) {
657        FieldInfo fieldInfo = walkReference(lvalue);
658        Object right = walkOperand(rvalue);
659        return new FieldInfoDBObject(fieldInfo, new BasicDBObject(QueryOperators.LT, right));
660    }
661
662    public DBObject walkGt(Operand lvalue, Operand rvalue) {
663        FieldInfo fieldInfo = walkReference(lvalue);
664        Object right = walkOperand(rvalue);
665        return new FieldInfoDBObject(fieldInfo, new BasicDBObject(QueryOperators.GT, right));
666    }
667
668    public DBObject walkLtEq(Operand lvalue, Operand rvalue) {
669        FieldInfo fieldInfo = walkReference(lvalue);
670        Object right = walkOperand(rvalue);
671        return new FieldInfoDBObject(fieldInfo, new BasicDBObject(QueryOperators.LTE, right));
672    }
673
674    public DBObject walkGtEq(Operand lvalue, Operand rvalue) {
675        FieldInfo fieldInfo = walkReference(lvalue);
676        Object right = walkOperand(rvalue);
677        return new FieldInfoDBObject(fieldInfo, new BasicDBObject(QueryOperators.GTE, right));
678    }
679
680    public DBObject walkBetween(Operand lvalue, Operand rvalue, boolean positive) {
681        LiteralList l = (LiteralList) rvalue;
682        FieldInfo fieldInfo = walkReference(lvalue);
683        Object left = walkOperand(l.get(0));
684        Object right = walkOperand(l.get(1));
685        if (positive) {
686            DBObject range = new BasicDBObject();
687            range.put(QueryOperators.GTE, left);
688            range.put(QueryOperators.LTE, right);
689            return new FieldInfoDBObject(fieldInfo, range);
690        } else {
691            DBObject a = new FieldInfoDBObject(fieldInfo, new BasicDBObject(QueryOperators.LT, left));
692            DBObject b = new FieldInfoDBObject(fieldInfo, new BasicDBObject(QueryOperators.GT, right));
693            return new BasicDBObject(QueryOperators.OR, Arrays.asList(a, b));
694        }
695    }
696
697    public DBObject walkIn(Operand lvalue, Operand rvalue, boolean positive) {
698        FieldInfo fieldInfo = walkReference(lvalue);
699        Object right = walkOperand(rvalue);
700        if (!(right instanceof List)) {
701            throw new QueryParseException("Invalid IN, right hand side must be a list: " + rvalue);
702        }
703        if (isMixinTypes(fieldInfo)) {
704            return walkMixinTypes((List<String>) right, positive);
705        }
706        // TODO check list fields
707        List<Object> list = (List<Object>) right;
708        return new FieldInfoDBObject(fieldInfo,
709                new BasicDBObject(positive ? QueryOperators.IN : QueryOperators.NIN, list));
710    }
711
712    public DBObject walkLike(Operand lvalue, Operand rvalue, boolean positive, boolean caseInsensitive) {
713        FieldInfo fieldInfo = walkReference(lvalue);
714        if (!(rvalue instanceof StringLiteral)) {
715            throw new QueryParseException("Invalid LIKE/ILIKE, right hand side must be a string: " + rvalue);
716        }
717        // TODO check list fields
718        String like = walkStringLiteral((StringLiteral) rvalue);
719        String regex = ExpressionEvaluator.likeToRegex(like);
720
721        int flags = caseInsensitive ? Pattern.CASE_INSENSITIVE : 0;
722        Pattern pattern = Pattern.compile(regex, flags);
723        Object value;
724        if (positive) {
725            value = pattern;
726        } else {
727            value = new BasicDBObject(QueryOperators.NOT, pattern);
728        }
729        return new FieldInfoDBObject(fieldInfo, value);
730    }
731
732    public Object walkOperand(Operand op) {
733        if (op instanceof Literal) {
734            return walkLiteral((Literal) op);
735        } else if (op instanceof LiteralList) {
736            return walkLiteralList((LiteralList) op);
737        } else if (op instanceof Function) {
738            return walkFunction((Function) op);
739        } else if (op instanceof Expression) {
740            return walkExpression((Expression) op);
741        } else if (op instanceof Reference) {
742            return walkReference((Reference) op);
743        } else {
744            throw new QueryParseException("Unknown operand: " + op);
745        }
746    }
747
748    public Object walkLiteral(Literal lit) {
749        if (lit instanceof BooleanLiteral) {
750            return walkBooleanLiteral((BooleanLiteral) lit);
751        } else if (lit instanceof DateLiteral) {
752            return walkDateLiteral((DateLiteral) lit);
753        } else if (lit instanceof DoubleLiteral) {
754            return walkDoubleLiteral((DoubleLiteral) lit);
755        } else if (lit instanceof IntegerLiteral) {
756            return walkIntegerLiteral((IntegerLiteral) lit);
757        } else if (lit instanceof StringLiteral) {
758            return walkStringLiteral((StringLiteral) lit);
759        } else {
760            throw new QueryParseException("Unknown literal: " + lit);
761        }
762    }
763
764    public Object walkBooleanLiteral(BooleanLiteral lit) {
765        return Boolean.valueOf(lit.value);
766    }
767
768    public Date walkDateLiteral(DateLiteral lit) {
769        return lit.value.toDate(); // TODO onlyDate
770    }
771
772    public Double walkDoubleLiteral(DoubleLiteral lit) {
773        return Double.valueOf(lit.value);
774    }
775
776    public Long walkIntegerLiteral(IntegerLiteral lit) {
777        return Long.valueOf(lit.value);
778    }
779
780    public String walkStringLiteral(StringLiteral lit) {
781        return lit.value;
782    }
783
784    public List<Object> walkLiteralList(LiteralList litList) {
785        List<Object> list = new ArrayList<Object>(litList.size());
786        for (Literal lit : litList) {
787            list.add(walkLiteral(lit));
788        }
789        return list;
790    }
791
792    protected List<Object> walkOperandList(List<Operand> values) {
793        List<Object> list = new LinkedList<>();
794        for (Operand value : values) {
795            list.add(walkOperand(value));
796        }
797        return list;
798    }
799
800    public Object walkFunction(Function func) {
801        throw new UnsupportedOperationException(func.name);
802    }
803
804    public DBObject walkStartsWith(Operand lvalue, Operand rvalue) {
805        if (!(lvalue instanceof Reference)) {
806            throw new QueryParseException("Invalid STARTSWITH query, left hand side must be a property: " + lvalue);
807        }
808        String name = ((Reference) lvalue).name;
809        if (!(rvalue instanceof StringLiteral)) {
810            throw new QueryParseException(
811                    "Invalid STARTSWITH query, right hand side must be a literal path: " + rvalue);
812        }
813        String path = ((StringLiteral) rvalue).value;
814        if (path.length() > 1 && path.endsWith("/")) {
815            path = path.substring(0, path.length() - 1);
816        }
817
818        if (NXQL.ECM_PATH.equals(name)) {
819            return walkStartsWithPath(path);
820        } else {
821            return walkStartsWithNonPath(lvalue, path);
822        }
823    }
824
825    protected DBObject walkStartsWithPath(String path) {
826        // resolve path
827        String ancestorId = pathResolver.getIdForPath(path);
828        if (ancestorId == null) {
829            // no such path
830            // TODO XXX do better
831            return new BasicDBObject(MONGODB_ID, "__nosuchid__");
832        }
833        return new BasicDBObject(DBSDocument.KEY_ANCESTOR_IDS, ancestorId);
834    }
835
836    protected DBObject walkStartsWithNonPath(Operand lvalue, String path) {
837        FieldInfo fieldInfo = walkReference(lvalue);
838        DBObject eq = new FieldInfoDBObject(fieldInfo, path);
839        // escape except alphanumeric and others not needing escaping
840        String regex = path.replaceAll("([^a-zA-Z0-9 /])", "\\\\$1");
841        Pattern pattern = Pattern.compile(regex + "/.*");
842        DBObject like = new FieldInfoDBObject(fieldInfo, pattern);
843        return new BasicDBObject(QueryOperators.OR, Arrays.asList(eq, like));
844    }
845
846    protected FieldInfo walkReference(Operand value) {
847        if (!(value instanceof Reference)) {
848            throw new QueryParseException("Invalid query, left hand side must be a property: " + value);
849        }
850        return walkReference((Reference) value);
851    }
852
853    // non-canonical index syntax, for replaceAll
854    protected final static Pattern NON_CANON_INDEX = Pattern.compile("[^/\\[\\]]+" // name
855            + "\\[(\\d+|\\*|\\*\\d+)\\]" // index in brackets
856    );
857
858    /**
859     * Canonicalizes a Nuxeo-xpath.
860     * <p>
861     * Replaces {@code a/foo[123]/b} with {@code a/123/b}
862     * <p>
863     * A star or a star followed by digits can be used instead of just the digits as well.
864     *
865     * @param xpath the xpath
866     * @return the canonicalized xpath.
867     */
868    public static String canonicalXPath(String xpath) {
869        while (xpath.length() > 0 && xpath.charAt(0) == '/') {
870            xpath = xpath.substring(1);
871        }
872        if (xpath.indexOf('[') == -1) {
873            return xpath;
874        } else {
875            return NON_CANON_INDEX.matcher(xpath).replaceAll("$1");
876        }
877    }
878
879    /** Splits foo.*.bar into foo, *, bar and split foo.*1.bar into foo, *1, bar with the last bar part optional */
880    protected final static Pattern WILDCARD_SPLIT = Pattern.compile("([^*]*)\\.\\*(\\d*)(?:\\.(.*))?");
881
882    protected static class FieldInfo {
883
884        /** NXQL property. */
885        protected final String prop;
886
887        /** MongoDB field including wildcards (not used as-is). */
888        protected final String fullField;
889
890        /** MongoDB field for query. foo/0/bar -> foo.0.bar; foo / * / bar -> foo.bar */
891        protected final String queryField;
892
893        /** MongoDB field for projection. */
894        protected final String projectionField;
895
896        protected final Type type;
897
898        /**
899         * Boolean system properties only use TRUE or NULL, not FALSE, so queries must be updated accordingly.
900         */
901        protected final boolean isTrueOrNullBoolean;
902
903        protected final boolean hasWildcard;
904
905        /** Prefix before the wildcard. */
906        protected final String fieldPrefix;
907
908        /** Wildcard part after * */
909        protected final String fieldWildcard;
910
911        /** Part after wildcard, may be null. */
912        protected final String fieldSuffix;
913
914        protected FieldInfo(String prop, String fullField, String queryField, String projectionField, Type type,
915                boolean isTrueOrNullBoolean) {
916            this.prop = prop;
917            this.fullField = fullField;
918            this.queryField = queryField;
919            this.projectionField = projectionField;
920            this.type = type;
921            this.isTrueOrNullBoolean = isTrueOrNullBoolean;
922            Matcher m = WILDCARD_SPLIT.matcher(fullField);
923            if (m.matches()) {
924                hasWildcard = true;
925                fieldPrefix = m.group(1);
926                fieldWildcard = m.group(2);
927                fieldSuffix = m.group(3);
928            } else {
929                hasWildcard = false;
930                fieldPrefix = fieldWildcard = fieldSuffix = null;
931            }
932        }
933
934        protected boolean isBoolean() {
935            return type instanceof BooleanType;
936        }
937    }
938
939    protected static class FieldInfoDBObject extends BasicDBObject {
940
941        private static final long serialVersionUID = 1L;
942
943        protected FieldInfo fieldInfo;
944
945        public FieldInfoDBObject(FieldInfo fieldInfo, Object value) {
946            super(fieldInfo.queryField, value);
947            this.fieldInfo = fieldInfo;
948        }
949    }
950
951    /**
952     * Returns the MongoDB field for this reference.
953     */
954    public FieldInfo walkReference(Reference ref) {
955        FieldInfo fieldInfo = walkReference(ref.name);
956        if (DATE_CAST.equals(ref.cast)) {
957            Type type = fieldInfo.type;
958            if (!(type instanceof DateType
959                    || (type instanceof ListType && ((ListType) type).getFieldType() instanceof DateType))) {
960                throw new QueryParseException("Cannot cast to " + ref.cast + ": " + ref.name);
961            }
962            // fieldInfo.isDateCast = true;
963        }
964        return fieldInfo;
965    }
966
967    protected FieldInfo walkReference(String name) {
968        String prop = canonicalXPath(name);
969        String[] parts = prop.split("/");
970        if (prop.startsWith(NXQL.ECM_PREFIX)) {
971            if (prop.startsWith(NXQL.ECM_ACL + "/")) {
972                return parseACP(prop, parts);
973            }
974            // simple field
975            String field = DBSSession.convToInternal(prop);
976            Type type = DBSSession.getType(field);
977            return new FieldInfo(prop, field, field, field, type, true);
978        } else {
979            String first = parts[0];
980            Field field = schemaManager.getField(first);
981            if (field == null) {
982                if (first.indexOf(':') > -1) {
983                    throw new QueryParseException("No such property: " + name);
984                }
985                // check without prefix
986                // TODO precompute this in SchemaManagerImpl
987                for (Schema schema : schemaManager.getSchemas()) {
988                    if (!StringUtils.isBlank(schema.getNamespace().prefix)) {
989                        // schema with prefix, do not consider as candidate
990                        continue;
991                    }
992                    if (schema != null) {
993                        field = schema.getField(first);
994                        if (field != null) {
995                            break;
996                        }
997                    }
998                }
999                if (field == null) {
1000                    throw new QueryParseException("No such property: " + name);
1001                }
1002            }
1003            Type type = field.getType();
1004            // canonical name
1005            parts[0] = field.getName().getPrefixedName();
1006            // are there wildcards or list indexes?
1007            List<String> queryFieldParts = new LinkedList<>(); // field for query
1008            List<String> projectionFieldParts = new LinkedList<>(); // field for projection
1009            boolean firstPart = true;
1010            for (String part : parts) {
1011                if (NumberUtils.isDigits(part)) {
1012                    // explicit list index
1013                    queryFieldParts.add(part);
1014                    type = ((ListType) type).getFieldType();
1015                } else if (!part.startsWith("*")) {
1016                    // complex sub-property
1017                    queryFieldParts.add(part);
1018                    projectionFieldParts.add(part);
1019                    if (!firstPart) {
1020                        // we already computed the type of the first part
1021                        field = ((ComplexType) type).getField(part);
1022                        if (field == null) {
1023                            throw new QueryParseException("No such property: " + name);
1024                        }
1025                        type = field.getType();
1026                    }
1027                } else {
1028                    // wildcard
1029                    type = ((ListType) type).getFieldType();
1030                }
1031                firstPart = false;
1032            }
1033            String fullField = StringUtils.join(parts, '.');
1034            String queryField = StringUtils.join(queryFieldParts, '.');
1035            String projectionField = StringUtils.join(projectionFieldParts, '.');
1036            return new FieldInfo(prop, fullField, queryField, projectionField, type, false);
1037        }
1038    }
1039
1040    protected FieldInfo parseACP(String prop, String[] parts) {
1041        if (parts.length != 3) {
1042            throw new QueryParseException("No such property: " + prop);
1043        }
1044        String wildcard = parts[1];
1045        if (NumberUtils.isDigits(wildcard)) {
1046            throw new QueryParseException("Cannot use explicit index in ACLs: " + prop);
1047        }
1048        String last = parts[2];
1049        String fullField;
1050        String queryField;
1051        String projectionField;
1052        if (NXQL.ECM_ACL_NAME.equals(last)) {
1053            fullField = KEY_ACP + "." + KEY_ACL_NAME;
1054            queryField = KEY_ACP + "." + KEY_ACL_NAME;
1055            // TODO remember wildcard correlation
1056        } else {
1057            String fieldLast = DBSSession.convToInternalAce(last);
1058            if (fieldLast == null) {
1059                throw new QueryParseException("No such property: " + prop);
1060            }
1061            fullField = KEY_ACP + "." + KEY_ACL + "." + wildcard + "." + fieldLast;
1062            queryField = KEY_ACP + "." + KEY_ACL + "." + fieldLast;
1063        }
1064        Type type = DBSSession.getType(last);
1065        projectionField = queryField;
1066        return new FieldInfo(prop, fullField, queryField, projectionField, type, false);
1067    }
1068
1069    protected boolean isMixinTypes(FieldInfo fieldInfo) {
1070        return fieldInfo.queryField.equals(DBSDocument.KEY_MIXIN_TYPES);
1071    }
1072
1073    protected Set<String> getMixinDocumentTypes(String mixin) {
1074        Set<String> types = schemaManager.getDocumentTypeNamesForFacet(mixin);
1075        return types == null ? Collections.emptySet() : types;
1076    }
1077
1078    protected List<String> getDocumentTypes() {
1079        // TODO precompute in SchemaManager
1080        if (documentTypes == null) {
1081            documentTypes = new ArrayList<>();
1082            for (DocumentType docType : schemaManager.getDocumentTypes()) {
1083                documentTypes.add(docType.getName());
1084            }
1085        }
1086        return documentTypes;
1087    }
1088
1089    protected boolean isNeverPerInstanceMixin(String mixin) {
1090        return schemaManager.getNoPerDocumentQueryFacets().contains(mixin);
1091    }
1092
1093    /**
1094     * Matches the mixin types against a list of values.
1095     * <p>
1096     * Used for:
1097     * <ul>
1098     * <li>ecm:mixinTypes = 'Foo'
1099     * <li>ecm:mixinTypes != 'Foo'
1100     * <li>ecm:mixinTypes IN ('Foo', 'Bar')
1101     * <li>ecm:mixinTypes NOT IN ('Foo', 'Bar')
1102     * </ul>
1103     * <p>
1104     * ecm:mixinTypes IN ('Foo', 'Bar')
1105     *
1106     * <pre>
1107     * { "$or" : [ { "ecm:primaryType" : { "$in" : [ ... types with Foo or Bar ...]}} ,
1108     *             { "ecm:mixinTypes" : { "$in" : [ "Foo" , "Bar]}}]}
1109     * </pre>
1110     *
1111     * ecm:mixinTypes NOT IN ('Foo', 'Bar')
1112     * <p>
1113     *
1114     * <pre>
1115     * { "$and" : [ { "ecm:primaryType" : { "$in" : [ ... types without Foo nor Bar ...]}} ,
1116     *              { "ecm:mixinTypes" : { "$nin" : [ "Foo" , "Bar]}}]}
1117     * </pre>
1118     */
1119    public DBObject walkMixinTypes(List<String> mixins, boolean include) {
1120        /*
1121         * Primary types that match.
1122         */
1123        Set<String> matchPrimaryTypes;
1124        if (include) {
1125            matchPrimaryTypes = new HashSet<String>();
1126            for (String mixin : mixins) {
1127                matchPrimaryTypes.addAll(getMixinDocumentTypes(mixin));
1128            }
1129        } else {
1130            matchPrimaryTypes = new HashSet<String>(getDocumentTypes());
1131            for (String mixin : mixins) {
1132                matchPrimaryTypes.removeAll(getMixinDocumentTypes(mixin));
1133            }
1134        }
1135        /*
1136         * Instance mixins that match.
1137         */
1138        Set<String> matchMixinTypes = new HashSet<String>();
1139        for (String mixin : mixins) {
1140            if (!isNeverPerInstanceMixin(mixin)) {
1141                matchMixinTypes.add(mixin);
1142            }
1143        }
1144        /*
1145         * MongoDB query generation.
1146         */
1147        // match on primary type
1148        DBObject p = new BasicDBObject(DBSDocument.KEY_PRIMARY_TYPE,
1149                new BasicDBObject(QueryOperators.IN, matchPrimaryTypes));
1150        // match on mixin types
1151        // $in/$nin with an array matches if any/no element of the array matches
1152        String innin = include ? QueryOperators.IN : QueryOperators.NIN;
1153        DBObject m = new BasicDBObject(DBSDocument.KEY_MIXIN_TYPES, new BasicDBObject(innin, matchMixinTypes));
1154        // and/or between those
1155        String op = include ? QueryOperators.OR : QueryOperators.AND;
1156        return new BasicDBObject(op, Arrays.asList(p, m));
1157    }
1158
1159}