001/*
002 * (C) Copyright 2014 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019package org.nuxeo.ecm.core.storage.mongodb;
020
021import static java.lang.Boolean.FALSE;
022import static java.lang.Boolean.TRUE;
023import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_GRANT;
024import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACL;
025import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACL_NAME;
026import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACP;
027import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_FULLTEXT_SCORE;
028import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_NAME;
029import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PARENT_ID;
030import static org.nuxeo.ecm.core.storage.mongodb.MongoDBRepository.MONGODB_ID;
031import static org.nuxeo.ecm.core.storage.mongodb.MongoDBRepository.MONGODB_META;
032import static org.nuxeo.ecm.core.storage.mongodb.MongoDBRepository.MONGODB_TEXT_SCORE;
033
034import java.util.ArrayList;
035import java.util.Arrays;
036import java.util.Collections;
037import java.util.Date;
038import java.util.HashMap;
039import java.util.HashSet;
040import java.util.Iterator;
041import java.util.LinkedHashMap;
042import java.util.LinkedList;
043import java.util.List;
044import java.util.Map;
045import java.util.Map.Entry;
046import java.util.Set;
047import java.util.concurrent.atomic.AtomicInteger;
048import java.util.regex.Matcher;
049import java.util.regex.Pattern;
050
051import org.apache.commons.lang.StringUtils;
052import org.apache.commons.lang.math.NumberUtils;
053import org.nuxeo.ecm.core.query.QueryParseException;
054import org.nuxeo.ecm.core.query.sql.NXQL;
055import org.nuxeo.ecm.core.query.sql.model.BooleanLiteral;
056import org.nuxeo.ecm.core.query.sql.model.DateLiteral;
057import org.nuxeo.ecm.core.query.sql.model.DoubleLiteral;
058import org.nuxeo.ecm.core.query.sql.model.Expression;
059import org.nuxeo.ecm.core.query.sql.model.Function;
060import org.nuxeo.ecm.core.query.sql.model.IntegerLiteral;
061import org.nuxeo.ecm.core.query.sql.model.Literal;
062import org.nuxeo.ecm.core.query.sql.model.LiteralList;
063import org.nuxeo.ecm.core.query.sql.model.MultiExpression;
064import org.nuxeo.ecm.core.query.sql.model.Operand;
065import org.nuxeo.ecm.core.query.sql.model.Operator;
066import org.nuxeo.ecm.core.query.sql.model.OrderByClause;
067import org.nuxeo.ecm.core.query.sql.model.OrderByExpr;
068import org.nuxeo.ecm.core.query.sql.model.Reference;
069import org.nuxeo.ecm.core.query.sql.model.SelectClause;
070import org.nuxeo.ecm.core.query.sql.model.SelectList;
071import org.nuxeo.ecm.core.query.sql.model.StringLiteral;
072import org.nuxeo.ecm.core.schema.DocumentType;
073import org.nuxeo.ecm.core.schema.SchemaManager;
074import org.nuxeo.ecm.core.schema.types.ComplexType;
075import org.nuxeo.ecm.core.schema.types.Field;
076import org.nuxeo.ecm.core.schema.types.ListType;
077import org.nuxeo.ecm.core.schema.types.Schema;
078import org.nuxeo.ecm.core.schema.types.Type;
079import org.nuxeo.ecm.core.schema.types.primitives.BooleanType;
080import org.nuxeo.ecm.core.schema.types.primitives.DateType;
081import org.nuxeo.ecm.core.storage.ExpressionEvaluator;
082import org.nuxeo.ecm.core.storage.ExpressionEvaluator.PathResolver;
083import org.nuxeo.ecm.core.storage.dbs.DBSDocument;
084import org.nuxeo.ecm.core.storage.dbs.DBSSession;
085import org.nuxeo.ecm.core.storage.FulltextQueryAnalyzer;
086import org.nuxeo.ecm.core.storage.FulltextQueryAnalyzer.FulltextQuery;
087import org.nuxeo.ecm.core.storage.FulltextQueryAnalyzer.Op;
088import org.nuxeo.runtime.api.Framework;
089
090import com.mongodb.BasicDBObject;
091import com.mongodb.DBObject;
092import com.mongodb.QueryOperators;
093
094/**
095 * Query builder for a MongoDB query from an {@link Expression}.
096 *
097 * @since 5.9.4
098 */
099public class MongoDBQueryBuilder {
100
101    private static final Long ZERO = Long.valueOf(0);
102
103    private static final Long ONE = Long.valueOf(1);
104
105    private static final Long MINUS_ONE = Long.valueOf(-1);
106
107    protected static final String DATE_CAST = "DATE";
108
109    protected final AtomicInteger counter = new AtomicInteger();
110
111    protected final SchemaManager schemaManager;
112
113    protected final MongoDBRepository repository;
114
115    protected final String idKey;
116
117    protected List<String> documentTypes;
118
119    protected final Expression expression;
120
121    protected final SelectClause selectClause;
122
123    protected final OrderByClause orderByClause;
124
125    protected final PathResolver pathResolver;
126
127    public boolean hasFulltext;
128
129    public boolean sortOnFulltextScore;
130
131    protected DBObject query;
132
133    protected DBObject orderBy;
134
135    protected DBObject projection;
136
137    boolean projectionHasWildcard;
138
139    private boolean fulltextSearchDisabled;
140
141    public MongoDBQueryBuilder(MongoDBRepository repository, Expression expression, SelectClause selectClause,
142            OrderByClause orderByClause, PathResolver pathResolver, boolean fulltextSearchDisabled) {
143        schemaManager = Framework.getLocalService(SchemaManager.class);
144        this.repository = repository;
145        idKey = repository.idKey;
146        this.expression = expression;
147        this.selectClause = selectClause;
148        this.orderByClause = orderByClause;
149        this.pathResolver = pathResolver;
150        this.fulltextSearchDisabled = fulltextSearchDisabled;
151    }
152
153    public void walk() {
154        query = walkExpression(expression); // computes hasFulltext
155        walkOrderBy(); // computes sortOnFulltextScore
156        walkProjection(); // needs hasFulltext and sortOnFulltextScore
157    }
158
159    public DBObject getQuery() {
160        return query;
161    }
162
163    public DBObject getOrderBy() {
164        return orderBy;
165    }
166
167    public DBObject getProjection() {
168        return projection;
169    }
170
171    public boolean hasProjectionWildcard() {
172        return projectionHasWildcard;
173    }
174
175    protected void walkOrderBy() {
176        sortOnFulltextScore = false;
177        if (orderByClause == null) {
178            orderBy = null;
179        } else {
180            orderBy = new BasicDBObject();
181            for (OrderByExpr ob : orderByClause.elements) {
182                Reference ref = ob.reference;
183                boolean desc = ob.isDescending;
184                String field = walkReference(ref).queryField;
185                if (!orderBy.containsField(field)) {
186                    Object value;
187                    if (KEY_FULLTEXT_SCORE.equals(field)) {
188                        if (!desc) {
189                            throw new QueryParseException("Cannot sort by " + NXQL.ECM_FULLTEXT_SCORE + " ascending");
190                        }
191                        sortOnFulltextScore = true;
192                        value = new BasicDBObject(MONGODB_META, MONGODB_TEXT_SCORE);
193                    } else {
194                        value = desc ? MINUS_ONE : ONE;
195                    }
196                    orderBy.put(field, value);
197                }
198            }
199            if (sortOnFulltextScore && ((BasicDBObject) orderBy).size() > 1) {
200                throw new QueryParseException("Cannot sort by " + NXQL.ECM_FULLTEXT_SCORE + " and other criteria");
201            }
202        }
203    }
204
205    protected void walkProjection() {
206        projection = new BasicDBObject();
207        projection.put(idKey, ONE); // always useful
208        projection.put(KEY_NAME, ONE); // used in ORDER BY ecm:path
209        projection.put(KEY_PARENT_ID, ONE); // used in ORDER BY ecm:path
210        boolean projectionOnFulltextScore = false;
211        for (int i = 0; i < selectClause.elements.size(); i++) {
212            Operand op = selectClause.elements.get(i);
213            if (!(op instanceof Reference)) {
214                throw new QueryParseException("Projection not supported: " + op);
215            }
216            FieldInfo fieldInfo = walkReference((Reference) op);
217            projection.put(fieldInfo.projectionField, ONE);
218            if (fieldInfo.hasWildcard) {
219                projectionHasWildcard = true;
220            }
221            if (fieldInfo.projectionField.equals(KEY_FULLTEXT_SCORE)) {
222                projectionOnFulltextScore = true;
223            }
224        }
225        if (projectionOnFulltextScore || sortOnFulltextScore) {
226            if (!hasFulltext) {
227                throw new QueryParseException(NXQL.ECM_FULLTEXT_SCORE + " cannot be used without " + NXQL.ECM_FULLTEXT);
228            }
229            projection.put(KEY_FULLTEXT_SCORE, new BasicDBObject(MONGODB_META, MONGODB_TEXT_SCORE));
230        }
231    }
232
233    public DBObject walkExpression(Expression expr) {
234        Operator op = expr.operator;
235        Operand lvalue = expr.lvalue;
236        Operand rvalue = expr.rvalue;
237        Reference ref = lvalue instanceof Reference ? (Reference) lvalue : null;
238        String name = ref != null ? ref.name : null;
239        String cast = ref != null ? ref.cast : null;
240        if (DATE_CAST.equals(cast)) {
241            checkDateLiteralForCast(op, rvalue, name);
242        }
243        if (op == Operator.STARTSWITH) {
244            return walkStartsWith(lvalue, rvalue);
245        } else if (NXQL.ECM_PATH.equals(name)) {
246            return walkEcmPath(op, rvalue);
247        } else if (NXQL.ECM_ANCESTORID.equals(name)) {
248            return walkAncestorId(op, rvalue);
249        } else if (name != null && name.startsWith(NXQL.ECM_FULLTEXT) && !NXQL.ECM_FULLTEXT_JOBID.equals(name)) {
250            return walkEcmFulltext(name, op, rvalue);
251        } else if (op == Operator.SUM) {
252            throw new UnsupportedOperationException("SUM");
253        } else if (op == Operator.SUB) {
254            throw new UnsupportedOperationException("SUB");
255        } else if (op == Operator.MUL) {
256            throw new UnsupportedOperationException("MUL");
257        } else if (op == Operator.DIV) {
258            throw new UnsupportedOperationException("DIV");
259        } else if (op == Operator.LT) {
260            return walkLt(lvalue, rvalue);
261        } else if (op == Operator.GT) {
262            return walkGt(lvalue, rvalue);
263        } else if (op == Operator.EQ) {
264            return walkEq(lvalue, rvalue);
265        } else if (op == Operator.NOTEQ) {
266            return walkNotEq(lvalue, rvalue);
267        } else if (op == Operator.LTEQ) {
268            return walkLtEq(lvalue, rvalue);
269        } else if (op == Operator.GTEQ) {
270            return walkGtEq(lvalue, rvalue);
271        } else if (op == Operator.AND) {
272            if (expr instanceof MultiExpression) {
273                return walkMultiExpression((MultiExpression) expr);
274            } else {
275                return walkAnd(lvalue, rvalue);
276            }
277        } else if (op == Operator.NOT) {
278            return walkNot(lvalue);
279        } else if (op == Operator.OR) {
280            return walkOr(lvalue, rvalue);
281        } else if (op == Operator.LIKE) {
282            return walkLike(lvalue, rvalue, true, false);
283        } else if (op == Operator.ILIKE) {
284            return walkLike(lvalue, rvalue, true, true);
285        } else if (op == Operator.NOTLIKE) {
286            return walkLike(lvalue, rvalue, false, false);
287        } else if (op == Operator.NOTILIKE) {
288            return walkLike(lvalue, rvalue, false, true);
289        } else if (op == Operator.IN) {
290            return walkIn(lvalue, rvalue, true);
291        } else if (op == Operator.NOTIN) {
292            return walkIn(lvalue, rvalue, false);
293        } else if (op == Operator.ISNULL) {
294            return walkIsNull(lvalue);
295        } else if (op == Operator.ISNOTNULL) {
296            return walkIsNotNull(lvalue);
297        } else if (op == Operator.BETWEEN) {
298            return walkBetween(lvalue, rvalue, true);
299        } else if (op == Operator.NOTBETWEEN) {
300            return walkBetween(lvalue, rvalue, false);
301        } else {
302            throw new QueryParseException("Unknown operator: " + op);
303        }
304    }
305
306    protected void checkDateLiteralForCast(Operator op, Operand value, String name) {
307        if (op == Operator.BETWEEN || op == Operator.NOTBETWEEN) {
308            LiteralList l = (LiteralList) value;
309            checkDateLiteralForCast(l.get(0), name);
310            checkDateLiteralForCast(l.get(1), name);
311        } else {
312            checkDateLiteralForCast(value, name);
313        }
314    }
315
316    protected void checkDateLiteralForCast(Operand value, String name) {
317        if (value instanceof DateLiteral && !((DateLiteral) value).onlyDate) {
318            throw new QueryParseException("DATE() cast must be used with DATE literal, not TIMESTAMP: " + name);
319        }
320    }
321
322    protected DBObject walkEcmPath(Operator op, Operand rvalue) {
323        if (op != Operator.EQ && op != Operator.NOTEQ) {
324            throw new QueryParseException(NXQL.ECM_PATH + " requires = or <> operator");
325        }
326        if (!(rvalue instanceof StringLiteral)) {
327            throw new QueryParseException(NXQL.ECM_PATH + " requires literal path as right argument");
328        }
329        String path = ((StringLiteral) rvalue).value;
330        if (path.length() > 1 && path.endsWith("/")) {
331            path = path.substring(0, path.length() - 1);
332        }
333        String id = pathResolver.getIdForPath(path);
334        if (id == null) {
335            // no such path
336            // TODO XXX do better
337            return new BasicDBObject(MONGODB_ID, "__nosuchid__");
338        }
339        if (op == Operator.EQ) {
340            return new BasicDBObject(idKey, id);
341        } else {
342            return new BasicDBObject(idKey, new BasicDBObject(QueryOperators.NE, id));
343        }
344    }
345
346    protected DBObject walkAncestorId(Operator op, Operand rvalue) {
347        if (op != Operator.EQ && op != Operator.NOTEQ) {
348            throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires = or <> operator");
349        }
350        if (!(rvalue instanceof StringLiteral)) {
351            throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires literal id as right argument");
352        }
353        String ancestorId = ((StringLiteral) rvalue).value;
354        if (op == Operator.EQ) {
355            return new BasicDBObject(DBSDocument.KEY_ANCESTOR_IDS, ancestorId);
356        } else {
357            return new BasicDBObject(DBSDocument.KEY_ANCESTOR_IDS, new BasicDBObject(QueryOperators.NE, ancestorId));
358        }
359    }
360
361    protected DBObject walkEcmFulltext(String name, Operator op, Operand rvalue) {
362        if (op != Operator.EQ && op != Operator.LIKE) {
363            throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires = or LIKE operator");
364        }
365        if (!(rvalue instanceof StringLiteral)) {
366            throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires literal string as right argument");
367        }
368        if (fulltextSearchDisabled) {
369            throw new QueryParseException("Fulltext search disabled by configuration");
370        }
371        String fulltextQuery = ((StringLiteral) rvalue).value;
372        if (name.equals(NXQL.ECM_FULLTEXT)) {
373            // standard fulltext query
374            hasFulltext = true;
375            String ft = getMongoDBFulltextQuery(fulltextQuery);
376            if (ft == null) {
377                // empty query, matches nothing
378                return new BasicDBObject(MONGODB_ID, "__nosuchid__");
379            }
380            DBObject textSearch = new BasicDBObject();
381            textSearch.put(QueryOperators.SEARCH, ft);
382            // TODO language?
383            return new BasicDBObject(QueryOperators.TEXT, textSearch);
384        } else {
385            // secondary index match with explicit field
386            // do a regexp on the field
387            if (name.charAt(NXQL.ECM_FULLTEXT.length()) != '.') {
388                throw new QueryParseException(name + " has incorrect syntax" + " for a secondary fulltext index");
389            }
390            String prop = name.substring(NXQL.ECM_FULLTEXT.length() + 1);
391            String ft = fulltextQuery.replace(" ", "%");
392            rvalue = new StringLiteral(ft);
393            return walkLike(new Reference(prop), rvalue, true, true);
394        }
395    }
396
397    // public static for tests
398    public static String getMongoDBFulltextQuery(String query) {
399        FulltextQuery ft = FulltextQueryAnalyzer.analyzeFulltextQuery(query);
400        if (ft == null) {
401            return null;
402        }
403        // translate into MongoDB syntax
404        return translateFulltext(ft, false);
405    }
406
407    /**
408     * Transforms the NXQL fulltext syntax into MongoDB syntax.
409     * <p>
410     * The MongoDB fulltext query syntax is badly documented, but is actually the following:
411     * <ul>
412     * <li>a term is a word,
413     * <li>a phrase is a set of spaced-separated words enclosed in double quotes,
414     * <li>negation is done by prepending a -,
415     * <li>the query is a space-separated set of terms, negated terms, phrases, or negated phrases.
416     * <li>all the words of non-negated phrases are also added to the terms.
417     * </ul>
418     * <p>
419     * The matching algorithm is (excluding stemming and stop words):
420     * <ul>
421     * <li>filter out documents with the negative terms, the negative phrases, or missing the phrases,
422     * <li>then if any term is present in the document then it's a match.
423     * </ul>
424     */
425    protected static String translateFulltext(FulltextQuery ft, boolean and) {
426        List<String> buf = new ArrayList<>();
427        translateFulltext(ft, buf, and);
428        return StringUtils.join(buf, ' ');
429    }
430
431    protected static void translateFulltext(FulltextQuery ft, List<String> buf, boolean and) {
432        if (ft.op == Op.OR) {
433            for (FulltextQuery term : ft.terms) {
434                // don't quote words for OR
435                translateFulltext(term, buf, false);
436            }
437        } else if (ft.op == Op.AND) {
438            for (FulltextQuery term : ft.terms) {
439                // quote words for AND
440                translateFulltext(term, buf, true);
441            }
442        } else {
443            String neg;
444            if (ft.op == Op.NOTWORD) {
445                neg = "-";
446            } else { // Op.WORD
447                neg = "";
448            }
449            String word = ft.word.toLowerCase();
450            if (ft.isPhrase() || and) {
451                buf.add(neg + '"' + word + '"');
452            } else {
453                buf.add(neg + word);
454            }
455        }
456    }
457
458    public DBObject walkNot(Operand value) {
459        Object val = walkOperand(value);
460        Object not = pushDownNot(val);
461        if (!(not instanceof DBObject)) {
462            throw new QueryParseException("Cannot do NOT on: " + val);
463        }
464        return (DBObject) not;
465    }
466
467    protected Object pushDownNot(Object object) {
468        if (!(object instanceof DBObject)) {
469            throw new QueryParseException("Cannot do NOT on: " + object);
470        }
471        DBObject ob = (DBObject) object;
472        Set<String> keySet = ob.keySet();
473        if (keySet.size() != 1) {
474            throw new QueryParseException("Cannot do NOT on: " + ob);
475        }
476        String key = keySet.iterator().next();
477        Object value = ob.get(key);
478        if (!key.startsWith("$")) {
479            if (value instanceof DBObject) {
480                // push down inside dbobject
481                return new BasicDBObject(key, pushDownNot(value));
482            } else {
483                // k = v -> k != v
484                return new BasicDBObject(key, new BasicDBObject(QueryOperators.NE, value));
485            }
486        }
487        if (QueryOperators.NE.equals(key)) {
488            // NOT k != v -> k = v
489            return value;
490        }
491        if (QueryOperators.NOT.equals(key)) {
492            // NOT NOT v -> v
493            return value;
494        }
495        if (QueryOperators.AND.equals(key) || QueryOperators.OR.equals(key)) {
496            // boolean algebra
497            // NOT (v1 AND v2) -> NOT v1 OR NOT v2
498            // NOT (v1 OR v2) -> NOT v1 AND NOT v2
499            String op = QueryOperators.AND.equals(key) ? QueryOperators.OR : QueryOperators.AND;
500            List<Object> list = (List<Object>) value;
501            for (int i = 0; i < list.size(); i++) {
502                list.set(i, pushDownNot(list.get(i)));
503            }
504            return new BasicDBObject(op, list);
505        }
506        if (QueryOperators.IN.equals(key) || QueryOperators.NIN.equals(key)) {
507            // boolean algebra
508            // IN <-> NIN
509            String op = QueryOperators.IN.equals(key) ? QueryOperators.NIN : QueryOperators.IN;
510            return new BasicDBObject(op, value);
511        }
512        if (QueryOperators.LT.equals(key) || QueryOperators.GT.equals(key) || QueryOperators.LTE.equals(key)
513                || QueryOperators.GTE.equals(key)) {
514            // TODO use inverse operators?
515            return new BasicDBObject(QueryOperators.NOT, ob);
516        }
517        throw new QueryParseException("Unknown operator for NOT: " + key);
518    }
519
520    public DBObject walkIsNull(Operand value) {
521        FieldInfo fieldInfo = walkReference(value);
522        return new FieldInfoDBObject(fieldInfo, null);
523    }
524
525    public DBObject walkIsNotNull(Operand value) {
526        FieldInfo fieldInfo = walkReference(value);
527        return new FieldInfoDBObject(fieldInfo, new BasicDBObject(QueryOperators.NE, null));
528    }
529
530    public DBObject walkMultiExpression(MultiExpression expr) {
531        return walkAnd(expr.values);
532    }
533
534    public DBObject walkAnd(Operand lvalue, Operand rvalue) {
535        return walkAnd(Arrays.asList(lvalue, rvalue));
536    }
537
538    protected DBObject walkAnd(List<Operand> values) {
539        List<Object> list = walkOperandList(values);
540        // check wildcards in the operands, extract common prefixes to use $elemMatch
541        Map<String, List<FieldInfoDBObject>> propBaseKeyToDBOs = new LinkedHashMap<>();
542        Map<String, String> propBaseKeyToFieldBase = new HashMap<>();
543        for (Iterator<Object> it = list.iterator(); it.hasNext();) {
544            Object ob = it.next();
545            if (ob instanceof FieldInfoDBObject) {
546                FieldInfoDBObject fidbo = (FieldInfoDBObject) ob;
547                FieldInfo fieldInfo = fidbo.fieldInfo;
548                if (fieldInfo.hasWildcard) {
549                    if (fieldInfo.fieldSuffix != null && fieldInfo.fieldSuffix.contains("*")) {
550                        // a double wildcard of the form foo/*/bar/* is not a problem if bar is an array
551                        // TODO prevent deep complex multiple wildcards
552                        // throw new QueryParseException("Cannot use two wildcards: " + fieldInfo.prop);
553                    }
554                    // generate a key unique per correlation for this element match
555                    String wildcardNumber = fieldInfo.fieldWildcard;
556                    if (wildcardNumber.isEmpty()) {
557                        // negative to not collide with regular correlated wildcards
558                        wildcardNumber = String.valueOf(-counter.incrementAndGet());
559                    }
560                    String propBaseKey = fieldInfo.fieldPrefix + "/*" + wildcardNumber;
561                    // store object for this key
562                    List<FieldInfoDBObject> dbos = propBaseKeyToDBOs.get(propBaseKey);
563                    if (dbos == null) {
564                        propBaseKeyToDBOs.put(propBaseKey, dbos = new LinkedList<>());
565                    }
566                    dbos.add(fidbo);
567                    // remember for which field base this is
568                    String fieldBase = fieldInfo.fieldPrefix.replace("/", ".");
569                    propBaseKeyToFieldBase.put(propBaseKey, fieldBase);
570                    // remove from list, will be re-added later through propBaseKeyToDBOs
571                    it.remove();
572                }
573            }
574        }
575        // generate $elemMatch items for correlated queries
576        for (Entry<String, List<FieldInfoDBObject>> es : propBaseKeyToDBOs.entrySet()) {
577            String propBaseKey = es.getKey();
578            List<FieldInfoDBObject> fidbos = es.getValue();
579            if (fidbos.size() == 1) {
580                // regular uncorrelated match
581                list.addAll(fidbos);
582            } else {
583                DBObject elemMatch = new BasicDBObject();
584                for (FieldInfoDBObject fidbo : fidbos) {
585                    // truncate field name to just the suffix
586                    FieldInfo fieldInfo = fidbo.fieldInfo;
587                    Object value = fidbo.get(fieldInfo.queryField);
588                    String fieldSuffix = fieldInfo.fieldSuffix.replace("/", ".");
589                    if (elemMatch.containsField(fieldSuffix)) {
590                        // ecm:acl/*1/principal = 'bob' AND ecm:acl/*1/principal = 'steve'
591                        // cannot match
592                        // TODO do better
593                        value = "__NOSUCHVALUE__";
594                    }
595                    elemMatch.put(fieldSuffix, value);
596                }
597                String fieldBase = propBaseKeyToFieldBase.get(propBaseKey);
598                BasicDBObject dbo = new BasicDBObject(fieldBase,
599                        new BasicDBObject(QueryOperators.ELEM_MATCH, elemMatch));
600                list.add(dbo);
601            }
602        }
603        if (list.size() == 1) {
604            return (DBObject) list.get(0);
605        } else {
606            return new BasicDBObject(QueryOperators.AND, list);
607        }
608    }
609
610    public DBObject walkOr(Operand lvalue, Operand rvalue) {
611        Object left = walkOperand(lvalue);
612        Object right = walkOperand(rvalue);
613        List<Object> list = new ArrayList<>(Arrays.asList(left, right));
614        return new BasicDBObject(QueryOperators.OR, list);
615    }
616
617    protected Object checkBoolean(FieldInfo fieldInfo, Object right) {
618        if (fieldInfo.isBoolean()) {
619            // convert 0 / 1 to actual booleans
620            if (right instanceof Long) {
621                if (ZERO.equals(right)) {
622                    right = fieldInfo.isTrueOrNullBoolean ? null : FALSE;
623                } else if (ONE.equals(right)) {
624                    right = TRUE;
625                } else {
626                    throw new QueryParseException("Invalid boolean: " + right);
627                }
628            }
629        }
630        return right;
631    }
632
633    public DBObject walkEq(Operand lvalue, Operand rvalue) {
634        FieldInfo fieldInfo = walkReference(lvalue);
635        Object right = walkOperand(rvalue);
636        if (isMixinTypes(fieldInfo)) {
637            if (!(right instanceof String)) {
638                throw new QueryParseException("Invalid EQ rhs: " + rvalue);
639            }
640            return walkMixinTypes(Collections.singletonList((String) right), true);
641        }
642        right = checkBoolean(fieldInfo, right);
643        // TODO check list fields
644        return new FieldInfoDBObject(fieldInfo, right);
645    }
646
647    public DBObject walkNotEq(Operand lvalue, Operand rvalue) {
648        FieldInfo fieldInfo = walkReference(lvalue);
649        Object right = walkOperand(rvalue);
650        if (isMixinTypes(fieldInfo)) {
651            if (!(right instanceof String)) {
652                throw new QueryParseException("Invalid NE rhs: " + rvalue);
653            }
654            return walkMixinTypes(Collections.singletonList((String) right), false);
655        }
656        right = checkBoolean(fieldInfo, right);
657        // TODO check list fields
658        return new FieldInfoDBObject(fieldInfo, new BasicDBObject(QueryOperators.NE, right));
659    }
660
661    public DBObject walkLt(Operand lvalue, Operand rvalue) {
662        FieldInfo fieldInfo = walkReference(lvalue);
663        Object right = walkOperand(rvalue);
664        return new FieldInfoDBObject(fieldInfo, new BasicDBObject(QueryOperators.LT, right));
665    }
666
667    public DBObject walkGt(Operand lvalue, Operand rvalue) {
668        FieldInfo fieldInfo = walkReference(lvalue);
669        Object right = walkOperand(rvalue);
670        return new FieldInfoDBObject(fieldInfo, new BasicDBObject(QueryOperators.GT, right));
671    }
672
673    public DBObject walkLtEq(Operand lvalue, Operand rvalue) {
674        FieldInfo fieldInfo = walkReference(lvalue);
675        Object right = walkOperand(rvalue);
676        return new FieldInfoDBObject(fieldInfo, new BasicDBObject(QueryOperators.LTE, right));
677    }
678
679    public DBObject walkGtEq(Operand lvalue, Operand rvalue) {
680        FieldInfo fieldInfo = walkReference(lvalue);
681        Object right = walkOperand(rvalue);
682        return new FieldInfoDBObject(fieldInfo, new BasicDBObject(QueryOperators.GTE, right));
683    }
684
685    public DBObject walkBetween(Operand lvalue, Operand rvalue, boolean positive) {
686        LiteralList l = (LiteralList) rvalue;
687        FieldInfo fieldInfo = walkReference(lvalue);
688        Object left = walkOperand(l.get(0));
689        Object right = walkOperand(l.get(1));
690        if (positive) {
691            DBObject range = new BasicDBObject();
692            range.put(QueryOperators.GTE, left);
693            range.put(QueryOperators.LTE, right);
694            return new FieldInfoDBObject(fieldInfo, range);
695        } else {
696            DBObject a = new FieldInfoDBObject(fieldInfo, new BasicDBObject(QueryOperators.LT, left));
697            DBObject b = new FieldInfoDBObject(fieldInfo, new BasicDBObject(QueryOperators.GT, right));
698            return new BasicDBObject(QueryOperators.OR, Arrays.asList(a, b));
699        }
700    }
701
702    public DBObject walkIn(Operand lvalue, Operand rvalue, boolean positive) {
703        FieldInfo fieldInfo = walkReference(lvalue);
704        Object right = walkOperand(rvalue);
705        if (!(right instanceof List)) {
706            throw new QueryParseException("Invalid IN, right hand side must be a list: " + rvalue);
707        }
708        if (isMixinTypes(fieldInfo)) {
709            return walkMixinTypes((List<String>) right, positive);
710        }
711        // TODO check list fields
712        List<Object> list = (List<Object>) right;
713        return new FieldInfoDBObject(fieldInfo,
714                new BasicDBObject(positive ? QueryOperators.IN : QueryOperators.NIN, list));
715    }
716
717    public DBObject walkLike(Operand lvalue, Operand rvalue, boolean positive, boolean caseInsensitive) {
718        FieldInfo fieldInfo = walkReference(lvalue);
719        if (!(rvalue instanceof StringLiteral)) {
720            throw new QueryParseException("Invalid LIKE/ILIKE, right hand side must be a string: " + rvalue);
721        }
722        // TODO check list fields
723        String like = walkStringLiteral((StringLiteral) rvalue);
724        String regex = ExpressionEvaluator.likeToRegex(like);
725
726        int flags = caseInsensitive ? Pattern.CASE_INSENSITIVE : 0;
727        Pattern pattern = Pattern.compile(regex, flags);
728        Object value;
729        if (positive) {
730            value = pattern;
731        } else {
732            value = new BasicDBObject(QueryOperators.NOT, pattern);
733        }
734        return new FieldInfoDBObject(fieldInfo, value);
735    }
736
737    public Object walkOperand(Operand op) {
738        if (op instanceof Literal) {
739            return walkLiteral((Literal) op);
740        } else if (op instanceof LiteralList) {
741            return walkLiteralList((LiteralList) op);
742        } else if (op instanceof Function) {
743            return walkFunction((Function) op);
744        } else if (op instanceof Expression) {
745            return walkExpression((Expression) op);
746        } else if (op instanceof Reference) {
747            return walkReference((Reference) op);
748        } else {
749            throw new QueryParseException("Unknown operand: " + op);
750        }
751    }
752
753    public Object walkLiteral(Literal lit) {
754        if (lit instanceof BooleanLiteral) {
755            return walkBooleanLiteral((BooleanLiteral) lit);
756        } else if (lit instanceof DateLiteral) {
757            return walkDateLiteral((DateLiteral) lit);
758        } else if (lit instanceof DoubleLiteral) {
759            return walkDoubleLiteral((DoubleLiteral) lit);
760        } else if (lit instanceof IntegerLiteral) {
761            return walkIntegerLiteral((IntegerLiteral) lit);
762        } else if (lit instanceof StringLiteral) {
763            return walkStringLiteral((StringLiteral) lit);
764        } else {
765            throw new QueryParseException("Unknown literal: " + lit);
766        }
767    }
768
769    public Object walkBooleanLiteral(BooleanLiteral lit) {
770        return Boolean.valueOf(lit.value);
771    }
772
773    public Date walkDateLiteral(DateLiteral lit) {
774        return lit.value.toDate(); // TODO onlyDate
775    }
776
777    public Double walkDoubleLiteral(DoubleLiteral lit) {
778        return Double.valueOf(lit.value);
779    }
780
781    public Long walkIntegerLiteral(IntegerLiteral lit) {
782        return Long.valueOf(lit.value);
783    }
784
785    public String walkStringLiteral(StringLiteral lit) {
786        return lit.value;
787    }
788
789    public List<Object> walkLiteralList(LiteralList litList) {
790        List<Object> list = new ArrayList<Object>(litList.size());
791        for (Literal lit : litList) {
792            list.add(walkLiteral(lit));
793        }
794        return list;
795    }
796
797    protected List<Object> walkOperandList(List<Operand> values) {
798        List<Object> list = new LinkedList<>();
799        for (Operand value : values) {
800            list.add(walkOperand(value));
801        }
802        return list;
803    }
804
805    public Object walkFunction(Function func) {
806        throw new UnsupportedOperationException(func.name);
807    }
808
809    public DBObject walkStartsWith(Operand lvalue, Operand rvalue) {
810        if (!(lvalue instanceof Reference)) {
811            throw new QueryParseException("Invalid STARTSWITH query, left hand side must be a property: " + lvalue);
812        }
813        String name = ((Reference) lvalue).name;
814        if (!(rvalue instanceof StringLiteral)) {
815            throw new QueryParseException(
816                    "Invalid STARTSWITH query, right hand side must be a literal path: " + rvalue);
817        }
818        String path = ((StringLiteral) rvalue).value;
819        if (path.length() > 1 && path.endsWith("/")) {
820            path = path.substring(0, path.length() - 1);
821        }
822
823        if (NXQL.ECM_PATH.equals(name)) {
824            return walkStartsWithPath(path);
825        } else {
826            return walkStartsWithNonPath(lvalue, path);
827        }
828    }
829
830    protected DBObject walkStartsWithPath(String path) {
831        // resolve path
832        String ancestorId = pathResolver.getIdForPath(path);
833        if (ancestorId == null) {
834            // no such path
835            // TODO XXX do better
836            return new BasicDBObject(MONGODB_ID, "__nosuchid__");
837        }
838        return new BasicDBObject(DBSDocument.KEY_ANCESTOR_IDS, ancestorId);
839    }
840
841    protected DBObject walkStartsWithNonPath(Operand lvalue, String path) {
842        FieldInfo fieldInfo = walkReference(lvalue);
843        DBObject eq = new FieldInfoDBObject(fieldInfo, path);
844        // escape except alphanumeric and others not needing escaping
845        String regex = path.replaceAll("([^a-zA-Z0-9 /])", "\\\\$1");
846        Pattern pattern = Pattern.compile(regex + "/.*");
847        DBObject like = new FieldInfoDBObject(fieldInfo, pattern);
848        return new BasicDBObject(QueryOperators.OR, Arrays.asList(eq, like));
849    }
850
851    protected FieldInfo walkReference(Operand value) {
852        if (!(value instanceof Reference)) {
853            throw new QueryParseException("Invalid query, left hand side must be a property: " + value);
854        }
855        return walkReference((Reference) value);
856    }
857
858    // non-canonical index syntax, for replaceAll
859    protected final static Pattern NON_CANON_INDEX = Pattern.compile("[^/\\[\\]]+" // name
860            + "\\[(\\d+|\\*|\\*\\d+)\\]" // index in brackets
861    );
862
863    /**
864     * Canonicalizes a Nuxeo-xpath.
865     * <p>
866     * Replaces {@code a/foo[123]/b} with {@code a/123/b}
867     * <p>
868     * A star or a star followed by digits can be used instead of just the digits as well.
869     *
870     * @param xpath the xpath
871     * @return the canonicalized xpath.
872     */
873    public static String canonicalXPath(String xpath) {
874        while (xpath.length() > 0 && xpath.charAt(0) == '/') {
875            xpath = xpath.substring(1);
876        }
877        if (xpath.indexOf('[') == -1) {
878            return xpath;
879        } else {
880            return NON_CANON_INDEX.matcher(xpath).replaceAll("$1");
881        }
882    }
883
884    /** Splits foo.*.bar into foo, *, bar and split foo.*1.bar into foo, *1, bar with the last bar part optional */
885    protected final static Pattern WILDCARD_SPLIT = Pattern.compile("([^*]*)\\.\\*(\\d*)(?:\\.(.*))?");
886
887    protected static class FieldInfo {
888
889        /** NXQL property. */
890        protected final String prop;
891
892        /** MongoDB field including wildcards (not used as-is). */
893        protected final String fullField;
894
895        /** MongoDB field for query. foo/0/bar -> foo.0.bar; foo / * / bar -> foo.bar */
896        protected final String queryField;
897
898        /** MongoDB field for projection. */
899        protected final String projectionField;
900
901        protected final Type type;
902
903        /**
904         * Boolean system properties only use TRUE or NULL, not FALSE, so queries must be updated accordingly.
905         */
906        protected final boolean isTrueOrNullBoolean;
907
908        protected final boolean hasWildcard;
909
910        /** Prefix before the wildcard. */
911        protected final String fieldPrefix;
912
913        /** Wildcard part after * */
914        protected final String fieldWildcard;
915
916        /** Part after wildcard, may be null. */
917        protected final String fieldSuffix;
918
919        protected FieldInfo(String prop, String fullField, String queryField, String projectionField, Type type,
920                boolean isTrueOrNullBoolean) {
921            this.prop = prop;
922            this.fullField = fullField;
923            this.queryField = queryField;
924            this.projectionField = projectionField;
925            this.type = type;
926            this.isTrueOrNullBoolean = isTrueOrNullBoolean;
927            Matcher m = WILDCARD_SPLIT.matcher(fullField);
928            if (m.matches()) {
929                hasWildcard = true;
930                fieldPrefix = m.group(1);
931                fieldWildcard = m.group(2);
932                fieldSuffix = m.group(3);
933            } else {
934                hasWildcard = false;
935                fieldPrefix = fieldWildcard = fieldSuffix = null;
936            }
937        }
938
939        protected boolean isBoolean() {
940            return type instanceof BooleanType;
941        }
942    }
943
944    protected static class FieldInfoDBObject extends BasicDBObject {
945
946        private static final long serialVersionUID = 1L;
947
948        protected FieldInfo fieldInfo;
949
950        public FieldInfoDBObject(FieldInfo fieldInfo, Object value) {
951            super(fieldInfo.queryField, value);
952            this.fieldInfo = fieldInfo;
953        }
954    }
955
956    /**
957     * Returns the MongoDB field for this reference.
958     */
959    public FieldInfo walkReference(Reference ref) {
960        FieldInfo fieldInfo = walkReference(ref.name);
961        if (DATE_CAST.equals(ref.cast)) {
962            Type type = fieldInfo.type;
963            if (!(type instanceof DateType
964                    || (type instanceof ListType && ((ListType) type).getFieldType() instanceof DateType))) {
965                throw new QueryParseException("Cannot cast to " + ref.cast + ": " + ref.name);
966            }
967            // fieldInfo.isDateCast = true;
968        }
969        return fieldInfo;
970    }
971
972    protected FieldInfo walkReference(String name) {
973        String prop = canonicalXPath(name);
974        String[] parts = prop.split("/");
975        if (prop.startsWith(NXQL.ECM_PREFIX)) {
976            if (prop.startsWith(NXQL.ECM_ACL + "/")) {
977                return parseACP(prop, parts);
978            }
979            // simple field
980            String field = DBSSession.convToInternal(prop);
981            Type type = DBSSession.getType(field);
982            String queryField = repository.keyToBson(field);
983            return new FieldInfo(prop, field, queryField, field, type, true);
984        } else {
985            String first = parts[0];
986            Field field = schemaManager.getField(first);
987            if (field == null) {
988                if (first.indexOf(':') > -1) {
989                    throw new QueryParseException("No such property: " + name);
990                }
991                // check without prefix
992                // TODO precompute this in SchemaManagerImpl
993                for (Schema schema : schemaManager.getSchemas()) {
994                    if (!StringUtils.isBlank(schema.getNamespace().prefix)) {
995                        // schema with prefix, do not consider as candidate
996                        continue;
997                    }
998                    if (schema != null) {
999                        field = schema.getField(first);
1000                        if (field != null) {
1001                            break;
1002                        }
1003                    }
1004                }
1005                if (field == null) {
1006                    throw new QueryParseException("No such property: " + name);
1007                }
1008            }
1009            Type type = field.getType();
1010            // canonical name
1011            parts[0] = field.getName().getPrefixedName();
1012            // are there wildcards or list indexes?
1013            List<String> queryFieldParts = new LinkedList<>(); // field for query
1014            List<String> projectionFieldParts = new LinkedList<>(); // field for projection
1015            boolean firstPart = true;
1016            for (String part : parts) {
1017                if (NumberUtils.isDigits(part)) {
1018                    // explicit list index
1019                    queryFieldParts.add(part);
1020                    type = ((ListType) type).getFieldType();
1021                } else if (!part.startsWith("*")) {
1022                    // complex sub-property
1023                    queryFieldParts.add(part);
1024                    projectionFieldParts.add(part);
1025                    if (!firstPart) {
1026                        // we already computed the type of the first part
1027                        field = ((ComplexType) type).getField(part);
1028                        if (field == null) {
1029                            throw new QueryParseException("No such property: " + name);
1030                        }
1031                        type = field.getType();
1032                    }
1033                } else {
1034                    // wildcard
1035                    type = ((ListType) type).getFieldType();
1036                }
1037                firstPart = false;
1038            }
1039            String fullField = StringUtils.join(parts, '.');
1040            String queryField = StringUtils.join(queryFieldParts, '.');
1041            String projectionField = StringUtils.join(projectionFieldParts, '.');
1042            return new FieldInfo(prop, fullField, queryField, projectionField, type, false);
1043        }
1044    }
1045
1046    protected FieldInfo parseACP(String prop, String[] parts) {
1047        if (parts.length != 3) {
1048            throw new QueryParseException("No such property: " + prop);
1049        }
1050        String wildcard = parts[1];
1051        if (NumberUtils.isDigits(wildcard)) {
1052            throw new QueryParseException("Cannot use explicit index in ACLs: " + prop);
1053        }
1054        String last = parts[2];
1055        String fullField;
1056        String queryField;
1057        String projectionField;
1058        if (NXQL.ECM_ACL_NAME.equals(last)) {
1059            fullField = KEY_ACP + "." + KEY_ACL_NAME;
1060            queryField = KEY_ACP + "." + KEY_ACL_NAME;
1061            // TODO remember wildcard correlation
1062        } else {
1063            String fieldLast = DBSSession.convToInternalAce(last);
1064            if (fieldLast == null) {
1065                throw new QueryParseException("No such property: " + prop);
1066            }
1067            fullField = KEY_ACP + "." + KEY_ACL + "." + wildcard + "." + fieldLast;
1068            queryField = KEY_ACP + "." + KEY_ACL + "." + fieldLast;
1069        }
1070        Type type = DBSSession.getType(last);
1071        projectionField = queryField;
1072        return new FieldInfo(prop, fullField, queryField, projectionField, type, false);
1073    }
1074
1075    protected boolean isMixinTypes(FieldInfo fieldInfo) {
1076        return fieldInfo.queryField.equals(DBSDocument.KEY_MIXIN_TYPES);
1077    }
1078
1079    protected Set<String> getMixinDocumentTypes(String mixin) {
1080        Set<String> types = schemaManager.getDocumentTypeNamesForFacet(mixin);
1081        return types == null ? Collections.emptySet() : types;
1082    }
1083
1084    protected List<String> getDocumentTypes() {
1085        // TODO precompute in SchemaManager
1086        if (documentTypes == null) {
1087            documentTypes = new ArrayList<>();
1088            for (DocumentType docType : schemaManager.getDocumentTypes()) {
1089                documentTypes.add(docType.getName());
1090            }
1091        }
1092        return documentTypes;
1093    }
1094
1095    protected boolean isNeverPerInstanceMixin(String mixin) {
1096        return schemaManager.getNoPerDocumentQueryFacets().contains(mixin);
1097    }
1098
1099    /**
1100     * Matches the mixin types against a list of values.
1101     * <p>
1102     * Used for:
1103     * <ul>
1104     * <li>ecm:mixinTypes = 'Foo'
1105     * <li>ecm:mixinTypes != 'Foo'
1106     * <li>ecm:mixinTypes IN ('Foo', 'Bar')
1107     * <li>ecm:mixinTypes NOT IN ('Foo', 'Bar')
1108     * </ul>
1109     * <p>
1110     * ecm:mixinTypes IN ('Foo', 'Bar')
1111     *
1112     * <pre>
1113     * { "$or" : [ { "ecm:primaryType" : { "$in" : [ ... types with Foo or Bar ...]}} ,
1114     *             { "ecm:mixinTypes" : { "$in" : [ "Foo" , "Bar]}}]}
1115     * </pre>
1116     *
1117     * ecm:mixinTypes NOT IN ('Foo', 'Bar')
1118     * <p>
1119     *
1120     * <pre>
1121     * { "$and" : [ { "ecm:primaryType" : { "$in" : [ ... types without Foo nor Bar ...]}} ,
1122     *              { "ecm:mixinTypes" : { "$nin" : [ "Foo" , "Bar]}}]}
1123     * </pre>
1124     */
1125    public DBObject walkMixinTypes(List<String> mixins, boolean include) {
1126        /*
1127         * Primary types that match.
1128         */
1129        Set<String> matchPrimaryTypes;
1130        if (include) {
1131            matchPrimaryTypes = new HashSet<String>();
1132            for (String mixin : mixins) {
1133                matchPrimaryTypes.addAll(getMixinDocumentTypes(mixin));
1134            }
1135        } else {
1136            matchPrimaryTypes = new HashSet<String>(getDocumentTypes());
1137            for (String mixin : mixins) {
1138                matchPrimaryTypes.removeAll(getMixinDocumentTypes(mixin));
1139            }
1140        }
1141        /*
1142         * Instance mixins that match.
1143         */
1144        Set<String> matchMixinTypes = new HashSet<String>();
1145        for (String mixin : mixins) {
1146            if (!isNeverPerInstanceMixin(mixin)) {
1147                matchMixinTypes.add(mixin);
1148            }
1149        }
1150        /*
1151         * MongoDB query generation.
1152         */
1153        // match on primary type
1154        DBObject p = new BasicDBObject(DBSDocument.KEY_PRIMARY_TYPE,
1155                new BasicDBObject(QueryOperators.IN, matchPrimaryTypes));
1156        // match on mixin types
1157        // $in/$nin with an array matches if any/no element of the array matches
1158        String innin = include ? QueryOperators.IN : QueryOperators.NIN;
1159        DBObject m = new BasicDBObject(DBSDocument.KEY_MIXIN_TYPES, new BasicDBObject(innin, matchMixinTypes));
1160        // and/or between those
1161        String op = include ? QueryOperators.OR : QueryOperators.AND;
1162        return new BasicDBObject(op, Arrays.asList(p, m));
1163    }
1164
1165}