001/* 002 * (C) Copyright 2014-2018 Nuxeo (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.core.storage; 020 021import static java.lang.Boolean.FALSE; 022import static java.lang.Boolean.TRUE; 023import static org.nuxeo.ecm.core.api.trash.TrashService.Feature.TRASHED_STATE_IN_MIGRATION; 024import static org.nuxeo.ecm.core.api.trash.TrashService.Feature.TRASHED_STATE_IS_DEDICATED_PROPERTY; 025import static org.nuxeo.ecm.core.api.trash.TrashService.Feature.TRASHED_STATE_IS_DEDUCED_FROM_LIFECYCLE; 026 027import java.util.ArrayList; 028import java.util.Arrays; 029import java.util.Calendar; 030import java.util.Collections; 031import java.util.Deque; 032import java.util.HashSet; 033import java.util.LinkedList; 034import java.util.List; 035import java.util.Set; 036import java.util.regex.Pattern; 037 038import org.apache.commons.lang3.CharUtils; 039import org.apache.commons.lang3.StringUtils; 040import org.nuxeo.ecm.core.api.LifeCycleConstants; 041import org.nuxeo.ecm.core.query.QueryParseException; 042import org.nuxeo.ecm.core.query.sql.NXQL; 043import org.nuxeo.ecm.core.query.sql.model.BooleanLiteral; 044import org.nuxeo.ecm.core.query.sql.model.DateLiteral; 045import org.nuxeo.ecm.core.query.sql.model.DoubleLiteral; 046import org.nuxeo.ecm.core.query.sql.model.Expression; 047import org.nuxeo.ecm.core.query.sql.model.Function; 048import org.nuxeo.ecm.core.query.sql.model.IntegerLiteral; 049import org.nuxeo.ecm.core.query.sql.model.Literal; 050import org.nuxeo.ecm.core.query.sql.model.LiteralList; 051import org.nuxeo.ecm.core.query.sql.model.MultiExpression; 052import org.nuxeo.ecm.core.query.sql.model.Operand; 053import org.nuxeo.ecm.core.query.sql.model.Operator; 054import org.nuxeo.ecm.core.query.sql.model.Predicate; 055import org.nuxeo.ecm.core.query.sql.model.Reference; 056import org.nuxeo.ecm.core.query.sql.model.StringLiteral; 057import org.nuxeo.ecm.core.trash.TrashService; 058import org.nuxeo.runtime.api.Framework; 059 060import com.google.common.collect.Iterators; 061import com.google.common.collect.PeekingIterator; 062 063/** 064 * Evaluator for an {@link Expression}. 065 * 066 * @since 5.9.4 067 */ 068public abstract class ExpressionEvaluator { 069 070 /** pseudo NXQL to resolve ancestor ids. */ 071 public static final String NXQL_ECM_ANCESTOR_IDS = "ecm:__ancestorIds"; 072 073 /** pseudo NXQL to resolve internal path. */ 074 public static final String NXQL_ECM_PATH = "ecm:__path"; 075 076 /** pseudo NXQL to resolve read acls. */ 077 public static final String NXQL_ECM_READ_ACL = "ecm:__read_acl"; 078 079 public static final String NXQL_ECM_FULLTEXT_SIMPLE = "ecm:__fulltextSimple"; 080 081 public static final String NXQL_ECM_FULLTEXT_BINARY = "ecm:__fulltextBinary"; 082 083 protected static final String DATE_CAST = "DATE"; 084 085 protected static final String PHRASE_QUOTE = "\""; 086 087 protected static final String NEG_PHRASE_QUOTE = "-\""; 088 089 protected static final String OR = "or"; 090 091 /** 092 * Interface for a class that knows how to resolve a path into an id. 093 */ 094 public interface PathResolver { 095 /** 096 * Returns the id for a given path. 097 * 098 * @param path the path 099 * @return the id, or {@code null} if not found 100 */ 101 String getIdForPath(String path); 102 } 103 104 public final PathResolver pathResolver; 105 106 public final Set<String> principals; 107 108 public final boolean fulltextSearchDisabled; 109 110 public boolean hasFulltext; 111 112 public ExpressionEvaluator(PathResolver pathResolver, String[] principals, boolean fulltextSearchDisabled) { 113 this.pathResolver = pathResolver; 114 this.principals = principals == null ? null : new HashSet<>(Arrays.asList(principals)); 115 this.fulltextSearchDisabled = fulltextSearchDisabled; 116 } 117 118 public Object walkExpression(Expression expr) { 119 Operator op = expr.operator; 120 Operand lvalue = expr.lvalue; 121 Operand rvalue = expr.rvalue; 122 Reference ref = lvalue instanceof Reference ? (Reference) lvalue : null; 123 String name = ref != null ? ref.name : null; 124 String cast = ref != null ? ref.cast : null; 125 if (DATE_CAST.equals(cast)) { 126 checkDateLiteralForCast(rvalue, name); 127 } 128 if (op == Operator.STARTSWITH) { 129 return walkStartsWith(lvalue, rvalue); 130 } else if (NXQL.ECM_PATH.equals(name)) { 131 return walkEcmPath(op, rvalue); 132 } else if (NXQL.ECM_ANCESTORID.equals(name)) { 133 return walkAncestorId(op, rvalue); 134 } else if (NXQL.ECM_ISTRASHED.equals(name)) { 135 return walkIsTrashed(op, rvalue); 136 } else if (name != null && name.startsWith(NXQL.ECM_FULLTEXT) && !NXQL.ECM_FULLTEXT_JOBID.equals(name)) { 137 return walkEcmFulltext(name, op, rvalue); 138 } else if (op == Operator.SUM) { 139 throw new UnsupportedOperationException("SUM"); 140 } else if (op == Operator.SUB) { 141 throw new UnsupportedOperationException("SUB"); 142 } else if (op == Operator.MUL) { 143 throw new UnsupportedOperationException("MUL"); 144 } else if (op == Operator.DIV) { 145 throw new UnsupportedOperationException("DIV"); 146 } else if (op == Operator.LT) { 147 return walkLt(lvalue, rvalue); 148 } else if (op == Operator.GT) { 149 return walkGt(lvalue, rvalue); 150 } else if (op == Operator.EQ) { 151 return walkEq(lvalue, rvalue); 152 } else if (op == Operator.NOTEQ) { 153 return walkNotEq(lvalue, rvalue); 154 } else if (op == Operator.LTEQ) { 155 return walkLtEq(lvalue, rvalue); 156 } else if (op == Operator.GTEQ) { 157 return walkGtEq(lvalue, rvalue); 158 } else if (op == Operator.AND) { 159 if (expr instanceof MultiExpression) { 160 return walkMultiExpression((MultiExpression) expr); 161 } else { 162 return walkAnd(lvalue, rvalue); 163 } 164 } else if (op == Operator.NOT) { 165 return walkNot(lvalue); 166 } else if (op == Operator.OR) { 167 return walkOr(lvalue, rvalue); 168 } else if (op == Operator.LIKE) { 169 return walkLike(lvalue, rvalue, true, false); 170 } else if (op == Operator.ILIKE) { 171 return walkLike(lvalue, rvalue, true, true); 172 } else if (op == Operator.NOTLIKE) { 173 return walkLike(lvalue, rvalue, false, false); 174 } else if (op == Operator.NOTILIKE) { 175 return walkLike(lvalue, rvalue, false, true); 176 } else if (op == Operator.IN) { 177 return walkIn(lvalue, rvalue, true); 178 } else if (op == Operator.NOTIN) { 179 return walkIn(lvalue, rvalue, false); 180 } else if (op == Operator.ISNULL) { 181 return walkIsNull(lvalue); 182 } else if (op == Operator.ISNOTNULL) { 183 return walkIsNotNull(lvalue); 184 } else if (op == Operator.BETWEEN) { 185 return walkBetween(lvalue, rvalue, true); 186 } else if (op == Operator.NOTBETWEEN) { 187 return walkBetween(lvalue, rvalue, false); 188 } else { 189 throw new QueryParseException("Unknown operator: " + op); 190 } 191 } 192 193 protected void checkDateLiteralForCast(Operand value, String name) { 194 if (value instanceof DateLiteral && !((DateLiteral) value).onlyDate) { 195 throw new QueryParseException("DATE() cast must be used with DATE literal, not TIMESTAMP: " + name); 196 } 197 } 198 199 protected Boolean walkEcmPath(Operator op, Operand rvalue) { 200 if (op != Operator.EQ && op != Operator.NOTEQ) { 201 throw new QueryParseException(NXQL.ECM_PATH + " requires = or <> operator"); 202 } 203 if (!(rvalue instanceof StringLiteral)) { 204 throw new QueryParseException(NXQL.ECM_PATH + " requires literal path as right argument"); 205 } 206 String path = ((StringLiteral) rvalue).value; 207 if (path.length() > 1 && path.endsWith("/")) { 208 path = path.substring(0, path.length() - 1); 209 } 210 String id = pathResolver.getIdForPath(path); 211 Object right = walkReference(new Reference(NXQL.ECM_UUID)); 212 if (id == null) { 213 return FALSE; 214 } 215 Boolean eq = eq(id, right); 216 return op == Operator.EQ ? eq : not(eq); 217 } 218 219 protected Boolean walkAncestorId(Operator op, Operand rvalue) { 220 if (op != Operator.EQ && op != Operator.NOTEQ) { 221 throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires = or <> operator"); 222 } 223 if (!(rvalue instanceof StringLiteral)) { 224 throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires literal id as right argument"); 225 } 226 String ancestorId = ((StringLiteral) rvalue).value; 227 Object[] ancestorIds = (Object[]) walkReference(new Reference(NXQL_ECM_ANCESTOR_IDS)); 228 boolean eq = op == Operator.EQ ? true : false; 229 if (ancestorIds == null) { 230 // placeless 231 return eq ? FALSE : TRUE; 232 } 233 for (Object id : ancestorIds) { 234 if (ancestorId.equals(id)) { 235 return eq ? TRUE : FALSE; 236 } 237 } 238 return eq ? FALSE : TRUE; 239 } 240 241 protected Boolean walkEcmFulltext(String name, Operator op, Operand rvalue) { 242 if (op != Operator.EQ && op != Operator.LIKE) { 243 throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires = or LIKE operator"); 244 } 245 if (!(rvalue instanceof StringLiteral)) { 246 throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires literal string as right argument"); 247 } 248 if (fulltextSearchDisabled) { 249 throw new QueryParseException("Fulltext search disabled by configuration"); 250 } 251 String query = ((StringLiteral) rvalue).value; 252 if (name.equals(NXQL.ECM_FULLTEXT)) { 253 // standard fulltext query 254 hasFulltext = true; 255 String simple = (String) walkReference(new Reference(NXQL_ECM_FULLTEXT_SIMPLE)); 256 String binary = (String) walkReference(new Reference(NXQL_ECM_FULLTEXT_BINARY)); 257 return fulltext(simple, binary, query); 258 } else { 259 // secondary index match with explicit field 260 // do a regexp on the field 261 if (name.charAt(NXQL.ECM_FULLTEXT.length()) != '.') { 262 throw new QueryParseException(name + " has incorrect syntax for a secondary fulltext index"); 263 } 264 String prop = name.substring(NXQL.ECM_FULLTEXT.length() + 1); 265 String ft = query.replace(" ", "%"); 266 rvalue = new StringLiteral(ft); 267 return walkLike(new Reference(prop), rvalue, true, true); 268 } 269 } 270 271 protected Boolean walkIsTrashed(Operator op, Operand rvalue) { 272 if (op != Operator.EQ && op != Operator.NOTEQ) { 273 throw new QueryParseException(NXQL.ECM_ISTRASHED + " requires = or <> operator"); 274 } 275 TrashService trashService = Framework.getService(TrashService.class); 276 if (trashService.hasFeature(TRASHED_STATE_IS_DEDUCED_FROM_LIFECYCLE)) { 277 return walkIsTrashed(new Reference(NXQL.ECM_LIFECYCLESTATE), op, rvalue, 278 new StringLiteral(LifeCycleConstants.DELETED_STATE)); 279 } else if (trashService.hasFeature(TRASHED_STATE_IN_MIGRATION)) { 280 Boolean lifeCycleTrashed = walkIsTrashed(new Reference(NXQL.ECM_LIFECYCLESTATE), op, rvalue, 281 new StringLiteral(LifeCycleConstants.DELETED_STATE)); 282 Boolean propertyTrashed = walkIsTrashed(new Reference(NXQL.ECM_ISTRASHED), op, rvalue, 283 new IntegerLiteral(1L)); 284 return or(lifeCycleTrashed, propertyTrashed); 285 } else if (trashService.hasFeature(TRASHED_STATE_IS_DEDICATED_PROPERTY)) { 286 return walkIsTrashed(new Reference(NXQL.ECM_ISTRASHED), op, rvalue, new IntegerLiteral(1L)); 287 } else { 288 throw new UnsupportedOperationException("TrashService is in an unknown state"); 289 } 290 } 291 292 protected Boolean walkIsTrashed(Reference ref, Operator op, Operand initialRvalue, Literal deletedRvalue) { 293 long v; 294 if (!(initialRvalue instanceof IntegerLiteral) 295 || ((v = ((IntegerLiteral) initialRvalue).value) != 0 && v != 1)) { 296 throw new QueryParseException(NXQL.ECM_ISTRASHED + " requires literal 0 or 1 as right argument"); 297 } 298 boolean equalsDeleted = op == Operator.EQ ^ v == 0; 299 if (equalsDeleted) { 300 return walkEq(ref, deletedRvalue); 301 } else { 302 return walkNotEq(ref, deletedRvalue); 303 } 304 } 305 306 public Boolean walkNot(Operand value) { 307 return not(bool(walkOperand(value))); 308 } 309 310 public Boolean walkIsNull(Operand value) { 311 return Boolean.valueOf(walkOperand(value) == null); 312 } 313 314 public Boolean walkIsNotNull(Operand value) { 315 return Boolean.valueOf(walkOperand(value) != null); 316 } 317 318 // ternary logic 319 public Boolean walkMultiExpression(MultiExpression expr) { 320 Boolean res = TRUE; 321 for (Operand value : expr.values) { 322 Boolean bool = bool(walkOperand(value)); 323 // don't short-circuit on null, we want to walk all references deterministically 324 res = and(res, bool); 325 } 326 return res; 327 } 328 329 public Boolean walkAnd(Operand lvalue, Operand rvalue) { 330 Boolean left = bool(walkOperand(lvalue)); 331 Boolean right = bool(walkOperand(rvalue)); 332 return and(left, right); 333 } 334 335 public Boolean walkOr(Operand lvalue, Operand rvalue) { 336 Boolean left = bool(walkOperand(lvalue)); 337 Boolean right = bool(walkOperand(rvalue)); 338 return or(left, right); 339 } 340 341 public Boolean walkEq(Operand lvalue, Operand rvalue) { 342 Object right = walkOperand(rvalue); 343 if (isMixinTypes(lvalue)) { 344 if (!(right instanceof String)) { 345 throw new QueryParseException("Invalid EQ rhs: " + rvalue); 346 } 347 return walkMixinTypes(Collections.singletonList((String) right), true); 348 } 349 Object left = walkOperand(lvalue); 350 return eqMaybeList(left, right); 351 } 352 353 public Boolean walkNotEq(Operand lvalue, Operand rvalue) { 354 if (isMixinTypes(lvalue)) { 355 Object right = walkOperand(rvalue); 356 if (!(right instanceof String)) { 357 throw new QueryParseException("Invalid NE rhs: " + rvalue); 358 } 359 return walkMixinTypes(Collections.singletonList((String) right), false); 360 } 361 return not(walkEq(lvalue, rvalue)); 362 } 363 364 public Boolean walkLt(Operand lvalue, Operand rvalue) { 365 Integer cmp = cmp(lvalue, rvalue); 366 return cmp == null ? null : cmp < 0; 367 } 368 369 public Boolean walkGt(Operand lvalue, Operand rvalue) { 370 Integer cmp = cmp(lvalue, rvalue); 371 return cmp == null ? null : cmp > 0; 372 } 373 374 public Boolean walkLtEq(Operand lvalue, Operand rvalue) { 375 Integer cmp = cmp(lvalue, rvalue); 376 return cmp == null ? null : cmp <= 0; 377 } 378 379 public Boolean walkGtEq(Operand lvalue, Operand rvalue) { 380 Integer cmp = cmp(lvalue, rvalue); 381 return cmp == null ? null : cmp >= 0; 382 } 383 384 public Object walkBetween(Operand lvalue, Operand rvalue, boolean positive) { 385 LiteralList l = (LiteralList) rvalue; 386 Predicate va = new Predicate(lvalue, Operator.GTEQ, l.get(0)); 387 Predicate vb = new Predicate(lvalue, Operator.LTEQ, l.get(1)); 388 Predicate pred = new Predicate(va, Operator.AND, vb); 389 if (!positive) { 390 pred = new Predicate(pred, Operator.NOT, null); 391 } 392 return walkExpression(pred); 393 } 394 395 public Boolean walkIn(Operand lvalue, Operand rvalue, boolean positive) { 396 Object right = walkOperand(rvalue); 397 if (!(right instanceof List)) { 398 throw new QueryParseException("Invalid IN rhs: " + rvalue); 399 } 400 if (isMixinTypes(lvalue)) { 401 return walkMixinTypes((List<String>) right, positive); 402 } 403 Object left = walkOperand(lvalue); 404 Boolean in = inMaybeList(left, (List<Object>) right); 405 return positive ? in : not(in); 406 } 407 408 public Object walkOperand(Operand op) { 409 if (op instanceof Literal) { 410 return walkLiteral((Literal) op); 411 } else if (op instanceof LiteralList) { 412 return walkLiteralList((LiteralList) op); 413 } else if (op instanceof Function) { 414 return walkFunction((Function) op); 415 } else if (op instanceof Expression) { 416 return walkExpression((Expression) op); 417 } else if (op instanceof Reference) { 418 return walkReference((Reference) op); 419 } else { 420 throw new QueryParseException("Unknown operand: " + op); 421 } 422 } 423 424 public Object walkLiteral(Literal lit) { 425 if (lit instanceof BooleanLiteral) { 426 return walkBooleanLiteral((BooleanLiteral) lit); 427 } else if (lit instanceof DateLiteral) { 428 return walkDateLiteral((DateLiteral) lit); 429 } else if (lit instanceof DoubleLiteral) { 430 return walkDoubleLiteral((DoubleLiteral) lit); 431 } else if (lit instanceof IntegerLiteral) { 432 return walkIntegerLiteral((IntegerLiteral) lit); 433 } else if (lit instanceof StringLiteral) { 434 return walkStringLiteral((StringLiteral) lit); 435 } else { 436 throw new QueryParseException("Unknown literal: " + lit); 437 } 438 } 439 440 public Boolean walkBooleanLiteral(BooleanLiteral lit) { 441 return Boolean.valueOf(lit.value); 442 } 443 444 public Calendar walkDateLiteral(DateLiteral lit) { 445 if (lit.onlyDate) { 446 Calendar date = lit.toCalendar(); 447 if (date != null) { 448 date.set(Calendar.HOUR_OF_DAY, 0); 449 date.set(Calendar.MINUTE, 0); 450 date.set(Calendar.SECOND, 0); 451 date.set(Calendar.MILLISECOND, 0); 452 } 453 return date; 454 } else { 455 return lit.toCalendar(); 456 } 457 } 458 459 public Double walkDoubleLiteral(DoubleLiteral lit) { 460 return Double.valueOf(lit.value); 461 } 462 463 public Long walkIntegerLiteral(IntegerLiteral lit) { 464 return Long.valueOf(lit.value); 465 } 466 467 public String walkStringLiteral(StringLiteral lit) { 468 return lit.value; 469 } 470 471 public List<Object> walkLiteralList(LiteralList litList) { 472 List<Object> list = new ArrayList<>(litList.size()); 473 for (Literal lit : litList) { 474 list.add(walkLiteral(lit)); 475 } 476 return list; 477 } 478 479 public Boolean walkLike(Operand lvalue, Operand rvalue, boolean positive, boolean caseInsensitive) { 480 Object left = walkOperand(lvalue); 481 Object right = walkOperand(rvalue); 482 if (!(right instanceof String)) { 483 throw new QueryParseException("Invalid LIKE rhs: " + rvalue); 484 } 485 return likeMaybeList(left, (String) right, positive, caseInsensitive); 486 } 487 488 public Object walkFunction(Function func) { 489 throw new UnsupportedOperationException("Function"); 490 } 491 492 public Boolean walkStartsWith(Operand lvalue, Operand rvalue) { 493 if (!(lvalue instanceof Reference)) { 494 throw new QueryParseException("Invalid STARTSWITH query, left hand side must be a property: " + lvalue); 495 } 496 String name = ((Reference) lvalue).name; 497 if (!(rvalue instanceof StringLiteral)) { 498 throw new QueryParseException( 499 "Invalid STARTSWITH query, right hand side must be a literal path: " + rvalue); 500 } 501 String path = ((StringLiteral) rvalue).value; 502 if (path.length() > 1 && path.endsWith("/")) { 503 path = path.substring(0, path.length() - 1); 504 } 505 506 if (NXQL.ECM_PATH.equals(name)) { 507 return walkStartsWithPath(path); 508 } else { 509 return walkStartsWithNonPath(lvalue, path); 510 } 511 } 512 513 protected Boolean walkStartsWithPath(String path) { 514 // resolve path 515 String ancestorId = pathResolver.getIdForPath(path); 516 // don't return early on null ancestorId, we want to walk all references deterministically 517 Object[] ancestorIds = (Object[]) walkReference(new Reference(NXQL_ECM_ANCESTOR_IDS)); 518 if (ancestorId == null) { 519 // no such path 520 return FALSE; 521 } 522 if (ancestorIds == null) { 523 // placeless 524 return FALSE; 525 } 526 for (Object id : ancestorIds) { 527 if (ancestorId.equals(id)) { 528 return TRUE; 529 } 530 } 531 return FALSE; 532 } 533 534 protected Boolean walkStartsWithNonPath(Operand lvalue, String path) { 535 Object left = walkReference((Reference) lvalue); 536 // exact match 537 Boolean bool = eqMaybeList(left, path); 538 if (TRUE.equals(bool)) { 539 return TRUE; 540 } 541 // prefix match TODO escape % chars 542 String pattern = path + "/%"; 543 return likeMaybeList(left, pattern, true, false); 544 } 545 546 /** 547 * Evaluates a reference over the context state. 548 * 549 * @param ref the reference 550 */ 551 public abstract Object walkReference(Reference ref); 552 553 protected boolean isMixinTypes(Operand op) { 554 if (!(op instanceof Reference)) { 555 return false; 556 } 557 return ((Reference) op).name.equals(NXQL.ECM_MIXINTYPE); 558 } 559 560 protected Boolean bool(Object value) { 561 if (value == null) { 562 return null; 563 } 564 if (!(value instanceof Boolean)) { 565 throw new QueryParseException("Not a boolean: " + value); 566 } 567 return (Boolean) value; 568 } 569 570 // ternary logic 571 protected Boolean not(Boolean value) { 572 if (value == null) { 573 return null; 574 } 575 return !value; 576 } 577 578 // ternary logic 579 protected Boolean and(Boolean left, Boolean right) { 580 if (TRUE.equals(left)) { 581 return right; 582 } else { 583 return left; 584 } 585 } 586 587 // ternary logic 588 protected Boolean or(Boolean left, Boolean right) { 589 if (TRUE.equals(left)) { 590 return left; 591 } else { 592 return right; 593 } 594 } 595 596 // ternary logic 597 protected Boolean eq(Object left, Object right) { 598 if (left == null || right == null) { 599 return null; 600 } 601 if (left instanceof Calendar && right instanceof Calendar) { 602 // avoid timezone issues (NXP-20260) 603 return ((Calendar) left).getTimeInMillis() == ((Calendar) right).getTimeInMillis(); 604 } 605 return left.equals(right); 606 } 607 608 // ternary logic 609 protected Boolean in(Object left, List<Object> right) { 610 if (left == null) { 611 return null; 612 } 613 boolean hasNull = false; 614 for (Object r : right) { 615 if (r == null) { 616 hasNull = true; 617 } else if (left.equals(r)) { 618 return TRUE; 619 } 620 } 621 return hasNull ? null : FALSE; 622 } 623 624 protected Integer cmp(Operand lvalue, Operand rvalue) { 625 Object left = walkOperand(lvalue); 626 Object right = walkOperand(rvalue); 627 return cmp(left, right); 628 } 629 630 // ternary logic 631 protected Integer cmp(Object left, Object right) { 632 if (left == null || right == null) { 633 return null; 634 } 635 if (!(left instanceof Comparable)) { 636 throw new QueryParseException("Not a comparable: " + left); 637 } 638 return ((Comparable<Object>) left).compareTo(right); 639 } 640 641 // ternary logic 642 protected Boolean like(Object left, String right, boolean caseInsensitive) { 643 if (left == null || right == null) { 644 return null; 645 } 646 if (!(left instanceof String)) { 647 throw new QueryParseException("Invalid LIKE lhs: " + left); 648 } 649 String value = (String) left; 650 if (caseInsensitive) { 651 value = value.toLowerCase(); 652 right = right.toLowerCase(); 653 } 654 String regex = likeToRegex(right); 655 boolean match = Pattern.matches(regex.toString(), value); 656 return match; 657 } 658 659 /** 660 * Turns a NXQL LIKE pattern into a regex. 661 * <p> 662 * % and _ are standard wildcards, and \ escapes them. 663 * 664 * @since 7.4 665 */ 666 public static String likeToRegex(String like) { 667 StringBuilder regex = new StringBuilder(); 668 char[] chars = like.toCharArray(); 669 boolean escape = false; 670 for (int i = 0; i < chars.length; i++) { 671 char c = chars[i]; 672 boolean escapeNext = false; 673 switch (c) { 674 case '%': 675 if (escape) { 676 regex.append(c); 677 } else { 678 regex.append(".*"); 679 } 680 break; 681 case '_': 682 if (escape) { 683 regex.append(c); 684 } else { 685 regex.append("."); 686 } 687 break; 688 case '\\': 689 if (escape) { 690 regex.append("\\\\"); // backslash escaped for regexp 691 } else { 692 escapeNext = true; 693 } 694 break; 695 default: 696 // escape mostly everything just in case 697 if (!CharUtils.isAsciiAlphanumeric(c)) { 698 regex.append("\\"); 699 } 700 regex.append(c); 701 break; 702 } 703 escape = escapeNext; 704 } 705 if (escape) { 706 // invalid string terminated by escape character, ignore 707 } 708 return regex.toString(); 709 } 710 711 // if list, use EXIST (SELECT 1 FROM left WHERE left.item = right) 712 protected Boolean eqMaybeList(Object left, Object right) { 713 if (left instanceof Object[]) { 714 for (Object l : ((Object[]) left)) { 715 Boolean eq = eq(l, right); 716 if (TRUE.equals(eq)) { 717 return TRUE; 718 } 719 } 720 return FALSE; 721 } else { 722 return eq(left, right); 723 } 724 } 725 726 // if list, use EXIST (SELECT 1 FROM left WHERE left.item IN right) 727 protected Boolean inMaybeList(Object left, List<Object> right) { 728 if (left instanceof Object[]) { 729 for (Object l : ((Object[]) left)) { 730 Boolean in = in(l, right); 731 if (TRUE.equals(in)) { 732 return TRUE; 733 } 734 } 735 return FALSE; 736 } else { 737 return in(left, right); 738 } 739 } 740 741 protected Boolean likeMaybeList(Object left, String right, boolean positive, boolean caseInsensitive) { 742 if (left instanceof Object[]) { 743 for (Object l : ((Object[]) left)) { 744 Boolean like = like(l, right, caseInsensitive); 745 if (TRUE.equals(like)) { 746 return Boolean.valueOf(positive); 747 } 748 } 749 return Boolean.valueOf(!positive); 750 } else { 751 Boolean like = like(left, right, caseInsensitive); 752 return positive ? like : not(like); 753 } 754 } 755 756 /** 757 * Matches the mixin types against a list of values. 758 * <p> 759 * Used for: 760 * <ul> 761 * <li>ecm:mixinTypes = 'foo' 762 * <li>ecm:mixinTypes != 'foo' 763 * <li>ecm:mixinTypes IN ('foo', 'bar') 764 * <li>ecm:mixinTypes NOT IN ('foo', 'bar') 765 * </ul> 766 * 767 * @param mixins the mixin(s) to match 768 * @param include {@code true} for = and IN 769 * @since 7.4 770 */ 771 public abstract Boolean walkMixinTypes(List<String> mixins, boolean include); 772 773 /* 774 * ----- simple parsing, don't try to be exhaustive ----- 775 */ 776 777 private static final Pattern WORD_PATTERN = Pattern.compile("[\\s\\p{Punct}]+"); 778 779 private static final String UNACCENTED = "aaaaaaaceeeeiiii\u00f0nooooo\u00f7ouuuuy\u00fey"; 780 781 private static final String STOP_WORDS_STR = "a an are and as at be by for from how " // 782 + "i in is it of on or that the this to was what when where who will with " // 783 + "car donc est il ils je la le les mais ni nous or ou pour tu un une vous " // 784 + "www com net org"; 785 786 private static final Set<String> STOP_WORDS = new HashSet<>(Arrays.asList(StringUtils.split(STOP_WORDS_STR, ' '))); 787 788 /** 789 * Checks if the fulltext combination of string1 and string2 matches the query expression. 790 */ 791 protected static Boolean fulltext(String string1, String string2, String queryString) { 792 if (queryString == null || (string1 == null && string2 == null)) { 793 return null; 794 } 795 // query 796 List<String> query = new ArrayList<>(); 797 String phrase = null; 798 int phraseWordCount = 1; 799 int maxPhraseWordCount = 1; // maximum number of words in a phrase 800 for (String word : StringUtils.split(queryString.toLowerCase(), ' ')) { 801 if (WORD_PATTERN.matcher(word).matches()) { 802 continue; 803 } 804 if (phrase != null) { 805 if (word.endsWith(PHRASE_QUOTE)) { 806 phrase += " " + word.substring(0, word.length() - 1); 807 query.add(phrase); 808 phraseWordCount++; 809 if (maxPhraseWordCount < phraseWordCount) { 810 maxPhraseWordCount = phraseWordCount; 811 } 812 phrase = null; 813 phraseWordCount = 1; 814 } else { 815 phrase += " " + word; 816 phraseWordCount++; 817 } 818 } else { 819 if (word.startsWith(PHRASE_QUOTE)) { 820 phrase = word.substring(1); 821 } else if (word.startsWith(NEG_PHRASE_QUOTE)) { 822 phrase = "-" + word.substring(2); 823 } else { 824 if (word.startsWith("+")) { 825 word = word.substring(1); 826 } 827 query.add(word); 828 } 829 } 830 } 831 if (query.isEmpty()) { 832 return FALSE; 833 } 834 // fulltext 835 Set<String> fulltext = new HashSet<>(); 836 fulltext.addAll(parseFullText(string1, maxPhraseWordCount)); 837 fulltext.addAll(parseFullText(string2, maxPhraseWordCount)); 838 839 return Boolean.valueOf(fulltext(fulltext, query)); 840 } 841 842 private static Set<String> parseFullText(String string, int phraseSize) { 843 if (string == null) { 844 return Collections.emptySet(); 845 } 846 Set<String> set = new HashSet<>(); 847 Deque<String> phraseWords = new LinkedList<>(); 848 for (String word : WORD_PATTERN.split(string)) { 849 word = parseWord(word); 850 if (word != null) { 851 word = word.toLowerCase(); 852 set.add(word); 853 if (phraseSize > 1) { 854 phraseWords.addLast(word); 855 if (phraseWords.size() > 1) { 856 if (phraseWords.size() > phraseSize) { 857 phraseWords.removeFirst(); 858 } 859 addPhraseWords(set, phraseWords); 860 } 861 } 862 } 863 } 864 while (phraseWords.size() > 2) { 865 phraseWords.removeFirst(); 866 addPhraseWords(set, phraseWords); 867 } 868 return set; 869 } 870 871 /** 872 * Adds to the set all the sub-phrases from the start of the phraseWords. 873 */ 874 private static void addPhraseWords(Set<String> set, Deque<String> phraseWords) { 875 String[] array = phraseWords.toArray(new String[0]); 876 for (int len = 2; len <= array.length; len++) { 877 String phrase = StringUtils.join(array, ' ', 0, len); 878 set.add(phrase); 879 } 880 } 881 882 private static String parseWord(String string) { 883 int len = string.length(); 884 if (len < 3) { 885 return null; 886 } 887 StringBuilder buf = new StringBuilder(len); 888 for (int i = 0; i < len; i++) { 889 char c = Character.toLowerCase(string.charAt(i)); 890 if (c == '\u00e6') { 891 buf.append("ae"); 892 } else if (c >= '\u00e0' && c <= '\u00ff') { 893 buf.append(UNACCENTED.charAt((c) - 0xe0)); 894 } else if (c == '\u0153') { 895 buf.append("oe"); 896 } else { 897 buf.append(c); 898 } 899 } 900 // simple heuristic to remove plurals 901 int l = buf.length(); 902 if (l > 3 && buf.charAt(l - 1) == 's') { 903 buf.setLength(l - 1); 904 } 905 String word = buf.toString(); 906 if (STOP_WORDS.contains(word)) { 907 return null; 908 } 909 return word; 910 } 911 912 // matches "foo OR bar baz" as "foo OR (bar AND baz)" 913 protected static boolean fulltext(Set<String> fulltext, List<String> query) { 914 boolean andMatch = true; 915 for (PeekingIterator<String> it = Iterators.peekingIterator(query.iterator()); it.hasNext();) { 916 String word = it.next(); 917 boolean match; 918 if (word.endsWith("*") || word.endsWith("%")) { 919 // prefix match 920 match = false; 921 String prefix = word.substring(0, word.length() - 2); 922 for (String candidate : fulltext) { 923 if (candidate.startsWith(prefix)) { 924 match = true; 925 break; 926 } 927 } 928 } else { 929 if (word.startsWith("-")) { 930 word = word.substring(1);// 931 match = !fulltext.contains(word); 932 } else { 933 match = fulltext.contains(word); 934 } 935 } 936 if (!match) { 937 andMatch = false; 938 } 939 if (it.hasNext() && it.peek().equals(OR)) { 940 // end of AND group 941 // swallow OR 942 it.next(); 943 // return if the previous AND group matched 944 if (andMatch) { 945 return true; 946 } 947 // else start next AND group 948 andMatch = true; 949 } 950 } 951 return andMatch; 952 } 953 954 // matches "foo OR bar baz" as "(foo OR bar) AND baz" 955 protected static boolean fulltext1(Set<String> fulltext, List<String> query) { 956 boolean inOr = false; // if we're in a OR group 957 boolean orMatch = false; // value of the OR group 958 for (PeekingIterator<String> it = Iterators.peekingIterator(query.iterator()); it.hasNext();) { 959 String word = it.next(); 960 if (it.hasNext() && it.peek().equals(OR)) { 961 inOr = true; 962 orMatch = false; 963 } 964 boolean match; 965 if (word.endsWith("*") || word.endsWith("%")) { 966 // prefix match 967 match = false; 968 String prefix = word.substring(0, word.length() - 2); 969 for (String candidate : fulltext) { 970 if (candidate.startsWith(prefix)) { 971 match = true; 972 break; 973 } 974 } 975 } else { 976 if (word.startsWith("-")) { 977 word = word.substring(1);// 978 match = !fulltext.contains(word); 979 } else { 980 match = fulltext.contains(word); 981 } 982 } 983 if (inOr) { 984 if (match) { 985 orMatch = true; 986 } 987 if (it.hasNext() && it.peek().equals(OR)) { 988 // swallow OR and keep going in OR group 989 it.next(); 990 continue; 991 } 992 // finish OR group 993 match = orMatch; 994 inOr = false; 995 } 996 if (!match) { 997 return false; 998 } 999 } 1000 if (inOr) { 1001 // trailing OR, ignore and finish previous group 1002 if (!orMatch) { 1003 return false; 1004 } 1005 } 1006 return true; 1007 } 1008 1009}