001/* 002 * (C) Copyright 2014 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.core.storage; 020 021import static java.lang.Boolean.FALSE; 022import static java.lang.Boolean.TRUE; 023 024import java.util.ArrayList; 025import java.util.Arrays; 026import java.util.Calendar; 027import java.util.Collections; 028import java.util.Deque; 029import java.util.HashSet; 030import java.util.LinkedList; 031import java.util.List; 032import java.util.Set; 033import java.util.regex.Pattern; 034 035import org.apache.commons.lang.CharUtils; 036import org.apache.commons.lang.StringUtils; 037import org.nuxeo.ecm.core.query.QueryParseException; 038import org.nuxeo.ecm.core.query.sql.NXQL; 039import org.nuxeo.ecm.core.query.sql.model.BooleanLiteral; 040import org.nuxeo.ecm.core.query.sql.model.DateLiteral; 041import org.nuxeo.ecm.core.query.sql.model.DoubleLiteral; 042import org.nuxeo.ecm.core.query.sql.model.Expression; 043import org.nuxeo.ecm.core.query.sql.model.Function; 044import org.nuxeo.ecm.core.query.sql.model.IntegerLiteral; 045import org.nuxeo.ecm.core.query.sql.model.Literal; 046import org.nuxeo.ecm.core.query.sql.model.LiteralList; 047import org.nuxeo.ecm.core.query.sql.model.MultiExpression; 048import org.nuxeo.ecm.core.query.sql.model.Operand; 049import org.nuxeo.ecm.core.query.sql.model.Operator; 050import org.nuxeo.ecm.core.query.sql.model.Predicate; 051import org.nuxeo.ecm.core.query.sql.model.Reference; 052import org.nuxeo.ecm.core.query.sql.model.StringLiteral; 053 054import com.google.common.collect.Iterators; 055import com.google.common.collect.PeekingIterator; 056 057/** 058 * Evaluator for an {@link Expression}. 059 * 060 * @since 5.9.4 061 */ 062public abstract class ExpressionEvaluator { 063 064 /** pseudo NXQL to resolve ancestor ids. */ 065 public static final String NXQL_ECM_ANCESTOR_IDS = "ecm:__ancestorIds"; 066 067 /** pseudo NXQL to resolve internal path. */ 068 public static final String NXQL_ECM_PATH = "ecm:__path"; 069 070 /** pseudo NXQL to resolve read acls. */ 071 public static final String NXQL_ECM_READ_ACL = "ecm:__read_acl"; 072 073 public static final String NXQL_ECM_FULLTEXT_SIMPLE = "ecm:__fulltextSimple"; 074 075 public static final String NXQL_ECM_FULLTEXT_BINARY = "ecm:__fulltextBinary"; 076 077 protected static final String DATE_CAST = "DATE"; 078 079 protected static final String PHRASE_QUOTE = "\""; 080 081 protected static final String NEG_PHRASE_QUOTE = "-\""; 082 083 protected static final String OR = "or"; 084 085 /** 086 * Interface for a class that knows how to resolve a path into an id. 087 */ 088 public interface PathResolver { 089 /** 090 * Returns the id for a given path. 091 * 092 * @param path the path 093 * @return the id, or {@code null} if not found 094 */ 095 String getIdForPath(String path); 096 } 097 098 public final PathResolver pathResolver; 099 100 public final Set<String> principals; 101 102 public final boolean fulltextSearchDisabled; 103 104 public boolean hasFulltext; 105 106 public ExpressionEvaluator(PathResolver pathResolver, String[] principals, boolean fulltextSearchDisabled) { 107 this.pathResolver = pathResolver; 108 this.principals = principals == null ? null : new HashSet<String>(Arrays.asList(principals)); 109 this.fulltextSearchDisabled = fulltextSearchDisabled; 110 } 111 112 public Object walkExpression(Expression expr) { 113 Operator op = expr.operator; 114 Operand lvalue = expr.lvalue; 115 Operand rvalue = expr.rvalue; 116 Reference ref = lvalue instanceof Reference ? (Reference) lvalue : null; 117 String name = ref != null ? ref.name : null; 118 String cast = ref != null ? ref.cast : null; 119 if (DATE_CAST.equals(cast)) { 120 checkDateLiteralForCast(rvalue, name); 121 } 122 if (op == Operator.STARTSWITH) { 123 return walkStartsWith(lvalue, rvalue); 124 } else if (NXQL.ECM_PATH.equals(name)) { 125 return walkEcmPath(op, rvalue); 126 } else if (NXQL.ECM_ANCESTORID.equals(name)) { 127 return walkAncestorId(op, rvalue); 128 } else if (name != null && name.startsWith(NXQL.ECM_FULLTEXT) && !NXQL.ECM_FULLTEXT_JOBID.equals(name)) { 129 return walkEcmFulltext(name, op, rvalue); 130 } else if (op == Operator.SUM) { 131 throw new UnsupportedOperationException("SUM"); 132 } else if (op == Operator.SUB) { 133 throw new UnsupportedOperationException("SUB"); 134 } else if (op == Operator.MUL) { 135 throw new UnsupportedOperationException("MUL"); 136 } else if (op == Operator.DIV) { 137 throw new UnsupportedOperationException("DIV"); 138 } else if (op == Operator.LT) { 139 return walkLt(lvalue, rvalue); 140 } else if (op == Operator.GT) { 141 return walkGt(lvalue, rvalue); 142 } else if (op == Operator.EQ) { 143 return walkEq(lvalue, rvalue); 144 } else if (op == Operator.NOTEQ) { 145 return walkNotEq(lvalue, rvalue); 146 } else if (op == Operator.LTEQ) { 147 return walkLtEq(lvalue, rvalue); 148 } else if (op == Operator.GTEQ) { 149 return walkGtEq(lvalue, rvalue); 150 } else if (op == Operator.AND) { 151 if (expr instanceof MultiExpression) { 152 return walkMultiExpression((MultiExpression) expr); 153 } else { 154 return walkAnd(lvalue, rvalue); 155 } 156 } else if (op == Operator.NOT) { 157 return walkNot(lvalue); 158 } else if (op == Operator.OR) { 159 return walkOr(lvalue, rvalue); 160 } else if (op == Operator.LIKE) { 161 return walkLike(lvalue, rvalue, true, false); 162 } else if (op == Operator.ILIKE) { 163 return walkLike(lvalue, rvalue, true, true); 164 } else if (op == Operator.NOTLIKE) { 165 return walkLike(lvalue, rvalue, false, false); 166 } else if (op == Operator.NOTILIKE) { 167 return walkLike(lvalue, rvalue, false, true); 168 } else if (op == Operator.IN) { 169 return walkIn(lvalue, rvalue, true); 170 } else if (op == Operator.NOTIN) { 171 return walkIn(lvalue, rvalue, false); 172 } else if (op == Operator.ISNULL) { 173 return walkIsNull(lvalue); 174 } else if (op == Operator.ISNOTNULL) { 175 return walkIsNotNull(lvalue); 176 } else if (op == Operator.BETWEEN) { 177 return walkBetween(lvalue, rvalue, true); 178 } else if (op == Operator.NOTBETWEEN) { 179 return walkBetween(lvalue, rvalue, false); 180 } else { 181 throw new QueryParseException("Unknown operator: " + op); 182 } 183 } 184 185 protected void checkDateLiteralForCast(Operand value, String name) { 186 if (value instanceof DateLiteral && !((DateLiteral) value).onlyDate) { 187 throw new QueryParseException("DATE() cast must be used with DATE literal, not TIMESTAMP: " + name); 188 } 189 } 190 191 protected Boolean walkEcmPath(Operator op, Operand rvalue) { 192 if (op != Operator.EQ && op != Operator.NOTEQ) { 193 throw new QueryParseException(NXQL.ECM_PATH + " requires = or <> operator"); 194 } 195 if (!(rvalue instanceof StringLiteral)) { 196 throw new QueryParseException(NXQL.ECM_PATH + " requires literal path as right argument"); 197 } 198 String path = ((StringLiteral) rvalue).value; 199 if (path.length() > 1 && path.endsWith("/")) { 200 path = path.substring(0, path.length() - 1); 201 } 202 String id = pathResolver.getIdForPath(path); 203 Object right = walkReference(new Reference(NXQL.ECM_UUID)); 204 if (id == null) { 205 return FALSE; 206 } 207 Boolean eq = eq(id, right); 208 return op == Operator.EQ ? eq : not(eq); 209 } 210 211 protected Boolean walkAncestorId(Operator op, Operand rvalue) { 212 if (op != Operator.EQ && op != Operator.NOTEQ) { 213 throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires = or <> operator"); 214 } 215 if (!(rvalue instanceof StringLiteral)) { 216 throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires literal id as right argument"); 217 } 218 String ancestorId = ((StringLiteral) rvalue).value; 219 Object[] ancestorIds = (Object[]) walkReference(new Reference(NXQL_ECM_ANCESTOR_IDS)); 220 boolean eq = op == Operator.EQ ? true : false; 221 if (ancestorIds == null) { 222 // placeless 223 return eq ? FALSE : TRUE; 224 } 225 for (Object id : ancestorIds) { 226 if (ancestorId.equals(id)) { 227 return eq ? TRUE : FALSE; 228 } 229 } 230 return eq ? FALSE : TRUE; 231 } 232 233 protected Boolean walkEcmFulltext(String name, Operator op, Operand rvalue) { 234 if (op != Operator.EQ && op != Operator.LIKE) { 235 throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires = or LIKE operator"); 236 } 237 if (!(rvalue instanceof StringLiteral)) { 238 throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires literal string as right argument"); 239 } 240 if (fulltextSearchDisabled) { 241 throw new QueryParseException("Fulltext search disabled by configuration"); 242 } 243 String query = ((StringLiteral) rvalue).value; 244 if (name.equals(NXQL.ECM_FULLTEXT)) { 245 // standard fulltext query 246 hasFulltext = true; 247 String simple = (String) walkReference(new Reference(NXQL_ECM_FULLTEXT_SIMPLE)); 248 String binary = (String) walkReference(new Reference(NXQL_ECM_FULLTEXT_BINARY)); 249 return fulltext(simple, binary, query); 250 } else { 251 // secondary index match with explicit field 252 // do a regexp on the field 253 if (name.charAt(NXQL.ECM_FULLTEXT.length()) != '.') { 254 throw new QueryParseException(name + " has incorrect syntax for a secondary fulltext index"); 255 } 256 String prop = name.substring(NXQL.ECM_FULLTEXT.length() + 1); 257 String ft = query.replace(" ", "%"); 258 rvalue = new StringLiteral(ft); 259 return walkLike(new Reference(prop), rvalue, true, true); 260 } 261 } 262 263 public Boolean walkNot(Operand value) { 264 return not(bool(walkOperand(value))); 265 } 266 267 public Boolean walkIsNull(Operand value) { 268 return Boolean.valueOf(walkOperand(value) == null); 269 } 270 271 public Boolean walkIsNotNull(Operand value) { 272 return Boolean.valueOf(walkOperand(value) != null); 273 } 274 275 // ternary logic 276 public Boolean walkMultiExpression(MultiExpression expr) { 277 Boolean res = TRUE; 278 for (Operand value : expr.values) { 279 Boolean bool = bool(walkOperand(value)); 280 // don't short-circuit on null, we want to walk all references deterministically 281 res = and(res, bool); 282 } 283 return res; 284 } 285 286 public Boolean walkAnd(Operand lvalue, Operand rvalue) { 287 Boolean left = bool(walkOperand(lvalue)); 288 Boolean right = bool(walkOperand(rvalue)); 289 return and(left, right); 290 } 291 292 public Boolean walkOr(Operand lvalue, Operand rvalue) { 293 Boolean left = bool(walkOperand(lvalue)); 294 Boolean right = bool(walkOperand(rvalue)); 295 return or(left, right); 296 } 297 298 public Boolean walkEq(Operand lvalue, Operand rvalue) { 299 Object right = walkOperand(rvalue); 300 if (isMixinTypes(lvalue)) { 301 if (!(right instanceof String)) { 302 throw new QueryParseException("Invalid EQ rhs: " + rvalue); 303 } 304 return walkMixinTypes(Collections.singletonList((String) right), true); 305 } 306 Object left = walkOperand(lvalue); 307 return eqMaybeList(left, right); 308 } 309 310 public Boolean walkNotEq(Operand lvalue, Operand rvalue) { 311 if (isMixinTypes(lvalue)) { 312 Object right = walkOperand(rvalue); 313 if (!(right instanceof String)) { 314 throw new QueryParseException("Invalid NE rhs: " + rvalue); 315 } 316 return walkMixinTypes(Collections.singletonList((String) right), false); 317 } 318 return not(walkEq(lvalue, rvalue)); 319 } 320 321 public Boolean walkLt(Operand lvalue, Operand rvalue) { 322 Integer cmp = cmp(lvalue, rvalue); 323 return cmp == null ? null : cmp < 0; 324 } 325 326 public Boolean walkGt(Operand lvalue, Operand rvalue) { 327 Integer cmp = cmp(lvalue, rvalue); 328 return cmp == null ? null : cmp > 0; 329 } 330 331 public Boolean walkLtEq(Operand lvalue, Operand rvalue) { 332 Integer cmp = cmp(lvalue, rvalue); 333 return cmp == null ? null : cmp <= 0; 334 } 335 336 public Boolean walkGtEq(Operand lvalue, Operand rvalue) { 337 Integer cmp = cmp(lvalue, rvalue); 338 return cmp == null ? null : cmp >= 0; 339 } 340 341 public Object walkBetween(Operand lvalue, Operand rvalue, boolean positive) { 342 LiteralList l = (LiteralList) rvalue; 343 Predicate va = new Predicate(lvalue, Operator.GTEQ, l.get(0)); 344 Predicate vb = new Predicate(lvalue, Operator.LTEQ, l.get(1)); 345 Predicate pred = new Predicate(va, Operator.AND, vb); 346 if (!positive) { 347 pred = new Predicate(pred, Operator.NOT, null); 348 } 349 return walkExpression(pred); 350 } 351 352 public Boolean walkIn(Operand lvalue, Operand rvalue, boolean positive) { 353 Object right = walkOperand(rvalue); 354 if (!(right instanceof List)) { 355 throw new QueryParseException("Invalid IN rhs: " + rvalue); 356 } 357 if (isMixinTypes(lvalue)) { 358 return walkMixinTypes((List<String>) right, positive); 359 } 360 Object left = walkOperand(lvalue); 361 Boolean in = inMaybeList(left, (List<Object>) right); 362 return positive ? in : not(in); 363 } 364 365 public Object walkOperand(Operand op) { 366 if (op instanceof Literal) { 367 return walkLiteral((Literal) op); 368 } else if (op instanceof LiteralList) { 369 return walkLiteralList((LiteralList) op); 370 } else if (op instanceof Function) { 371 return walkFunction((Function) op); 372 } else if (op instanceof Expression) { 373 return walkExpression((Expression) op); 374 } else if (op instanceof Reference) { 375 return walkReference((Reference) op); 376 } else { 377 throw new QueryParseException("Unknown operand: " + op); 378 } 379 } 380 381 public Object walkLiteral(Literal lit) { 382 if (lit instanceof BooleanLiteral) { 383 return walkBooleanLiteral((BooleanLiteral) lit); 384 } else if (lit instanceof DateLiteral) { 385 return walkDateLiteral((DateLiteral) lit); 386 } else if (lit instanceof DoubleLiteral) { 387 return walkDoubleLiteral((DoubleLiteral) lit); 388 } else if (lit instanceof IntegerLiteral) { 389 return walkIntegerLiteral((IntegerLiteral) lit); 390 } else if (lit instanceof StringLiteral) { 391 return walkStringLiteral((StringLiteral) lit); 392 } else { 393 throw new QueryParseException("Unknown literal: " + lit); 394 } 395 } 396 397 public Boolean walkBooleanLiteral(BooleanLiteral lit) { 398 return Boolean.valueOf(lit.value); 399 } 400 401 public Calendar walkDateLiteral(DateLiteral lit) { 402 if (lit.onlyDate) { 403 Calendar date = lit.toCalendar(); 404 if (date != null) { 405 date.set(Calendar.HOUR_OF_DAY, 0); 406 date.set(Calendar.MINUTE, 0); 407 date.set(Calendar.SECOND, 0); 408 date.set(Calendar.MILLISECOND, 0); 409 } 410 return date; 411 } else { 412 return lit.toCalendar(); 413 } 414 } 415 416 public Double walkDoubleLiteral(DoubleLiteral lit) { 417 return Double.valueOf(lit.value); 418 } 419 420 public Long walkIntegerLiteral(IntegerLiteral lit) { 421 return Long.valueOf(lit.value); 422 } 423 424 public String walkStringLiteral(StringLiteral lit) { 425 return lit.value; 426 } 427 428 public List<Object> walkLiteralList(LiteralList litList) { 429 List<Object> list = new ArrayList<Object>(litList.size()); 430 for (Literal lit : litList) { 431 list.add(walkLiteral(lit)); 432 } 433 return list; 434 } 435 436 public Boolean walkLike(Operand lvalue, Operand rvalue, boolean positive, boolean caseInsensitive) { 437 Object left = walkOperand(lvalue); 438 Object right = walkOperand(rvalue); 439 if (!(right instanceof String)) { 440 throw new QueryParseException("Invalid LIKE rhs: " + rvalue); 441 } 442 return likeMaybeList(left, (String) right, positive, caseInsensitive); 443 } 444 445 public Object walkFunction(Function func) { 446 throw new UnsupportedOperationException("Function"); 447 } 448 449 public Boolean walkStartsWith(Operand lvalue, Operand rvalue) { 450 if (!(lvalue instanceof Reference)) { 451 throw new QueryParseException("Invalid STARTSWITH query, left hand side must be a property: " + lvalue); 452 } 453 String name = ((Reference) lvalue).name; 454 if (!(rvalue instanceof StringLiteral)) { 455 throw new QueryParseException( 456 "Invalid STARTSWITH query, right hand side must be a literal path: " + rvalue); 457 } 458 String path = ((StringLiteral) rvalue).value; 459 if (path.length() > 1 && path.endsWith("/")) { 460 path = path.substring(0, path.length() - 1); 461 } 462 463 if (NXQL.ECM_PATH.equals(name)) { 464 return walkStartsWithPath(path); 465 } else { 466 return walkStartsWithNonPath(lvalue, path); 467 } 468 } 469 470 protected Boolean walkStartsWithPath(String path) { 471 // resolve path 472 String ancestorId = pathResolver.getIdForPath(path); 473 // don't return early on null ancestorId, we want to walk all references deterministically 474 Object[] ancestorIds = (Object[]) walkReference(new Reference(NXQL_ECM_ANCESTOR_IDS)); 475 if (ancestorId == null) { 476 // no such path 477 return FALSE; 478 } 479 if (ancestorIds == null) { 480 // placeless 481 return FALSE; 482 } 483 for (Object id : ancestorIds) { 484 if (ancestorId.equals(id)) { 485 return TRUE; 486 } 487 } 488 return FALSE; 489 } 490 491 protected Boolean walkStartsWithNonPath(Operand lvalue, String path) { 492 Object left = walkReference((Reference) lvalue); 493 // exact match 494 Boolean bool = eqMaybeList(left, path); 495 if (TRUE.equals(bool)) { 496 return TRUE; 497 } 498 // prefix match TODO escape % chars 499 String pattern = path + "/%"; 500 return likeMaybeList(left, pattern, true, false); 501 } 502 503 /** 504 * Evaluates a reference over the context state. 505 * 506 * @param ref the reference 507 */ 508 public abstract Object walkReference(Reference ref); 509 510 protected boolean isMixinTypes(Operand op) { 511 if (!(op instanceof Reference)) { 512 return false; 513 } 514 return ((Reference) op).name.equals(NXQL.ECM_MIXINTYPE); 515 } 516 517 protected Boolean bool(Object value) { 518 if (value == null) { 519 return null; 520 } 521 if (!(value instanceof Boolean)) { 522 throw new QueryParseException("Not a boolean: " + value); 523 } 524 return (Boolean) value; 525 } 526 527 // ternary logic 528 protected Boolean not(Boolean value) { 529 if (value == null) { 530 return null; 531 } 532 return !value; 533 } 534 535 // ternary logic 536 protected Boolean and(Boolean left, Boolean right) { 537 if (TRUE.equals(left)) { 538 return right; 539 } else { 540 return left; 541 } 542 } 543 544 // ternary logic 545 protected Boolean or(Boolean left, Boolean right) { 546 if (TRUE.equals(left)) { 547 return left; 548 } else { 549 return right; 550 } 551 } 552 553 // ternary logic 554 protected Boolean eq(Object left, Object right) { 555 if (left == null || right == null) { 556 return null; 557 } 558 return left.equals(right); 559 } 560 561 // ternary logic 562 protected Boolean in(Object left, List<Object> right) { 563 if (left == null) { 564 return null; 565 } 566 boolean hasNull = false; 567 for (Object r : right) { 568 if (r == null) { 569 hasNull = true; 570 } else if (left.equals(r)) { 571 return TRUE; 572 } 573 } 574 return hasNull ? null : FALSE; 575 } 576 577 protected Integer cmp(Operand lvalue, Operand rvalue) { 578 Object left = walkOperand(lvalue); 579 Object right = walkOperand(rvalue); 580 return cmp(left, right); 581 } 582 583 // ternary logic 584 protected Integer cmp(Object left, Object right) { 585 if (left == null || right == null) { 586 return null; 587 } 588 if (!(left instanceof Comparable)) { 589 throw new QueryParseException("Not a comparable: " + left); 590 } 591 return ((Comparable<Object>) left).compareTo(right); 592 } 593 594 // ternary logic 595 protected Boolean like(Object left, String right, boolean caseInsensitive) { 596 if (left == null || right == null) { 597 return null; 598 } 599 if (!(left instanceof String)) { 600 throw new QueryParseException("Invalid LIKE lhs: " + left); 601 } 602 String value = (String) left; 603 if (caseInsensitive) { 604 value = value.toLowerCase(); 605 right = right.toLowerCase(); 606 } 607 String regex = likeToRegex(right); 608 boolean match = Pattern.matches(regex.toString(), value); 609 return match; 610 } 611 612 /** 613 * Turns a NXQL LIKE pattern into a regex. 614 * <p> 615 * % and _ are standard wildcards, and \ escapes them. 616 * 617 * @since 7.4 618 */ 619 public static String likeToRegex(String like) { 620 StringBuilder regex = new StringBuilder(); 621 char[] chars = like.toCharArray(); 622 boolean escape = false; 623 for (int i = 0; i < chars.length; i++) { 624 char c = chars[i]; 625 boolean escapeNext = false; 626 switch (c) { 627 case '%': 628 if (escape) { 629 regex.append(c); 630 } else { 631 regex.append(".*"); 632 } 633 break; 634 case '_': 635 if (escape) { 636 regex.append(c); 637 } else { 638 regex.append("."); 639 } 640 break; 641 case '\\': 642 if (escape) { 643 regex.append("\\\\"); // backslash escaped for regexp 644 } else { 645 escapeNext = true; 646 } 647 break; 648 default: 649 // escape mostly everything just in case 650 if (!CharUtils.isAsciiAlphanumeric(c)) { 651 regex.append("\\"); 652 } 653 regex.append(c); 654 break; 655 } 656 escape = escapeNext; 657 } 658 if (escape) { 659 // invalid string terminated by escape character, ignore 660 } 661 return regex.toString(); 662 } 663 664 // if list, use EXIST (SELECT 1 FROM left WHERE left.item = right) 665 protected Boolean eqMaybeList(Object left, Object right) { 666 if (left instanceof Object[]) { 667 for (Object l : ((Object[]) left)) { 668 Boolean eq = eq(l, right); 669 if (TRUE.equals(eq)) { 670 return TRUE; 671 } 672 } 673 return FALSE; 674 } else { 675 return eq(left, right); 676 } 677 } 678 679 // if list, use EXIST (SELECT 1 FROM left WHERE left.item IN right) 680 protected Boolean inMaybeList(Object left, List<Object> right) { 681 if (left instanceof Object[]) { 682 for (Object l : ((Object[]) left)) { 683 Boolean in = in(l, right); 684 if (TRUE.equals(in)) { 685 return TRUE; 686 } 687 } 688 return FALSE; 689 } else { 690 return in(left, right); 691 } 692 } 693 694 protected Boolean likeMaybeList(Object left, String right, boolean positive, boolean caseInsensitive) { 695 if (left instanceof Object[]) { 696 for (Object l : ((Object[]) left)) { 697 Boolean like = like(l, right, caseInsensitive); 698 if (TRUE.equals(like)) { 699 return Boolean.valueOf(positive); 700 } 701 } 702 return Boolean.valueOf(!positive); 703 } else { 704 Boolean like = like(left, right, caseInsensitive); 705 return positive ? like : not(like); 706 } 707 } 708 709 /** 710 * Matches the mixin types against a list of values. 711 * <p> 712 * Used for: 713 * <ul> 714 * <li>ecm:mixinTypes = 'foo' 715 * <li>ecm:mixinTypes != 'foo' 716 * <li>ecm:mixinTypes IN ('foo', 'bar') 717 * <li>ecm:mixinTypes NOT IN ('foo', 'bar') 718 * </ul> 719 * 720 * @param mixins the mixin(s) to match 721 * @param include {@code true} for = and IN 722 * @since 7.4 723 */ 724 public abstract Boolean walkMixinTypes(List<String> mixins, boolean include); 725 726 /* 727 * ----- simple parsing, don't try to be exhaustive ----- 728 */ 729 730 private static final Pattern WORD_PATTERN = Pattern.compile("[\\s\\p{Punct}]+"); 731 732 private static final String UNACCENTED = "aaaaaaaceeeeiiii\u00f0nooooo\u00f7ouuuuy\u00fey"; 733 734 private static final String STOP_WORDS_STR = "a an are and as at be by for from how " // 735 + "i in is it of on or that the this to was what when where who will with " // 736 + "car donc est il ils je la le les mais ni nous or ou pour tu un une vous " // 737 + "www com net org"; 738 739 private static final Set<String> STOP_WORDS = new HashSet<>(Arrays.asList(StringUtils.split(STOP_WORDS_STR, ' '))); 740 741 /** 742 * Checks if the fulltext combination of string1 and string2 matches the query expression. 743 */ 744 protected static Boolean fulltext(String string1, String string2, String queryString) { 745 if (queryString == null || (string1 == null && string2 == null)) { 746 return null; 747 } 748 // query 749 List<String> query = new ArrayList<String>(); 750 String phrase = null; 751 int phraseWordCount = 1; 752 int maxPhraseWordCount = 1; // maximum number of words in a phrase 753 for (String word : StringUtils.split(queryString.toLowerCase(), ' ')) { 754 if (WORD_PATTERN.matcher(word).matches()) { 755 continue; 756 } 757 if (phrase != null) { 758 if (word.endsWith(PHRASE_QUOTE)) { 759 phrase += " " + word.substring(0, word.length() - 1); 760 query.add(phrase); 761 phraseWordCount++; 762 if (maxPhraseWordCount < phraseWordCount) { 763 maxPhraseWordCount = phraseWordCount; 764 } 765 phrase = null; 766 phraseWordCount = 1; 767 } else { 768 phrase += " " + word; 769 phraseWordCount++; 770 } 771 } else { 772 if (word.startsWith(PHRASE_QUOTE)) { 773 phrase = word.substring(1); 774 } else if (word.startsWith(NEG_PHRASE_QUOTE)) { 775 phrase = "-" + word.substring(2); 776 } else { 777 if (word.startsWith("+")) { 778 word = word.substring(1); 779 } 780 query.add(word); 781 } 782 } 783 } 784 if (query.isEmpty()) { 785 return FALSE; 786 } 787 // fulltext 788 Set<String> fulltext = new HashSet<String>(); 789 fulltext.addAll(parseFullText(string1, maxPhraseWordCount)); 790 fulltext.addAll(parseFullText(string2, maxPhraseWordCount)); 791 792 return Boolean.valueOf(fulltext(fulltext, query)); 793 } 794 795 private static Set<String> parseFullText(String string, int phraseSize) { 796 if (string == null) { 797 return Collections.emptySet(); 798 } 799 Set<String> set = new HashSet<String>(); 800 Deque<String> phraseWords = new LinkedList<>(); 801 for (String word : WORD_PATTERN.split(string)) { 802 word = parseWord(word); 803 if (word != null) { 804 word = word.toLowerCase(); 805 set.add(word); 806 if (phraseSize > 1) { 807 phraseWords.addLast(word); 808 if (phraseWords.size() > 1) { 809 if (phraseWords.size() > phraseSize) { 810 phraseWords.removeFirst(); 811 } 812 addPhraseWords(set, phraseWords); 813 } 814 } 815 } 816 } 817 while (phraseWords.size() > 2) { 818 phraseWords.removeFirst(); 819 addPhraseWords(set, phraseWords); 820 } 821 return set; 822 } 823 824 /** 825 * Adds to the set all the sub-phrases from the start of the phraseWords. 826 */ 827 private static void addPhraseWords(Set<String> set, Deque<String> phraseWords) { 828 String[] array = phraseWords.toArray(new String[0]); 829 for (int len = 2; len <= array.length; len++) { 830 String phrase = StringUtils.join(array, ' ', 0, len); 831 set.add(phrase); 832 } 833 } 834 835 private static String parseWord(String string) { 836 int len = string.length(); 837 if (len < 3) { 838 return null; 839 } 840 StringBuilder buf = new StringBuilder(len); 841 for (int i = 0; i < len; i++) { 842 char c = Character.toLowerCase(string.charAt(i)); 843 if (c == '\u00e6') { 844 buf.append("ae"); 845 } else if (c >= '\u00e0' && c <= '\u00ff') { 846 buf.append(UNACCENTED.charAt((c) - 0xe0)); 847 } else if (c == '\u0153') { 848 buf.append("oe"); 849 } else { 850 buf.append(c); 851 } 852 } 853 // simple heuristic to remove plurals 854 int l = buf.length(); 855 if (l > 3 && buf.charAt(l - 1) == 's') { 856 buf.setLength(l - 1); 857 } 858 String word = buf.toString(); 859 if (STOP_WORDS.contains(word)) { 860 return null; 861 } 862 return word; 863 } 864 865 // matches "foo OR bar baz" as "foo OR (bar AND baz)" 866 protected static boolean fulltext(Set<String> fulltext, List<String> query) { 867 boolean andMatch = true; 868 for (PeekingIterator<String> it = Iterators.peekingIterator(query.iterator()); it.hasNext(); ) { 869 String word = it.next(); 870 boolean match; 871 if (word.endsWith("*") || word.endsWith("%")) { 872 // prefix match 873 match = false; 874 String prefix = word.substring(0, word.length() - 2); 875 for (String candidate : fulltext) { 876 if (candidate.startsWith(prefix)) { 877 match = true; 878 break; 879 } 880 } 881 } else { 882 if (word.startsWith("-")) { 883 word = word.substring(1);// 884 match = !fulltext.contains(word); 885 } else { 886 match = fulltext.contains(word); 887 } 888 } 889 if (!match) { 890 andMatch = false; 891 } 892 if (it.hasNext() && it.peek().equals(OR)) { 893 // end of AND group 894 // swallow OR 895 it.next(); 896 // return if the previous AND group matched 897 if (andMatch) { 898 return true; 899 } 900 // else start next AND group 901 andMatch = true; 902 } 903 } 904 return andMatch; 905 } 906 907 // matches "foo OR bar baz" as "(foo OR bar) AND baz" 908 protected static boolean fulltext1(Set<String> fulltext, List<String> query) { 909 boolean inOr = false; // if we're in a OR group 910 boolean orMatch = false; // value of the OR group 911 for (PeekingIterator<String> it = Iterators.peekingIterator(query.iterator()); it.hasNext(); ) { 912 String word = it.next(); 913 if (it.hasNext() && it.peek().equals(OR)) { 914 inOr = true; 915 orMatch = false; 916 } 917 boolean match; 918 if (word.endsWith("*") || word.endsWith("%")) { 919 // prefix match 920 match = false; 921 String prefix = word.substring(0, word.length() - 2); 922 for (String candidate : fulltext) { 923 if (candidate.startsWith(prefix)) { 924 match = true; 925 break; 926 } 927 } 928 } else { 929 if (word.startsWith("-")) { 930 word = word.substring(1);// 931 match = !fulltext.contains(word); 932 } else { 933 match = fulltext.contains(word); 934 } 935 } 936 if (inOr) { 937 if (match) { 938 orMatch = true; 939 } 940 if (it.hasNext() && it.peek().equals(OR)) { 941 // swallow OR and keep going in OR group 942 it.next(); 943 continue; 944 } 945 // finish OR group 946 match = orMatch; 947 inOr = false; 948 } 949 if (!match) { 950 return false; 951 } 952 } 953 if (inOr) { 954 // trailing OR, ignore and finish previous group 955 if (!orMatch) { 956 return false; 957 } 958 } 959 return true; 960 } 961 962}