001/* 002 * (C) Copyright 2014 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.core.storage; 020 021import static java.lang.Boolean.FALSE; 022import static java.lang.Boolean.TRUE; 023 024import java.util.ArrayList; 025import java.util.Arrays; 026import java.util.Calendar; 027import java.util.Collections; 028import java.util.HashSet; 029import java.util.List; 030import java.util.Set; 031import java.util.regex.Pattern; 032 033import org.apache.commons.lang.CharUtils; 034import org.apache.commons.lang.StringUtils; 035import org.nuxeo.ecm.core.query.QueryParseException; 036import org.nuxeo.ecm.core.query.sql.NXQL; 037import org.nuxeo.ecm.core.query.sql.model.BooleanLiteral; 038import org.nuxeo.ecm.core.query.sql.model.DateLiteral; 039import org.nuxeo.ecm.core.query.sql.model.DoubleLiteral; 040import org.nuxeo.ecm.core.query.sql.model.Expression; 041import org.nuxeo.ecm.core.query.sql.model.Function; 042import org.nuxeo.ecm.core.query.sql.model.IntegerLiteral; 043import org.nuxeo.ecm.core.query.sql.model.Literal; 044import org.nuxeo.ecm.core.query.sql.model.LiteralList; 045import org.nuxeo.ecm.core.query.sql.model.MultiExpression; 046import org.nuxeo.ecm.core.query.sql.model.Operand; 047import org.nuxeo.ecm.core.query.sql.model.Operator; 048import org.nuxeo.ecm.core.query.sql.model.Predicate; 049import org.nuxeo.ecm.core.query.sql.model.Reference; 050import org.nuxeo.ecm.core.query.sql.model.StringLiteral; 051 052import com.google.common.collect.Iterators; 053import com.google.common.collect.PeekingIterator; 054 055/** 056 * Evaluator for an {@link Expression}. 057 * 058 * @since 5.9.4 059 */ 060public abstract class ExpressionEvaluator { 061 062 /** pseudo NXQL to resolve ancestor ids. */ 063 public static final String NXQL_ECM_ANCESTOR_IDS = "ecm:__ancestorIds"; 064 065 /** pseudo NXQL to resolve internal path. */ 066 public static final String NXQL_ECM_PATH = "ecm:__path"; 067 068 /** pseudo NXQL to resolve read acls. */ 069 public static final String NXQL_ECM_READ_ACL = "ecm:__read_acl"; 070 071 public static final String NXQL_ECM_FULLTEXT_SIMPLE = "ecm:__fulltextSimple"; 072 073 public static final String NXQL_ECM_FULLTEXT_BINARY = "ecm:__fulltextBinary"; 074 075 protected static final String DATE_CAST = "DATE"; 076 077 protected static final String PHRASE_QUOTE = "\""; 078 079 protected static final String OR = "or"; 080 081 /** 082 * Interface for a class that knows how to resolve a path into an id. 083 */ 084 public interface PathResolver { 085 /** 086 * Returns the id for a given path. 087 * 088 * @param path the path 089 * @return the id, or {@code null} if not found 090 */ 091 String getIdForPath(String path); 092 } 093 094 public final PathResolver pathResolver; 095 096 public final Set<String> principals; 097 098 public final boolean fulltextDisabled; 099 100 public ExpressionEvaluator(PathResolver pathResolver, String[] principals, boolean fulltextDisabled) { 101 this.pathResolver = pathResolver; 102 this.principals = principals == null ? null : new HashSet<String>(Arrays.asList(principals)); 103 this.fulltextDisabled = fulltextDisabled; 104 } 105 106 public Object walkExpression(Expression expr) { 107 Operator op = expr.operator; 108 Operand lvalue = expr.lvalue; 109 Operand rvalue = expr.rvalue; 110 Reference ref = lvalue instanceof Reference ? (Reference) lvalue : null; 111 String name = ref != null ? ref.name : null; 112 String cast = ref != null ? ref.cast : null; 113 if (DATE_CAST.equals(cast)) { 114 checkDateLiteralForCast(rvalue, name); 115 } 116 if (op == Operator.STARTSWITH) { 117 return walkStartsWith(lvalue, rvalue); 118 } else if (NXQL.ECM_PATH.equals(name)) { 119 return walkEcmPath(op, rvalue); 120 } else if (NXQL.ECM_ANCESTORID.equals(name)) { 121 return walkAncestorId(op, rvalue); 122 } else if (name != null && name.startsWith(NXQL.ECM_FULLTEXT) && !NXQL.ECM_FULLTEXT_JOBID.equals(name)) { 123 return walkEcmFulltext(name, op, rvalue); 124 } else if (op == Operator.SUM) { 125 throw new UnsupportedOperationException("SUM"); 126 } else if (op == Operator.SUB) { 127 throw new UnsupportedOperationException("SUB"); 128 } else if (op == Operator.MUL) { 129 throw new UnsupportedOperationException("MUL"); 130 } else if (op == Operator.DIV) { 131 throw new UnsupportedOperationException("DIV"); 132 } else if (op == Operator.LT) { 133 return walkLt(lvalue, rvalue); 134 } else if (op == Operator.GT) { 135 return walkGt(lvalue, rvalue); 136 } else if (op == Operator.EQ) { 137 return walkEq(lvalue, rvalue); 138 } else if (op == Operator.NOTEQ) { 139 return walkNotEq(lvalue, rvalue); 140 } else if (op == Operator.LTEQ) { 141 return walkLtEq(lvalue, rvalue); 142 } else if (op == Operator.GTEQ) { 143 return walkGtEq(lvalue, rvalue); 144 } else if (op == Operator.AND) { 145 if (expr instanceof MultiExpression) { 146 return walkMultiExpression((MultiExpression) expr); 147 } else { 148 return walkAnd(lvalue, rvalue); 149 } 150 } else if (op == Operator.NOT) { 151 return walkNot(lvalue); 152 } else if (op == Operator.OR) { 153 return walkOr(lvalue, rvalue); 154 } else if (op == Operator.LIKE) { 155 return walkLike(lvalue, rvalue, true, false); 156 } else if (op == Operator.ILIKE) { 157 return walkLike(lvalue, rvalue, true, true); 158 } else if (op == Operator.NOTLIKE) { 159 return walkLike(lvalue, rvalue, false, false); 160 } else if (op == Operator.NOTILIKE) { 161 return walkLike(lvalue, rvalue, false, true); 162 } else if (op == Operator.IN) { 163 return walkIn(lvalue, rvalue, true); 164 } else if (op == Operator.NOTIN) { 165 return walkIn(lvalue, rvalue, false); 166 } else if (op == Operator.ISNULL) { 167 return walkIsNull(lvalue); 168 } else if (op == Operator.ISNOTNULL) { 169 return walkIsNotNull(lvalue); 170 } else if (op == Operator.BETWEEN) { 171 return walkBetween(lvalue, rvalue, true); 172 } else if (op == Operator.NOTBETWEEN) { 173 return walkBetween(lvalue, rvalue, false); 174 } else { 175 throw new QueryParseException("Unknown operator: " + op); 176 } 177 } 178 179 protected void checkDateLiteralForCast(Operand value, String name) { 180 if (value instanceof DateLiteral && !((DateLiteral) value).onlyDate) { 181 throw new QueryParseException("DATE() cast must be used with DATE literal, not TIMESTAMP: " + name); 182 } 183 } 184 185 protected Boolean walkEcmPath(Operator op, Operand rvalue) { 186 if (op != Operator.EQ && op != Operator.NOTEQ) { 187 throw new QueryParseException(NXQL.ECM_PATH + " requires = or <> operator"); 188 } 189 if (!(rvalue instanceof StringLiteral)) { 190 throw new QueryParseException(NXQL.ECM_PATH + " requires literal path as right argument"); 191 } 192 String path = ((StringLiteral) rvalue).value; 193 if (path.length() > 1 && path.endsWith("/")) { 194 path = path.substring(0, path.length() - 1); 195 } 196 String id = pathResolver.getIdForPath(path); 197 Object right = walkReference(new Reference(NXQL.ECM_UUID)); 198 if (id == null) { 199 return FALSE; 200 } 201 Boolean eq = eq(id, right); 202 return op == Operator.EQ ? eq : not(eq); 203 } 204 205 protected Boolean walkAncestorId(Operator op, Operand rvalue) { 206 if (op != Operator.EQ && op != Operator.NOTEQ) { 207 throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires = or <> operator"); 208 } 209 if (!(rvalue instanceof StringLiteral)) { 210 throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires literal id as right argument"); 211 } 212 String ancestorId = ((StringLiteral) rvalue).value; 213 Object[] ancestorIds = (Object[]) walkReference(new Reference(NXQL_ECM_ANCESTOR_IDS)); 214 boolean eq = op == Operator.EQ ? true : false; 215 if (ancestorIds == null) { 216 // placeless 217 return eq ? FALSE : TRUE; 218 } 219 for (Object id : ancestorIds) { 220 if (ancestorId.equals(id)) { 221 return eq ? TRUE : FALSE; 222 } 223 } 224 return eq ? FALSE : TRUE; 225 } 226 227 protected Boolean walkEcmFulltext(String name, Operator op, Operand rvalue) { 228 if (op != Operator.EQ && op != Operator.LIKE) { 229 throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires = or LIKE operator"); 230 } 231 if (!(rvalue instanceof StringLiteral)) { 232 throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires literal string as right argument"); 233 } 234 if (fulltextDisabled) { 235 throw new QueryParseException("Fulltext search disabled by configuration"); 236 } 237 String query = ((StringLiteral) rvalue).value; 238 if (name.equals(NXQL.ECM_FULLTEXT)) { 239 // standard fulltext query 240 String simple = (String) walkReference(new Reference(NXQL_ECM_FULLTEXT_SIMPLE)); 241 String binary = (String) walkReference(new Reference(NXQL_ECM_FULLTEXT_BINARY)); 242 return fulltext(simple, binary, query); 243 } else { 244 // secondary index match with explicit field 245 // do a regexp on the field 246 if (name.charAt(NXQL.ECM_FULLTEXT.length()) != '.') { 247 throw new QueryParseException(name + " has incorrect syntax for a secondary fulltext index"); 248 } 249 String prop = name.substring(NXQL.ECM_FULLTEXT.length() + 1); 250 String ft = query.replace(" ", "%"); 251 rvalue = new StringLiteral(ft); 252 return walkLike(new Reference(prop), rvalue, true, true); 253 } 254 } 255 256 public Boolean walkNot(Operand value) { 257 return not(bool(walkOperand(value))); 258 } 259 260 public Boolean walkIsNull(Operand value) { 261 return Boolean.valueOf(walkOperand(value) == null); 262 } 263 264 public Boolean walkIsNotNull(Operand value) { 265 return Boolean.valueOf(walkOperand(value) != null); 266 } 267 268 // ternary logic 269 public Boolean walkMultiExpression(MultiExpression expr) { 270 Boolean res = TRUE; 271 for (Operand value : expr.values) { 272 Boolean bool = bool(walkOperand(value)); 273 // don't short-circuit on null, we want to walk all references deterministically 274 res = and(res, bool); 275 } 276 return res; 277 } 278 279 public Boolean walkAnd(Operand lvalue, Operand rvalue) { 280 Boolean left = bool(walkOperand(lvalue)); 281 Boolean right = bool(walkOperand(rvalue)); 282 return and(left, right); 283 } 284 285 public Boolean walkOr(Operand lvalue, Operand rvalue) { 286 Boolean left = bool(walkOperand(lvalue)); 287 Boolean right = bool(walkOperand(rvalue)); 288 return or(left, right); 289 } 290 291 public Boolean walkEq(Operand lvalue, Operand rvalue) { 292 Object right = walkOperand(rvalue); 293 if (isMixinTypes(lvalue)) { 294 if (!(right instanceof String)) { 295 throw new QueryParseException("Invalid EQ rhs: " + rvalue); 296 } 297 return walkMixinTypes(Collections.singletonList((String) right), true); 298 } 299 Object left = walkOperand(lvalue); 300 return eqMaybeList(left, right); 301 } 302 303 public Boolean walkNotEq(Operand lvalue, Operand rvalue) { 304 if (isMixinTypes(lvalue)) { 305 Object right = walkOperand(rvalue); 306 if (!(right instanceof String)) { 307 throw new QueryParseException("Invalid NE rhs: " + rvalue); 308 } 309 return walkMixinTypes(Collections.singletonList((String) right), false); 310 } 311 return not(walkEq(lvalue, rvalue)); 312 } 313 314 public Boolean walkLt(Operand lvalue, Operand rvalue) { 315 Integer cmp = cmp(lvalue, rvalue); 316 return cmp == null ? null : cmp < 0; 317 } 318 319 public Boolean walkGt(Operand lvalue, Operand rvalue) { 320 Integer cmp = cmp(lvalue, rvalue); 321 return cmp == null ? null : cmp > 0; 322 } 323 324 public Boolean walkLtEq(Operand lvalue, Operand rvalue) { 325 Integer cmp = cmp(lvalue, rvalue); 326 return cmp == null ? null : cmp <= 0; 327 } 328 329 public Boolean walkGtEq(Operand lvalue, Operand rvalue) { 330 Integer cmp = cmp(lvalue, rvalue); 331 return cmp == null ? null : cmp >= 0; 332 } 333 334 public Object walkBetween(Operand lvalue, Operand rvalue, boolean positive) { 335 LiteralList l = (LiteralList) rvalue; 336 Predicate va = new Predicate(lvalue, Operator.GTEQ, l.get(0)); 337 Predicate vb = new Predicate(lvalue, Operator.LTEQ, l.get(1)); 338 Predicate pred = new Predicate(va, Operator.AND, vb); 339 if (!positive) { 340 pred = new Predicate(pred, Operator.NOT, null); 341 } 342 return walkExpression(pred); 343 } 344 345 public Boolean walkIn(Operand lvalue, Operand rvalue, boolean positive) { 346 Object right = walkOperand(rvalue); 347 if (!(right instanceof List)) { 348 throw new QueryParseException("Invalid IN rhs: " + rvalue); 349 } 350 if (isMixinTypes(lvalue)) { 351 return walkMixinTypes((List<String>) right, positive); 352 } 353 Object left = walkOperand(lvalue); 354 Boolean in = inMaybeList(left, (List<Object>) right); 355 return positive ? in : not(in); 356 } 357 358 public Object walkOperand(Operand op) { 359 if (op instanceof Literal) { 360 return walkLiteral((Literal) op); 361 } else if (op instanceof LiteralList) { 362 return walkLiteralList((LiteralList) op); 363 } else if (op instanceof Function) { 364 return walkFunction((Function) op); 365 } else if (op instanceof Expression) { 366 return walkExpression((Expression) op); 367 } else if (op instanceof Reference) { 368 return walkReference((Reference) op); 369 } else { 370 throw new QueryParseException("Unknown operand: " + op); 371 } 372 } 373 374 public Object walkLiteral(Literal lit) { 375 if (lit instanceof BooleanLiteral) { 376 return walkBooleanLiteral((BooleanLiteral) lit); 377 } else if (lit instanceof DateLiteral) { 378 return walkDateLiteral((DateLiteral) lit); 379 } else if (lit instanceof DoubleLiteral) { 380 return walkDoubleLiteral((DoubleLiteral) lit); 381 } else if (lit instanceof IntegerLiteral) { 382 return walkIntegerLiteral((IntegerLiteral) lit); 383 } else if (lit instanceof StringLiteral) { 384 return walkStringLiteral((StringLiteral) lit); 385 } else { 386 throw new QueryParseException("Unknown literal: " + lit); 387 } 388 } 389 390 public Boolean walkBooleanLiteral(BooleanLiteral lit) { 391 return Boolean.valueOf(lit.value); 392 } 393 394 public Calendar walkDateLiteral(DateLiteral lit) { 395 if (lit.onlyDate) { 396 Calendar date = lit.toCalendar(); 397 if (date != null) { 398 date.set(Calendar.HOUR_OF_DAY, 0); 399 date.set(Calendar.MINUTE, 0); 400 date.set(Calendar.SECOND, 0); 401 date.set(Calendar.MILLISECOND, 0); 402 } 403 return date; 404 } else { 405 return lit.toCalendar(); 406 } 407 } 408 409 public Double walkDoubleLiteral(DoubleLiteral lit) { 410 return Double.valueOf(lit.value); 411 } 412 413 public Long walkIntegerLiteral(IntegerLiteral lit) { 414 return Long.valueOf(lit.value); 415 } 416 417 public String walkStringLiteral(StringLiteral lit) { 418 return lit.value; 419 } 420 421 public List<Object> walkLiteralList(LiteralList litList) { 422 List<Object> list = new ArrayList<Object>(litList.size()); 423 for (Literal lit : litList) { 424 list.add(walkLiteral(lit)); 425 } 426 return list; 427 } 428 429 public Boolean walkLike(Operand lvalue, Operand rvalue, boolean positive, boolean caseInsensitive) { 430 Object left = walkOperand(lvalue); 431 Object right = walkOperand(rvalue); 432 if (!(right instanceof String)) { 433 throw new QueryParseException("Invalid LIKE rhs: " + rvalue); 434 } 435 return likeMaybeList(left, (String) right, positive, caseInsensitive); 436 } 437 438 public Object walkFunction(Function func) { 439 throw new UnsupportedOperationException("Function"); 440 } 441 442 public Boolean walkStartsWith(Operand lvalue, Operand rvalue) { 443 if (!(lvalue instanceof Reference)) { 444 throw new QueryParseException("Invalid STARTSWITH query, left hand side must be a property: " + lvalue); 445 } 446 String name = ((Reference) lvalue).name; 447 if (!(rvalue instanceof StringLiteral)) { 448 throw new QueryParseException( 449 "Invalid STARTSWITH query, right hand side must be a literal path: " + rvalue); 450 } 451 String path = ((StringLiteral) rvalue).value; 452 if (path.length() > 1 && path.endsWith("/")) { 453 path = path.substring(0, path.length() - 1); 454 } 455 456 if (NXQL.ECM_PATH.equals(name)) { 457 return walkStartsWithPath(path); 458 } else { 459 return walkStartsWithNonPath(lvalue, path); 460 } 461 } 462 463 protected Boolean walkStartsWithPath(String path) { 464 // resolve path 465 String ancestorId = pathResolver.getIdForPath(path); 466 // don't return early on null ancestorId, we want to walk all references deterministically 467 Object[] ancestorIds = (Object[]) walkReference(new Reference(NXQL_ECM_ANCESTOR_IDS)); 468 if (ancestorId == null) { 469 // no such path 470 return FALSE; 471 } 472 if (ancestorIds == null) { 473 // placeless 474 return FALSE; 475 } 476 for (Object id : ancestorIds) { 477 if (ancestorId.equals(id)) { 478 return TRUE; 479 } 480 } 481 return FALSE; 482 } 483 484 protected Boolean walkStartsWithNonPath(Operand lvalue, String path) { 485 Object left = walkReference((Reference) lvalue); 486 // exact match 487 Boolean bool = eqMaybeList(left, path); 488 if (TRUE.equals(bool)) { 489 return TRUE; 490 } 491 // prefix match TODO escape % chars 492 String pattern = path + "/%"; 493 return likeMaybeList(left, pattern, true, false); 494 } 495 496 /** 497 * Evaluates a reference over the context state. 498 * 499 * @param ref the reference 500 */ 501 public abstract Object walkReference(Reference ref); 502 503 protected boolean isMixinTypes(Operand op) { 504 if (!(op instanceof Reference)) { 505 return false; 506 } 507 return ((Reference) op).name.equals(NXQL.ECM_MIXINTYPE); 508 } 509 510 protected Boolean bool(Object value) { 511 if (value == null) { 512 return null; 513 } 514 if (!(value instanceof Boolean)) { 515 throw new QueryParseException("Not a boolean: " + value); 516 } 517 return (Boolean) value; 518 } 519 520 // ternary logic 521 protected Boolean not(Boolean value) { 522 if (value == null) { 523 return null; 524 } 525 return !value; 526 } 527 528 // ternary logic 529 protected Boolean and(Boolean left, Boolean right) { 530 if (TRUE.equals(left)) { 531 return right; 532 } else { 533 return left; 534 } 535 } 536 537 // ternary logic 538 protected Boolean or(Boolean left, Boolean right) { 539 if (TRUE.equals(left)) { 540 return left; 541 } else { 542 return right; 543 } 544 } 545 546 // ternary logic 547 protected Boolean eq(Object left, Object right) { 548 if (left == null || right == null) { 549 return null; 550 } 551 return left.equals(right); 552 } 553 554 // ternary logic 555 protected Boolean in(Object left, List<Object> right) { 556 if (left == null) { 557 return null; 558 } 559 boolean hasNull = false; 560 for (Object r : right) { 561 if (r == null) { 562 hasNull = true; 563 } else if (left.equals(r)) { 564 return TRUE; 565 } 566 } 567 return hasNull ? null : FALSE; 568 } 569 570 protected Integer cmp(Operand lvalue, Operand rvalue) { 571 Object left = walkOperand(lvalue); 572 Object right = walkOperand(rvalue); 573 return cmp(left, right); 574 } 575 576 // ternary logic 577 protected Integer cmp(Object left, Object right) { 578 if (left == null || right == null) { 579 return null; 580 } 581 if (!(left instanceof Comparable)) { 582 throw new QueryParseException("Not a comparable: " + left); 583 } 584 return ((Comparable<Object>) left).compareTo(right); 585 } 586 587 // ternary logic 588 protected Boolean like(Object left, String right, boolean caseInsensitive) { 589 if (left == null || right == null) { 590 return null; 591 } 592 if (!(left instanceof String)) { 593 throw new QueryParseException("Invalid LIKE lhs: " + left); 594 } 595 String value = (String) left; 596 if (caseInsensitive) { 597 value = value.toLowerCase(); 598 right = right.toLowerCase(); 599 } 600 String regex = likeToRegex(right); 601 boolean match = Pattern.matches(regex.toString(), value); 602 return match; 603 } 604 605 /** 606 * Turns a NXQL LIKE pattern into a regex. 607 * <p> 608 * % and _ are standard wildcards, and \ escapes them. 609 * 610 * @since 7.4 611 */ 612 public static String likeToRegex(String like) { 613 StringBuilder regex = new StringBuilder(); 614 char[] chars = like.toCharArray(); 615 boolean escape = false; 616 for (int i = 0; i < chars.length; i++) { 617 char c = chars[i]; 618 boolean escapeNext = false; 619 switch (c) { 620 case '%': 621 if (escape) { 622 regex.append(c); 623 } else { 624 regex.append(".*"); 625 } 626 break; 627 case '_': 628 if (escape) { 629 regex.append(c); 630 } else { 631 regex.append("."); 632 } 633 break; 634 case '\\': 635 if (escape) { 636 regex.append("\\\\"); // backslash escaped for regexp 637 } else { 638 escapeNext = true; 639 } 640 break; 641 default: 642 // escape mostly everything just in case 643 if (!CharUtils.isAsciiAlphanumeric(c)) { 644 regex.append("\\"); 645 } 646 regex.append(c); 647 break; 648 } 649 escape = escapeNext; 650 } 651 if (escape) { 652 // invalid string terminated by escape character, ignore 653 } 654 return regex.toString(); 655 } 656 657 // if list, use EXIST (SELECT 1 FROM left WHERE left.item = right) 658 protected Boolean eqMaybeList(Object left, Object right) { 659 if (left instanceof Object[]) { 660 for (Object l : ((Object[]) left)) { 661 Boolean eq = eq(l, right); 662 if (TRUE.equals(eq)) { 663 return TRUE; 664 } 665 } 666 return FALSE; 667 } else { 668 return eq(left, right); 669 } 670 } 671 672 // if list, use EXIST (SELECT 1 FROM left WHERE left.item IN right) 673 protected Boolean inMaybeList(Object left, List<Object> right) { 674 if (left instanceof Object[]) { 675 for (Object l : ((Object[]) left)) { 676 Boolean in = in(l, right); 677 if (TRUE.equals(in)) { 678 return TRUE; 679 } 680 } 681 return FALSE; 682 } else { 683 return in(left, right); 684 } 685 } 686 687 protected Boolean likeMaybeList(Object left, String right, boolean positive, boolean caseInsensitive) { 688 if (left instanceof Object[]) { 689 for (Object l : ((Object[]) left)) { 690 Boolean like = like(l, right, caseInsensitive); 691 if (TRUE.equals(like)) { 692 return Boolean.valueOf(positive); 693 } 694 } 695 return Boolean.valueOf(!positive); 696 } else { 697 Boolean like = like(left, right, caseInsensitive); 698 return positive ? like : not(like); 699 } 700 } 701 702 /** 703 * Matches the mixin types against a list of values. 704 * <p> 705 * Used for: 706 * <ul> 707 * <li>ecm:mixinTypes = 'foo' 708 * <li>ecm:mixinTypes != 'foo' 709 * <li>ecm:mixinTypes IN ('foo', 'bar') 710 * <li>ecm:mixinTypes NOT IN ('foo', 'bar') 711 * </ul> 712 * 713 * @param mixins the mixin(s) to match 714 * @param include {@code true} for = and IN 715 * @since 7.4 716 */ 717 public abstract Boolean walkMixinTypes(List<String> mixins, boolean include); 718 719 /* 720 * ----- simple parsing, don't try to be exhaustive ----- 721 */ 722 723 private static final Pattern WORD_PATTERN = Pattern.compile("[\\s\\p{Punct}]+"); 724 725 private static final String UNACCENTED = "aaaaaaaceeeeiiii\u00f0nooooo\u00f7ouuuuy\u00fey"; 726 727 private static final String STOP_WORDS_STR = "a an are and as at be by for from how " // 728 + "i in is it of on or that the this to was what when where who will with " // 729 + "car donc est il ils je la le les mais ni nous or ou pour tu un une vous " // 730 + "www com net org"; 731 732 private static final Set<String> STOP_WORDS = new HashSet<>(Arrays.asList(StringUtils.split(STOP_WORDS_STR, ' '))); 733 734 /** 735 * Checks if the fulltext combination of string1 and string2 matches the query expression. 736 */ 737 protected static Boolean fulltext(String string1, String string2, String queryString) { 738 if (queryString == null || (string1 == null && string2 == null)) { 739 return null; 740 } 741 // query 742 List<String> query = new ArrayList<String>(); 743 String phrase = null; 744 for (String word : StringUtils.split(queryString.toLowerCase(), ' ')) { 745 if (WORD_PATTERN.matcher(word).matches()) { 746 continue; 747 } 748 if (phrase != null) { 749 if (word.endsWith(PHRASE_QUOTE)) { 750 phrase += " " + word.substring(0, word.length() - 1); 751 query.add(phrase); 752 phrase = null; 753 } else { 754 phrase += " " + word; 755 } 756 } else { 757 if (word.startsWith(PHRASE_QUOTE)) { 758 phrase = word.substring(1); 759 } else { 760 if (word.startsWith("+")) { 761 word = word.substring(1); 762 } 763 query.add(word); 764 } 765 } 766 } 767 if (query.isEmpty()) { 768 return FALSE; 769 } 770 // fulltext 771 Set<String> fulltext = new HashSet<String>(); 772 fulltext.addAll(parseFullText(string1)); 773 fulltext.addAll(parseFullText(string2)); 774 775 return Boolean.valueOf(fulltext(fulltext, query)); 776 } 777 778 private static Set<String> parseFullText(String string) { 779 if (string == null) { 780 return Collections.emptySet(); 781 } 782 Set<String> set = new HashSet<String>(); 783 for (String word : WORD_PATTERN.split(string)) { 784 String w = parseWord(word); 785 if (w != null) { 786 set.add(w.toLowerCase()); 787 } 788 } 789 return set; 790 } 791 792 private static String parseWord(String string) { 793 int len = string.length(); 794 if (len < 3) { 795 return null; 796 } 797 StringBuilder buf = new StringBuilder(len); 798 for (int i = 0; i < len; i++) { 799 char c = Character.toLowerCase(string.charAt(i)); 800 if (c == '\u00e6') { 801 buf.append("ae"); 802 } else if (c >= '\u00e0' && c <= '\u00ff') { 803 buf.append(UNACCENTED.charAt((c) - 0xe0)); 804 } else if (c == '\u0153') { 805 buf.append("oe"); 806 } else { 807 buf.append(c); 808 } 809 } 810 // simple heuristic to remove plurals 811 int l = buf.length(); 812 if (l > 3 && buf.charAt(l - 1) == 's') { 813 buf.setLength(l - 1); 814 } 815 String word = buf.toString(); 816 if (STOP_WORDS.contains(word)) { 817 return null; 818 } 819 return word; 820 } 821 822 // matches "foo OR bar baz" as "foo OR (bar AND baz)" 823 protected static boolean fulltext(Set<String> fulltext, List<String> query) { 824 boolean andMatch = true; 825 for (PeekingIterator<String> it = Iterators.peekingIterator(query.iterator()); it.hasNext(); ) { 826 String word = it.next(); 827 boolean match; 828 if (word.endsWith("*") || word.endsWith("%")) { 829 // prefix match 830 match = false; 831 String prefix = word.substring(0, word.length() - 2); 832 for (String candidate : fulltext) { 833 if (candidate.startsWith(prefix)) { 834 match = true; 835 break; 836 } 837 } 838 } else { 839 if (word.startsWith("-")) { 840 word = word.substring(1);// 841 match = !fulltext.contains(word); 842 } else { 843 match = fulltext.contains(word); 844 } 845 } 846 if (!match) { 847 andMatch = false; 848 } 849 if (it.hasNext() && it.peek().equals(OR)) { 850 // end of AND group 851 // swallow OR 852 it.next(); 853 // return if the previous AND group matched 854 if (andMatch) { 855 return true; 856 } 857 // else start next AND group 858 andMatch = true; 859 } 860 } 861 return andMatch; 862 } 863 864 // matches "foo OR bar baz" as "(foo OR bar) AND baz" 865 protected static boolean fulltext1(Set<String> fulltext, List<String> query) { 866 boolean inOr = false; // if we're in a OR group 867 boolean orMatch = false; // value of the OR group 868 for (PeekingIterator<String> it = Iterators.peekingIterator(query.iterator()); it.hasNext(); ) { 869 String word = it.next(); 870 if (it.hasNext() && it.peek().equals(OR)) { 871 inOr = true; 872 orMatch = false; 873 } 874 boolean match; 875 if (word.endsWith("*") || word.endsWith("%")) { 876 // prefix match 877 match = false; 878 String prefix = word.substring(0, word.length() - 2); 879 for (String candidate : fulltext) { 880 if (candidate.startsWith(prefix)) { 881 match = true; 882 break; 883 } 884 } 885 } else { 886 if (word.startsWith("-")) { 887 word = word.substring(1);// 888 match = !fulltext.contains(word); 889 } else { 890 match = fulltext.contains(word); 891 } 892 } 893 if (inOr) { 894 if (match) { 895 orMatch = true; 896 } 897 if (it.hasNext() && it.peek().equals(OR)) { 898 // swallow OR and keep going in OR group 899 it.next(); 900 continue; 901 } 902 // finish OR group 903 match = orMatch; 904 inOr = false; 905 } 906 if (!match) { 907 return false; 908 } 909 } 910 if (inOr) { 911 // trailing OR, ignore and finish previous group 912 if (!orMatch) { 913 return false; 914 } 915 } 916 return true; 917 } 918 919}