001/* 002 * (C) Copyright 2014 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.core.storage; 020 021import static java.lang.Boolean.FALSE; 022import static java.lang.Boolean.TRUE; 023 024import java.util.ArrayList; 025import java.util.Arrays; 026import java.util.Calendar; 027import java.util.Collections; 028import java.util.Deque; 029import java.util.HashSet; 030import java.util.LinkedList; 031import java.util.List; 032import java.util.Set; 033import java.util.regex.Pattern; 034 035import org.apache.commons.lang.CharUtils; 036import org.apache.commons.lang.StringUtils; 037import org.nuxeo.ecm.core.query.QueryParseException; 038import org.nuxeo.ecm.core.query.sql.NXQL; 039import org.nuxeo.ecm.core.query.sql.model.BooleanLiteral; 040import org.nuxeo.ecm.core.query.sql.model.DateLiteral; 041import org.nuxeo.ecm.core.query.sql.model.DoubleLiteral; 042import org.nuxeo.ecm.core.query.sql.model.Expression; 043import org.nuxeo.ecm.core.query.sql.model.Function; 044import org.nuxeo.ecm.core.query.sql.model.IntegerLiteral; 045import org.nuxeo.ecm.core.query.sql.model.Literal; 046import org.nuxeo.ecm.core.query.sql.model.LiteralList; 047import org.nuxeo.ecm.core.query.sql.model.MultiExpression; 048import org.nuxeo.ecm.core.query.sql.model.Operand; 049import org.nuxeo.ecm.core.query.sql.model.Operator; 050import org.nuxeo.ecm.core.query.sql.model.Predicate; 051import org.nuxeo.ecm.core.query.sql.model.Reference; 052import org.nuxeo.ecm.core.query.sql.model.StringLiteral; 053 054import com.google.common.collect.Iterators; 055import com.google.common.collect.PeekingIterator; 056 057/** 058 * Evaluator for an {@link Expression}. 059 * 060 * @since 5.9.4 061 */ 062public abstract class ExpressionEvaluator { 063 064 /** pseudo NXQL to resolve ancestor ids. */ 065 public static final String NXQL_ECM_ANCESTOR_IDS = "ecm:__ancestorIds"; 066 067 /** pseudo NXQL to resolve internal path. */ 068 public static final String NXQL_ECM_PATH = "ecm:__path"; 069 070 /** pseudo NXQL to resolve read acls. */ 071 public static final String NXQL_ECM_READ_ACL = "ecm:__read_acl"; 072 073 public static final String NXQL_ECM_FULLTEXT_SIMPLE = "ecm:__fulltextSimple"; 074 075 public static final String NXQL_ECM_FULLTEXT_BINARY = "ecm:__fulltextBinary"; 076 077 protected static final String DATE_CAST = "DATE"; 078 079 protected static final String PHRASE_QUOTE = "\""; 080 081 protected static final String NEG_PHRASE_QUOTE = "-\""; 082 083 protected static final String OR = "or"; 084 085 /** 086 * Interface for a class that knows how to resolve a path into an id. 087 */ 088 public interface PathResolver { 089 /** 090 * Returns the id for a given path. 091 * 092 * @param path the path 093 * @return the id, or {@code null} if not found 094 */ 095 String getIdForPath(String path); 096 } 097 098 public final PathResolver pathResolver; 099 100 public final Set<String> principals; 101 102 public final boolean fulltextSearchDisabled; 103 104 public boolean hasFulltext; 105 106 public ExpressionEvaluator(PathResolver pathResolver, String[] principals, boolean fulltextSearchDisabled) { 107 this.pathResolver = pathResolver; 108 this.principals = principals == null ? null : new HashSet<String>(Arrays.asList(principals)); 109 this.fulltextSearchDisabled = fulltextSearchDisabled; 110 } 111 112 public Object walkExpression(Expression expr) { 113 Operator op = expr.operator; 114 Operand lvalue = expr.lvalue; 115 Operand rvalue = expr.rvalue; 116 Reference ref = lvalue instanceof Reference ? (Reference) lvalue : null; 117 String name = ref != null ? ref.name : null; 118 String cast = ref != null ? ref.cast : null; 119 if (DATE_CAST.equals(cast)) { 120 checkDateLiteralForCast(rvalue, name); 121 } 122 if (op == Operator.STARTSWITH) { 123 return walkStartsWith(lvalue, rvalue); 124 } else if (NXQL.ECM_PATH.equals(name)) { 125 return walkEcmPath(op, rvalue); 126 } else if (NXQL.ECM_ANCESTORID.equals(name)) { 127 return walkAncestorId(op, rvalue); 128 } else if (name != null && name.startsWith(NXQL.ECM_FULLTEXT) && !NXQL.ECM_FULLTEXT_JOBID.equals(name)) { 129 return walkEcmFulltext(name, op, rvalue); 130 } else if (op == Operator.SUM) { 131 throw new UnsupportedOperationException("SUM"); 132 } else if (op == Operator.SUB) { 133 throw new UnsupportedOperationException("SUB"); 134 } else if (op == Operator.MUL) { 135 throw new UnsupportedOperationException("MUL"); 136 } else if (op == Operator.DIV) { 137 throw new UnsupportedOperationException("DIV"); 138 } else if (op == Operator.LT) { 139 return walkLt(lvalue, rvalue); 140 } else if (op == Operator.GT) { 141 return walkGt(lvalue, rvalue); 142 } else if (op == Operator.EQ) { 143 return walkEq(lvalue, rvalue); 144 } else if (op == Operator.NOTEQ) { 145 return walkNotEq(lvalue, rvalue); 146 } else if (op == Operator.LTEQ) { 147 return walkLtEq(lvalue, rvalue); 148 } else if (op == Operator.GTEQ) { 149 return walkGtEq(lvalue, rvalue); 150 } else if (op == Operator.AND) { 151 if (expr instanceof MultiExpression) { 152 return walkMultiExpression((MultiExpression) expr); 153 } else { 154 return walkAnd(lvalue, rvalue); 155 } 156 } else if (op == Operator.NOT) { 157 return walkNot(lvalue); 158 } else if (op == Operator.OR) { 159 return walkOr(lvalue, rvalue); 160 } else if (op == Operator.LIKE) { 161 return walkLike(lvalue, rvalue, true, false); 162 } else if (op == Operator.ILIKE) { 163 return walkLike(lvalue, rvalue, true, true); 164 } else if (op == Operator.NOTLIKE) { 165 return walkLike(lvalue, rvalue, false, false); 166 } else if (op == Operator.NOTILIKE) { 167 return walkLike(lvalue, rvalue, false, true); 168 } else if (op == Operator.IN) { 169 return walkIn(lvalue, rvalue, true); 170 } else if (op == Operator.NOTIN) { 171 return walkIn(lvalue, rvalue, false); 172 } else if (op == Operator.ISNULL) { 173 return walkIsNull(lvalue); 174 } else if (op == Operator.ISNOTNULL) { 175 return walkIsNotNull(lvalue); 176 } else if (op == Operator.BETWEEN) { 177 return walkBetween(lvalue, rvalue, true); 178 } else if (op == Operator.NOTBETWEEN) { 179 return walkBetween(lvalue, rvalue, false); 180 } else { 181 throw new QueryParseException("Unknown operator: " + op); 182 } 183 } 184 185 protected void checkDateLiteralForCast(Operand value, String name) { 186 if (value instanceof DateLiteral && !((DateLiteral) value).onlyDate) { 187 throw new QueryParseException("DATE() cast must be used with DATE literal, not TIMESTAMP: " + name); 188 } 189 } 190 191 protected Boolean walkEcmPath(Operator op, Operand rvalue) { 192 if (op != Operator.EQ && op != Operator.NOTEQ) { 193 throw new QueryParseException(NXQL.ECM_PATH + " requires = or <> operator"); 194 } 195 if (!(rvalue instanceof StringLiteral)) { 196 throw new QueryParseException(NXQL.ECM_PATH + " requires literal path as right argument"); 197 } 198 String path = ((StringLiteral) rvalue).value; 199 if (path.length() > 1 && path.endsWith("/")) { 200 path = path.substring(0, path.length() - 1); 201 } 202 String id = pathResolver.getIdForPath(path); 203 Object right = walkReference(new Reference(NXQL.ECM_UUID)); 204 if (id == null) { 205 return FALSE; 206 } 207 Boolean eq = eq(id, right); 208 return op == Operator.EQ ? eq : not(eq); 209 } 210 211 protected Boolean walkAncestorId(Operator op, Operand rvalue) { 212 if (op != Operator.EQ && op != Operator.NOTEQ) { 213 throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires = or <> operator"); 214 } 215 if (!(rvalue instanceof StringLiteral)) { 216 throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires literal id as right argument"); 217 } 218 String ancestorId = ((StringLiteral) rvalue).value; 219 Object[] ancestorIds = (Object[]) walkReference(new Reference(NXQL_ECM_ANCESTOR_IDS)); 220 boolean eq = op == Operator.EQ ? true : false; 221 if (ancestorIds == null) { 222 // placeless 223 return eq ? FALSE : TRUE; 224 } 225 for (Object id : ancestorIds) { 226 if (ancestorId.equals(id)) { 227 return eq ? TRUE : FALSE; 228 } 229 } 230 return eq ? FALSE : TRUE; 231 } 232 233 protected Boolean walkEcmFulltext(String name, Operator op, Operand rvalue) { 234 if (op != Operator.EQ && op != Operator.LIKE) { 235 throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires = or LIKE operator"); 236 } 237 if (!(rvalue instanceof StringLiteral)) { 238 throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires literal string as right argument"); 239 } 240 if (fulltextSearchDisabled) { 241 throw new QueryParseException("Fulltext search disabled by configuration"); 242 } 243 String query = ((StringLiteral) rvalue).value; 244 if (name.equals(NXQL.ECM_FULLTEXT)) { 245 // standard fulltext query 246 hasFulltext = true; 247 String simple = (String) walkReference(new Reference(NXQL_ECM_FULLTEXT_SIMPLE)); 248 String binary = (String) walkReference(new Reference(NXQL_ECM_FULLTEXT_BINARY)); 249 return fulltext(simple, binary, query); 250 } else { 251 // secondary index match with explicit field 252 // do a regexp on the field 253 if (name.charAt(NXQL.ECM_FULLTEXT.length()) != '.') { 254 throw new QueryParseException(name + " has incorrect syntax for a secondary fulltext index"); 255 } 256 String prop = name.substring(NXQL.ECM_FULLTEXT.length() + 1); 257 String ft = query.replace(" ", "%"); 258 rvalue = new StringLiteral(ft); 259 return walkLike(new Reference(prop), rvalue, true, true); 260 } 261 } 262 263 public Boolean walkNot(Operand value) { 264 return not(bool(walkOperand(value))); 265 } 266 267 public Boolean walkIsNull(Operand value) { 268 return Boolean.valueOf(walkOperand(value) == null); 269 } 270 271 public Boolean walkIsNotNull(Operand value) { 272 return Boolean.valueOf(walkOperand(value) != null); 273 } 274 275 // ternary logic 276 public Boolean walkMultiExpression(MultiExpression expr) { 277 Boolean res = TRUE; 278 for (Operand value : expr.values) { 279 Boolean bool = bool(walkOperand(value)); 280 // don't short-circuit on null, we want to walk all references deterministically 281 res = and(res, bool); 282 } 283 return res; 284 } 285 286 public Boolean walkAnd(Operand lvalue, Operand rvalue) { 287 Boolean left = bool(walkOperand(lvalue)); 288 Boolean right = bool(walkOperand(rvalue)); 289 return and(left, right); 290 } 291 292 public Boolean walkOr(Operand lvalue, Operand rvalue) { 293 Boolean left = bool(walkOperand(lvalue)); 294 Boolean right = bool(walkOperand(rvalue)); 295 return or(left, right); 296 } 297 298 public Boolean walkEq(Operand lvalue, Operand rvalue) { 299 Object right = walkOperand(rvalue); 300 if (isMixinTypes(lvalue)) { 301 if (!(right instanceof String)) { 302 throw new QueryParseException("Invalid EQ rhs: " + rvalue); 303 } 304 return walkMixinTypes(Collections.singletonList((String) right), true); 305 } 306 Object left = walkOperand(lvalue); 307 return eqMaybeList(left, right); 308 } 309 310 public Boolean walkNotEq(Operand lvalue, Operand rvalue) { 311 if (isMixinTypes(lvalue)) { 312 Object right = walkOperand(rvalue); 313 if (!(right instanceof String)) { 314 throw new QueryParseException("Invalid NE rhs: " + rvalue); 315 } 316 return walkMixinTypes(Collections.singletonList((String) right), false); 317 } 318 return not(walkEq(lvalue, rvalue)); 319 } 320 321 public Boolean walkLt(Operand lvalue, Operand rvalue) { 322 Integer cmp = cmp(lvalue, rvalue); 323 return cmp == null ? null : cmp < 0; 324 } 325 326 public Boolean walkGt(Operand lvalue, Operand rvalue) { 327 Integer cmp = cmp(lvalue, rvalue); 328 return cmp == null ? null : cmp > 0; 329 } 330 331 public Boolean walkLtEq(Operand lvalue, Operand rvalue) { 332 Integer cmp = cmp(lvalue, rvalue); 333 return cmp == null ? null : cmp <= 0; 334 } 335 336 public Boolean walkGtEq(Operand lvalue, Operand rvalue) { 337 Integer cmp = cmp(lvalue, rvalue); 338 return cmp == null ? null : cmp >= 0; 339 } 340 341 public Object walkBetween(Operand lvalue, Operand rvalue, boolean positive) { 342 LiteralList l = (LiteralList) rvalue; 343 Predicate va = new Predicate(lvalue, Operator.GTEQ, l.get(0)); 344 Predicate vb = new Predicate(lvalue, Operator.LTEQ, l.get(1)); 345 Predicate pred = new Predicate(va, Operator.AND, vb); 346 if (!positive) { 347 pred = new Predicate(pred, Operator.NOT, null); 348 } 349 return walkExpression(pred); 350 } 351 352 public Boolean walkIn(Operand lvalue, Operand rvalue, boolean positive) { 353 Object right = walkOperand(rvalue); 354 if (!(right instanceof List)) { 355 throw new QueryParseException("Invalid IN rhs: " + rvalue); 356 } 357 if (isMixinTypes(lvalue)) { 358 return walkMixinTypes((List<String>) right, positive); 359 } 360 Object left = walkOperand(lvalue); 361 Boolean in = inMaybeList(left, (List<Object>) right); 362 return positive ? in : not(in); 363 } 364 365 public Object walkOperand(Operand op) { 366 if (op instanceof Literal) { 367 return walkLiteral((Literal) op); 368 } else if (op instanceof LiteralList) { 369 return walkLiteralList((LiteralList) op); 370 } else if (op instanceof Function) { 371 return walkFunction((Function) op); 372 } else if (op instanceof Expression) { 373 return walkExpression((Expression) op); 374 } else if (op instanceof Reference) { 375 return walkReference((Reference) op); 376 } else { 377 throw new QueryParseException("Unknown operand: " + op); 378 } 379 } 380 381 public Object walkLiteral(Literal lit) { 382 if (lit instanceof BooleanLiteral) { 383 return walkBooleanLiteral((BooleanLiteral) lit); 384 } else if (lit instanceof DateLiteral) { 385 return walkDateLiteral((DateLiteral) lit); 386 } else if (lit instanceof DoubleLiteral) { 387 return walkDoubleLiteral((DoubleLiteral) lit); 388 } else if (lit instanceof IntegerLiteral) { 389 return walkIntegerLiteral((IntegerLiteral) lit); 390 } else if (lit instanceof StringLiteral) { 391 return walkStringLiteral((StringLiteral) lit); 392 } else { 393 throw new QueryParseException("Unknown literal: " + lit); 394 } 395 } 396 397 public Boolean walkBooleanLiteral(BooleanLiteral lit) { 398 return Boolean.valueOf(lit.value); 399 } 400 401 public Calendar walkDateLiteral(DateLiteral lit) { 402 if (lit.onlyDate) { 403 Calendar date = lit.toCalendar(); 404 if (date != null) { 405 date.set(Calendar.HOUR_OF_DAY, 0); 406 date.set(Calendar.MINUTE, 0); 407 date.set(Calendar.SECOND, 0); 408 date.set(Calendar.MILLISECOND, 0); 409 } 410 return date; 411 } else { 412 return lit.toCalendar(); 413 } 414 } 415 416 public Double walkDoubleLiteral(DoubleLiteral lit) { 417 return Double.valueOf(lit.value); 418 } 419 420 public Long walkIntegerLiteral(IntegerLiteral lit) { 421 return Long.valueOf(lit.value); 422 } 423 424 public String walkStringLiteral(StringLiteral lit) { 425 return lit.value; 426 } 427 428 public List<Object> walkLiteralList(LiteralList litList) { 429 List<Object> list = new ArrayList<Object>(litList.size()); 430 for (Literal lit : litList) { 431 list.add(walkLiteral(lit)); 432 } 433 return list; 434 } 435 436 public Boolean walkLike(Operand lvalue, Operand rvalue, boolean positive, boolean caseInsensitive) { 437 Object left = walkOperand(lvalue); 438 Object right = walkOperand(rvalue); 439 if (!(right instanceof String)) { 440 throw new QueryParseException("Invalid LIKE rhs: " + rvalue); 441 } 442 return likeMaybeList(left, (String) right, positive, caseInsensitive); 443 } 444 445 public Object walkFunction(Function func) { 446 throw new UnsupportedOperationException("Function"); 447 } 448 449 public Boolean walkStartsWith(Operand lvalue, Operand rvalue) { 450 if (!(lvalue instanceof Reference)) { 451 throw new QueryParseException("Invalid STARTSWITH query, left hand side must be a property: " + lvalue); 452 } 453 String name = ((Reference) lvalue).name; 454 if (!(rvalue instanceof StringLiteral)) { 455 throw new QueryParseException( 456 "Invalid STARTSWITH query, right hand side must be a literal path: " + rvalue); 457 } 458 String path = ((StringLiteral) rvalue).value; 459 if (path.length() > 1 && path.endsWith("/")) { 460 path = path.substring(0, path.length() - 1); 461 } 462 463 if (NXQL.ECM_PATH.equals(name)) { 464 return walkStartsWithPath(path); 465 } else { 466 return walkStartsWithNonPath(lvalue, path); 467 } 468 } 469 470 protected Boolean walkStartsWithPath(String path) { 471 // resolve path 472 String ancestorId = pathResolver.getIdForPath(path); 473 // don't return early on null ancestorId, we want to walk all references deterministically 474 Object[] ancestorIds = (Object[]) walkReference(new Reference(NXQL_ECM_ANCESTOR_IDS)); 475 if (ancestorId == null) { 476 // no such path 477 return FALSE; 478 } 479 if (ancestorIds == null) { 480 // placeless 481 return FALSE; 482 } 483 for (Object id : ancestorIds) { 484 if (ancestorId.equals(id)) { 485 return TRUE; 486 } 487 } 488 return FALSE; 489 } 490 491 protected Boolean walkStartsWithNonPath(Operand lvalue, String path) { 492 Object left = walkReference((Reference) lvalue); 493 // exact match 494 Boolean bool = eqMaybeList(left, path); 495 if (TRUE.equals(bool)) { 496 return TRUE; 497 } 498 // prefix match TODO escape % chars 499 String pattern = path + "/%"; 500 return likeMaybeList(left, pattern, true, false); 501 } 502 503 /** 504 * Evaluates a reference over the context state. 505 * 506 * @param ref the reference 507 */ 508 public abstract Object walkReference(Reference ref); 509 510 protected boolean isMixinTypes(Operand op) { 511 if (!(op instanceof Reference)) { 512 return false; 513 } 514 return ((Reference) op).name.equals(NXQL.ECM_MIXINTYPE); 515 } 516 517 protected Boolean bool(Object value) { 518 if (value == null) { 519 return null; 520 } 521 if (!(value instanceof Boolean)) { 522 throw new QueryParseException("Not a boolean: " + value); 523 } 524 return (Boolean) value; 525 } 526 527 // ternary logic 528 protected Boolean not(Boolean value) { 529 if (value == null) { 530 return null; 531 } 532 return !value; 533 } 534 535 // ternary logic 536 protected Boolean and(Boolean left, Boolean right) { 537 if (TRUE.equals(left)) { 538 return right; 539 } else { 540 return left; 541 } 542 } 543 544 // ternary logic 545 protected Boolean or(Boolean left, Boolean right) { 546 if (TRUE.equals(left)) { 547 return left; 548 } else { 549 return right; 550 } 551 } 552 553 // ternary logic 554 protected Boolean eq(Object left, Object right) { 555 if (left == null || right == null) { 556 return null; 557 } 558 if (left instanceof Calendar && right instanceof Calendar) { 559 // avoid timezone issues (NXP-20260) 560 return ((Calendar) left).getTimeInMillis() == ((Calendar) right).getTimeInMillis(); 561 } 562 return left.equals(right); 563 } 564 565 // ternary logic 566 protected Boolean in(Object left, List<Object> right) { 567 if (left == null) { 568 return null; 569 } 570 boolean hasNull = false; 571 for (Object r : right) { 572 if (r == null) { 573 hasNull = true; 574 } else if (left.equals(r)) { 575 return TRUE; 576 } 577 } 578 return hasNull ? null : FALSE; 579 } 580 581 protected Integer cmp(Operand lvalue, Operand rvalue) { 582 Object left = walkOperand(lvalue); 583 Object right = walkOperand(rvalue); 584 return cmp(left, right); 585 } 586 587 // ternary logic 588 protected Integer cmp(Object left, Object right) { 589 if (left == null || right == null) { 590 return null; 591 } 592 if (!(left instanceof Comparable)) { 593 throw new QueryParseException("Not a comparable: " + left); 594 } 595 return ((Comparable<Object>) left).compareTo(right); 596 } 597 598 // ternary logic 599 protected Boolean like(Object left, String right, boolean caseInsensitive) { 600 if (left == null || right == null) { 601 return null; 602 } 603 if (!(left instanceof String)) { 604 throw new QueryParseException("Invalid LIKE lhs: " + left); 605 } 606 String value = (String) left; 607 if (caseInsensitive) { 608 value = value.toLowerCase(); 609 right = right.toLowerCase(); 610 } 611 String regex = likeToRegex(right); 612 boolean match = Pattern.matches(regex.toString(), value); 613 return match; 614 } 615 616 /** 617 * Turns a NXQL LIKE pattern into a regex. 618 * <p> 619 * % and _ are standard wildcards, and \ escapes them. 620 * 621 * @since 7.4 622 */ 623 public static String likeToRegex(String like) { 624 StringBuilder regex = new StringBuilder(); 625 char[] chars = like.toCharArray(); 626 boolean escape = false; 627 for (int i = 0; i < chars.length; i++) { 628 char c = chars[i]; 629 boolean escapeNext = false; 630 switch (c) { 631 case '%': 632 if (escape) { 633 regex.append(c); 634 } else { 635 regex.append(".*"); 636 } 637 break; 638 case '_': 639 if (escape) { 640 regex.append(c); 641 } else { 642 regex.append("."); 643 } 644 break; 645 case '\\': 646 if (escape) { 647 regex.append("\\\\"); // backslash escaped for regexp 648 } else { 649 escapeNext = true; 650 } 651 break; 652 default: 653 // escape mostly everything just in case 654 if (!CharUtils.isAsciiAlphanumeric(c)) { 655 regex.append("\\"); 656 } 657 regex.append(c); 658 break; 659 } 660 escape = escapeNext; 661 } 662 if (escape) { 663 // invalid string terminated by escape character, ignore 664 } 665 return regex.toString(); 666 } 667 668 // if list, use EXIST (SELECT 1 FROM left WHERE left.item = right) 669 protected Boolean eqMaybeList(Object left, Object right) { 670 if (left instanceof Object[]) { 671 for (Object l : ((Object[]) left)) { 672 Boolean eq = eq(l, right); 673 if (TRUE.equals(eq)) { 674 return TRUE; 675 } 676 } 677 return FALSE; 678 } else { 679 return eq(left, right); 680 } 681 } 682 683 // if list, use EXIST (SELECT 1 FROM left WHERE left.item IN right) 684 protected Boolean inMaybeList(Object left, List<Object> right) { 685 if (left instanceof Object[]) { 686 for (Object l : ((Object[]) left)) { 687 Boolean in = in(l, right); 688 if (TRUE.equals(in)) { 689 return TRUE; 690 } 691 } 692 return FALSE; 693 } else { 694 return in(left, right); 695 } 696 } 697 698 protected Boolean likeMaybeList(Object left, String right, boolean positive, boolean caseInsensitive) { 699 if (left instanceof Object[]) { 700 for (Object l : ((Object[]) left)) { 701 Boolean like = like(l, right, caseInsensitive); 702 if (TRUE.equals(like)) { 703 return Boolean.valueOf(positive); 704 } 705 } 706 return Boolean.valueOf(!positive); 707 } else { 708 Boolean like = like(left, right, caseInsensitive); 709 return positive ? like : not(like); 710 } 711 } 712 713 /** 714 * Matches the mixin types against a list of values. 715 * <p> 716 * Used for: 717 * <ul> 718 * <li>ecm:mixinTypes = 'foo' 719 * <li>ecm:mixinTypes != 'foo' 720 * <li>ecm:mixinTypes IN ('foo', 'bar') 721 * <li>ecm:mixinTypes NOT IN ('foo', 'bar') 722 * </ul> 723 * 724 * @param mixins the mixin(s) to match 725 * @param include {@code true} for = and IN 726 * @since 7.4 727 */ 728 public abstract Boolean walkMixinTypes(List<String> mixins, boolean include); 729 730 /* 731 * ----- simple parsing, don't try to be exhaustive ----- 732 */ 733 734 private static final Pattern WORD_PATTERN = Pattern.compile("[\\s\\p{Punct}]+"); 735 736 private static final String UNACCENTED = "aaaaaaaceeeeiiii\u00f0nooooo\u00f7ouuuuy\u00fey"; 737 738 private static final String STOP_WORDS_STR = "a an are and as at be by for from how " // 739 + "i in is it of on or that the this to was what when where who will with " // 740 + "car donc est il ils je la le les mais ni nous or ou pour tu un une vous " // 741 + "www com net org"; 742 743 private static final Set<String> STOP_WORDS = new HashSet<>(Arrays.asList(StringUtils.split(STOP_WORDS_STR, ' '))); 744 745 /** 746 * Checks if the fulltext combination of string1 and string2 matches the query expression. 747 */ 748 protected static Boolean fulltext(String string1, String string2, String queryString) { 749 if (queryString == null || (string1 == null && string2 == null)) { 750 return null; 751 } 752 // query 753 List<String> query = new ArrayList<String>(); 754 String phrase = null; 755 int phraseWordCount = 1; 756 int maxPhraseWordCount = 1; // maximum number of words in a phrase 757 for (String word : StringUtils.split(queryString.toLowerCase(), ' ')) { 758 if (WORD_PATTERN.matcher(word).matches()) { 759 continue; 760 } 761 if (phrase != null) { 762 if (word.endsWith(PHRASE_QUOTE)) { 763 phrase += " " + word.substring(0, word.length() - 1); 764 query.add(phrase); 765 phraseWordCount++; 766 if (maxPhraseWordCount < phraseWordCount) { 767 maxPhraseWordCount = phraseWordCount; 768 } 769 phrase = null; 770 phraseWordCount = 1; 771 } else { 772 phrase += " " + word; 773 phraseWordCount++; 774 } 775 } else { 776 if (word.startsWith(PHRASE_QUOTE)) { 777 phrase = word.substring(1); 778 } else if (word.startsWith(NEG_PHRASE_QUOTE)) { 779 phrase = "-" + word.substring(2); 780 } else { 781 if (word.startsWith("+")) { 782 word = word.substring(1); 783 } 784 query.add(word); 785 } 786 } 787 } 788 if (query.isEmpty()) { 789 return FALSE; 790 } 791 // fulltext 792 Set<String> fulltext = new HashSet<String>(); 793 fulltext.addAll(parseFullText(string1, maxPhraseWordCount)); 794 fulltext.addAll(parseFullText(string2, maxPhraseWordCount)); 795 796 return Boolean.valueOf(fulltext(fulltext, query)); 797 } 798 799 private static Set<String> parseFullText(String string, int phraseSize) { 800 if (string == null) { 801 return Collections.emptySet(); 802 } 803 Set<String> set = new HashSet<String>(); 804 Deque<String> phraseWords = new LinkedList<>(); 805 for (String word : WORD_PATTERN.split(string)) { 806 word = parseWord(word); 807 if (word != null) { 808 word = word.toLowerCase(); 809 set.add(word); 810 if (phraseSize > 1) { 811 phraseWords.addLast(word); 812 if (phraseWords.size() > 1) { 813 if (phraseWords.size() > phraseSize) { 814 phraseWords.removeFirst(); 815 } 816 addPhraseWords(set, phraseWords); 817 } 818 } 819 } 820 } 821 while (phraseWords.size() > 2) { 822 phraseWords.removeFirst(); 823 addPhraseWords(set, phraseWords); 824 } 825 return set; 826 } 827 828 /** 829 * Adds to the set all the sub-phrases from the start of the phraseWords. 830 */ 831 private static void addPhraseWords(Set<String> set, Deque<String> phraseWords) { 832 String[] array = phraseWords.toArray(new String[0]); 833 for (int len = 2; len <= array.length; len++) { 834 String phrase = StringUtils.join(array, ' ', 0, len); 835 set.add(phrase); 836 } 837 } 838 839 private static String parseWord(String string) { 840 int len = string.length(); 841 if (len < 3) { 842 return null; 843 } 844 StringBuilder buf = new StringBuilder(len); 845 for (int i = 0; i < len; i++) { 846 char c = Character.toLowerCase(string.charAt(i)); 847 if (c == '\u00e6') { 848 buf.append("ae"); 849 } else if (c >= '\u00e0' && c <= '\u00ff') { 850 buf.append(UNACCENTED.charAt((c) - 0xe0)); 851 } else if (c == '\u0153') { 852 buf.append("oe"); 853 } else { 854 buf.append(c); 855 } 856 } 857 // simple heuristic to remove plurals 858 int l = buf.length(); 859 if (l > 3 && buf.charAt(l - 1) == 's') { 860 buf.setLength(l - 1); 861 } 862 String word = buf.toString(); 863 if (STOP_WORDS.contains(word)) { 864 return null; 865 } 866 return word; 867 } 868 869 // matches "foo OR bar baz" as "foo OR (bar AND baz)" 870 protected static boolean fulltext(Set<String> fulltext, List<String> query) { 871 boolean andMatch = true; 872 for (PeekingIterator<String> it = Iterators.peekingIterator(query.iterator()); it.hasNext(); ) { 873 String word = it.next(); 874 boolean match; 875 if (word.endsWith("*") || word.endsWith("%")) { 876 // prefix match 877 match = false; 878 String prefix = word.substring(0, word.length() - 2); 879 for (String candidate : fulltext) { 880 if (candidate.startsWith(prefix)) { 881 match = true; 882 break; 883 } 884 } 885 } else { 886 if (word.startsWith("-")) { 887 word = word.substring(1);// 888 match = !fulltext.contains(word); 889 } else { 890 match = fulltext.contains(word); 891 } 892 } 893 if (!match) { 894 andMatch = false; 895 } 896 if (it.hasNext() && it.peek().equals(OR)) { 897 // end of AND group 898 // swallow OR 899 it.next(); 900 // return if the previous AND group matched 901 if (andMatch) { 902 return true; 903 } 904 // else start next AND group 905 andMatch = true; 906 } 907 } 908 return andMatch; 909 } 910 911 // matches "foo OR bar baz" as "(foo OR bar) AND baz" 912 protected static boolean fulltext1(Set<String> fulltext, List<String> query) { 913 boolean inOr = false; // if we're in a OR group 914 boolean orMatch = false; // value of the OR group 915 for (PeekingIterator<String> it = Iterators.peekingIterator(query.iterator()); it.hasNext(); ) { 916 String word = it.next(); 917 if (it.hasNext() && it.peek().equals(OR)) { 918 inOr = true; 919 orMatch = false; 920 } 921 boolean match; 922 if (word.endsWith("*") || word.endsWith("%")) { 923 // prefix match 924 match = false; 925 String prefix = word.substring(0, word.length() - 2); 926 for (String candidate : fulltext) { 927 if (candidate.startsWith(prefix)) { 928 match = true; 929 break; 930 } 931 } 932 } else { 933 if (word.startsWith("-")) { 934 word = word.substring(1);// 935 match = !fulltext.contains(word); 936 } else { 937 match = fulltext.contains(word); 938 } 939 } 940 if (inOr) { 941 if (match) { 942 orMatch = true; 943 } 944 if (it.hasNext() && it.peek().equals(OR)) { 945 // swallow OR and keep going in OR group 946 it.next(); 947 continue; 948 } 949 // finish OR group 950 match = orMatch; 951 inOr = false; 952 } 953 if (!match) { 954 return false; 955 } 956 } 957 if (inOr) { 958 // trailing OR, ignore and finish previous group 959 if (!orMatch) { 960 return false; 961 } 962 } 963 return true; 964 } 965 966}