001/* 002 * (C) Copyright 2014-2018 Nuxeo (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.core.storage; 020 021import static java.lang.Boolean.FALSE; 022import static java.lang.Boolean.TRUE; 023import static org.nuxeo.ecm.core.trash.TrashService.Feature.TRASHED_STATE_IS_DEDICATED_PROPERTY; 024 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.Calendar; 028import java.util.Collections; 029import java.util.Deque; 030import java.util.HashSet; 031import java.util.LinkedList; 032import java.util.List; 033import java.util.Set; 034import java.util.regex.Pattern; 035 036import org.apache.commons.lang3.CharUtils; 037import org.apache.commons.lang3.StringUtils; 038import org.nuxeo.ecm.core.api.LifeCycleConstants; 039import org.nuxeo.ecm.core.query.QueryParseException; 040import org.nuxeo.ecm.core.query.sql.NXQL; 041import org.nuxeo.ecm.core.query.sql.model.BooleanLiteral; 042import org.nuxeo.ecm.core.query.sql.model.DateLiteral; 043import org.nuxeo.ecm.core.query.sql.model.DoubleLiteral; 044import org.nuxeo.ecm.core.query.sql.model.Expression; 045import org.nuxeo.ecm.core.query.sql.model.Function; 046import org.nuxeo.ecm.core.query.sql.model.IntegerLiteral; 047import org.nuxeo.ecm.core.query.sql.model.Literal; 048import org.nuxeo.ecm.core.query.sql.model.LiteralList; 049import org.nuxeo.ecm.core.query.sql.model.MultiExpression; 050import org.nuxeo.ecm.core.query.sql.model.Operand; 051import org.nuxeo.ecm.core.query.sql.model.Operator; 052import org.nuxeo.ecm.core.query.sql.model.Predicate; 053import org.nuxeo.ecm.core.query.sql.model.Reference; 054import org.nuxeo.ecm.core.query.sql.model.StringLiteral; 055import org.nuxeo.ecm.core.trash.TrashService; 056import org.nuxeo.runtime.api.Framework; 057 058import com.google.common.collect.Iterators; 059import com.google.common.collect.PeekingIterator; 060 061/** 062 * Evaluator for an {@link Expression}. 063 * 064 * @since 5.9.4 065 */ 066public abstract class ExpressionEvaluator { 067 068 /** pseudo NXQL to resolve ancestor ids. */ 069 public static final String NXQL_ECM_ANCESTOR_IDS = "ecm:__ancestorIds"; 070 071 /** pseudo NXQL to resolve internal path. */ 072 public static final String NXQL_ECM_PATH = "ecm:__path"; 073 074 /** pseudo NXQL to resolve read acls. */ 075 public static final String NXQL_ECM_READ_ACL = "ecm:__read_acl"; 076 077 public static final String NXQL_ECM_FULLTEXT_SIMPLE = "ecm:__fulltextSimple"; 078 079 public static final String NXQL_ECM_FULLTEXT_BINARY = "ecm:__fulltextBinary"; 080 081 protected static final String DATE_CAST = "DATE"; 082 083 protected static final String PHRASE_QUOTE = "\""; 084 085 protected static final String NEG_PHRASE_QUOTE = "-\""; 086 087 protected static final String OR = "or"; 088 089 /** 090 * Interface for a class that knows how to resolve a path into an id. 091 */ 092 public interface PathResolver { 093 /** 094 * Returns the id for a given path. 095 * 096 * @param path the path 097 * @return the id, or {@code null} if not found 098 */ 099 String getIdForPath(String path); 100 } 101 102 public final PathResolver pathResolver; 103 104 public final Set<String> principals; 105 106 public final boolean fulltextSearchDisabled; 107 108 public boolean hasFulltext; 109 110 public ExpressionEvaluator(PathResolver pathResolver, String[] principals, boolean fulltextSearchDisabled) { 111 this.pathResolver = pathResolver; 112 this.principals = principals == null ? null : new HashSet<>(Arrays.asList(principals)); 113 this.fulltextSearchDisabled = fulltextSearchDisabled; 114 } 115 116 public Object walkExpression(Expression expr) { 117 Operator op = expr.operator; 118 Operand lvalue = expr.lvalue; 119 Operand rvalue = expr.rvalue; 120 Reference ref = lvalue instanceof Reference ? (Reference) lvalue : null; 121 String name = ref != null ? ref.name : null; 122 String cast = ref != null ? ref.cast : null; 123 if (DATE_CAST.equals(cast)) { 124 checkDateLiteralForCast(rvalue, name); 125 } 126 if (op == Operator.STARTSWITH) { 127 return walkStartsWith(lvalue, rvalue); 128 } else if (NXQL.ECM_PATH.equals(name)) { 129 return walkEcmPath(op, rvalue); 130 } else if (NXQL.ECM_ANCESTORID.equals(name)) { 131 return walkAncestorId(op, rvalue); 132 } else if (NXQL.ECM_ISTRASHED.equals(name)) { 133 return walkIsTrashed(op, rvalue); 134 } else if (name != null && name.startsWith(NXQL.ECM_FULLTEXT) && !NXQL.ECM_FULLTEXT_JOBID.equals(name)) { 135 return walkEcmFulltext(name, op, rvalue); 136 } else if (op == Operator.SUM) { 137 throw new UnsupportedOperationException("SUM"); 138 } else if (op == Operator.SUB) { 139 throw new UnsupportedOperationException("SUB"); 140 } else if (op == Operator.MUL) { 141 throw new UnsupportedOperationException("MUL"); 142 } else if (op == Operator.DIV) { 143 throw new UnsupportedOperationException("DIV"); 144 } else if (op == Operator.LT) { 145 return walkLt(lvalue, rvalue); 146 } else if (op == Operator.GT) { 147 return walkGt(lvalue, rvalue); 148 } else if (op == Operator.EQ) { 149 return walkEq(lvalue, rvalue); 150 } else if (op == Operator.NOTEQ) { 151 return walkNotEq(lvalue, rvalue); 152 } else if (op == Operator.LTEQ) { 153 return walkLtEq(lvalue, rvalue); 154 } else if (op == Operator.GTEQ) { 155 return walkGtEq(lvalue, rvalue); 156 } else if (op == Operator.AND) { 157 if (expr instanceof MultiExpression) { 158 return walkMultiExpression((MultiExpression) expr); 159 } else { 160 return walkAnd(lvalue, rvalue); 161 } 162 } else if (op == Operator.NOT) { 163 return walkNot(lvalue); 164 } else if (op == Operator.OR) { 165 return walkOr(lvalue, rvalue); 166 } else if (op == Operator.LIKE) { 167 return walkLike(lvalue, rvalue, true, false); 168 } else if (op == Operator.ILIKE) { 169 return walkLike(lvalue, rvalue, true, true); 170 } else if (op == Operator.NOTLIKE) { 171 return walkLike(lvalue, rvalue, false, false); 172 } else if (op == Operator.NOTILIKE) { 173 return walkLike(lvalue, rvalue, false, true); 174 } else if (op == Operator.IN) { 175 return walkIn(lvalue, rvalue, true); 176 } else if (op == Operator.NOTIN) { 177 return walkIn(lvalue, rvalue, false); 178 } else if (op == Operator.ISNULL) { 179 return walkIsNull(lvalue); 180 } else if (op == Operator.ISNOTNULL) { 181 return walkIsNotNull(lvalue); 182 } else if (op == Operator.BETWEEN) { 183 return walkBetween(lvalue, rvalue, true); 184 } else if (op == Operator.NOTBETWEEN) { 185 return walkBetween(lvalue, rvalue, false); 186 } else { 187 throw new QueryParseException("Unknown operator: " + op); 188 } 189 } 190 191 protected void checkDateLiteralForCast(Operand value, String name) { 192 if (value instanceof DateLiteral && !((DateLiteral) value).onlyDate) { 193 throw new QueryParseException("DATE() cast must be used with DATE literal, not TIMESTAMP: " + name); 194 } 195 } 196 197 protected Boolean walkEcmPath(Operator op, Operand rvalue) { 198 if (op != Operator.EQ && op != Operator.NOTEQ) { 199 throw new QueryParseException(NXQL.ECM_PATH + " requires = or <> operator"); 200 } 201 if (!(rvalue instanceof StringLiteral)) { 202 throw new QueryParseException(NXQL.ECM_PATH + " requires literal path as right argument"); 203 } 204 String path = ((StringLiteral) rvalue).value; 205 if (path.length() > 1 && path.endsWith("/")) { 206 path = path.substring(0, path.length() - 1); 207 } 208 String id = pathResolver.getIdForPath(path); 209 Object right = walkReference(new Reference(NXQL.ECM_UUID)); 210 if (id == null) { 211 return FALSE; 212 } 213 Boolean eq = eq(id, right); 214 return op == Operator.EQ ? eq : not(eq); 215 } 216 217 protected Boolean walkAncestorId(Operator op, Operand rvalue) { 218 if (op != Operator.EQ && op != Operator.NOTEQ) { 219 throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires = or <> operator"); 220 } 221 if (!(rvalue instanceof StringLiteral)) { 222 throw new QueryParseException(NXQL.ECM_ANCESTORID + " requires literal id as right argument"); 223 } 224 String ancestorId = ((StringLiteral) rvalue).value; 225 Object[] ancestorIds = (Object[]) walkReference(new Reference(NXQL_ECM_ANCESTOR_IDS)); 226 boolean eq = op == Operator.EQ ? true : false; 227 if (ancestorIds == null) { 228 // placeless 229 return eq ? FALSE : TRUE; 230 } 231 for (Object id : ancestorIds) { 232 if (ancestorId.equals(id)) { 233 return eq ? TRUE : FALSE; 234 } 235 } 236 return eq ? FALSE : TRUE; 237 } 238 239 protected Boolean walkEcmFulltext(String name, Operator op, Operand rvalue) { 240 if (op != Operator.EQ && op != Operator.LIKE) { 241 throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires = or LIKE operator"); 242 } 243 if (!(rvalue instanceof StringLiteral)) { 244 throw new QueryParseException(NXQL.ECM_FULLTEXT + " requires literal string as right argument"); 245 } 246 if (fulltextSearchDisabled) { 247 throw new QueryParseException("Fulltext search disabled by configuration"); 248 } 249 String query = ((StringLiteral) rvalue).value; 250 if (name.equals(NXQL.ECM_FULLTEXT)) { 251 // standard fulltext query 252 hasFulltext = true; 253 String simple = (String) walkReference(new Reference(NXQL_ECM_FULLTEXT_SIMPLE)); 254 String binary = (String) walkReference(new Reference(NXQL_ECM_FULLTEXT_BINARY)); 255 return fulltext(simple, binary, query); 256 } else { 257 // secondary index match with explicit field 258 // do a regexp on the field 259 if (name.charAt(NXQL.ECM_FULLTEXT.length()) != '.') { 260 throw new QueryParseException(name + " has incorrect syntax for a secondary fulltext index"); 261 } 262 String prop = name.substring(NXQL.ECM_FULLTEXT.length() + 1); 263 String ft = query.replace(" ", "%"); 264 rvalue = new StringLiteral(ft); 265 return walkLike(new Reference(prop), rvalue, true, true); 266 } 267 } 268 269 protected Boolean walkIsTrashed(Operator op, Operand rvalue) { 270 if (op != Operator.EQ && op != Operator.NOTEQ) { 271 throw new QueryParseException(NXQL.ECM_ISTRASHED + " requires = or <> operator"); 272 } 273 long v; 274 if (!(rvalue instanceof IntegerLiteral) 275 || ((v = ((IntegerLiteral) rvalue).value) != 0 && v != 1)) { 276 throw new QueryParseException(NXQL.ECM_ISTRASHED + " requires literal 0 or 1 as right argument"); 277 } 278 Reference ref; 279 Literal val; 280 TrashService trashService = Framework.getService(TrashService.class); 281 if (trashService.hasFeature(TRASHED_STATE_IS_DEDICATED_PROPERTY)) { 282 ref = new Reference(NXQL.ECM_ISTRASHED); 283 val = new BooleanLiteral(true); // give true to match equalsDeleted mechanism 284 } else { 285 ref = new Reference(NXQL.ECM_LIFECYCLESTATE); 286 val = new StringLiteral(LifeCycleConstants.DELETED_STATE); 287 } 288 boolean equalsDeleted = op == Operator.EQ ^ v == 0; 289 if (equalsDeleted) { 290 return walkEq(ref, val); 291 } else { 292 return walkNotEq(ref, val); 293 } 294 } 295 296 public Boolean walkNot(Operand value) { 297 return not(bool(walkOperand(value))); 298 } 299 300 public Boolean walkIsNull(Operand value) { 301 return Boolean.valueOf(walkOperand(value) == null); 302 } 303 304 public Boolean walkIsNotNull(Operand value) { 305 return Boolean.valueOf(walkOperand(value) != null); 306 } 307 308 // ternary logic 309 public Boolean walkMultiExpression(MultiExpression expr) { 310 Boolean res = TRUE; 311 for (Operand value : expr.values) { 312 Boolean bool = bool(walkOperand(value)); 313 // don't short-circuit on null, we want to walk all references deterministically 314 res = and(res, bool); 315 } 316 return res; 317 } 318 319 public Boolean walkAnd(Operand lvalue, Operand rvalue) { 320 Boolean left = bool(walkOperand(lvalue)); 321 Boolean right = bool(walkOperand(rvalue)); 322 return and(left, right); 323 } 324 325 public Boolean walkOr(Operand lvalue, Operand rvalue) { 326 Boolean left = bool(walkOperand(lvalue)); 327 Boolean right = bool(walkOperand(rvalue)); 328 return or(left, right); 329 } 330 331 public Boolean walkEq(Operand lvalue, Operand rvalue) { 332 Object right = walkOperand(rvalue); 333 if (isMixinTypes(lvalue)) { 334 if (!(right instanceof String)) { 335 throw new QueryParseException("Invalid EQ rhs: " + rvalue); 336 } 337 return walkMixinTypes(Collections.singletonList((String) right), true); 338 } 339 Object left = walkOperand(lvalue); 340 return eqMaybeList(left, right); 341 } 342 343 public Boolean walkNotEq(Operand lvalue, Operand rvalue) { 344 if (isMixinTypes(lvalue)) { 345 Object right = walkOperand(rvalue); 346 if (!(right instanceof String)) { 347 throw new QueryParseException("Invalid NE rhs: " + rvalue); 348 } 349 return walkMixinTypes(Collections.singletonList((String) right), false); 350 } 351 return not(walkEq(lvalue, rvalue)); 352 } 353 354 public Boolean walkLt(Operand lvalue, Operand rvalue) { 355 Integer cmp = cmp(lvalue, rvalue); 356 return cmp == null ? null : cmp < 0; 357 } 358 359 public Boolean walkGt(Operand lvalue, Operand rvalue) { 360 Integer cmp = cmp(lvalue, rvalue); 361 return cmp == null ? null : cmp > 0; 362 } 363 364 public Boolean walkLtEq(Operand lvalue, Operand rvalue) { 365 Integer cmp = cmp(lvalue, rvalue); 366 return cmp == null ? null : cmp <= 0; 367 } 368 369 public Boolean walkGtEq(Operand lvalue, Operand rvalue) { 370 Integer cmp = cmp(lvalue, rvalue); 371 return cmp == null ? null : cmp >= 0; 372 } 373 374 public Object walkBetween(Operand lvalue, Operand rvalue, boolean positive) { 375 LiteralList l = (LiteralList) rvalue; 376 Predicate va = new Predicate(lvalue, Operator.GTEQ, l.get(0)); 377 Predicate vb = new Predicate(lvalue, Operator.LTEQ, l.get(1)); 378 Predicate pred = new Predicate(va, Operator.AND, vb); 379 if (!positive) { 380 pred = new Predicate(pred, Operator.NOT, null); 381 } 382 return walkExpression(pred); 383 } 384 385 public Boolean walkIn(Operand lvalue, Operand rvalue, boolean positive) { 386 Object right = walkOperand(rvalue); 387 if (!(right instanceof List)) { 388 throw new QueryParseException("Invalid IN rhs: " + rvalue); 389 } 390 if (isMixinTypes(lvalue)) { 391 return walkMixinTypes((List<String>) right, positive); 392 } 393 Object left = walkOperand(lvalue); 394 Boolean in = inMaybeList(left, (List<Object>) right); 395 return positive ? in : not(in); 396 } 397 398 public Object walkOperand(Operand op) { 399 if (op instanceof Literal) { 400 return walkLiteral((Literal) op); 401 } else if (op instanceof LiteralList) { 402 return walkLiteralList((LiteralList) op); 403 } else if (op instanceof Function) { 404 return walkFunction((Function) op); 405 } else if (op instanceof Expression) { 406 return walkExpression((Expression) op); 407 } else if (op instanceof Reference) { 408 return walkReference((Reference) op); 409 } else { 410 throw new QueryParseException("Unknown operand: " + op); 411 } 412 } 413 414 public Object walkLiteral(Literal lit) { 415 if (lit instanceof BooleanLiteral) { 416 return walkBooleanLiteral((BooleanLiteral) lit); 417 } else if (lit instanceof DateLiteral) { 418 return walkDateLiteral((DateLiteral) lit); 419 } else if (lit instanceof DoubleLiteral) { 420 return walkDoubleLiteral((DoubleLiteral) lit); 421 } else if (lit instanceof IntegerLiteral) { 422 return walkIntegerLiteral((IntegerLiteral) lit); 423 } else if (lit instanceof StringLiteral) { 424 return walkStringLiteral((StringLiteral) lit); 425 } else { 426 throw new QueryParseException("Unknown literal: " + lit); 427 } 428 } 429 430 public Boolean walkBooleanLiteral(BooleanLiteral lit) { 431 return Boolean.valueOf(lit.value); 432 } 433 434 public Calendar walkDateLiteral(DateLiteral lit) { 435 if (lit.onlyDate) { 436 Calendar date = lit.toCalendar(); 437 if (date != null) { 438 date.set(Calendar.HOUR_OF_DAY, 0); 439 date.set(Calendar.MINUTE, 0); 440 date.set(Calendar.SECOND, 0); 441 date.set(Calendar.MILLISECOND, 0); 442 } 443 return date; 444 } else { 445 return lit.toCalendar(); 446 } 447 } 448 449 public Double walkDoubleLiteral(DoubleLiteral lit) { 450 return Double.valueOf(lit.value); 451 } 452 453 public Long walkIntegerLiteral(IntegerLiteral lit) { 454 return Long.valueOf(lit.value); 455 } 456 457 public String walkStringLiteral(StringLiteral lit) { 458 return lit.value; 459 } 460 461 public List<Object> walkLiteralList(LiteralList litList) { 462 List<Object> list = new ArrayList<>(litList.size()); 463 for (Literal lit : litList) { 464 list.add(walkLiteral(lit)); 465 } 466 return list; 467 } 468 469 public Boolean walkLike(Operand lvalue, Operand rvalue, boolean positive, boolean caseInsensitive) { 470 Object left = walkOperand(lvalue); 471 Object right = walkOperand(rvalue); 472 if (!(right instanceof String)) { 473 throw new QueryParseException("Invalid LIKE rhs: " + rvalue); 474 } 475 return likeMaybeList(left, (String) right, positive, caseInsensitive); 476 } 477 478 public Object walkFunction(Function func) { 479 throw new UnsupportedOperationException("Function"); 480 } 481 482 public Boolean walkStartsWith(Operand lvalue, Operand rvalue) { 483 if (!(lvalue instanceof Reference)) { 484 throw new QueryParseException("Invalid STARTSWITH query, left hand side must be a property: " + lvalue); 485 } 486 String name = ((Reference) lvalue).name; 487 if (!(rvalue instanceof StringLiteral)) { 488 throw new QueryParseException( 489 "Invalid STARTSWITH query, right hand side must be a literal path: " + rvalue); 490 } 491 String path = ((StringLiteral) rvalue).value; 492 if (path.length() > 1 && path.endsWith("/")) { 493 path = path.substring(0, path.length() - 1); 494 } 495 496 if (NXQL.ECM_PATH.equals(name)) { 497 return walkStartsWithPath(path); 498 } else { 499 return walkStartsWithNonPath(lvalue, path); 500 } 501 } 502 503 protected Boolean walkStartsWithPath(String path) { 504 // resolve path 505 String ancestorId = pathResolver.getIdForPath(path); 506 // don't return early on null ancestorId, we want to walk all references deterministically 507 Object[] ancestorIds = (Object[]) walkReference(new Reference(NXQL_ECM_ANCESTOR_IDS)); 508 if (ancestorId == null) { 509 // no such path 510 return FALSE; 511 } 512 if (ancestorIds == null) { 513 // placeless 514 return FALSE; 515 } 516 for (Object id : ancestorIds) { 517 if (ancestorId.equals(id)) { 518 return TRUE; 519 } 520 } 521 return FALSE; 522 } 523 524 protected Boolean walkStartsWithNonPath(Operand lvalue, String path) { 525 Object left = walkReference((Reference) lvalue); 526 // exact match 527 Boolean bool = eqMaybeList(left, path); 528 if (TRUE.equals(bool)) { 529 return TRUE; 530 } 531 // prefix match TODO escape % chars 532 String pattern = path + "/%"; 533 return likeMaybeList(left, pattern, true, false); 534 } 535 536 /** 537 * Evaluates a reference over the context state. 538 * 539 * @param ref the reference 540 */ 541 public abstract Object walkReference(Reference ref); 542 543 protected boolean isMixinTypes(Operand op) { 544 if (!(op instanceof Reference)) { 545 return false; 546 } 547 return ((Reference) op).name.equals(NXQL.ECM_MIXINTYPE); 548 } 549 550 protected Boolean bool(Object value) { 551 if (value == null) { 552 return null; 553 } 554 if (!(value instanceof Boolean)) { 555 throw new QueryParseException("Not a boolean: " + value); 556 } 557 return (Boolean) value; 558 } 559 560 // ternary logic 561 protected Boolean not(Boolean value) { 562 if (value == null) { 563 return null; 564 } 565 return !value; 566 } 567 568 // ternary logic 569 protected Boolean and(Boolean left, Boolean right) { 570 if (TRUE.equals(left)) { 571 return right; 572 } else { 573 return left; 574 } 575 } 576 577 // ternary logic 578 protected Boolean or(Boolean left, Boolean right) { 579 if (TRUE.equals(left)) { 580 return left; 581 } else { 582 return right; 583 } 584 } 585 586 // ternary logic 587 protected Boolean eq(Object left, Object right) { 588 if (left == null || right == null) { 589 return null; 590 } 591 if (left instanceof Calendar && right instanceof Calendar) { 592 // avoid timezone issues (NXP-20260) 593 return ((Calendar) left).getTimeInMillis() == ((Calendar) right).getTimeInMillis(); 594 } 595 return left.equals(right); 596 } 597 598 // ternary logic 599 protected Boolean in(Object left, List<Object> right) { 600 if (left == null) { 601 return null; 602 } 603 boolean hasNull = false; 604 for (Object r : right) { 605 if (r == null) { 606 hasNull = true; 607 } else if (left.equals(r)) { 608 return TRUE; 609 } 610 } 611 return hasNull ? null : FALSE; 612 } 613 614 protected Integer cmp(Operand lvalue, Operand rvalue) { 615 Object left = walkOperand(lvalue); 616 Object right = walkOperand(rvalue); 617 return cmp(left, right); 618 } 619 620 // ternary logic 621 protected Integer cmp(Object left, Object right) { 622 if (left == null || right == null) { 623 return null; 624 } 625 if (!(left instanceof Comparable)) { 626 throw new QueryParseException("Not a comparable: " + left); 627 } 628 return ((Comparable<Object>) left).compareTo(right); 629 } 630 631 // ternary logic 632 protected Boolean like(Object left, String right, boolean caseInsensitive) { 633 if (left == null || right == null) { 634 return null; 635 } 636 if (!(left instanceof String)) { 637 throw new QueryParseException("Invalid LIKE lhs: " + left); 638 } 639 String value = (String) left; 640 if (caseInsensitive) { 641 value = value.toLowerCase(); 642 right = right.toLowerCase(); 643 } 644 String regex = likeToRegex(right); 645 boolean match = Pattern.matches(regex.toString(), value); 646 return match; 647 } 648 649 /** 650 * Turns a NXQL LIKE pattern into a regex. 651 * <p> 652 * % and _ are standard wildcards, and \ escapes them. 653 * 654 * @since 7.4 655 */ 656 public static String likeToRegex(String like) { 657 StringBuilder regex = new StringBuilder(); 658 char[] chars = like.toCharArray(); 659 boolean escape = false; 660 for (int i = 0; i < chars.length; i++) { 661 char c = chars[i]; 662 boolean escapeNext = false; 663 switch (c) { 664 case '%': 665 if (escape) { 666 regex.append(c); 667 } else { 668 regex.append(".*"); 669 } 670 break; 671 case '_': 672 if (escape) { 673 regex.append(c); 674 } else { 675 regex.append("."); 676 } 677 break; 678 case '\\': 679 if (escape) { 680 regex.append("\\\\"); // backslash escaped for regexp 681 } else { 682 escapeNext = true; 683 } 684 break; 685 default: 686 // escape mostly everything just in case 687 if (!CharUtils.isAsciiAlphanumeric(c)) { 688 regex.append("\\"); 689 } 690 regex.append(c); 691 break; 692 } 693 escape = escapeNext; 694 } 695 if (escape) { 696 // invalid string terminated by escape character, ignore 697 } 698 return regex.toString(); 699 } 700 701 // if list, use EXIST (SELECT 1 FROM left WHERE left.item = right) 702 protected Boolean eqMaybeList(Object left, Object right) { 703 if (left instanceof Object[]) { 704 for (Object l : ((Object[]) left)) { 705 Boolean eq = eq(l, right); 706 if (TRUE.equals(eq)) { 707 return TRUE; 708 } 709 } 710 return FALSE; 711 } else { 712 return eq(left, right); 713 } 714 } 715 716 // if list, use EXIST (SELECT 1 FROM left WHERE left.item IN right) 717 protected Boolean inMaybeList(Object left, List<Object> right) { 718 if (left instanceof Object[]) { 719 for (Object l : ((Object[]) left)) { 720 Boolean in = in(l, right); 721 if (TRUE.equals(in)) { 722 return TRUE; 723 } 724 } 725 return FALSE; 726 } else { 727 return in(left, right); 728 } 729 } 730 731 protected Boolean likeMaybeList(Object left, String right, boolean positive, boolean caseInsensitive) { 732 if (left instanceof Object[]) { 733 for (Object l : ((Object[]) left)) { 734 Boolean like = like(l, right, caseInsensitive); 735 if (TRUE.equals(like)) { 736 return Boolean.valueOf(positive); 737 } 738 } 739 return Boolean.valueOf(!positive); 740 } else { 741 Boolean like = like(left, right, caseInsensitive); 742 return positive ? like : not(like); 743 } 744 } 745 746 /** 747 * Matches the mixin types against a list of values. 748 * <p> 749 * Used for: 750 * <ul> 751 * <li>ecm:mixinTypes = 'foo' 752 * <li>ecm:mixinTypes != 'foo' 753 * <li>ecm:mixinTypes IN ('foo', 'bar') 754 * <li>ecm:mixinTypes NOT IN ('foo', 'bar') 755 * </ul> 756 * 757 * @param mixins the mixin(s) to match 758 * @param include {@code true} for = and IN 759 * @since 7.4 760 */ 761 public abstract Boolean walkMixinTypes(List<String> mixins, boolean include); 762 763 /* 764 * ----- simple parsing, don't try to be exhaustive ----- 765 */ 766 767 private static final Pattern WORD_PATTERN = Pattern.compile("[\\s\\p{Punct}]+"); 768 769 private static final String UNACCENTED = "aaaaaaaceeeeiiii\u00f0nooooo\u00f7ouuuuy\u00fey"; 770 771 private static final String STOP_WORDS_STR = "a an are and as at be by for from how " // 772 + "i in is it of on or that the this to was what when where who will with " // 773 + "car donc est il ils je la le les mais ni nous or ou pour tu un une vous " // 774 + "www com net org"; 775 776 private static final Set<String> STOP_WORDS = new HashSet<>(Arrays.asList(StringUtils.split(STOP_WORDS_STR, ' '))); 777 778 /** 779 * Checks if the fulltext combination of string1 and string2 matches the query expression. 780 */ 781 protected static Boolean fulltext(String string1, String string2, String queryString) { 782 if (queryString == null || (string1 == null && string2 == null)) { 783 return null; 784 } 785 // query 786 List<String> query = new ArrayList<>(); 787 String phrase = null; 788 int phraseWordCount = 1; 789 int maxPhraseWordCount = 1; // maximum number of words in a phrase 790 for (String word : StringUtils.split(queryString.toLowerCase(), ' ')) { 791 if (WORD_PATTERN.matcher(word).matches()) { 792 continue; 793 } 794 if (phrase != null) { 795 if (word.endsWith(PHRASE_QUOTE)) { 796 phrase += " " + word.substring(0, word.length() - 1); 797 query.add(phrase); 798 phraseWordCount++; 799 if (maxPhraseWordCount < phraseWordCount) { 800 maxPhraseWordCount = phraseWordCount; 801 } 802 phrase = null; 803 phraseWordCount = 1; 804 } else { 805 phrase += " " + word; 806 phraseWordCount++; 807 } 808 } else { 809 if (word.startsWith(PHRASE_QUOTE)) { 810 phrase = word.substring(1); 811 } else if (word.startsWith(NEG_PHRASE_QUOTE)) { 812 phrase = "-" + word.substring(2); 813 } else { 814 if (word.startsWith("+")) { 815 word = word.substring(1); 816 } 817 query.add(word); 818 } 819 } 820 } 821 if (query.isEmpty()) { 822 return FALSE; 823 } 824 // fulltext 825 Set<String> fulltext = new HashSet<>(); 826 fulltext.addAll(parseFullText(string1, maxPhraseWordCount)); 827 fulltext.addAll(parseFullText(string2, maxPhraseWordCount)); 828 829 return Boolean.valueOf(fulltext(fulltext, query)); 830 } 831 832 private static Set<String> parseFullText(String string, int phraseSize) { 833 if (string == null) { 834 return Collections.emptySet(); 835 } 836 Set<String> set = new HashSet<>(); 837 Deque<String> phraseWords = new LinkedList<>(); 838 for (String word : WORD_PATTERN.split(string)) { 839 word = parseWord(word); 840 if (word != null) { 841 word = word.toLowerCase(); 842 set.add(word); 843 if (phraseSize > 1) { 844 phraseWords.addLast(word); 845 if (phraseWords.size() > 1) { 846 if (phraseWords.size() > phraseSize) { 847 phraseWords.removeFirst(); 848 } 849 addPhraseWords(set, phraseWords); 850 } 851 } 852 } 853 } 854 while (phraseWords.size() > 2) { 855 phraseWords.removeFirst(); 856 addPhraseWords(set, phraseWords); 857 } 858 return set; 859 } 860 861 /** 862 * Adds to the set all the sub-phrases from the start of the phraseWords. 863 */ 864 private static void addPhraseWords(Set<String> set, Deque<String> phraseWords) { 865 String[] array = phraseWords.toArray(new String[0]); 866 for (int len = 2; len <= array.length; len++) { 867 String phrase = StringUtils.join(array, ' ', 0, len); 868 set.add(phrase); 869 } 870 } 871 872 private static String parseWord(String string) { 873 int len = string.length(); 874 if (len < 3) { 875 return null; 876 } 877 StringBuilder buf = new StringBuilder(len); 878 for (int i = 0; i < len; i++) { 879 char c = Character.toLowerCase(string.charAt(i)); 880 if (c == '\u00e6') { 881 buf.append("ae"); 882 } else if (c >= '\u00e0' && c <= '\u00ff') { 883 buf.append(UNACCENTED.charAt((c) - 0xe0)); 884 } else if (c == '\u0153') { 885 buf.append("oe"); 886 } else { 887 buf.append(c); 888 } 889 } 890 // simple heuristic to remove plurals 891 int l = buf.length(); 892 if (l > 3 && buf.charAt(l - 1) == 's') { 893 buf.setLength(l - 1); 894 } 895 String word = buf.toString(); 896 if (STOP_WORDS.contains(word)) { 897 return null; 898 } 899 return word; 900 } 901 902 // matches "foo OR bar baz" as "foo OR (bar AND baz)" 903 protected static boolean fulltext(Set<String> fulltext, List<String> query) { 904 boolean andMatch = true; 905 for (PeekingIterator<String> it = Iterators.peekingIterator(query.iterator()); it.hasNext();) { 906 String word = it.next(); 907 boolean match; 908 if (word.endsWith("*") || word.endsWith("%")) { 909 // prefix match 910 match = false; 911 String prefix = word.substring(0, word.length() - 2); 912 for (String candidate : fulltext) { 913 if (candidate.startsWith(prefix)) { 914 match = true; 915 break; 916 } 917 } 918 } else { 919 if (word.startsWith("-")) { 920 word = word.substring(1);// 921 match = !fulltext.contains(word); 922 } else { 923 match = fulltext.contains(word); 924 } 925 } 926 if (!match) { 927 andMatch = false; 928 } 929 if (it.hasNext() && it.peek().equals(OR)) { 930 // end of AND group 931 // swallow OR 932 it.next(); 933 // return if the previous AND group matched 934 if (andMatch) { 935 return true; 936 } 937 // else start next AND group 938 andMatch = true; 939 } 940 } 941 return andMatch; 942 } 943 944 // matches "foo OR bar baz" as "(foo OR bar) AND baz" 945 protected static boolean fulltext1(Set<String> fulltext, List<String> query) { 946 boolean inOr = false; // if we're in a OR group 947 boolean orMatch = false; // value of the OR group 948 for (PeekingIterator<String> it = Iterators.peekingIterator(query.iterator()); it.hasNext();) { 949 String word = it.next(); 950 if (it.hasNext() && it.peek().equals(OR)) { 951 inOr = true; 952 orMatch = false; 953 } 954 boolean match; 955 if (word.endsWith("*") || word.endsWith("%")) { 956 // prefix match 957 match = false; 958 String prefix = word.substring(0, word.length() - 2); 959 for (String candidate : fulltext) { 960 if (candidate.startsWith(prefix)) { 961 match = true; 962 break; 963 } 964 } 965 } else { 966 if (word.startsWith("-")) { 967 word = word.substring(1);// 968 match = !fulltext.contains(word); 969 } else { 970 match = fulltext.contains(word); 971 } 972 } 973 if (inOr) { 974 if (match) { 975 orMatch = true; 976 } 977 if (it.hasNext() && it.peek().equals(OR)) { 978 // swallow OR and keep going in OR group 979 it.next(); 980 continue; 981 } 982 // finish OR group 983 match = orMatch; 984 inOr = false; 985 } 986 if (!match) { 987 return false; 988 } 989 } 990 if (inOr) { 991 // trailing OR, ignore and finish previous group 992 if (!orMatch) { 993 return false; 994 } 995 } 996 return true; 997 } 998 999}