001/* 002 * (C) Copyright 2015 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.core.blob; 020 021import static java.util.stream.Collectors.toList; 022import static org.nuxeo.ecm.core.model.Session.PROP_ALLOW_DELETE_UNDELETABLE_DOCUMENTS; 023 024import java.time.Instant; 025import java.time.format.DateTimeParseException; 026import java.util.ArrayList; 027import java.util.Arrays; 028import java.util.Calendar; 029import java.util.Collection; 030import java.util.Collections; 031import java.util.HashSet; 032import java.util.List; 033import java.util.Map; 034import java.util.Map.Entry; 035import java.util.Objects; 036import java.util.Set; 037import java.util.function.IntPredicate; 038import java.util.regex.Matcher; 039import java.util.regex.Pattern; 040 041import org.apache.commons.logging.Log; 042import org.apache.commons.logging.LogFactory; 043import org.nuxeo.ecm.core.api.Blob; 044import org.nuxeo.ecm.core.api.DocumentSecurityException; 045import org.nuxeo.ecm.core.api.model.PropertyNotFoundException; 046import org.nuxeo.ecm.core.api.repository.RepositoryManager; 047import org.nuxeo.ecm.core.model.Document; 048import org.nuxeo.ecm.core.model.Document.BlobAccessor; 049import org.nuxeo.runtime.api.Framework; 050 051/** 052 * Default blob dispatcher, that uses the repository name as the blob provider. 053 * <p> 054 * Alternatively, it can be configured through properties to dispatch to a blob provider based on document properties 055 * instead of the repository name. 056 * <p> 057 * The property name is a list of comma-separated clauses, with each clause consisting of a property, an operator and a 058 * value. The property can be a {@link Document} xpath, {@code ecm:repositoryName}, {@code ecm:path}, or, to match the 059 * current blob being dispatched, {@code blob:name}, {@code blob:mime-type}, {@code blob:encoding}, {@code blob:digest}, 060 * {@code blob:length} or {@code blob:xpath}. 061 * <p> 062 * Comma-separated clauses are ANDed together. The special name {@code default} defines the default provider, and must 063 * be present. 064 * <p> 065 * Available operators between property and value are =, !=, <, <= ,>, >=, ~ and ^. 066 * <p> 067 * The operators =, !=, <, <=, > and >= work as numeric operators if the property is numeric, otherwise as 068 * string comparisons operators. 069 * <p> 070 * The operator ~ does glob matching using {@code ?} to match a single arbitrary character, and {@code *} to match any 071 * number of characters (including none). The operator ^ does full regexp matching. 072 * <p> 073 * For example, to dispatch to the "first" provider if dc:format is "video", to the "second" provider if the blob's MIME 074 * type is "video/mp4", to the "third" provider if the blob is stored as a secondary attached file, to the "fourth" 075 * provider if the lifecycle state is "approved", to the "fifth" provider if the blob's document is stored in under an 076 * "images" folder, and the document is in the default repository, and otherwise to the "other" provider: 077 * 078 * <pre> 079 * {@code 080 * <property name="dc:format=video">first</property> 081 * <property name="blob:mime-type=video/mp4">second</property> 082 * <property name="blob:xpath~files/*/file">third</property> 083 * <property name="ecm:repositoryName=default,ecm:lifeCycleState=approved">fourth</property> 084 * <property name="ecm:path^.*/images/.*">fifth</property> 085 * <property name="default">other</property> 086 * } 087 * </pre> 088 * <p> 089 * You can make use of a record blob provider by using: 090 * 091 * <pre> 092 * {@code 093 * <property name="records">records</property> 094 * <property name="default">other</property> 095 * } 096 * </pre> 097 * 098 * @since 7.3 099 */ 100public class DefaultBlobDispatcher implements BlobDispatcher { 101 102 private static final Log log = LogFactory.getLog(DefaultBlobDispatcher.class); 103 104 protected static final String NAME_DEFAULT = "default"; 105 106 protected static final String NAME_RECORDS = "records"; 107 108 // this is a low-level xpath, without schema prefix 109 protected static final String MAIN_BLOB_XPATH = "content"; 110 111 // name="records" is equivalent to the following clause: 112 protected static final String RECORDS_CLAUSE = "ecm:isRecord=true,blob:xpath=" + MAIN_BLOB_XPATH; 113 114 protected static final Pattern NAME_PATTERN = Pattern.compile("(.*?)(=|!=|<=|<|>=|>|~|\\^)(.*)"); 115 116 /** Pseudo-property for the repository name. */ 117 protected static final String REPOSITORY_NAME = "ecm:repositoryName"; 118 119 /** Pseudo-property for the document path. */ 120 protected static final String PATH = "ecm:path"; 121 122 /** 123 * Pseudo-property for the record state. 124 * 125 * @since 11.1 126 */ 127 protected static final String IS_RECORD = "ecm:isRecord"; 128 129 protected static final String BLOB_PREFIX = "blob:"; 130 131 protected static final String BLOB_NAME = "name"; 132 133 protected static final String BLOB_MIME_TYPE = "mime-type"; 134 135 protected static final String BLOB_ENCODING = "encoding"; 136 137 protected static final String BLOB_DIGEST = "digest"; 138 139 protected static final String BLOB_LENGTH = "length"; 140 141 protected static final String BLOB_XPATH = "xpath"; 142 143 protected enum Op { 144 EQ, NEQ, LT, LTE, GT, GTE, GLOB, RE; 145 } 146 147 protected static class Clause { 148 public final String xpath; 149 150 public final Op op; 151 152 public final Object value; 153 154 public Clause(String xpath, Op op, Object value) { 155 this.xpath = xpath; 156 this.op = op; 157 this.value = value; 158 } 159 } 160 161 protected static class Rule { 162 public final List<Clause> clauses; 163 164 public final String providerId; 165 166 public Rule(List<Clause> clauses, String providerId) { 167 this.clauses = clauses; 168 this.providerId = providerId; 169 } 170 } 171 172 // default to true when initialize is not called (default instance) 173 protected boolean useRepositoryName = true; 174 175 protected List<Rule> rules; 176 177 protected Set<String> rulesXPaths; 178 179 protected Set<String> providerIds; 180 181 protected List<String> repositoryNames; 182 183 protected String defaultProviderId; 184 185 @Override 186 public void initialize(Map<String, String> properties) { 187 providerIds = new HashSet<>(); 188 rulesXPaths = new HashSet<>(); 189 rules = new ArrayList<>(); 190 for (Entry<String, String> en : properties.entrySet()) { 191 String clausesString = en.getKey(); 192 String providerId = en.getValue(); 193 providerIds.add(providerId); 194 if (clausesString.equals(NAME_RECORDS)) { 195 clausesString = RECORDS_CLAUSE; 196 } 197 if (clausesString.equals(NAME_DEFAULT)) { 198 defaultProviderId = providerId; 199 } else { 200 List<Clause> clauses = Arrays.stream(clausesString.split(",")) 201 .map(this::getClause) 202 .filter(Objects::nonNull) 203 .collect(toList()); 204 if (!clauses.isEmpty()) { 205 rules.add(new Rule(clauses, providerId)); 206 clauses.forEach(clause -> rulesXPaths.add(clause.xpath)); 207 } 208 } 209 } 210 useRepositoryName = providerIds.isEmpty(); 211 if (!useRepositoryName && defaultProviderId == null) { 212 log.error("Invalid dispatcher configuration, missing default, configuration will be ignored"); 213 useRepositoryName = true; 214 } 215 } 216 217 protected Clause getClause(String name) { 218 Matcher m = NAME_PATTERN.matcher(name); 219 if (m.matches()) { 220 String xpath = m.group(1); 221 String ops = m.group(2); 222 Object value = m.group(3); 223 Op op; 224 switch (ops) { 225 case "=": 226 op = Op.EQ; 227 break; 228 case "!=": 229 op = Op.NEQ; 230 break; 231 case "<": 232 op = Op.LT; 233 break; 234 case "<=": 235 op = Op.LTE; 236 break; 237 case ">": 238 op = Op.GT; 239 break; 240 case ">=": 241 op = Op.GTE; 242 break; 243 case "~": 244 op = Op.GLOB; 245 value = getPatternFromGlob((String) value); 246 break; 247 case "^": 248 op = Op.RE; 249 value = Pattern.compile((String) value); 250 break; 251 default: 252 log.error("Invalid dispatcher configuration operator: " + ops); 253 return null; 254 } 255 return new Clause(xpath, op, value); 256 } else { 257 log.error("Invalid dispatcher configuration property name: " + name); 258 return null; 259 } 260 } 261 262 protected Pattern getPatternFromGlob(String glob) { 263 // this relies on the fact that Pattern.quote wraps everything between \Q and \E 264 // so we "open" the quoting to insert the corresponding regex for * and ? 265 String regex = Pattern.quote(glob).replace("?", "\\E.\\Q").replace("*", "\\E.*\\Q"); 266 return Pattern.compile(regex); 267 } 268 269 @Override 270 public Collection<String> getBlobProviderIds() { 271 if (useRepositoryName) { 272 if (repositoryNames == null) { 273 repositoryNames = Framework.getService(RepositoryManager.class).getRepositoryNames(); 274 } 275 return repositoryNames; 276 } 277 return providerIds; 278 } 279 280 protected String getProviderId(Document doc, Blob blob, String blobXPath) { 281 if (useRepositoryName) { 282 return doc.getRepositoryName(); 283 } 284 NEXT_RULE: // 285 for (Rule rule : rules) { 286 for (Clause clause : rule.clauses) { 287 Object value; 288 try { 289 value = getValue(doc, blob, blobXPath, clause); 290 } catch (PropertyNotFoundException e) { 291 continue NEXT_RULE; 292 } 293 value = convert(value); 294 if (!match(value, clause)) { 295 continue NEXT_RULE; 296 } 297 } 298 return rule.providerId; 299 } 300 return defaultProviderId; 301 } 302 303 protected Object getValue(Document doc, Blob blob, String blobXPath, Clause clause) { 304 String xpath = clause.xpath; 305 if (xpath.equals(REPOSITORY_NAME)) { 306 return doc.getRepositoryName(); 307 } 308 if (xpath.equals(PATH)) { 309 return doc.getPath(); 310 } 311 if (xpath.equals(IS_RECORD)) { 312 return doc.isRecord(); 313 } 314 if (xpath.startsWith(BLOB_PREFIX)) { 315 switch (xpath.substring(BLOB_PREFIX.length())) { 316 case BLOB_NAME: 317 return blob.getFilename(); 318 case BLOB_MIME_TYPE: 319 return blob.getMimeType(); 320 case BLOB_ENCODING: 321 return blob.getEncoding(); 322 case BLOB_DIGEST: 323 return blob.getDigest(); 324 case BLOB_LENGTH: 325 return blob.getLength(); 326 case BLOB_XPATH: 327 return blobXPath; 328 default: 329 log.error("Invalid dispatcher configuration property name: " + xpath); 330 throw new PropertyNotFoundException(xpath); 331 } 332 } 333 try { 334 return doc.getValue(xpath); 335 } catch (PropertyNotFoundException e) { 336 return doc.getPropertyValue(xpath); // may still throw PropertyNotFoundException 337 } 338 } 339 340 protected Object convert(Object value) { 341 if (value instanceof Calendar) { 342 value = ((Calendar) value).toInstant(); 343 } 344 return value; 345 } 346 347 protected boolean match(Object value, Clause clause) { 348 switch (clause.op) { 349 case EQ: 350 return compare(value, clause, true, cmp -> cmp == 0); 351 case NEQ: 352 return compare(value, clause, true, cmp -> cmp != 0); 353 case LT: 354 return compare(value, clause, false, cmp -> cmp < 0); 355 case LTE: 356 return compare(value, clause, false, cmp -> cmp <= 0); 357 case GT: 358 return compare(value, clause, false, cmp -> cmp > 0); 359 case GTE: 360 return compare(value, clause, false, cmp -> cmp >= 0); 361 case GLOB: 362 case RE: 363 return ((Pattern) clause.value).matcher(String.valueOf(value)).matches(); 364 default: 365 throw new AssertionError("notreached"); 366 } 367 } 368 369 protected boolean compare(Object a, Clause clause, boolean eqneq, IntPredicate predicate) { 370 String b = (String) clause.value; 371 int cmp; 372 if (a == null) { 373 if (eqneq) { 374 // treat null as the string "null" (backward compat) 375 cmp = "null".compareTo(b); 376 } else { 377 // for <, >, etc. try to treat null as 0 378 try { 379 // try Long 380 cmp = Long.valueOf(0).compareTo(Long.valueOf(b)); 381 } catch (NumberFormatException e) { 382 try { 383 // try Double 384 cmp = Double.valueOf(0).compareTo(Double.valueOf(b)); 385 } catch (NumberFormatException e2) { 386 // else treat null as empty string 387 cmp = "".compareTo(b); 388 } 389 } 390 } 391 } else { 392 if (a instanceof Long) { 393 try { 394 cmp = ((Long) a).compareTo(Long.valueOf(b)); 395 } catch (NumberFormatException e) { 396 if (!eqneq) { 397 return false; // no match 398 } 399 cmp = 1; // different 400 } 401 } else if (a instanceof Double) { 402 try { 403 cmp = ((Double) a).compareTo(Double.valueOf(b)); 404 } catch (NumberFormatException e) { 405 if (!eqneq) { 406 return false; // no match 407 } 408 cmp = 1; // different 409 } 410 } else if (a instanceof Instant) { 411 try { 412 cmp = ((Instant) a).compareTo(Instant.parse(b)); 413 } catch (DateTimeParseException e) { 414 if (!eqneq) { 415 return false; // no match 416 } 417 cmp = 1; // different 418 } 419 } else { 420 cmp = String.valueOf(a).compareTo(b); 421 } 422 } 423 return predicate.test(cmp); 424 } 425 426 @Override 427 public String getBlobProvider(String repositoryName) { 428 if (useRepositoryName) { 429 return repositoryName; 430 } 431 // useful for legacy blobs created without prefix before dispatch was configured 432 return defaultProviderId; 433 } 434 435 @Override 436 public BlobDispatch getBlobProvider(Document doc, Blob blob, String xpath) { 437 if (useRepositoryName) { 438 String providerId = doc.getRepositoryName(); 439 return new BlobDispatch(providerId, false); 440 } 441 String providerId = getProviderId(doc, blob, xpath); 442 return new BlobDispatch(providerId, true); 443 } 444 445 @Override 446 public void notifyChanges(Document doc, Set<String> xpaths) { 447 if (useRepositoryName) { 448 return; 449 } 450 for (String xpath : rulesXPaths) { 451 if (xpaths.contains(xpath)) { 452 doc.visitBlobs(accessor -> checkBlob(doc, accessor)); 453 return; 454 } 455 } 456 } 457 458 /** 459 * Checks if the blob is stored in the expected blob provider to which it's supposed to be dispatched. If not, 460 * store it in the correct one (and maybe remove it from the previous one if it makes sense). 461 */ 462 protected void checkBlob(Document doc, BlobAccessor accessor) { 463 Blob blob = accessor.getBlob(); 464 if (!(blob instanceof ManagedBlob)) { 465 return; 466 } 467 String xpath = accessor.getXPath(); 468 // compare current provider with expected 469 ManagedBlob managedBlob = (ManagedBlob) blob; 470 String previousProviderId = managedBlob.getProviderId(); 471 String expectedProviderId = getProviderId(doc, blob, xpath); 472 if (previousProviderId.equals(expectedProviderId)) { 473 return; 474 } 475 // re-dispatch blob to new blob provider 476 // this calls back into blobProvider.writeBlob for the expected blob provider 477 accessor.setBlob(blob); 478 // if old blob provider is in record mode, delete from it 479 deleteBlobIfRecord(previousProviderId, doc, xpath); 480 } 481 482 @Override 483 public void notifyMakeRecord(Document doc) { 484 notifyChanges(doc, Collections.singleton(IS_RECORD)); 485 } 486 487 @Override 488 public void notifyAfterCopy(Document doc) { 489 notifyChanges(doc, Collections.singleton(IS_RECORD)); 490 } 491 492 // TODO move this to caller 493 494 @Override 495 public void notifyBeforeRemove(Document doc) { 496 String xpath = MAIN_BLOB_XPATH; 497 Blob blob; 498 try { 499 blob = (Blob) doc.getValue(xpath); 500 } catch (PropertyNotFoundException e) { 501 return; 502 } 503 if (!(blob instanceof ManagedBlob)) { 504 return; 505 } 506 String blobProviderId = ((ManagedBlob) blob).getProviderId(); 507 deleteBlobIfRecord(blobProviderId, doc, xpath); 508 } 509 510 protected void deleteBlobIfRecord(String blobProviderId, Document doc, String xpath) { 511 BlobProvider blobProvider = Framework.getService(BlobManager.class).getBlobProvider(blobProviderId); 512 if (blobProvider != null && blobProvider.isRecordMode()) { 513 checkBlobCanBeDeleted(doc, xpath); 514 blobProvider.deleteBlob(new BlobContext(doc, xpath)); 515 } 516 } 517 518 protected void checkBlobCanBeDeleted(Document doc, String xpath) { 519 if (MAIN_BLOB_XPATH.equals(xpath) && doc.isUnderRetentionOrLegalHold()) { 520 boolean allowDeleteUndeletable = Framework.isBooleanPropertyTrue(PROP_ALLOW_DELETE_UNDELETABLE_DOCUMENTS); 521 if (allowDeleteUndeletable) { 522 // in unit tests allow this 523 return; 524 } 525 throw new DocumentSecurityException( 526 "Cannot remove main blob from document " + doc.getUUID() + ", it is under retention / hold"); 527 } 528 } 529 530}