001/* 002 * (C) Copyright 2014-2016 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.core.storage.dbs; 020 021import static java.lang.Boolean.FALSE; 022import static java.lang.Boolean.TRUE; 023import static org.nuxeo.ecm.core.api.security.SecurityConstants.BROWSE; 024import static org.nuxeo.ecm.core.api.security.SecurityConstants.EVERYONE; 025import static org.nuxeo.ecm.core.api.security.SecurityConstants.UNSUPPORTED_ACL; 026import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_GRANT; 027import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_PERMISSION; 028import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_USER; 029import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACL; 030import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACP; 031import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ANCESTOR_IDS; 032import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_FULLTEXT_JOBID; 033import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ID; 034import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_IS_PROXY; 035import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_IS_VERSION; 036import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_MIXIN_TYPES; 037import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_NAME; 038import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PARENT_ID; 039import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_POS; 040import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PREFIX; 041import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PRIMARY_TYPE; 042import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_IDS; 043import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_TARGET_ID; 044import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_VERSION_SERIES_ID; 045import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_READ_ACL; 046import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_VERSION_SERIES_ID; 047 048import java.io.Serializable; 049import java.util.ArrayList; 050import java.util.Arrays; 051import java.util.Collections; 052import java.util.HashMap; 053import java.util.HashSet; 054import java.util.LinkedHashSet; 055import java.util.LinkedList; 056import java.util.List; 057import java.util.Map; 058import java.util.Map.Entry; 059import java.util.Set; 060 061import org.apache.commons.lang.StringUtils; 062import org.apache.commons.logging.Log; 063import org.apache.commons.logging.LogFactory; 064import org.nuxeo.ecm.core.api.ConcurrentUpdateException; 065import org.nuxeo.ecm.core.api.repository.RepositoryManager; 066import org.nuxeo.ecm.core.schema.SchemaManager; 067import org.nuxeo.ecm.core.schema.types.Schema; 068import org.nuxeo.ecm.core.security.SecurityService; 069import org.nuxeo.ecm.core.storage.DefaultFulltextParser; 070import org.nuxeo.ecm.core.storage.FulltextConfiguration; 071import org.nuxeo.ecm.core.storage.FulltextParser; 072import org.nuxeo.ecm.core.storage.FulltextUpdaterWork; 073import org.nuxeo.ecm.core.storage.FulltextUpdaterWork.IndexAndText; 074import org.nuxeo.ecm.core.storage.State; 075import org.nuxeo.ecm.core.storage.State.ListDiff; 076import org.nuxeo.ecm.core.storage.State.StateDiff; 077import org.nuxeo.ecm.core.storage.StateHelper; 078import org.nuxeo.ecm.core.work.api.Work; 079import org.nuxeo.ecm.core.work.api.WorkManager; 080import org.nuxeo.ecm.core.work.api.WorkManager.Scheduling; 081import org.nuxeo.runtime.api.Framework; 082 083/** 084 * Transactional state for a session. 085 * <p> 086 * Until {@code save()} is called, data lives in the transient map. 087 * <p> 088 * Upon save, data is written to the repository, even though it has not yet been committed (this means that other 089 * sessions can read uncommitted data). It's also kept in an undo log in order for rollback to be possible. 090 * <p> 091 * On commit, the undo log is forgotten. On rollback, the undo log is replayed. 092 * 093 * @since 5.9.4 094 */ 095public class DBSTransactionState { 096 097 private static final Log log = LogFactory.getLog(DBSTransactionState.class); 098 099 private static final String KEY_UNDOLOG_CREATE = "__UNDOLOG_CREATE__\0\0"; 100 101 protected final DBSRepository repository; 102 103 protected final DBSSession session; 104 105 /** Retrieved and created document state. */ 106 protected Map<String, DBSDocumentState> transientStates = new HashMap<>(); 107 108 /** Ids of documents created but not yet saved. */ 109 protected Set<String> transientCreated = new LinkedHashSet<>(); 110 111 /** 112 * Undo log. 113 * <p> 114 * A map of document ids to null or State. The value is null when the document has to be deleted when applying the 115 * undo log. Otherwise the value is a State. If the State contains the key {@link #KEY_UNDOLOG_CREATE} then the 116 * state must be re-created completely when applying the undo log, otherwise just applied as an update. 117 * <p> 118 * Null when there is no active transaction. 119 */ 120 protected Map<String, State> undoLog; 121 122 protected final Set<String> browsePermissions; 123 124 public DBSTransactionState(DBSRepository repository, DBSSession session) { 125 this.repository = repository; 126 this.session = session; 127 SecurityService securityService = Framework.getLocalService(SecurityService.class); 128 browsePermissions = new HashSet<>(Arrays.asList(securityService.getPermissionsToCheck(BROWSE))); 129 } 130 131 /** 132 * New transient state for something just read from the repository. 133 */ 134 protected DBSDocumentState newTransientState(State state) { 135 if (state == null) { 136 return null; 137 } 138 String id = (String) state.get(KEY_ID); 139 if (transientStates.containsKey(id)) { 140 throw new IllegalStateException("Already transient: " + id); 141 } 142 DBSDocumentState docState = new DBSDocumentState(state); // copy 143 transientStates.put(id, docState); 144 return docState; 145 } 146 147 /** 148 * Returns a state and marks it as transient, because it's about to be modified or returned to user code (where it 149 * may be modified). 150 */ 151 public DBSDocumentState getStateForUpdate(String id) { 152 // check transient state 153 DBSDocumentState docState = transientStates.get(id); 154 if (docState != null) { 155 return docState; 156 } 157 // fetch from repository 158 State state = repository.readState(id); 159 return newTransientState(state); 160 } 161 162 /** 163 * Returns a state which won't be modified. 164 */ 165 // TODO in some cases it's good to have this kept in memory instead of 166 // rereading from database every time 167 // XXX getStateForReadOneShot 168 public State getStateForRead(String id) { 169 // check transient state 170 DBSDocumentState docState = transientStates.get(id); 171 if (docState != null) { 172 return docState.getState(); 173 } 174 // fetch from repository 175 return repository.readState(id); 176 } 177 178 /** 179 * Returns states and marks them transient, because they're about to be returned to user code (where they may be 180 * modified). 181 */ 182 public List<DBSDocumentState> getStatesForUpdate(List<String> ids) { 183 // check which ones we have to fetch from repository 184 List<String> idsToFetch = new LinkedList<>(); 185 for (String id : ids) { 186 // check transient state 187 DBSDocumentState docState = transientStates.get(id); 188 if (docState != null) { 189 continue; 190 } 191 // will have to fetch it 192 idsToFetch.add(id); 193 } 194 if (!idsToFetch.isEmpty()) { 195 List<State> states = repository.readStates(idsToFetch); 196 for (State state : states) { 197 newTransientState(state); 198 } 199 } 200 // everything now fetched in transient 201 List<DBSDocumentState> docStates = new ArrayList<>(ids.size()); 202 for (String id : ids) { 203 DBSDocumentState docState = transientStates.get(id); 204 if (docState == null) { 205 if (log.isTraceEnabled()) { 206 log.trace("Cannot fetch document with id: " + id, new Throwable("debug stack trace")); 207 } 208 continue; 209 } 210 docStates.add(docState); 211 } 212 return docStates; 213 } 214 215 // XXX TODO for update or for read? 216 public DBSDocumentState getChildState(String parentId, String name) { 217 Set<String> seen = new HashSet<String>(); 218 // check transient state 219 for (DBSDocumentState docState : transientStates.values()) { 220 seen.add(docState.getId()); 221 if (!parentId.equals(docState.getParentId())) { 222 continue; 223 } 224 if (!name.equals(docState.getName())) { 225 continue; 226 } 227 return docState; 228 } 229 // fetch from repository 230 State state = repository.readChildState(parentId, name, seen); 231 return newTransientState(state); 232 } 233 234 public boolean hasChild(String parentId, String name) { 235 // check transient state 236 for (DBSDocumentState docState : transientStates.values()) { 237 if (!parentId.equals(docState.getParentId())) { 238 continue; 239 } 240 if (!name.equals(docState.getName())) { 241 continue; 242 } 243 return true; 244 } 245 // check repository 246 return repository.hasChild(parentId, name, Collections.emptySet()); 247 } 248 249 public List<DBSDocumentState> getChildrenStates(String parentId) { 250 List<DBSDocumentState> docStates = new LinkedList<>(); 251 Set<String> seen = new HashSet<>(); 252 // check transient state 253 for (DBSDocumentState docState : transientStates.values()) { 254 seen.add(docState.getId()); 255 if (!parentId.equals(docState.getParentId())) { 256 continue; 257 } 258 docStates.add(docState); 259 } 260 // fetch from repository 261 List<State> states = repository.queryKeyValue(KEY_PARENT_ID, parentId, seen); 262 for (State state : states) { 263 docStates.add(newTransientState(state)); 264 } 265 return docStates; 266 } 267 268 public List<String> getChildrenIds(String parentId) { 269 List<String> children = new ArrayList<>(); 270 Set<String> seen = new HashSet<>(); 271 // check transient state 272 for (DBSDocumentState docState : transientStates.values()) { 273 String id = docState.getId(); 274 seen.add(id); 275 if (!parentId.equals(docState.getParentId())) { 276 continue; 277 } 278 children.add(id); 279 } 280 // fetch from repository 281 List<State> states = repository.queryKeyValue(KEY_PARENT_ID, parentId, seen); 282 for (State state : states) { 283 children.add((String) state.get(KEY_ID)); 284 } 285 return new ArrayList<>(children); 286 } 287 288 public boolean hasChildren(String parentId) { 289 Set<String> seen = new HashSet<>(); 290 // check transient state 291 for (DBSDocumentState docState : transientStates.values()) { 292 seen.add(docState.getId()); 293 if (!parentId.equals(docState.getParentId())) { 294 continue; 295 } 296 return true; 297 } 298 // check repository 299 return repository.queryKeyValuePresence(KEY_PARENT_ID, parentId, seen); 300 } 301 302 public DBSDocumentState createChild(String id, String parentId, String name, Long pos, String typeName) { 303 // id may be not-null for import 304 if (id == null) { 305 id = repository.generateNewId(); 306 } 307 transientCreated.add(id); 308 DBSDocumentState docState = new DBSDocumentState(); 309 transientStates.put(id, docState); 310 docState.put(KEY_ID, id); 311 docState.put(KEY_PARENT_ID, parentId); 312 docState.put(KEY_ANCESTOR_IDS, getAncestorIds(parentId)); 313 docState.put(KEY_NAME, name); 314 docState.put(KEY_POS, pos); 315 docState.put(KEY_PRIMARY_TYPE, typeName); 316 // update read acls for new doc 317 updateDocumentReadAcls(id); 318 return docState; 319 } 320 321 /** Gets ancestors including id itself. */ 322 protected Object[] getAncestorIds(String id) { 323 if (id == null) { 324 return null; 325 } 326 State state = getStateForRead(id); 327 if (state == null) { 328 throw new RuntimeException("No such id: " + id); 329 } 330 Object[] ancestors = (Object[]) state.get(KEY_ANCESTOR_IDS); 331 if (ancestors == null) { 332 return new Object[] { id }; 333 } else { 334 Object[] newAncestors = new Object[ancestors.length + 1]; 335 System.arraycopy(ancestors, 0, newAncestors, 0, ancestors.length); 336 newAncestors[ancestors.length] = id; 337 return newAncestors; 338 } 339 } 340 341 /** 342 * Copies the document into a newly-created object. 343 * <p> 344 * The copy is automatically saved. 345 */ 346 public DBSDocumentState copy(String id) { 347 DBSDocumentState copyState = new DBSDocumentState(getStateForRead(id)); 348 String copyId = repository.generateNewId(); 349 copyState.put(KEY_ID, copyId); 350 copyState.put(KEY_PROXY_IDS, null); // no proxies to this new doc 351 // other fields updated by the caller 352 transientStates.put(copyId, copyState); 353 transientCreated.add(copyId); 354 return copyState; 355 } 356 357 /** 358 * Updates ancestors recursively after a move. 359 * <p> 360 * Recursing from given doc, replace the first ndel ancestors with those passed. 361 * <p> 362 * Doesn't check transient (assumes save is done). The modifications are automatically saved. 363 */ 364 public void updateAncestors(String id, int ndel, Object[] ancestorIds) { 365 int nadd = ancestorIds.length; 366 Set<String> ids = getSubTree(id, null, null); 367 ids.add(id); 368 for (String cid : ids) { 369 // XXX TODO oneShot update, don't pollute transient space 370 DBSDocumentState docState = getStateForUpdate(cid); 371 Object[] ancestors = (Object[]) docState.get(KEY_ANCESTOR_IDS); 372 Object[] newAncestors; 373 if (ancestors == null) { 374 newAncestors = ancestorIds.clone(); 375 } else { 376 newAncestors = new Object[ancestors.length - ndel + nadd]; 377 System.arraycopy(ancestorIds, 0, newAncestors, 0, nadd); 378 System.arraycopy(ancestors, ndel, newAncestors, nadd, ancestors.length - ndel); 379 } 380 docState.put(KEY_ANCESTOR_IDS, newAncestors); 381 } 382 } 383 384 /** 385 * Updates the Read ACLs recursively on a document. 386 */ 387 public void updateTreeReadAcls(String id) { 388 // versions too XXX TODO 389 Set<String> ids = getSubTree(id, null, null); 390 ids.add(id); 391 ids.forEach(this::updateDocumentReadAcls); 392 } 393 394 /** 395 * Updates the Read ACLs on a document (not recursively) 396 */ 397 protected void updateDocumentReadAcls(String id) { 398 // XXX TODO oneShot update, don't pollute transient space 399 DBSDocumentState docState = getStateForUpdate(id); 400 docState.put(KEY_READ_ACL, getReadACL(docState)); 401 } 402 403 /** 404 * Gets the Read ACL (flat list of users having browse permission, including inheritance) on a document. 405 */ 406 protected String[] getReadACL(DBSDocumentState docState) { 407 Set<String> racls = new HashSet<>(); 408 State state = docState.getState(); 409 LOOP: do { 410 @SuppressWarnings("unchecked") 411 List<Serializable> aclList = (List<Serializable>) state.get(KEY_ACP); 412 if (aclList != null) { 413 for (Serializable aclSer : aclList) { 414 State aclMap = (State) aclSer; 415 @SuppressWarnings("unchecked") 416 List<Serializable> aceList = (List<Serializable>) aclMap.get(KEY_ACL); 417 for (Serializable aceSer : aceList) { 418 State aceMap = (State) aceSer; 419 String username = (String) aceMap.get(KEY_ACE_USER); 420 String permission = (String) aceMap.get(KEY_ACE_PERMISSION); 421 Boolean granted = (Boolean) aceMap.get(KEY_ACE_GRANT); 422 if (TRUE.equals(granted) && browsePermissions.contains(permission)) { 423 racls.add(username); 424 } 425 if (FALSE.equals(granted)) { 426 if (!EVERYONE.equals(username)) { 427 // TODO log 428 racls.add(UNSUPPORTED_ACL); 429 } 430 break LOOP; 431 } 432 } 433 } 434 } 435 // get parent 436 if (TRUE.equals(state.get(KEY_IS_VERSION))) { 437 String versionSeriesId = (String) state.get(KEY_VERSION_SERIES_ID); 438 state = versionSeriesId == null ? null : getStateForRead(versionSeriesId); 439 } else { 440 String parentId = (String) state.get(KEY_PARENT_ID); 441 state = parentId == null ? null : getStateForRead(parentId); 442 } 443 } while (state != null); 444 445 // sort to have canonical order 446 List<String> racl = new ArrayList<>(racls); 447 Collections.sort(racl); 448 return racl.toArray(new String[racl.size()]); 449 } 450 451 /** 452 * Gets all the ids under a given one, recursively. 453 * <p> 454 * Doesn't check transient (assumes save is done). 455 * 456 * @param id the root of the tree (not included in results) 457 * @param proxyTargets returns a map of proxy to target among the documents found 458 * @param targetProxies returns a map of target to proxies among the document found 459 */ 460 protected Set<String> getSubTree(String id, Map<String, String> proxyTargets, Map<String, Object[]> targetProxies) { 461 Set<String> ids = new HashSet<>(); 462 // check repository 463 repository.queryKeyValueArray(KEY_ANCESTOR_IDS, id, ids, proxyTargets, targetProxies); 464 return ids; 465 } 466 467 public List<DBSDocumentState> getKeyValuedStates(String key, Object value) { 468 List<DBSDocumentState> docStates = new LinkedList<>(); 469 Set<String> seen = new HashSet<>(); 470 // check transient state 471 for (DBSDocumentState docState : transientStates.values()) { 472 seen.add(docState.getId()); 473 if (!value.equals(docState.get(key))) { 474 continue; 475 } 476 docStates.add(docState); 477 } 478 // fetch from repository 479 List<State> states = repository.queryKeyValue(key, value, seen); 480 for (State state : states) { 481 docStates.add(newTransientState(state)); 482 } 483 return docStates; 484 } 485 486 public List<DBSDocumentState> getKeyValuedStates(String key1, Object value1, String key2, Object value2) { 487 List<DBSDocumentState> docStates = new LinkedList<>(); 488 Set<String> seen = new HashSet<>(); 489 // check transient state 490 for (DBSDocumentState docState : transientStates.values()) { 491 seen.add(docState.getId()); 492 if (!(value1.equals(docState.get(key1)) && value2.equals(docState.get(key2)))) { 493 continue; 494 } 495 docStates.add(docState); 496 } 497 // fetch from repository 498 List<State> states = repository.queryKeyValue(key1, value1, key2, value2, seen); 499 for (State state : states) { 500 docStates.add(newTransientState(state)); 501 } 502 return docStates; 503 } 504 505 /** 506 * Removes a list of documents. 507 * <p> 508 * Called after a {@link #save} has been done. 509 */ 510 public void removeStates(Set<String> ids) { 511 if (undoLog != null) { 512 for (String id : ids) { 513 if (undoLog.containsKey(id)) { 514 // there's already a create or an update in the undo log 515 State oldUndo = undoLog.get(id); 516 if (oldUndo == null) { 517 // create + delete -> forget 518 undoLog.remove(id); 519 } else { 520 // update + delete -> original old state to re-create 521 oldUndo.put(KEY_UNDOLOG_CREATE, TRUE); 522 } 523 } else { 524 // just delete -> store old state to re-create 525 State oldState = StateHelper.deepCopy(getStateForRead(id)); 526 oldState.put(KEY_UNDOLOG_CREATE, TRUE); 527 undoLog.put(id, oldState); 528 } 529 } 530 } 531 for (String id : ids) { 532 transientStates.remove(id); 533 } 534 repository.deleteStates(ids); 535 } 536 537 /** 538 * Writes transient state to database. 539 * <p> 540 * An undo log is kept in order to rollback the transaction later if needed. 541 */ 542 public void save() { 543 updateProxies(); 544 List<Work> works; 545 if (!repository.isFulltextDisabled()) { 546 // TODO getting fulltext already does a getStateChange 547 works = getFulltextWorks(); 548 } else { 549 works = Collections.emptyList(); 550 } 551 List<State> statesToCreate = new ArrayList<>(); 552 for (String id : transientCreated) { // ordered 553 DBSDocumentState docState = transientStates.get(id); 554 docState.setNotDirty(); 555 if (undoLog != null) { 556 undoLog.put(id, null); // marker to denote create 557 } 558 statesToCreate.add(docState.getState()); 559 } 560 if (!statesToCreate.isEmpty()) { 561 repository.createStates(statesToCreate); 562 } 563 for (DBSDocumentState docState : transientStates.values()) { 564 String id = docState.getId(); 565 if (transientCreated.contains(id)) { 566 continue; // already done 567 } 568 StateDiff diff = docState.getStateChange(); 569 if (diff != null) { 570 if (undoLog != null) { 571 if (!undoLog.containsKey(id)) { 572 undoLog.put(id, StateHelper.deepCopy(docState.getOriginalState())); 573 } 574 // else there's already a create or an update in the undo log so original info is enough 575 } 576 repository.updateState(id, diff); 577 } 578 docState.setNotDirty(); 579 } 580 transientCreated.clear(); 581 scheduleWork(works); 582 } 583 584 protected void applyUndoLog() { 585 Set<String> deletes = new HashSet<>(); 586 for (Entry<String, State> es : undoLog.entrySet()) { 587 String id = es.getKey(); 588 State state = es.getValue(); 589 if (state == null) { 590 deletes.add(id); 591 } else { 592 boolean recreate = state.remove(KEY_UNDOLOG_CREATE) != null; 593 if (recreate) { 594 repository.createState(state); 595 } else { 596 // undo update 597 State currentState = repository.readState(id); 598 if (currentState != null) { 599 StateDiff diff = StateHelper.diff(currentState, state); 600 if (!diff.isEmpty()) { 601 repository.updateState(id, diff); 602 } 603 } 604 // else we expected to read a current state but it was concurrently deleted... 605 // in that case leave it deleted 606 } 607 } 608 } 609 if (!deletes.isEmpty()) { 610 repository.deleteStates(deletes); 611 } 612 } 613 614 /** 615 * Checks if the changed documents are proxy targets, and updates the proxies if that's the case. 616 */ 617 protected void updateProxies() { 618 for (String id : transientCreated) { // ordered 619 DBSDocumentState docState = transientStates.get(id); 620 updateProxies(docState); 621 } 622 // copy as we may modify proxies 623 for (String id : transientStates.keySet().toArray(new String[0])) { 624 DBSDocumentState docState = transientStates.get(id); 625 if (transientCreated.contains(id)) { 626 continue; // already done 627 } 628 if (docState.isDirty()) { 629 updateProxies(docState); 630 } 631 } 632 } 633 634 protected void updateProxies(DBSDocumentState target) { 635 Object[] proxyIds = (Object[]) target.get(KEY_PROXY_IDS); 636 if (proxyIds != null) { 637 for (Object proxyId : proxyIds) { 638 try { 639 updateProxy(target, (String) proxyId); 640 } catch (ConcurrentUpdateException e) { 641 e.addInfo("On doc " + target.getId()); 642 throw e; 643 } 644 } 645 } 646 } 647 648 /** 649 * Updates the state of a proxy based on its target. 650 */ 651 protected void updateProxy(DBSDocumentState target, String proxyId) { 652 DBSDocumentState proxy = getStateForUpdate(proxyId); 653 if (proxy == null) { 654 throw new ConcurrentUpdateException("Proxy " + proxyId + " concurrently deleted"); 655 } 656 SchemaManager schemaManager = Framework.getService(SchemaManager.class); 657 // clear all proxy data 658 for (String key : proxy.getState().keyArray()) { 659 if (!isProxySpecific(key, schemaManager)) { 660 proxy.put(key, null); 661 } 662 } 663 // copy from target 664 for (Entry<String, Serializable> en : target.getState().entrySet()) { 665 String key = en.getKey(); 666 if (!isProxySpecific(key, schemaManager)) { 667 proxy.put(key, StateHelper.deepCopy(en.getValue())); 668 } 669 } 670 } 671 672 /** 673 * Things that we don't touch on a proxy when updating it. 674 */ 675 protected boolean isProxySpecific(String key, SchemaManager schemaManager) { 676 switch (key) { 677 // these are placeful stuff 678 case KEY_ID: 679 case KEY_PARENT_ID: 680 case KEY_ANCESTOR_IDS: 681 case KEY_NAME: 682 case KEY_POS: 683 case KEY_ACP: 684 case KEY_READ_ACL: 685 // these are proxy-specific 686 case KEY_IS_PROXY: 687 case KEY_PROXY_TARGET_ID: 688 case KEY_PROXY_VERSION_SERIES_ID: 689 case KEY_IS_VERSION: 690 case KEY_PROXY_IDS: 691 return true; 692 } 693 int p = key.indexOf(':'); 694 if (p == -1) { 695 // no prefix, assume not proxy-specific 696 return false; 697 } 698 String prefix = key.substring(0, p); 699 Schema schema = schemaManager.getSchemaFromPrefix(prefix); 700 if (schema == null) { 701 schema = schemaManager.getSchema(prefix); 702 if (schema == null) { 703 // unknown prefix, assume not proxy-specific 704 return false; 705 } 706 } 707 return schemaManager.isProxySchema(schema.getName(), null); // type unused 708 } 709 710 /** 711 * Called when created in a transaction. 712 * 713 * @since 7.4 714 */ 715 public void begin() { 716 undoLog = new HashMap<>(); 717 } 718 719 /** 720 * Saves and flushes to database. 721 */ 722 public void commit() { 723 save(); 724 commitSave(); 725 } 726 727 /** 728 * Commits the saved state to the database. 729 */ 730 protected void commitSave() { 731 // clear transient, this means that after this references to states will be stale 732 // TODO mark states as invalid 733 clearTransient(); 734 // the transaction ended, the proxied DBSSession will disappear and cannot be reused anyway 735 undoLog = null; 736 } 737 738 /** 739 * Rolls back the save state by applying the undo log. 740 */ 741 public void rollback() { 742 clearTransient(); 743 applyUndoLog(); 744 // the transaction ended, the proxied DBSSession will disappear and cannot be reused anyway 745 undoLog = null; 746 } 747 748 protected void clearTransient() { 749 transientStates.clear(); 750 transientCreated.clear(); 751 } 752 753 /** 754 * Gets the fulltext updates to do. Called at save() time. 755 * 756 * @return a list of {@link Work} instances to schedule post-commit. 757 */ 758 protected List<Work> getFulltextWorks() { 759 Set<String> docsWithDirtyStrings = new HashSet<>(); 760 Set<String> docsWithDirtyBinaries = new HashSet<>(); 761 findDirtyDocuments(docsWithDirtyStrings, docsWithDirtyBinaries); 762 if (docsWithDirtyStrings.isEmpty() && docsWithDirtyBinaries.isEmpty()) { 763 return Collections.emptyList(); 764 } 765 List<Work> works = new LinkedList<>(); 766 getFulltextSimpleWorks(works, docsWithDirtyStrings); 767 getFulltextBinariesWorks(works, docsWithDirtyBinaries); 768 return works; 769 } 770 771 /** 772 * Finds the documents having dirty text or dirty binaries that have to be reindexed as fulltext. 773 * 774 * @param docsWithDirtyStrings set of ids, updated by this method 775 * @param docWithDirtyBinaries set of ids, updated by this method 776 */ 777 protected void findDirtyDocuments(Set<String> docsWithDirtyStrings, Set<String> docWithDirtyBinaries) { 778 for (DBSDocumentState docState : transientStates.values()) { 779 State originalState = docState.getOriginalState(); 780 State state = docState.getState(); 781 if (originalState == state) { 782 continue; 783 } 784 StateDiff diff = StateHelper.diff(originalState, state); 785 if (diff.isEmpty()) { 786 continue; 787 } 788 StateDiff rdiff = StateHelper.diff(state, originalState); 789 // we do diffs in both directions to capture removal of complex list elements, 790 // for instance for {foo: [{bar: baz}] -> {foo: []} 791 // diff paths = foo and rdiff paths = foo/*/bar 792 Set<String> paths = new HashSet<>(); 793 DirtyPathsFinder dirtyPathsFinder = new DirtyPathsFinder(paths); 794 dirtyPathsFinder.findDirtyPaths(diff); 795 dirtyPathsFinder.findDirtyPaths(rdiff); 796 FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration(); 797 boolean dirtyStrings = false; 798 boolean dirtyBinaries = false; 799 for (String path : paths) { 800 Set<String> indexesSimple = fulltextConfiguration.indexesByPropPathSimple.get(path); 801 if (indexesSimple != null && !indexesSimple.isEmpty()) { 802 dirtyStrings = true; 803 if (dirtyBinaries) { 804 break; 805 } 806 } 807 Set<String> indexesBinary = fulltextConfiguration.indexesByPropPathBinary.get(path); 808 if (indexesBinary != null && !indexesBinary.isEmpty()) { 809 dirtyBinaries = true; 810 if (dirtyStrings) { 811 break; 812 } 813 } 814 } 815 if (dirtyStrings) { 816 docsWithDirtyStrings.add(docState.getId()); 817 } 818 if (dirtyBinaries) { 819 docWithDirtyBinaries.add(docState.getId()); 820 } 821 } 822 } 823 824 /** 825 * Iterates on a state diff to find the paths corresponding to dirty values. 826 * 827 * @since 7.10-HF04, 8.1 828 */ 829 protected static class DirtyPathsFinder { 830 831 protected Set<String> paths; 832 833 public DirtyPathsFinder(Set<String> paths) { 834 this.paths = paths; 835 } 836 837 public void findDirtyPaths(StateDiff value) { 838 findDirtyPaths(value, null); 839 } 840 841 protected void findDirtyPaths(Object value, String path) { 842 if (value instanceof Object[]) { 843 findDirtyPaths((Object[]) value, path); 844 } else if (value instanceof List) { 845 findDirtyPaths((List<?>) value, path); 846 } else if (value instanceof ListDiff) { 847 findDirtyPaths((ListDiff) value, path); 848 } else if (value instanceof State) { 849 findDirtyPaths((State) value, path); 850 } else { 851 paths.add(path); 852 } 853 } 854 855 protected void findDirtyPaths(Object[] value, String path) { 856 String newPath = path + "/*"; 857 for (Object v : value) { 858 findDirtyPaths(v, newPath); 859 } 860 } 861 862 protected void findDirtyPaths(List<?> value, String path) { 863 String newPath = path + "/*"; 864 for (Object v : value) { 865 findDirtyPaths(v, newPath); 866 } 867 } 868 869 protected void findDirtyPaths(ListDiff value, String path) { 870 String newPath = path + "/*"; 871 if (value.diff != null) { 872 findDirtyPaths(value.diff, newPath); 873 } 874 if (value.rpush != null) { 875 findDirtyPaths(value.rpush, newPath); 876 } 877 } 878 879 protected void findDirtyPaths(State value, String path) { 880 for (Entry<String, Serializable> es : value.entrySet()) { 881 String key = es.getKey(); 882 Serializable v = es.getValue(); 883 String newPath = path == null ? key : path + "/" + key; 884 findDirtyPaths(v, newPath); 885 } 886 } 887 } 888 889 protected void getFulltextSimpleWorks(List<Work> works, Set<String> docsWithDirtyStrings) { 890 // TODO XXX make configurable, see also FulltextExtractorWork 891 FulltextParser fulltextParser = new DefaultFulltextParser(); 892 FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration(); 893 if (fulltextConfiguration.fulltextSearchDisabled) { 894 return; 895 } 896 // update simpletext on documents with dirty strings 897 for (String id : docsWithDirtyStrings) { 898 if (id == null) { 899 // cannot happen, but has been observed :( 900 log.error("Got null doc id in fulltext update, cannot happen"); 901 continue; 902 } 903 DBSDocumentState docState = getStateForUpdate(id); 904 if (docState == null) { 905 // cannot happen 906 continue; 907 } 908 String documentType = docState.getPrimaryType(); 909 // Object[] mixinTypes = (Object[]) docState.get(KEY_MIXIN_TYPES); 910 911 if (!fulltextConfiguration.isFulltextIndexable(documentType)) { 912 continue; 913 } 914 docState.put(KEY_FULLTEXT_JOBID, docState.getId()); 915 FulltextFinder fulltextFinder = new FulltextFinder(fulltextParser, docState, session); 916 List<IndexAndText> indexesAndText = new LinkedList<>(); 917 for (String indexName : fulltextConfiguration.indexNames) { 918 // TODO paths from config 919 String text = fulltextFinder.findFulltext(indexName); 920 indexesAndText.add(new IndexAndText(indexName, text)); 921 } 922 if (!indexesAndText.isEmpty()) { 923 Work work = new FulltextUpdaterWork(repository.getName(), id, true, false, indexesAndText); 924 works.add(work); 925 } 926 } 927 } 928 929 protected void getFulltextBinariesWorks(List<Work> works, Set<String> docWithDirtyBinaries) { 930 if (docWithDirtyBinaries.isEmpty()) { 931 return; 932 } 933 934 FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration(); 935 936 // mark indexing in progress, so that future copies (including versions) 937 // will be indexed as well 938 for (String id : docWithDirtyBinaries) { 939 DBSDocumentState docState = getStateForUpdate(id); 940 if (docState == null) { 941 // cannot happen 942 continue; 943 } 944 if (!fulltextConfiguration.isFulltextIndexable(docState.getPrimaryType())) { 945 continue; 946 } 947 docState.put(KEY_FULLTEXT_JOBID, docState.getId()); 948 } 949 950 // FulltextExtractorWork does fulltext extraction using converters 951 // and then schedules a FulltextUpdaterWork to write the results 952 // single-threaded 953 for (String id : docWithDirtyBinaries) { 954 // don't exclude proxies 955 Work work = new DBSFulltextExtractorWork(repository.getName(), id); 956 works.add(work); 957 } 958 } 959 960 protected static class FulltextFinder { 961 962 protected final FulltextParser fulltextParser; 963 964 protected final DBSDocumentState document; 965 966 protected final DBSSession session; 967 968 protected final String documentType; 969 970 protected final Object[] mixinTypes; 971 972 /** 973 * Prepares parsing for one document. 974 */ 975 public FulltextFinder(FulltextParser fulltextParser, DBSDocumentState document, DBSSession session) { 976 this.fulltextParser = fulltextParser; 977 this.document = document; 978 this.session = session; 979 if (document == null) { 980 documentType = null; 981 mixinTypes = null; 982 } else { // null in tests 983 documentType = document.getPrimaryType(); 984 mixinTypes = (Object[]) document.get(KEY_MIXIN_TYPES); 985 } 986 } 987 988 /** 989 * Parses the document for one index. 990 */ 991 public String findFulltext(String indexName) { 992 // TODO indexName 993 // TODO paths 994 List<String> strings = new ArrayList<>(); 995 findFulltext(indexName, document.getState(), strings); 996 return StringUtils.join(strings, ' '); 997 } 998 999 protected void findFulltext(String indexName, State state, List<String> strings) { 1000 for (Entry<String, Serializable> en : state.entrySet()) { 1001 String key = en.getKey(); 1002 if (key.startsWith(KEY_PREFIX)) { 1003 switch (key) { 1004 // allow indexing of this: 1005 case DBSDocument.KEY_NAME: 1006 break; 1007 default: 1008 continue; 1009 } 1010 } 1011 Serializable value = en.getValue(); 1012 if (value instanceof State) { 1013 State s = (State) value; 1014 findFulltext(indexName, s, strings); 1015 } else if (value instanceof List) { 1016 @SuppressWarnings("unchecked") 1017 List<State> v = (List<State>) value; 1018 for (State s : v) { 1019 findFulltext(indexName, s, strings); 1020 } 1021 } else if (value instanceof Object[]) { 1022 Object[] ar = (Object[]) value; 1023 for (Object v : ar) { 1024 if (v instanceof String) { 1025 fulltextParser.parse((String) v, null, strings); 1026 } else { 1027 // arrays are homogeneous, no need to continue 1028 break; 1029 } 1030 } 1031 } else { 1032 if (value instanceof String) { 1033 fulltextParser.parse((String) value, null, strings); 1034 } 1035 } 1036 } 1037 } 1038 } 1039 1040 protected void scheduleWork(List<Work> works) { 1041 // do async fulltext indexing only if high-level sessions are available 1042 RepositoryManager repositoryManager = Framework.getLocalService(RepositoryManager.class); 1043 if (repositoryManager != null && !works.isEmpty()) { 1044 WorkManager workManager = Framework.getLocalService(WorkManager.class); 1045 for (Work work : works) { 1046 // schedule work post-commit 1047 // in non-tx mode, this may execute it nearly immediately 1048 workManager.schedule(work, Scheduling.IF_NOT_SCHEDULED, true); 1049 } 1050 } 1051 } 1052 1053}