001/* 002 * (C) Copyright 2014-2016 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.core.storage.dbs; 020 021import static java.lang.Boolean.FALSE; 022import static java.lang.Boolean.TRUE; 023import static org.nuxeo.ecm.core.api.security.SecurityConstants.BROWSE; 024import static org.nuxeo.ecm.core.api.security.SecurityConstants.EVERYONE; 025import static org.nuxeo.ecm.core.api.security.SecurityConstants.UNSUPPORTED_ACL; 026import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.INITIAL_CHANGE_TOKEN; 027import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_GRANT; 028import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_PERMISSION; 029import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_USER; 030import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACL; 031import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACP; 032import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ANCESTOR_IDS; 033import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_CHANGE_TOKEN; 034import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_FULLTEXT_JOBID; 035import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ID; 036import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_IS_PROXY; 037import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_IS_VERSION; 038import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_MIXIN_TYPES; 039import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_NAME; 040import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PARENT_ID; 041import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_POS; 042import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PREFIX; 043import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PRIMARY_TYPE; 044import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_IDS; 045import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_TARGET_ID; 046import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_VERSION_SERIES_ID; 047import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_READ_ACL; 048import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_VERSION_SERIES_ID; 049 050import java.io.Serializable; 051import java.util.ArrayList; 052import java.util.Arrays; 053import java.util.Collections; 054import java.util.HashMap; 055import java.util.HashSet; 056import java.util.LinkedHashSet; 057import java.util.LinkedList; 058import java.util.List; 059import java.util.Map; 060import java.util.Map.Entry; 061import java.util.Set; 062 063import org.apache.commons.lang.StringUtils; 064import org.apache.commons.logging.Log; 065import org.apache.commons.logging.LogFactory; 066import org.nuxeo.ecm.core.api.ConcurrentUpdateException; 067import org.nuxeo.ecm.core.api.repository.RepositoryManager; 068import org.nuxeo.ecm.core.schema.SchemaManager; 069import org.nuxeo.ecm.core.schema.types.Schema; 070import org.nuxeo.ecm.core.security.SecurityService; 071import org.nuxeo.ecm.core.storage.DefaultFulltextParser; 072import org.nuxeo.ecm.core.storage.FulltextConfiguration; 073import org.nuxeo.ecm.core.storage.FulltextParser; 074import org.nuxeo.ecm.core.storage.FulltextUpdaterWork; 075import org.nuxeo.ecm.core.storage.FulltextUpdaterWork.IndexAndText; 076import org.nuxeo.ecm.core.storage.State; 077import org.nuxeo.ecm.core.storage.State.ListDiff; 078import org.nuxeo.ecm.core.storage.State.StateDiff; 079import org.nuxeo.ecm.core.storage.StateHelper; 080import org.nuxeo.ecm.core.work.api.Work; 081import org.nuxeo.ecm.core.work.api.WorkManager; 082import org.nuxeo.ecm.core.work.api.WorkManager.Scheduling; 083import org.nuxeo.runtime.api.Framework; 084 085/** 086 * Transactional state for a session. 087 * <p> 088 * Until {@code save()} is called, data lives in the transient map. 089 * <p> 090 * Upon save, data is written to the repository, even though it has not yet been committed (this means that other 091 * sessions can read uncommitted data). It's also kept in an undo log in order for rollback to be possible. 092 * <p> 093 * On commit, the undo log is forgotten. On rollback, the undo log is replayed. 094 * 095 * @since 5.9.4 096 */ 097public class DBSTransactionState { 098 099 private static final Log log = LogFactory.getLog(DBSTransactionState.class); 100 101 private static final String KEY_UNDOLOG_CREATE = "__UNDOLOG_CREATE__\0\0"; 102 103 protected final DBSRepository repository; 104 105 protected final DBSSession session; 106 107 /** Retrieved and created document state. */ 108 protected Map<String, DBSDocumentState> transientStates = new HashMap<>(); 109 110 /** Ids of documents created but not yet saved. */ 111 protected Set<String> transientCreated = new LinkedHashSet<>(); 112 113 /** 114 * Undo log. 115 * <p> 116 * A map of document ids to null or State. The value is null when the document has to be deleted when applying the 117 * undo log. Otherwise the value is a State. If the State contains the key {@link #KEY_UNDOLOG_CREATE} then the 118 * state must be re-created completely when applying the undo log, otherwise just applied as an update. 119 * <p> 120 * Null when there is no active transaction. 121 */ 122 protected Map<String, State> undoLog; 123 124 protected final Set<String> browsePermissions; 125 126 public DBSTransactionState(DBSRepository repository, DBSSession session) { 127 this.repository = repository; 128 this.session = session; 129 SecurityService securityService = Framework.getLocalService(SecurityService.class); 130 browsePermissions = new HashSet<>(Arrays.asList(securityService.getPermissionsToCheck(BROWSE))); 131 } 132 133 /** 134 * New transient state for something just read from the repository. 135 */ 136 protected DBSDocumentState newTransientState(State state) { 137 if (state == null) { 138 return null; 139 } 140 String id = (String) state.get(KEY_ID); 141 if (transientStates.containsKey(id)) { 142 throw new IllegalStateException("Already transient: " + id); 143 } 144 DBSDocumentState docState = new DBSDocumentState(state); // copy 145 transientStates.put(id, docState); 146 return docState; 147 } 148 149 /** 150 * Returns a state and marks it as transient, because it's about to be modified or returned to user code (where it 151 * may be modified). 152 */ 153 public DBSDocumentState getStateForUpdate(String id) { 154 // check transient state 155 DBSDocumentState docState = transientStates.get(id); 156 if (docState != null) { 157 return docState; 158 } 159 // fetch from repository 160 State state = repository.readState(id); 161 return newTransientState(state); 162 } 163 164 /** 165 * Returns a state which won't be modified. 166 */ 167 // TODO in some cases it's good to have this kept in memory instead of 168 // rereading from database every time 169 // XXX getStateForReadOneShot 170 public State getStateForRead(String id) { 171 // check transient state 172 DBSDocumentState docState = transientStates.get(id); 173 if (docState != null) { 174 return docState.getState(); 175 } 176 // fetch from repository 177 return repository.readState(id); 178 } 179 180 /** 181 * Returns states and marks them transient, because they're about to be returned to user code (where they may be 182 * modified). 183 */ 184 public List<DBSDocumentState> getStatesForUpdate(List<String> ids) { 185 // check which ones we have to fetch from repository 186 List<String> idsToFetch = new LinkedList<>(); 187 for (String id : ids) { 188 // check transient state 189 DBSDocumentState docState = transientStates.get(id); 190 if (docState != null) { 191 continue; 192 } 193 // will have to fetch it 194 idsToFetch.add(id); 195 } 196 if (!idsToFetch.isEmpty()) { 197 List<State> states = repository.readStates(idsToFetch); 198 for (State state : states) { 199 newTransientState(state); 200 } 201 } 202 // everything now fetched in transient 203 List<DBSDocumentState> docStates = new ArrayList<>(ids.size()); 204 for (String id : ids) { 205 DBSDocumentState docState = transientStates.get(id); 206 if (docState == null) { 207 if (log.isTraceEnabled()) { 208 log.trace("Cannot fetch document with id: " + id, new Throwable("debug stack trace")); 209 } 210 continue; 211 } 212 docStates.add(docState); 213 } 214 return docStates; 215 } 216 217 // XXX TODO for update or for read? 218 public DBSDocumentState getChildState(String parentId, String name) { 219 // check transient state 220 for (DBSDocumentState docState : transientStates.values()) { 221 if (!parentId.equals(docState.getParentId())) { 222 continue; 223 } 224 if (!name.equals(docState.getName())) { 225 continue; 226 } 227 return docState; 228 } 229 // fetch from repository 230 State state = repository.readChildState(parentId, name, Collections.emptySet()); 231 if (state == null) { 232 return null; 233 } 234 String id = (String) state.get(KEY_ID); 235 if (transientStates.containsKey(id)) { 236 // found transient, even though we already checked 237 // that means that in-memory it's not a child, but in-database it's a child (was moved) 238 // -> ignore the database state 239 return null; 240 } 241 return newTransientState(state); 242 } 243 244 public boolean hasChild(String parentId, String name) { 245 // check transient state 246 for (DBSDocumentState docState : transientStates.values()) { 247 if (!parentId.equals(docState.getParentId())) { 248 continue; 249 } 250 if (!name.equals(docState.getName())) { 251 continue; 252 } 253 return true; 254 } 255 // check repository 256 return repository.hasChild(parentId, name, Collections.emptySet()); 257 } 258 259 public List<DBSDocumentState> getChildrenStates(String parentId) { 260 List<DBSDocumentState> docStates = new LinkedList<>(); 261 Set<String> seen = new HashSet<>(); 262 // check transient state 263 for (DBSDocumentState docState : transientStates.values()) { 264 if (!parentId.equals(docState.getParentId())) { 265 continue; 266 } 267 docStates.add(docState); 268 seen.add(docState.getId()); 269 } 270 // fetch from repository 271 List<State> states = repository.queryKeyValue(KEY_PARENT_ID, parentId, seen); 272 for (State state : states) { 273 String id = (String) state.get(KEY_ID); 274 if (transientStates.containsKey(id)) { 275 // found transient, even though we passed an exclusion list for known children 276 // that means that in-memory it's not a child, but in-database it's a child (was moved) 277 // -> ignore the database state 278 continue; 279 } 280 docStates.add(newTransientState(state)); 281 } 282 return docStates; 283 } 284 285 public List<String> getChildrenIds(String parentId) { 286 List<String> children = new ArrayList<>(); 287 Set<String> seen = new HashSet<>(); 288 // check transient state 289 for (DBSDocumentState docState : transientStates.values()) { 290 String id = docState.getId(); 291 if (!parentId.equals(docState.getParentId())) { 292 continue; 293 } 294 seen.add(id); 295 children.add(id); 296 } 297 // fetch from repository 298 List<State> states = repository.queryKeyValue(KEY_PARENT_ID, parentId, seen); 299 for (State state : states) { 300 String id = (String) state.get(KEY_ID); 301 if (transientStates.containsKey(id)) { 302 // found transient, even though we passed an exclusion list for known children 303 // that means that in-memory it's not a child, but in-database it's a child (was moved) 304 // -> ignore the database state 305 continue; 306 } 307 children.add(id); 308 } 309 return new ArrayList<>(children); 310 } 311 312 public boolean hasChildren(String parentId) { 313 // check transient state 314 for (DBSDocumentState docState : transientStates.values()) { 315 if (!parentId.equals(docState.getParentId())) { 316 continue; 317 } 318 return true; 319 } 320 // check repository 321 return repository.queryKeyValuePresence(KEY_PARENT_ID, parentId, Collections.emptySet()); 322 } 323 324 public DBSDocumentState createChild(String id, String parentId, String name, Long pos, String typeName) { 325 // id may be not-null for import 326 if (id == null) { 327 id = repository.generateNewId(); 328 } 329 transientCreated.add(id); 330 DBSDocumentState docState = new DBSDocumentState(); 331 transientStates.put(id, docState); 332 docState.put(KEY_ID, id); 333 docState.put(KEY_PARENT_ID, parentId); 334 docState.put(KEY_ANCESTOR_IDS, getAncestorIds(parentId)); 335 docState.put(KEY_NAME, name); 336 docState.put(KEY_POS, pos); 337 docState.put(KEY_PRIMARY_TYPE, typeName); 338 // update read acls for new doc 339 updateDocumentReadAcls(id); 340 return docState; 341 } 342 343 /** Gets ancestors including id itself. */ 344 protected Object[] getAncestorIds(String id) { 345 if (id == null) { 346 return null; 347 } 348 State state = getStateForRead(id); 349 if (state == null) { 350 throw new RuntimeException("No such id: " + id); 351 } 352 Object[] ancestors = (Object[]) state.get(KEY_ANCESTOR_IDS); 353 if (ancestors == null) { 354 return new Object[] { id }; 355 } else { 356 Object[] newAncestors = new Object[ancestors.length + 1]; 357 System.arraycopy(ancestors, 0, newAncestors, 0, ancestors.length); 358 newAncestors[ancestors.length] = id; 359 return newAncestors; 360 } 361 } 362 363 /** 364 * Copies the document into a newly-created object. 365 * <p> 366 * The copy is automatically saved. 367 */ 368 public DBSDocumentState copy(String id) { 369 DBSDocumentState copyState = new DBSDocumentState(getStateForRead(id)); 370 String copyId = repository.generateNewId(); 371 copyState.put(KEY_ID, copyId); 372 copyState.put(KEY_PROXY_IDS, null); // no proxies to this new doc 373 // other fields updated by the caller 374 transientStates.put(copyId, copyState); 375 transientCreated.add(copyId); 376 return copyState; 377 } 378 379 /** 380 * Updates ancestors recursively after a move. 381 * <p> 382 * Recursing from given doc, replace the first ndel ancestors with those passed. 383 * <p> 384 * Doesn't check transient (assumes save is done). The modifications are automatically saved. 385 */ 386 public void updateAncestors(String id, int ndel, Object[] ancestorIds) { 387 int nadd = ancestorIds.length; 388 Set<String> ids = getSubTree(id, null, null); 389 ids.add(id); 390 for (String cid : ids) { 391 // XXX TODO oneShot update, don't pollute transient space 392 DBSDocumentState docState = getStateForUpdate(cid); 393 Object[] ancestors = (Object[]) docState.get(KEY_ANCESTOR_IDS); 394 Object[] newAncestors; 395 if (ancestors == null) { 396 newAncestors = ancestorIds.clone(); 397 } else { 398 newAncestors = new Object[ancestors.length - ndel + nadd]; 399 System.arraycopy(ancestorIds, 0, newAncestors, 0, nadd); 400 System.arraycopy(ancestors, ndel, newAncestors, nadd, ancestors.length - ndel); 401 } 402 docState.put(KEY_ANCESTOR_IDS, newAncestors); 403 } 404 } 405 406 /** 407 * Updates the Read ACLs recursively on a document. 408 */ 409 public void updateTreeReadAcls(String id) { 410 // versions too XXX TODO 411 Set<String> ids = getSubTree(id, null, null); 412 ids.add(id); 413 ids.forEach(this::updateDocumentReadAcls); 414 } 415 416 /** 417 * Updates the Read ACLs on a document (not recursively) 418 */ 419 protected void updateDocumentReadAcls(String id) { 420 // XXX TODO oneShot update, don't pollute transient space 421 DBSDocumentState docState = getStateForUpdate(id); 422 docState.put(KEY_READ_ACL, getReadACL(docState)); 423 } 424 425 /** 426 * Gets the Read ACL (flat list of users having browse permission, including inheritance) on a document. 427 */ 428 protected String[] getReadACL(DBSDocumentState docState) { 429 Set<String> racls = new HashSet<>(); 430 State state = docState.getState(); 431 LOOP: do { 432 @SuppressWarnings("unchecked") 433 List<Serializable> aclList = (List<Serializable>) state.get(KEY_ACP); 434 if (aclList != null) { 435 for (Serializable aclSer : aclList) { 436 State aclMap = (State) aclSer; 437 @SuppressWarnings("unchecked") 438 List<Serializable> aceList = (List<Serializable>) aclMap.get(KEY_ACL); 439 for (Serializable aceSer : aceList) { 440 State aceMap = (State) aceSer; 441 String username = (String) aceMap.get(KEY_ACE_USER); 442 String permission = (String) aceMap.get(KEY_ACE_PERMISSION); 443 Boolean granted = (Boolean) aceMap.get(KEY_ACE_GRANT); 444 if (TRUE.equals(granted) && browsePermissions.contains(permission)) { 445 racls.add(username); 446 } 447 if (FALSE.equals(granted)) { 448 if (!EVERYONE.equals(username)) { 449 // TODO log 450 racls.add(UNSUPPORTED_ACL); 451 } 452 break LOOP; 453 } 454 } 455 } 456 } 457 // get parent 458 if (TRUE.equals(state.get(KEY_IS_VERSION))) { 459 String versionSeriesId = (String) state.get(KEY_VERSION_SERIES_ID); 460 state = versionSeriesId == null ? null : getStateForRead(versionSeriesId); 461 } else { 462 String parentId = (String) state.get(KEY_PARENT_ID); 463 state = parentId == null ? null : getStateForRead(parentId); 464 } 465 } while (state != null); 466 467 // sort to have canonical order 468 List<String> racl = new ArrayList<>(racls); 469 Collections.sort(racl); 470 return racl.toArray(new String[racl.size()]); 471 } 472 473 /** 474 * Gets all the ids under a given one, recursively. 475 * <p> 476 * Doesn't check transient (assumes save is done). 477 * 478 * @param id the root of the tree (not included in results) 479 * @param proxyTargets returns a map of proxy to target among the documents found 480 * @param targetProxies returns a map of target to proxies among the document found 481 */ 482 protected Set<String> getSubTree(String id, Map<String, String> proxyTargets, Map<String, Object[]> targetProxies) { 483 Set<String> ids = new HashSet<>(); 484 // check repository 485 repository.queryKeyValueArray(KEY_ANCESTOR_IDS, id, ids, proxyTargets, targetProxies); 486 return ids; 487 } 488 489 public List<DBSDocumentState> getKeyValuedStates(String key, Object value) { 490 List<DBSDocumentState> docStates = new LinkedList<>(); 491 Set<String> seen = new HashSet<>(); 492 // check transient state 493 for (DBSDocumentState docState : transientStates.values()) { 494 if (!value.equals(docState.get(key))) { 495 continue; 496 } 497 docStates.add(docState); 498 seen.add(docState.getId()); 499 } 500 // fetch from repository 501 List<State> states = repository.queryKeyValue(key, value, seen); 502 for (State state : states) { 503 docStates.add(newTransientState(state)); 504 } 505 return docStates; 506 } 507 508 public List<DBSDocumentState> getKeyValuedStates(String key1, Object value1, String key2, Object value2) { 509 List<DBSDocumentState> docStates = new LinkedList<>(); 510 Set<String> seen = new HashSet<>(); 511 // check transient state 512 for (DBSDocumentState docState : transientStates.values()) { 513 seen.add(docState.getId()); 514 if (!(value1.equals(docState.get(key1)) && value2.equals(docState.get(key2)))) { 515 continue; 516 } 517 docStates.add(docState); 518 } 519 // fetch from repository 520 List<State> states = repository.queryKeyValue(key1, value1, key2, value2, seen); 521 for (State state : states) { 522 docStates.add(newTransientState(state)); 523 } 524 return docStates; 525 } 526 527 /** 528 * Removes a list of documents. 529 * <p> 530 * Called after a {@link #save} has been done. 531 */ 532 public void removeStates(Set<String> ids) { 533 if (undoLog != null) { 534 for (String id : ids) { 535 if (undoLog.containsKey(id)) { 536 // there's already a create or an update in the undo log 537 State oldUndo = undoLog.get(id); 538 if (oldUndo == null) { 539 // create + delete -> forget 540 undoLog.remove(id); 541 } else { 542 // update + delete -> original old state to re-create 543 oldUndo.put(KEY_UNDOLOG_CREATE, TRUE); 544 } 545 } else { 546 // just delete -> store old state to re-create 547 State oldState = StateHelper.deepCopy(getStateForRead(id)); 548 oldState.put(KEY_UNDOLOG_CREATE, TRUE); 549 undoLog.put(id, oldState); 550 } 551 } 552 } 553 for (String id : ids) { 554 transientStates.remove(id); 555 } 556 repository.deleteStates(ids); 557 } 558 559 /** 560 * Writes transient state to database. 561 * <p> 562 * An undo log is kept in order to rollback the transaction later if needed. 563 */ 564 public void save() { 565 updateProxies(); 566 List<Work> works; 567 if (!repository.isFulltextDisabled()) { 568 // TODO getting fulltext already does a getStateChange 569 works = getFulltextWorks(); 570 } else { 571 works = Collections.emptyList(); 572 } 573 List<State> statesToCreate = new ArrayList<>(); 574 for (String id : transientCreated) { // ordered 575 DBSDocumentState docState = transientStates.get(id); 576 docState.setNotDirty(); 577 if (undoLog != null) { 578 undoLog.put(id, null); // marker to denote create 579 } 580 State state = docState.getState(); 581 state.put(KEY_CHANGE_TOKEN, INITIAL_CHANGE_TOKEN); 582 statesToCreate.add(state); 583 } 584 if (!statesToCreate.isEmpty()) { 585 repository.createStates(statesToCreate); 586 } 587 for (DBSDocumentState docState : transientStates.values()) { 588 String id = docState.getId(); 589 if (transientCreated.contains(id)) { 590 continue; // already done 591 } 592 StateDiff diff = docState.getStateChange(); 593 if (diff != null) { 594 if (undoLog != null) { 595 if (!undoLog.containsKey(id)) { 596 undoLog.put(id, StateHelper.deepCopy(docState.getOriginalState())); 597 } 598 // else there's already a create or an update in the undo log so original info is enough 599 } 600 ChangeTokenUpdater changeTokenUpdater; 601 if (session.changeTokenEnabled) { 602 changeTokenUpdater = new ChangeTokenUpdater(docState); 603 } else { 604 changeTokenUpdater = null; 605 } 606 repository.updateState(id, diff, changeTokenUpdater); 607 } 608 docState.setNotDirty(); 609 } 610 transientCreated.clear(); 611 scheduleWork(works); 612 } 613 614 /** 615 * Logic to get the conditions to use to match and update a change token. 616 * <p> 617 * This may be called several times for a single DBS document update, because the low-level storage may need several 618 * database updates for a single high-level update in some cases. 619 * 620 * @since 9.1 621 */ 622 public static class ChangeTokenUpdater { 623 624 protected final DBSDocumentState docState; 625 626 protected String oldToken; 627 628 public ChangeTokenUpdater(DBSDocumentState docState) { 629 this.docState = docState; 630 oldToken = (String) docState.getOriginalState().get(KEY_CHANGE_TOKEN); 631 } 632 633 /** 634 * Gets the conditions to use to match a change token. 635 */ 636 public Map<String, Serializable> getConditions() { 637 return Collections.singletonMap(KEY_CHANGE_TOKEN, oldToken); 638 } 639 640 /** 641 * Gets the updates to make to write the updated change token. 642 */ 643 public Map<String, Serializable> getUpdates() { 644 String newToken; 645 if (oldToken == null) { 646 // document without change token, just created 647 newToken = INITIAL_CHANGE_TOKEN; 648 } else { 649 newToken = updateChangeToken(oldToken); 650 } 651 // also store the new token in the state (without marking dirty), for the next update 652 docState.getState().put(KEY_CHANGE_TOKEN, newToken); 653 oldToken = newToken; 654 return Collections.singletonMap(KEY_CHANGE_TOKEN, newToken); 655 } 656 657 /** Updates a change token to its new value. */ 658 protected String updateChangeToken(String token) { 659 return Long.toString(Long.parseLong(token) + 1); 660 } 661 } 662 663 protected void applyUndoLog() { 664 Set<String> deletes = new HashSet<>(); 665 for (Entry<String, State> es : undoLog.entrySet()) { 666 String id = es.getKey(); 667 State state = es.getValue(); 668 if (state == null) { 669 deletes.add(id); 670 } else { 671 boolean recreate = state.remove(KEY_UNDOLOG_CREATE) != null; 672 if (recreate) { 673 repository.createState(state); 674 } else { 675 // undo update 676 State currentState = repository.readState(id); 677 if (currentState != null) { 678 StateDiff diff = StateHelper.diff(currentState, state); 679 if (!diff.isEmpty()) { 680 repository.updateState(id, diff, null); 681 } 682 } 683 // else we expected to read a current state but it was concurrently deleted... 684 // in that case leave it deleted 685 } 686 } 687 } 688 if (!deletes.isEmpty()) { 689 repository.deleteStates(deletes); 690 } 691 } 692 693 /** 694 * Checks if the changed documents are proxy targets, and updates the proxies if that's the case. 695 */ 696 protected void updateProxies() { 697 for (String id : transientCreated) { // ordered 698 DBSDocumentState docState = transientStates.get(id); 699 updateProxies(docState); 700 } 701 // copy as we may modify proxies 702 for (String id : transientStates.keySet().toArray(new String[0])) { 703 DBSDocumentState docState = transientStates.get(id); 704 if (transientCreated.contains(id)) { 705 continue; // already done 706 } 707 if (docState.isDirty()) { 708 updateProxies(docState); 709 } 710 } 711 } 712 713 protected void updateProxies(DBSDocumentState target) { 714 Object[] proxyIds = (Object[]) target.get(KEY_PROXY_IDS); 715 if (proxyIds != null) { 716 for (Object proxyId : proxyIds) { 717 try { 718 updateProxy(target, (String) proxyId); 719 } catch (ConcurrentUpdateException e) { 720 e.addInfo("On doc " + target.getId()); 721 log.error(e, e); 722 // do not throw, this avoids crashing the session 723 } 724 } 725 } 726 } 727 728 /** 729 * Updates the state of a proxy based on its target. 730 */ 731 protected void updateProxy(DBSDocumentState target, String proxyId) { 732 DBSDocumentState proxy = getStateForUpdate(proxyId); 733 if (proxy == null) { 734 throw new ConcurrentUpdateException("Proxy " + proxyId + " concurrently deleted"); 735 } 736 SchemaManager schemaManager = Framework.getService(SchemaManager.class); 737 // clear all proxy data 738 for (String key : proxy.getState().keyArray()) { 739 if (!isProxySpecific(key, schemaManager)) { 740 proxy.put(key, null); 741 } 742 } 743 // copy from target 744 for (Entry<String, Serializable> en : target.getState().entrySet()) { 745 String key = en.getKey(); 746 if (!isProxySpecific(key, schemaManager)) { 747 proxy.put(key, StateHelper.deepCopy(en.getValue())); 748 } 749 } 750 } 751 752 /** 753 * Things that we don't touch on a proxy when updating it. 754 */ 755 protected boolean isProxySpecific(String key, SchemaManager schemaManager) { 756 switch (key) { 757 // these are placeful stuff 758 case KEY_ID: 759 case KEY_PARENT_ID: 760 case KEY_ANCESTOR_IDS: 761 case KEY_NAME: 762 case KEY_POS: 763 case KEY_ACP: 764 case KEY_READ_ACL: 765 // these are proxy-specific 766 case KEY_IS_PROXY: 767 case KEY_PROXY_TARGET_ID: 768 case KEY_PROXY_VERSION_SERIES_ID: 769 case KEY_IS_VERSION: 770 case KEY_PROXY_IDS: 771 return true; 772 } 773 int p = key.indexOf(':'); 774 if (p == -1) { 775 // no prefix, assume not proxy-specific 776 return false; 777 } 778 String prefix = key.substring(0, p); 779 Schema schema = schemaManager.getSchemaFromPrefix(prefix); 780 if (schema == null) { 781 schema = schemaManager.getSchema(prefix); 782 if (schema == null) { 783 // unknown prefix, assume not proxy-specific 784 return false; 785 } 786 } 787 return schemaManager.isProxySchema(schema.getName(), null); // type unused 788 } 789 790 /** 791 * Called when created in a transaction. 792 * 793 * @since 7.4 794 */ 795 public void begin() { 796 undoLog = new HashMap<>(); 797 repository.begin(); 798 } 799 800 /** 801 * Saves and flushes to database. 802 */ 803 public void commit() { 804 save(); 805 commitSave(); 806 repository.commit(); 807 } 808 809 /** 810 * Commits the saved state to the database. 811 */ 812 protected void commitSave() { 813 // clear transient, this means that after this references to states will be stale 814 // TODO mark states as invalid 815 clearTransient(); 816 // the transaction ended, the proxied DBSSession will disappear and cannot be reused anyway 817 undoLog = null; 818 } 819 820 /** 821 * Rolls back the save state by applying the undo log. 822 */ 823 public void rollback() { 824 clearTransient(); 825 applyUndoLog(); 826 // the transaction ended, the proxied DBSSession will disappear and cannot be reused anyway 827 undoLog = null; 828 repository.rollback(); 829 } 830 831 protected void clearTransient() { 832 transientStates.clear(); 833 transientCreated.clear(); 834 } 835 836 /** 837 * Gets the fulltext updates to do. Called at save() time. 838 * 839 * @return a list of {@link Work} instances to schedule post-commit. 840 */ 841 protected List<Work> getFulltextWorks() { 842 Set<String> docsWithDirtyStrings = new HashSet<>(); 843 Set<String> docsWithDirtyBinaries = new HashSet<>(); 844 findDirtyDocuments(docsWithDirtyStrings, docsWithDirtyBinaries); 845 if (docsWithDirtyStrings.isEmpty() && docsWithDirtyBinaries.isEmpty()) { 846 return Collections.emptyList(); 847 } 848 List<Work> works = new LinkedList<>(); 849 getFulltextSimpleWorks(works, docsWithDirtyStrings); 850 getFulltextBinariesWorks(works, docsWithDirtyBinaries); 851 return works; 852 } 853 854 /** 855 * Finds the documents having dirty text or dirty binaries that have to be reindexed as fulltext. 856 * 857 * @param docsWithDirtyStrings set of ids, updated by this method 858 * @param docWithDirtyBinaries set of ids, updated by this method 859 */ 860 protected void findDirtyDocuments(Set<String> docsWithDirtyStrings, Set<String> docWithDirtyBinaries) { 861 for (DBSDocumentState docState : transientStates.values()) { 862 State originalState = docState.getOriginalState(); 863 State state = docState.getState(); 864 if (originalState == state) { 865 continue; 866 } 867 StateDiff diff = StateHelper.diff(originalState, state); 868 if (diff.isEmpty()) { 869 continue; 870 } 871 StateDiff rdiff = StateHelper.diff(state, originalState); 872 // we do diffs in both directions to capture removal of complex list elements, 873 // for instance for {foo: [{bar: baz}] -> {foo: []} 874 // diff paths = foo and rdiff paths = foo/*/bar 875 Set<String> paths = new HashSet<>(); 876 DirtyPathsFinder dirtyPathsFinder = new DirtyPathsFinder(paths); 877 dirtyPathsFinder.findDirtyPaths(diff); 878 dirtyPathsFinder.findDirtyPaths(rdiff); 879 FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration(); 880 boolean dirtyStrings = false; 881 boolean dirtyBinaries = false; 882 for (String path : paths) { 883 Set<String> indexesSimple = fulltextConfiguration.indexesByPropPathSimple.get(path); 884 if (indexesSimple != null && !indexesSimple.isEmpty()) { 885 dirtyStrings = true; 886 if (dirtyBinaries) { 887 break; 888 } 889 } 890 Set<String> indexesBinary = fulltextConfiguration.indexesByPropPathBinary.get(path); 891 if (indexesBinary != null && !indexesBinary.isEmpty()) { 892 dirtyBinaries = true; 893 if (dirtyStrings) { 894 break; 895 } 896 } 897 } 898 if (dirtyStrings) { 899 docsWithDirtyStrings.add(docState.getId()); 900 } 901 if (dirtyBinaries) { 902 docWithDirtyBinaries.add(docState.getId()); 903 } 904 } 905 } 906 907 /** 908 * Iterates on a state diff to find the paths corresponding to dirty values. 909 * 910 * @since 7.10-HF04, 8.1 911 */ 912 protected static class DirtyPathsFinder { 913 914 protected Set<String> paths; 915 916 public DirtyPathsFinder(Set<String> paths) { 917 this.paths = paths; 918 } 919 920 public void findDirtyPaths(StateDiff value) { 921 findDirtyPaths(value, null); 922 } 923 924 protected void findDirtyPaths(Object value, String path) { 925 if (value instanceof Object[]) { 926 findDirtyPaths((Object[]) value, path); 927 } else if (value instanceof List) { 928 findDirtyPaths((List<?>) value, path); 929 } else if (value instanceof ListDiff) { 930 findDirtyPaths((ListDiff) value, path); 931 } else if (value instanceof State) { 932 findDirtyPaths((State) value, path); 933 } else { 934 paths.add(path); 935 } 936 } 937 938 protected void findDirtyPaths(Object[] value, String path) { 939 String newPath = path + "/*"; 940 for (Object v : value) { 941 findDirtyPaths(v, newPath); 942 } 943 } 944 945 protected void findDirtyPaths(List<?> value, String path) { 946 String newPath = path + "/*"; 947 for (Object v : value) { 948 findDirtyPaths(v, newPath); 949 } 950 } 951 952 protected void findDirtyPaths(ListDiff value, String path) { 953 String newPath = path + "/*"; 954 if (value.diff != null) { 955 findDirtyPaths(value.diff, newPath); 956 } 957 if (value.rpush != null) { 958 findDirtyPaths(value.rpush, newPath); 959 } 960 } 961 962 protected void findDirtyPaths(State value, String path) { 963 for (Entry<String, Serializable> es : value.entrySet()) { 964 String key = es.getKey(); 965 Serializable v = es.getValue(); 966 String newPath = path == null ? key : path + "/" + key; 967 findDirtyPaths(v, newPath); 968 } 969 } 970 } 971 972 protected void getFulltextSimpleWorks(List<Work> works, Set<String> docsWithDirtyStrings) { 973 // TODO XXX make configurable, see also FulltextExtractorWork 974 FulltextParser fulltextParser = new DefaultFulltextParser(); 975 FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration(); 976 if (fulltextConfiguration.fulltextSearchDisabled) { 977 return; 978 } 979 // update simpletext on documents with dirty strings 980 for (String id : docsWithDirtyStrings) { 981 if (id == null) { 982 // cannot happen, but has been observed :( 983 log.error("Got null doc id in fulltext update, cannot happen"); 984 continue; 985 } 986 DBSDocumentState docState = getStateForUpdate(id); 987 if (docState == null) { 988 // cannot happen 989 continue; 990 } 991 String documentType = docState.getPrimaryType(); 992 // Object[] mixinTypes = (Object[]) docState.get(KEY_MIXIN_TYPES); 993 994 if (!fulltextConfiguration.isFulltextIndexable(documentType)) { 995 continue; 996 } 997 docState.put(KEY_FULLTEXT_JOBID, docState.getId()); 998 FulltextFinder fulltextFinder = new FulltextFinder(fulltextParser, docState, session); 999 List<IndexAndText> indexesAndText = new LinkedList<>(); 1000 for (String indexName : fulltextConfiguration.indexNames) { 1001 // TODO paths from config 1002 String text = fulltextFinder.findFulltext(indexName); 1003 indexesAndText.add(new IndexAndText(indexName, text)); 1004 } 1005 if (!indexesAndText.isEmpty()) { 1006 Work work = new FulltextUpdaterWork(repository.getName(), id, true, false, indexesAndText); 1007 works.add(work); 1008 } 1009 } 1010 } 1011 1012 protected void getFulltextBinariesWorks(List<Work> works, Set<String> docWithDirtyBinaries) { 1013 if (docWithDirtyBinaries.isEmpty()) { 1014 return; 1015 } 1016 1017 FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration(); 1018 1019 // mark indexing in progress, so that future copies (including versions) 1020 // will be indexed as well 1021 for (String id : docWithDirtyBinaries) { 1022 DBSDocumentState docState = getStateForUpdate(id); 1023 if (docState == null) { 1024 // cannot happen 1025 continue; 1026 } 1027 if (!fulltextConfiguration.isFulltextIndexable(docState.getPrimaryType())) { 1028 continue; 1029 } 1030 docState.put(KEY_FULLTEXT_JOBID, docState.getId()); 1031 } 1032 1033 // FulltextExtractorWork does fulltext extraction using converters 1034 // and then schedules a FulltextUpdaterWork to write the results 1035 // single-threaded 1036 for (String id : docWithDirtyBinaries) { 1037 // don't exclude proxies 1038 Work work = new DBSFulltextExtractorWork(repository.getName(), id); 1039 works.add(work); 1040 } 1041 } 1042 1043 protected static class FulltextFinder { 1044 1045 protected final FulltextParser fulltextParser; 1046 1047 protected final DBSDocumentState document; 1048 1049 protected final DBSSession session; 1050 1051 protected final String documentType; 1052 1053 protected final Object[] mixinTypes; 1054 1055 /** 1056 * Prepares parsing for one document. 1057 */ 1058 public FulltextFinder(FulltextParser fulltextParser, DBSDocumentState document, DBSSession session) { 1059 this.fulltextParser = fulltextParser; 1060 this.document = document; 1061 this.session = session; 1062 if (document == null) { 1063 documentType = null; 1064 mixinTypes = null; 1065 } else { // null in tests 1066 documentType = document.getPrimaryType(); 1067 mixinTypes = (Object[]) document.get(KEY_MIXIN_TYPES); 1068 } 1069 } 1070 1071 /** 1072 * Parses the document for one index. 1073 */ 1074 public String findFulltext(String indexName) { 1075 // TODO indexName 1076 // TODO paths 1077 List<String> strings = new ArrayList<>(); 1078 findFulltext(indexName, document.getState(), strings); 1079 return StringUtils.join(strings, ' '); 1080 } 1081 1082 protected void findFulltext(String indexName, State state, List<String> strings) { 1083 for (Entry<String, Serializable> en : state.entrySet()) { 1084 String key = en.getKey(); 1085 if (key.startsWith(KEY_PREFIX)) { 1086 switch (key) { 1087 // allow indexing of this: 1088 case DBSDocument.KEY_NAME: 1089 break; 1090 default: 1091 continue; 1092 } 1093 } 1094 Serializable value = en.getValue(); 1095 if (value instanceof State) { 1096 State s = (State) value; 1097 findFulltext(indexName, s, strings); 1098 } else if (value instanceof List) { 1099 @SuppressWarnings("unchecked") 1100 List<State> v = (List<State>) value; 1101 for (State s : v) { 1102 findFulltext(indexName, s, strings); 1103 } 1104 } else if (value instanceof Object[]) { 1105 Object[] ar = (Object[]) value; 1106 for (Object v : ar) { 1107 if (v instanceof String) { 1108 fulltextParser.parse((String) v, null, strings); 1109 } else { 1110 // arrays are homogeneous, no need to continue 1111 break; 1112 } 1113 } 1114 } else { 1115 if (value instanceof String) { 1116 fulltextParser.parse((String) value, null, strings); 1117 } 1118 } 1119 } 1120 } 1121 } 1122 1123 protected void scheduleWork(List<Work> works) { 1124 // do async fulltext indexing only if high-level sessions are available 1125 RepositoryManager repositoryManager = Framework.getLocalService(RepositoryManager.class); 1126 if (repositoryManager != null && !works.isEmpty()) { 1127 WorkManager workManager = Framework.getLocalService(WorkManager.class); 1128 for (Work work : works) { 1129 // schedule work post-commit 1130 // in non-tx mode, this may execute it nearly immediately 1131 workManager.schedule(work, Scheduling.IF_NOT_SCHEDULED, true); 1132 } 1133 } 1134 } 1135 1136}