001/* 002 * (C) Copyright 2014-2016 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.core.storage.dbs; 020 021import static java.lang.Boolean.FALSE; 022import static java.lang.Boolean.TRUE; 023import static org.nuxeo.ecm.core.api.security.SecurityConstants.BROWSE; 024import static org.nuxeo.ecm.core.api.security.SecurityConstants.EVERYONE; 025import static org.nuxeo.ecm.core.api.security.SecurityConstants.UNSUPPORTED_ACL; 026import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_GRANT; 027import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_PERMISSION; 028import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_USER; 029import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACL; 030import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACP; 031import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ANCESTOR_IDS; 032import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_FULLTEXT_JOBID; 033import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ID; 034import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_IS_PROXY; 035import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_IS_VERSION; 036import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_MIXIN_TYPES; 037import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_NAME; 038import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PARENT_ID; 039import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_POS; 040import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PREFIX; 041import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PRIMARY_TYPE; 042import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_IDS; 043import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_TARGET_ID; 044import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_VERSION_SERIES_ID; 045import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_READ_ACL; 046import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_VERSION_SERIES_ID; 047 048import java.io.Serializable; 049import java.util.ArrayList; 050import java.util.Arrays; 051import java.util.Collections; 052import java.util.HashMap; 053import java.util.HashSet; 054import java.util.LinkedHashSet; 055import java.util.LinkedList; 056import java.util.List; 057import java.util.Map; 058import java.util.Map.Entry; 059import java.util.Set; 060 061import org.apache.commons.lang.StringUtils; 062import org.apache.commons.logging.Log; 063import org.apache.commons.logging.LogFactory; 064import org.nuxeo.ecm.core.api.ConcurrentUpdateException; 065import org.nuxeo.ecm.core.api.repository.RepositoryManager; 066import org.nuxeo.ecm.core.schema.SchemaManager; 067import org.nuxeo.ecm.core.schema.types.Schema; 068import org.nuxeo.ecm.core.security.SecurityService; 069import org.nuxeo.ecm.core.storage.DefaultFulltextParser; 070import org.nuxeo.ecm.core.storage.FulltextConfiguration; 071import org.nuxeo.ecm.core.storage.FulltextParser; 072import org.nuxeo.ecm.core.storage.FulltextUpdaterWork; 073import org.nuxeo.ecm.core.storage.FulltextUpdaterWork.IndexAndText; 074import org.nuxeo.ecm.core.storage.State; 075import org.nuxeo.ecm.core.storage.State.ListDiff; 076import org.nuxeo.ecm.core.storage.State.StateDiff; 077import org.nuxeo.ecm.core.storage.StateHelper; 078import org.nuxeo.ecm.core.work.api.Work; 079import org.nuxeo.ecm.core.work.api.WorkManager; 080import org.nuxeo.ecm.core.work.api.WorkManager.Scheduling; 081import org.nuxeo.runtime.api.Framework; 082 083/** 084 * Transactional state for a session. 085 * <p> 086 * Until {@code save()} is called, data lives in the transient map. 087 * <p> 088 * Upon save, data is written to the repository, even though it has not yet been committed (this means that other 089 * sessions can read uncommitted data). It's also kept in an undo log in order for rollback to be possible. 090 * <p> 091 * On commit, the undo log is forgotten. On rollback, the undo log is replayed. 092 * 093 * @since 5.9.4 094 */ 095public class DBSTransactionState { 096 097 private static final Log log = LogFactory.getLog(DBSTransactionState.class); 098 099 private static final String KEY_UNDOLOG_CREATE = "__UNDOLOG_CREATE__\0\0"; 100 101 protected final DBSRepository repository; 102 103 protected final DBSSession session; 104 105 /** Retrieved and created document state. */ 106 protected Map<String, DBSDocumentState> transientStates = new HashMap<>(); 107 108 /** Ids of documents created but not yet saved. */ 109 protected Set<String> transientCreated = new LinkedHashSet<>(); 110 111 /** 112 * Undo log. 113 * <p> 114 * A map of document ids to null or State. The value is null when the document has to be deleted when applying the 115 * undo log. Otherwise the value is a State. If the State contains the key {@link #KEY_UNDOLOG_CREATE} then the 116 * state must be re-created completely when applying the undo log, otherwise just applied as an update. 117 * <p> 118 * Null when there is no active transaction. 119 */ 120 protected Map<String, State> undoLog; 121 122 protected final Set<String> browsePermissions; 123 124 public DBSTransactionState(DBSRepository repository, DBSSession session) { 125 this.repository = repository; 126 this.session = session; 127 SecurityService securityService = Framework.getLocalService(SecurityService.class); 128 browsePermissions = new HashSet<>(Arrays.asList(securityService.getPermissionsToCheck(BROWSE))); 129 } 130 131 /** 132 * New transient state for something just read from the repository. 133 */ 134 protected DBSDocumentState newTransientState(State state) { 135 if (state == null) { 136 return null; 137 } 138 String id = (String) state.get(KEY_ID); 139 if (transientStates.containsKey(id)) { 140 throw new IllegalStateException("Already transient: " + id); 141 } 142 DBSDocumentState docState = new DBSDocumentState(state); // copy 143 transientStates.put(id, docState); 144 return docState; 145 } 146 147 /** 148 * Returns a state and marks it as transient, because it's about to be modified or returned to user code (where it 149 * may be modified). 150 */ 151 public DBSDocumentState getStateForUpdate(String id) { 152 // check transient state 153 DBSDocumentState docState = transientStates.get(id); 154 if (docState != null) { 155 return docState; 156 } 157 // fetch from repository 158 State state = repository.readState(id); 159 return newTransientState(state); 160 } 161 162 /** 163 * Returns a state which won't be modified. 164 */ 165 // TODO in some cases it's good to have this kept in memory instead of 166 // rereading from database every time 167 // XXX getStateForReadOneShot 168 public State getStateForRead(String id) { 169 // check transient state 170 DBSDocumentState docState = transientStates.get(id); 171 if (docState != null) { 172 return docState.getState(); 173 } 174 // fetch from repository 175 return repository.readState(id); 176 } 177 178 /** 179 * Returns states and marks them transient, because they're about to be returned to user code (where they may be 180 * modified). 181 */ 182 public List<DBSDocumentState> getStatesForUpdate(List<String> ids) { 183 // check which ones we have to fetch from repository 184 List<String> idsToFetch = new LinkedList<>(); 185 for (String id : ids) { 186 // check transient state 187 DBSDocumentState docState = transientStates.get(id); 188 if (docState != null) { 189 continue; 190 } 191 // will have to fetch it 192 idsToFetch.add(id); 193 } 194 if (!idsToFetch.isEmpty()) { 195 List<State> states = repository.readStates(idsToFetch); 196 for (State state : states) { 197 newTransientState(state); 198 } 199 } 200 // everything now fetched in transient 201 List<DBSDocumentState> docStates = new ArrayList<>(ids.size()); 202 for (String id : ids) { 203 DBSDocumentState docState = transientStates.get(id); 204 if (docState == null) { 205 if (log.isTraceEnabled()) { 206 log.trace("Cannot fetch document with id: " + id, new Throwable("debug stack trace")); 207 } 208 continue; 209 } 210 docStates.add(docState); 211 } 212 return docStates; 213 } 214 215 // XXX TODO for update or for read? 216 public DBSDocumentState getChildState(String parentId, String name) { 217 // check transient state 218 for (DBSDocumentState docState : transientStates.values()) { 219 if (!parentId.equals(docState.getParentId())) { 220 continue; 221 } 222 if (!name.equals(docState.getName())) { 223 continue; 224 } 225 return docState; 226 } 227 // fetch from repository 228 State state = repository.readChildState(parentId, name, Collections.emptySet()); 229 return newTransientState(state); 230 } 231 232 public boolean hasChild(String parentId, String name) { 233 // check transient state 234 for (DBSDocumentState docState : transientStates.values()) { 235 if (!parentId.equals(docState.getParentId())) { 236 continue; 237 } 238 if (!name.equals(docState.getName())) { 239 continue; 240 } 241 return true; 242 } 243 // check repository 244 return repository.hasChild(parentId, name, Collections.emptySet()); 245 } 246 247 public List<DBSDocumentState> getChildrenStates(String parentId) { 248 List<DBSDocumentState> docStates = new LinkedList<>(); 249 Set<String> seen = new HashSet<>(); 250 // check transient state 251 for (DBSDocumentState docState : transientStates.values()) { 252 if (!parentId.equals(docState.getParentId())) { 253 continue; 254 } 255 docStates.add(docState); 256 seen.add(docState.getId()); 257 } 258 // fetch from repository 259 List<State> states = repository.queryKeyValue(KEY_PARENT_ID, parentId, seen); 260 for (State state : states) { 261 docStates.add(newTransientState(state)); 262 } 263 return docStates; 264 } 265 266 public List<String> getChildrenIds(String parentId) { 267 List<String> children = new ArrayList<>(); 268 Set<String> seen = new HashSet<>(); 269 // check transient state 270 for (DBSDocumentState docState : transientStates.values()) { 271 String id = docState.getId(); 272 if (!parentId.equals(docState.getParentId())) { 273 continue; 274 } 275 seen.add(id); 276 children.add(id); 277 } 278 // fetch from repository 279 List<State> states = repository.queryKeyValue(KEY_PARENT_ID, parentId, seen); 280 for (State state : states) { 281 children.add((String) state.get(KEY_ID)); 282 } 283 return new ArrayList<>(children); 284 } 285 286 public boolean hasChildren(String parentId) { 287 // check transient state 288 for (DBSDocumentState docState : transientStates.values()) { 289 if (!parentId.equals(docState.getParentId())) { 290 continue; 291 } 292 return true; 293 } 294 // check repository 295 return repository.queryKeyValuePresence(KEY_PARENT_ID, parentId, Collections.emptySet()); 296 } 297 298 public DBSDocumentState createChild(String id, String parentId, String name, Long pos, String typeName) { 299 // id may be not-null for import 300 if (id == null) { 301 id = repository.generateNewId(); 302 } 303 transientCreated.add(id); 304 DBSDocumentState docState = new DBSDocumentState(); 305 transientStates.put(id, docState); 306 docState.put(KEY_ID, id); 307 docState.put(KEY_PARENT_ID, parentId); 308 docState.put(KEY_ANCESTOR_IDS, getAncestorIds(parentId)); 309 docState.put(KEY_NAME, name); 310 docState.put(KEY_POS, pos); 311 docState.put(KEY_PRIMARY_TYPE, typeName); 312 // update read acls for new doc 313 updateDocumentReadAcls(id); 314 return docState; 315 } 316 317 /** Gets ancestors including id itself. */ 318 protected Object[] getAncestorIds(String id) { 319 if (id == null) { 320 return null; 321 } 322 State state = getStateForRead(id); 323 if (state == null) { 324 throw new RuntimeException("No such id: " + id); 325 } 326 Object[] ancestors = (Object[]) state.get(KEY_ANCESTOR_IDS); 327 if (ancestors == null) { 328 return new Object[] { id }; 329 } else { 330 Object[] newAncestors = new Object[ancestors.length + 1]; 331 System.arraycopy(ancestors, 0, newAncestors, 0, ancestors.length); 332 newAncestors[ancestors.length] = id; 333 return newAncestors; 334 } 335 } 336 337 /** 338 * Copies the document into a newly-created object. 339 * <p> 340 * The copy is automatically saved. 341 */ 342 public DBSDocumentState copy(String id) { 343 DBSDocumentState copyState = new DBSDocumentState(getStateForRead(id)); 344 String copyId = repository.generateNewId(); 345 copyState.put(KEY_ID, copyId); 346 copyState.put(KEY_PROXY_IDS, null); // no proxies to this new doc 347 // other fields updated by the caller 348 transientStates.put(copyId, copyState); 349 transientCreated.add(copyId); 350 return copyState; 351 } 352 353 /** 354 * Updates ancestors recursively after a move. 355 * <p> 356 * Recursing from given doc, replace the first ndel ancestors with those passed. 357 * <p> 358 * Doesn't check transient (assumes save is done). The modifications are automatically saved. 359 */ 360 public void updateAncestors(String id, int ndel, Object[] ancestorIds) { 361 int nadd = ancestorIds.length; 362 Set<String> ids = getSubTree(id, null, null); 363 ids.add(id); 364 for (String cid : ids) { 365 // XXX TODO oneShot update, don't pollute transient space 366 DBSDocumentState docState = getStateForUpdate(cid); 367 Object[] ancestors = (Object[]) docState.get(KEY_ANCESTOR_IDS); 368 Object[] newAncestors; 369 if (ancestors == null) { 370 newAncestors = ancestorIds.clone(); 371 } else { 372 newAncestors = new Object[ancestors.length - ndel + nadd]; 373 System.arraycopy(ancestorIds, 0, newAncestors, 0, nadd); 374 System.arraycopy(ancestors, ndel, newAncestors, nadd, ancestors.length - ndel); 375 } 376 docState.put(KEY_ANCESTOR_IDS, newAncestors); 377 } 378 } 379 380 /** 381 * Updates the Read ACLs recursively on a document. 382 */ 383 public void updateTreeReadAcls(String id) { 384 // versions too XXX TODO 385 Set<String> ids = getSubTree(id, null, null); 386 ids.add(id); 387 ids.forEach(this::updateDocumentReadAcls); 388 } 389 390 /** 391 * Updates the Read ACLs on a document (not recursively) 392 */ 393 protected void updateDocumentReadAcls(String id) { 394 // XXX TODO oneShot update, don't pollute transient space 395 DBSDocumentState docState = getStateForUpdate(id); 396 docState.put(KEY_READ_ACL, getReadACL(docState)); 397 } 398 399 /** 400 * Gets the Read ACL (flat list of users having browse permission, including inheritance) on a document. 401 */ 402 protected String[] getReadACL(DBSDocumentState docState) { 403 Set<String> racls = new HashSet<>(); 404 State state = docState.getState(); 405 LOOP: do { 406 @SuppressWarnings("unchecked") 407 List<Serializable> aclList = (List<Serializable>) state.get(KEY_ACP); 408 if (aclList != null) { 409 for (Serializable aclSer : aclList) { 410 State aclMap = (State) aclSer; 411 @SuppressWarnings("unchecked") 412 List<Serializable> aceList = (List<Serializable>) aclMap.get(KEY_ACL); 413 for (Serializable aceSer : aceList) { 414 State aceMap = (State) aceSer; 415 String username = (String) aceMap.get(KEY_ACE_USER); 416 String permission = (String) aceMap.get(KEY_ACE_PERMISSION); 417 Boolean granted = (Boolean) aceMap.get(KEY_ACE_GRANT); 418 if (TRUE.equals(granted) && browsePermissions.contains(permission)) { 419 racls.add(username); 420 } 421 if (FALSE.equals(granted)) { 422 if (!EVERYONE.equals(username)) { 423 // TODO log 424 racls.add(UNSUPPORTED_ACL); 425 } 426 break LOOP; 427 } 428 } 429 } 430 } 431 // get parent 432 if (TRUE.equals(state.get(KEY_IS_VERSION))) { 433 String versionSeriesId = (String) state.get(KEY_VERSION_SERIES_ID); 434 state = versionSeriesId == null ? null : getStateForRead(versionSeriesId); 435 } else { 436 String parentId = (String) state.get(KEY_PARENT_ID); 437 state = parentId == null ? null : getStateForRead(parentId); 438 } 439 } while (state != null); 440 441 // sort to have canonical order 442 List<String> racl = new ArrayList<>(racls); 443 Collections.sort(racl); 444 return racl.toArray(new String[racl.size()]); 445 } 446 447 /** 448 * Gets all the ids under a given one, recursively. 449 * <p> 450 * Doesn't check transient (assumes save is done). 451 * 452 * @param id the root of the tree (not included in results) 453 * @param proxyTargets returns a map of proxy to target among the documents found 454 * @param targetProxies returns a map of target to proxies among the document found 455 */ 456 protected Set<String> getSubTree(String id, Map<String, String> proxyTargets, Map<String, Object[]> targetProxies) { 457 Set<String> ids = new HashSet<>(); 458 // check repository 459 repository.queryKeyValueArray(KEY_ANCESTOR_IDS, id, ids, proxyTargets, targetProxies); 460 return ids; 461 } 462 463 public List<DBSDocumentState> getKeyValuedStates(String key, Object value) { 464 List<DBSDocumentState> docStates = new LinkedList<>(); 465 Set<String> seen = new HashSet<>(); 466 // check transient state 467 for (DBSDocumentState docState : transientStates.values()) { 468 if (!value.equals(docState.get(key))) { 469 continue; 470 } 471 docStates.add(docState); 472 seen.add(docState.getId()); 473 } 474 // fetch from repository 475 List<State> states = repository.queryKeyValue(key, value, seen); 476 for (State state : states) { 477 docStates.add(newTransientState(state)); 478 } 479 return docStates; 480 } 481 482 public List<DBSDocumentState> getKeyValuedStates(String key1, Object value1, String key2, Object value2) { 483 List<DBSDocumentState> docStates = new LinkedList<>(); 484 Set<String> seen = new HashSet<>(); 485 // check transient state 486 for (DBSDocumentState docState : transientStates.values()) { 487 seen.add(docState.getId()); 488 if (!(value1.equals(docState.get(key1)) && value2.equals(docState.get(key2)))) { 489 continue; 490 } 491 docStates.add(docState); 492 } 493 // fetch from repository 494 List<State> states = repository.queryKeyValue(key1, value1, key2, value2, seen); 495 for (State state : states) { 496 docStates.add(newTransientState(state)); 497 } 498 return docStates; 499 } 500 501 /** 502 * Removes a list of documents. 503 * <p> 504 * Called after a {@link #save} has been done. 505 */ 506 public void removeStates(Set<String> ids) { 507 if (undoLog != null) { 508 for (String id : ids) { 509 if (undoLog.containsKey(id)) { 510 // there's already a create or an update in the undo log 511 State oldUndo = undoLog.get(id); 512 if (oldUndo == null) { 513 // create + delete -> forget 514 undoLog.remove(id); 515 } else { 516 // update + delete -> original old state to re-create 517 oldUndo.put(KEY_UNDOLOG_CREATE, TRUE); 518 } 519 } else { 520 // just delete -> store old state to re-create 521 State oldState = StateHelper.deepCopy(getStateForRead(id)); 522 oldState.put(KEY_UNDOLOG_CREATE, TRUE); 523 undoLog.put(id, oldState); 524 } 525 } 526 } 527 for (String id : ids) { 528 transientStates.remove(id); 529 } 530 repository.deleteStates(ids); 531 } 532 533 /** 534 * Writes transient state to database. 535 * <p> 536 * An undo log is kept in order to rollback the transaction later if needed. 537 */ 538 public void save() { 539 updateProxies(); 540 List<Work> works; 541 if (!repository.isFulltextDisabled()) { 542 // TODO getting fulltext already does a getStateChange 543 works = getFulltextWorks(); 544 } else { 545 works = Collections.emptyList(); 546 } 547 List<State> statesToCreate = new ArrayList<>(); 548 for (String id : transientCreated) { // ordered 549 DBSDocumentState docState = transientStates.get(id); 550 docState.setNotDirty(); 551 if (undoLog != null) { 552 undoLog.put(id, null); // marker to denote create 553 } 554 statesToCreate.add(docState.getState()); 555 } 556 if (!statesToCreate.isEmpty()) { 557 repository.createStates(statesToCreate); 558 } 559 for (DBSDocumentState docState : transientStates.values()) { 560 String id = docState.getId(); 561 if (transientCreated.contains(id)) { 562 continue; // already done 563 } 564 StateDiff diff = docState.getStateChange(); 565 if (diff != null) { 566 if (undoLog != null) { 567 if (!undoLog.containsKey(id)) { 568 undoLog.put(id, StateHelper.deepCopy(docState.getOriginalState())); 569 } 570 // else there's already a create or an update in the undo log so original info is enough 571 } 572 repository.updateState(id, diff); 573 } 574 docState.setNotDirty(); 575 } 576 transientCreated.clear(); 577 scheduleWork(works); 578 } 579 580 protected void applyUndoLog() { 581 Set<String> deletes = new HashSet<>(); 582 for (Entry<String, State> es : undoLog.entrySet()) { 583 String id = es.getKey(); 584 State state = es.getValue(); 585 if (state == null) { 586 deletes.add(id); 587 } else { 588 boolean recreate = state.remove(KEY_UNDOLOG_CREATE) != null; 589 if (recreate) { 590 repository.createState(state); 591 } else { 592 // undo update 593 State currentState = repository.readState(id); 594 if (currentState != null) { 595 StateDiff diff = StateHelper.diff(currentState, state); 596 if (!diff.isEmpty()) { 597 repository.updateState(id, diff); 598 } 599 } 600 // else we expected to read a current state but it was concurrently deleted... 601 // in that case leave it deleted 602 } 603 } 604 } 605 if (!deletes.isEmpty()) { 606 repository.deleteStates(deletes); 607 } 608 } 609 610 /** 611 * Checks if the changed documents are proxy targets, and updates the proxies if that's the case. 612 */ 613 protected void updateProxies() { 614 for (String id : transientCreated) { // ordered 615 DBSDocumentState docState = transientStates.get(id); 616 updateProxies(docState); 617 } 618 // copy as we may modify proxies 619 for (String id : transientStates.keySet().toArray(new String[0])) { 620 DBSDocumentState docState = transientStates.get(id); 621 if (transientCreated.contains(id)) { 622 continue; // already done 623 } 624 if (docState.isDirty()) { 625 updateProxies(docState); 626 } 627 } 628 } 629 630 protected void updateProxies(DBSDocumentState target) { 631 Object[] proxyIds = (Object[]) target.get(KEY_PROXY_IDS); 632 if (proxyIds != null) { 633 for (Object proxyId : proxyIds) { 634 try { 635 updateProxy(target, (String) proxyId); 636 } catch (ConcurrentUpdateException e) { 637 e.addInfo("On doc " + target.getId()); 638 log.error(e, e); 639 // do not throw, this avoids crashing the session 640 } 641 } 642 } 643 } 644 645 /** 646 * Updates the state of a proxy based on its target. 647 */ 648 protected void updateProxy(DBSDocumentState target, String proxyId) { 649 DBSDocumentState proxy = getStateForUpdate(proxyId); 650 if (proxy == null) { 651 throw new ConcurrentUpdateException("Proxy " + proxyId + " concurrently deleted"); 652 } 653 SchemaManager schemaManager = Framework.getService(SchemaManager.class); 654 // clear all proxy data 655 for (String key : proxy.getState().keyArray()) { 656 if (!isProxySpecific(key, schemaManager)) { 657 proxy.put(key, null); 658 } 659 } 660 // copy from target 661 for (Entry<String, Serializable> en : target.getState().entrySet()) { 662 String key = en.getKey(); 663 if (!isProxySpecific(key, schemaManager)) { 664 proxy.put(key, StateHelper.deepCopy(en.getValue())); 665 } 666 } 667 } 668 669 /** 670 * Things that we don't touch on a proxy when updating it. 671 */ 672 protected boolean isProxySpecific(String key, SchemaManager schemaManager) { 673 switch (key) { 674 // these are placeful stuff 675 case KEY_ID: 676 case KEY_PARENT_ID: 677 case KEY_ANCESTOR_IDS: 678 case KEY_NAME: 679 case KEY_POS: 680 case KEY_ACP: 681 case KEY_READ_ACL: 682 // these are proxy-specific 683 case KEY_IS_PROXY: 684 case KEY_PROXY_TARGET_ID: 685 case KEY_PROXY_VERSION_SERIES_ID: 686 case KEY_IS_VERSION: 687 case KEY_PROXY_IDS: 688 return true; 689 } 690 int p = key.indexOf(':'); 691 if (p == -1) { 692 // no prefix, assume not proxy-specific 693 return false; 694 } 695 String prefix = key.substring(0, p); 696 Schema schema = schemaManager.getSchemaFromPrefix(prefix); 697 if (schema == null) { 698 schema = schemaManager.getSchema(prefix); 699 if (schema == null) { 700 // unknown prefix, assume not proxy-specific 701 return false; 702 } 703 } 704 return schemaManager.isProxySchema(schema.getName(), null); // type unused 705 } 706 707 /** 708 * Called when created in a transaction. 709 * 710 * @since 7.4 711 */ 712 public void begin() { 713 undoLog = new HashMap<>(); 714 repository.begin(); 715 } 716 717 /** 718 * Saves and flushes to database. 719 */ 720 public void commit() { 721 save(); 722 commitSave(); 723 repository.commit(); 724 } 725 726 /** 727 * Commits the saved state to the database. 728 */ 729 protected void commitSave() { 730 // clear transient, this means that after this references to states will be stale 731 // TODO mark states as invalid 732 clearTransient(); 733 // the transaction ended, the proxied DBSSession will disappear and cannot be reused anyway 734 undoLog = null; 735 } 736 737 /** 738 * Rolls back the save state by applying the undo log. 739 */ 740 public void rollback() { 741 clearTransient(); 742 applyUndoLog(); 743 // the transaction ended, the proxied DBSSession will disappear and cannot be reused anyway 744 undoLog = null; 745 repository.rollback(); 746 } 747 748 protected void clearTransient() { 749 transientStates.clear(); 750 transientCreated.clear(); 751 } 752 753 /** 754 * Gets the fulltext updates to do. Called at save() time. 755 * 756 * @return a list of {@link Work} instances to schedule post-commit. 757 */ 758 protected List<Work> getFulltextWorks() { 759 Set<String> docsWithDirtyStrings = new HashSet<>(); 760 Set<String> docsWithDirtyBinaries = new HashSet<>(); 761 findDirtyDocuments(docsWithDirtyStrings, docsWithDirtyBinaries); 762 if (docsWithDirtyStrings.isEmpty() && docsWithDirtyBinaries.isEmpty()) { 763 return Collections.emptyList(); 764 } 765 List<Work> works = new LinkedList<>(); 766 getFulltextSimpleWorks(works, docsWithDirtyStrings); 767 getFulltextBinariesWorks(works, docsWithDirtyBinaries); 768 return works; 769 } 770 771 /** 772 * Finds the documents having dirty text or dirty binaries that have to be reindexed as fulltext. 773 * 774 * @param docsWithDirtyStrings set of ids, updated by this method 775 * @param docWithDirtyBinaries set of ids, updated by this method 776 */ 777 protected void findDirtyDocuments(Set<String> docsWithDirtyStrings, Set<String> docWithDirtyBinaries) { 778 for (DBSDocumentState docState : transientStates.values()) { 779 State originalState = docState.getOriginalState(); 780 State state = docState.getState(); 781 if (originalState == state) { 782 continue; 783 } 784 StateDiff diff = StateHelper.diff(originalState, state); 785 if (diff.isEmpty()) { 786 continue; 787 } 788 StateDiff rdiff = StateHelper.diff(state, originalState); 789 // we do diffs in both directions to capture removal of complex list elements, 790 // for instance for {foo: [{bar: baz}] -> {foo: []} 791 // diff paths = foo and rdiff paths = foo/*/bar 792 Set<String> paths = new HashSet<>(); 793 DirtyPathsFinder dirtyPathsFinder = new DirtyPathsFinder(paths); 794 dirtyPathsFinder.findDirtyPaths(diff); 795 dirtyPathsFinder.findDirtyPaths(rdiff); 796 FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration(); 797 boolean dirtyStrings = false; 798 boolean dirtyBinaries = false; 799 for (String path : paths) { 800 Set<String> indexesSimple = fulltextConfiguration.indexesByPropPathSimple.get(path); 801 if (indexesSimple != null && !indexesSimple.isEmpty()) { 802 dirtyStrings = true; 803 if (dirtyBinaries) { 804 break; 805 } 806 } 807 Set<String> indexesBinary = fulltextConfiguration.indexesByPropPathBinary.get(path); 808 if (indexesBinary != null && !indexesBinary.isEmpty()) { 809 dirtyBinaries = true; 810 if (dirtyStrings) { 811 break; 812 } 813 } 814 } 815 if (dirtyStrings) { 816 docsWithDirtyStrings.add(docState.getId()); 817 } 818 if (dirtyBinaries) { 819 docWithDirtyBinaries.add(docState.getId()); 820 } 821 } 822 } 823 824 /** 825 * Iterates on a state diff to find the paths corresponding to dirty values. 826 * 827 * @since 7.10-HF04, 8.1 828 */ 829 protected static class DirtyPathsFinder { 830 831 protected Set<String> paths; 832 833 public DirtyPathsFinder(Set<String> paths) { 834 this.paths = paths; 835 } 836 837 public void findDirtyPaths(StateDiff value) { 838 findDirtyPaths(value, null); 839 } 840 841 protected void findDirtyPaths(Object value, String path) { 842 if (value instanceof Object[]) { 843 findDirtyPaths((Object[]) value, path); 844 } else if (value instanceof List) { 845 findDirtyPaths((List<?>) value, path); 846 } else if (value instanceof ListDiff) { 847 findDirtyPaths((ListDiff) value, path); 848 } else if (value instanceof State) { 849 findDirtyPaths((State) value, path); 850 } else { 851 paths.add(path); 852 } 853 } 854 855 protected void findDirtyPaths(Object[] value, String path) { 856 String newPath = path + "/*"; 857 for (Object v : value) { 858 findDirtyPaths(v, newPath); 859 } 860 } 861 862 protected void findDirtyPaths(List<?> value, String path) { 863 String newPath = path + "/*"; 864 for (Object v : value) { 865 findDirtyPaths(v, newPath); 866 } 867 } 868 869 protected void findDirtyPaths(ListDiff value, String path) { 870 String newPath = path + "/*"; 871 if (value.diff != null) { 872 findDirtyPaths(value.diff, newPath); 873 } 874 if (value.rpush != null) { 875 findDirtyPaths(value.rpush, newPath); 876 } 877 } 878 879 protected void findDirtyPaths(State value, String path) { 880 for (Entry<String, Serializable> es : value.entrySet()) { 881 String key = es.getKey(); 882 Serializable v = es.getValue(); 883 String newPath = path == null ? key : path + "/" + key; 884 findDirtyPaths(v, newPath); 885 } 886 } 887 } 888 889 protected void getFulltextSimpleWorks(List<Work> works, Set<String> docsWithDirtyStrings) { 890 // TODO XXX make configurable, see also FulltextExtractorWork 891 FulltextParser fulltextParser = new DefaultFulltextParser(); 892 FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration(); 893 if (fulltextConfiguration.fulltextSearchDisabled) { 894 return; 895 } 896 // update simpletext on documents with dirty strings 897 for (String id : docsWithDirtyStrings) { 898 if (id == null) { 899 // cannot happen, but has been observed :( 900 log.error("Got null doc id in fulltext update, cannot happen"); 901 continue; 902 } 903 DBSDocumentState docState = getStateForUpdate(id); 904 if (docState == null) { 905 // cannot happen 906 continue; 907 } 908 String documentType = docState.getPrimaryType(); 909 // Object[] mixinTypes = (Object[]) docState.get(KEY_MIXIN_TYPES); 910 911 if (!fulltextConfiguration.isFulltextIndexable(documentType)) { 912 continue; 913 } 914 docState.put(KEY_FULLTEXT_JOBID, docState.getId()); 915 FulltextFinder fulltextFinder = new FulltextFinder(fulltextParser, docState, session); 916 List<IndexAndText> indexesAndText = new LinkedList<>(); 917 for (String indexName : fulltextConfiguration.indexNames) { 918 // TODO paths from config 919 String text = fulltextFinder.findFulltext(indexName); 920 indexesAndText.add(new IndexAndText(indexName, text)); 921 } 922 if (!indexesAndText.isEmpty()) { 923 Work work = new FulltextUpdaterWork(repository.getName(), id, true, false, indexesAndText); 924 works.add(work); 925 } 926 } 927 } 928 929 protected void getFulltextBinariesWorks(List<Work> works, Set<String> docWithDirtyBinaries) { 930 if (docWithDirtyBinaries.isEmpty()) { 931 return; 932 } 933 934 FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration(); 935 936 // mark indexing in progress, so that future copies (including versions) 937 // will be indexed as well 938 for (String id : docWithDirtyBinaries) { 939 DBSDocumentState docState = getStateForUpdate(id); 940 if (docState == null) { 941 // cannot happen 942 continue; 943 } 944 if (!fulltextConfiguration.isFulltextIndexable(docState.getPrimaryType())) { 945 continue; 946 } 947 docState.put(KEY_FULLTEXT_JOBID, docState.getId()); 948 } 949 950 // FulltextExtractorWork does fulltext extraction using converters 951 // and then schedules a FulltextUpdaterWork to write the results 952 // single-threaded 953 for (String id : docWithDirtyBinaries) { 954 // don't exclude proxies 955 Work work = new DBSFulltextExtractorWork(repository.getName(), id); 956 works.add(work); 957 } 958 } 959 960 protected static class FulltextFinder { 961 962 protected final FulltextParser fulltextParser; 963 964 protected final DBSDocumentState document; 965 966 protected final DBSSession session; 967 968 protected final String documentType; 969 970 protected final Object[] mixinTypes; 971 972 /** 973 * Prepares parsing for one document. 974 */ 975 public FulltextFinder(FulltextParser fulltextParser, DBSDocumentState document, DBSSession session) { 976 this.fulltextParser = fulltextParser; 977 this.document = document; 978 this.session = session; 979 if (document == null) { 980 documentType = null; 981 mixinTypes = null; 982 } else { // null in tests 983 documentType = document.getPrimaryType(); 984 mixinTypes = (Object[]) document.get(KEY_MIXIN_TYPES); 985 } 986 } 987 988 /** 989 * Parses the document for one index. 990 */ 991 public String findFulltext(String indexName) { 992 // TODO indexName 993 // TODO paths 994 List<String> strings = new ArrayList<>(); 995 findFulltext(indexName, document.getState(), strings); 996 return StringUtils.join(strings, ' '); 997 } 998 999 protected void findFulltext(String indexName, State state, List<String> strings) { 1000 for (Entry<String, Serializable> en : state.entrySet()) { 1001 String key = en.getKey(); 1002 if (key.startsWith(KEY_PREFIX)) { 1003 switch (key) { 1004 // allow indexing of this: 1005 case DBSDocument.KEY_NAME: 1006 break; 1007 default: 1008 continue; 1009 } 1010 } 1011 Serializable value = en.getValue(); 1012 if (value instanceof State) { 1013 State s = (State) value; 1014 findFulltext(indexName, s, strings); 1015 } else if (value instanceof List) { 1016 @SuppressWarnings("unchecked") 1017 List<State> v = (List<State>) value; 1018 for (State s : v) { 1019 findFulltext(indexName, s, strings); 1020 } 1021 } else if (value instanceof Object[]) { 1022 Object[] ar = (Object[]) value; 1023 for (Object v : ar) { 1024 if (v instanceof String) { 1025 fulltextParser.parse((String) v, null, strings); 1026 } else { 1027 // arrays are homogeneous, no need to continue 1028 break; 1029 } 1030 } 1031 } else { 1032 if (value instanceof String) { 1033 fulltextParser.parse((String) value, null, strings); 1034 } 1035 } 1036 } 1037 } 1038 } 1039 1040 protected void scheduleWork(List<Work> works) { 1041 // do async fulltext indexing only if high-level sessions are available 1042 RepositoryManager repositoryManager = Framework.getLocalService(RepositoryManager.class); 1043 if (repositoryManager != null && !works.isEmpty()) { 1044 WorkManager workManager = Framework.getLocalService(WorkManager.class); 1045 for (Work work : works) { 1046 // schedule work post-commit 1047 // in non-tx mode, this may execute it nearly immediately 1048 workManager.schedule(work, Scheduling.IF_NOT_SCHEDULED, true); 1049 } 1050 } 1051 } 1052 1053}