001/* 002 * (C) Copyright 2014 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.core.storage.dbs; 020 021import static java.lang.Boolean.FALSE; 022import static java.lang.Boolean.TRUE; 023import static org.nuxeo.ecm.core.api.security.SecurityConstants.BROWSE; 024import static org.nuxeo.ecm.core.api.security.SecurityConstants.EVERYONE; 025import static org.nuxeo.ecm.core.api.security.SecurityConstants.UNSUPPORTED_ACL; 026import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_GRANT; 027import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_PERMISSION; 028import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_USER; 029import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACL; 030import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACP; 031import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ANCESTOR_IDS; 032import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_FULLTEXT_JOBID; 033import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ID; 034import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_IS_PROXY; 035import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_IS_VERSION; 036import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_MIXIN_TYPES; 037import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_NAME; 038import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PARENT_ID; 039import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_POS; 040import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PREFIX; 041import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PRIMARY_TYPE; 042import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_IDS; 043import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_TARGET_ID; 044import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_VERSION_SERIES_ID; 045import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_READ_ACL; 046import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_VERSION_SERIES_ID; 047 048import java.io.Serializable; 049import java.util.ArrayList; 050import java.util.Arrays; 051import java.util.Collections; 052import java.util.HashMap; 053import java.util.HashSet; 054import java.util.LinkedHashSet; 055import java.util.LinkedList; 056import java.util.List; 057import java.util.Map; 058import java.util.Map.Entry; 059import java.util.Set; 060 061import org.apache.commons.lang.StringUtils; 062import org.apache.commons.logging.Log; 063import org.apache.commons.logging.LogFactory; 064import org.nuxeo.ecm.core.api.ConcurrentUpdateException; 065import org.nuxeo.ecm.core.api.repository.RepositoryManager; 066import org.nuxeo.ecm.core.schema.SchemaManager; 067import org.nuxeo.ecm.core.schema.types.Schema; 068import org.nuxeo.ecm.core.security.SecurityService; 069import org.nuxeo.ecm.core.storage.State.ListDiff; 070import org.nuxeo.ecm.core.storage.State.StateDiff; 071import org.nuxeo.ecm.core.storage.StateHelper; 072import org.nuxeo.ecm.core.storage.DefaultFulltextParser; 073import org.nuxeo.ecm.core.storage.FulltextConfiguration; 074import org.nuxeo.ecm.core.storage.FulltextParser; 075import org.nuxeo.ecm.core.storage.FulltextUpdaterWork; 076import org.nuxeo.ecm.core.storage.State; 077import org.nuxeo.ecm.core.storage.FulltextUpdaterWork.IndexAndText; 078import org.nuxeo.ecm.core.work.api.Work; 079import org.nuxeo.ecm.core.work.api.WorkManager; 080import org.nuxeo.ecm.core.work.api.WorkManager.Scheduling; 081import org.nuxeo.runtime.api.Framework; 082 083/** 084 * Transactional state for a session. 085 * <p> 086 * Until {@code save()} is called, data lives in the transient map. 087 * <p> 088 * Upon save, data is written to the repository, even though it has not yet been committed (this means that other 089 * sessions can read uncommitted data). It's also kept in an undo log in order for rollback to be possible. 090 * <p> 091 * On commit, the undo log is forgotten. On rollback, the undo log is replayed. 092 * 093 * @since 5.9.4 094 */ 095public class DBSTransactionState { 096 097 private static final Log log = LogFactory.getLog(DBSTransactionState.class); 098 099 private static final String KEY_UNDOLOG_CREATE = "__UNDOLOG_CREATE__\0\0"; 100 101 protected final DBSRepository repository; 102 103 protected final DBSSession session; 104 105 /** Retrieved and created document state. */ 106 protected Map<String, DBSDocumentState> transientStates = new HashMap<String, DBSDocumentState>(); 107 108 /** Ids of documents created but not yet saved. */ 109 protected Set<String> transientCreated = new LinkedHashSet<String>(); 110 111 /** 112 * Undo log. 113 * <p> 114 * A map of document ids to null or State. The value is null when the document has to be deleted when applying the 115 * undo log. Otherwise the value is a State. If the State contains the key {@link #KEY_UNDOLOG_CREATE} then the 116 * state must be re-created completely when applying the undo log, otherwise just applied as an update. 117 * <p> 118 * Null when there is no active transaction. 119 */ 120 protected Map<String, State> undoLog; 121 122 protected final Set<String> browsePermissions; 123 124 public DBSTransactionState(DBSRepository repository, DBSSession session) { 125 this.repository = repository; 126 this.session = session; 127 SecurityService securityService = Framework.getLocalService(SecurityService.class); 128 browsePermissions = new HashSet<>(Arrays.asList(securityService.getPermissionsToCheck(BROWSE))); 129 } 130 131 /** 132 * New transient state for something just read from the repository. 133 */ 134 protected DBSDocumentState newTransientState(State state) { 135 if (state == null) { 136 return null; 137 } 138 String id = (String) state.get(KEY_ID); 139 if (transientStates.containsKey(id)) { 140 throw new IllegalStateException("Already transient: " + id); 141 } 142 DBSDocumentState docState = new DBSDocumentState(state); // copy 143 transientStates.put(id, docState); 144 return docState; 145 } 146 147 /** 148 * Returns a state and marks it as transient, because it's about to be modified or returned to user code (where it 149 * may be modified). 150 */ 151 public DBSDocumentState getStateForUpdate(String id) { 152 // check transient state 153 DBSDocumentState docState = transientStates.get(id); 154 if (docState != null) { 155 return docState; 156 } 157 // fetch from repository 158 State state = repository.readState(id); 159 return newTransientState(state); 160 } 161 162 /** 163 * Returns a state which won't be modified. 164 */ 165 // TODO in some cases it's good to have this kept in memory instead of 166 // rereading from database every time 167 // XXX getStateForReadOneShot 168 public State getStateForRead(String id) { 169 // check transient state 170 DBSDocumentState docState = transientStates.get(id); 171 if (docState != null) { 172 return docState.getState(); 173 } 174 // fetch from repository 175 return repository.readState(id); 176 } 177 178 /** 179 * Returns states and marks them transient, because they're about to be returned to user code (where they may be 180 * modified). 181 */ 182 public List<DBSDocumentState> getStatesForUpdate(List<String> ids) { 183 // check which ones we have to fetch from repository 184 List<String> idsToFetch = new LinkedList<String>(); 185 for (String id : ids) { 186 // check transient state 187 DBSDocumentState docState = transientStates.get(id); 188 if (docState != null) { 189 continue; 190 } 191 // will have to fetch it 192 idsToFetch.add(id); 193 } 194 if (!idsToFetch.isEmpty()) { 195 List<State> states = repository.readStates(idsToFetch); 196 for (State state : states) { 197 newTransientState(state); 198 } 199 } 200 // everything now fetched in transient 201 List<DBSDocumentState> docStates = new ArrayList<DBSDocumentState>(ids.size()); 202 for (String id : ids) { 203 DBSDocumentState docState = transientStates.get(id); 204 if (docState != null) { 205 docStates.add(docState); 206 } else { 207 log.warn("Cannot fetch document with id: " + id, new Throwable("debug stack trace")); 208 } 209 } 210 return docStates; 211 } 212 213 // XXX TODO for update or for read? 214 public DBSDocumentState getChildState(String parentId, String name) { 215 Set<String> seen = new HashSet<String>(); 216 // check transient state 217 for (DBSDocumentState docState : transientStates.values()) { 218 seen.add(docState.getId()); 219 if (!parentId.equals(docState.getParentId())) { 220 continue; 221 } 222 if (!name.equals(docState.getName())) { 223 continue; 224 } 225 return docState; 226 } 227 // fetch from repository 228 State state = repository.readChildState(parentId, name, seen); 229 return newTransientState(state); 230 } 231 232 public boolean hasChild(String parentId, String name) { 233 Set<String> seen = new HashSet<String>(); 234 // check transient state 235 for (DBSDocumentState docState : transientStates.values()) { 236 seen.add(docState.getId()); 237 if (!parentId.equals(docState.getParentId())) { 238 continue; 239 } 240 if (!name.equals(docState.getName())) { 241 continue; 242 } 243 return true; 244 } 245 // check repository 246 return repository.hasChild(parentId, name, seen); 247 } 248 249 public List<DBSDocumentState> getChildrenStates(String parentId) { 250 List<DBSDocumentState> docStates = new LinkedList<DBSDocumentState>(); 251 Set<String> seen = new HashSet<String>(); 252 // check transient state 253 for (DBSDocumentState docState : transientStates.values()) { 254 seen.add(docState.getId()); 255 if (!parentId.equals(docState.getParentId())) { 256 continue; 257 } 258 docStates.add(docState); 259 } 260 // fetch from repository 261 List<State> states = repository.queryKeyValue(KEY_PARENT_ID, parentId, seen); 262 for (State state : states) { 263 docStates.add(newTransientState(state)); 264 } 265 return docStates; 266 } 267 268 public List<String> getChildrenIds(String parentId) { 269 List<String> children = new ArrayList<String>(); 270 Set<String> seen = new HashSet<String>(); 271 // check transient state 272 for (DBSDocumentState docState : transientStates.values()) { 273 String id = docState.getId(); 274 seen.add(id); 275 if (!parentId.equals(docState.getParentId())) { 276 continue; 277 } 278 children.add(id); 279 } 280 // fetch from repository 281 List<State> states = repository.queryKeyValue(KEY_PARENT_ID, parentId, seen); 282 for (State state : states) { 283 children.add((String) state.get(KEY_ID)); 284 } 285 return new ArrayList<String>(children); 286 } 287 288 public boolean hasChildren(String parentId) { 289 Set<String> seen = new HashSet<String>(); 290 // check transient state 291 for (DBSDocumentState docState : transientStates.values()) { 292 seen.add(docState.getId()); 293 if (!parentId.equals(docState.getParentId())) { 294 continue; 295 } 296 return true; 297 } 298 // check repository 299 return repository.queryKeyValuePresence(KEY_PARENT_ID, parentId, seen); 300 } 301 302 public DBSDocumentState createChild(String id, String parentId, String name, Long pos, String typeName) { 303 // id may be not-null for import 304 if (id == null) { 305 id = repository.generateNewId(); 306 } 307 transientCreated.add(id); 308 DBSDocumentState docState = new DBSDocumentState(); 309 transientStates.put(id, docState); 310 docState.put(KEY_ID, id); 311 docState.put(KEY_PARENT_ID, parentId); 312 docState.put(KEY_ANCESTOR_IDS, getAncestorIds(parentId)); 313 docState.put(KEY_NAME, name); 314 docState.put(KEY_POS, pos); 315 docState.put(KEY_PRIMARY_TYPE, typeName); 316 // update read acls for new doc 317 updateReadAcls(id); 318 return docState; 319 } 320 321 /** Gets ancestors including id itself. */ 322 protected Object[] getAncestorIds(String id) { 323 if (id == null) { 324 return null; 325 } 326 State state = getStateForRead(id); 327 if (state == null) { 328 throw new RuntimeException("No such id: " + id); 329 } 330 Object[] ancestors = (Object[]) state.get(KEY_ANCESTOR_IDS); 331 if (ancestors == null) { 332 return new Object[] { id }; 333 } else { 334 Object[] newAncestors = new Object[ancestors.length + 1]; 335 System.arraycopy(ancestors, 0, newAncestors, 0, ancestors.length); 336 newAncestors[ancestors.length] = id; 337 return newAncestors; 338 } 339 } 340 341 /** 342 * Copies the document into a newly-created object. 343 * <p> 344 * The copy is automatically saved. 345 */ 346 public DBSDocumentState copy(String id) { 347 DBSDocumentState copyState = new DBSDocumentState(getStateForRead(id)); 348 String copyId = repository.generateNewId(); 349 copyState.put(KEY_ID, copyId); 350 copyState.put(KEY_PROXY_IDS, null); // no proxies to this new doc 351 // other fields updated by the caller 352 transientStates.put(copyId, copyState); 353 transientCreated.add(copyId); 354 return copyState; 355 } 356 357 /** 358 * Updates ancestors recursively after a move. 359 * <p> 360 * Recursing from given doc, replace the first ndel ancestors with those passed. 361 * <p> 362 * Doesn't check transient (assumes save is done). The modifications are automatically saved. 363 */ 364 public void updateAncestors(String id, int ndel, Object[] ancestorIds) { 365 int nadd = ancestorIds.length; 366 Set<String> ids = getSubTree(id, null, null); 367 ids.add(id); 368 for (String cid : ids) { 369 // XXX TODO oneShot update, don't pollute transient space 370 DBSDocumentState docState = getStateForUpdate(cid); 371 Object[] ancestors = (Object[]) docState.get(KEY_ANCESTOR_IDS); 372 Object[] newAncestors; 373 if (ancestors == null) { 374 newAncestors = ancestorIds.clone(); 375 } else { 376 newAncestors = new Object[ancestors.length - ndel + nadd]; 377 System.arraycopy(ancestorIds, 0, newAncestors, 0, nadd); 378 System.arraycopy(ancestors, ndel, newAncestors, nadd, ancestors.length - ndel); 379 } 380 docState.put(KEY_ANCESTOR_IDS, newAncestors); 381 } 382 } 383 384 /** 385 * Updates the Read ACLs recursively on a document. 386 */ 387 public void updateReadAcls(String id) { 388 // versions too XXX TODO 389 Set<String> ids = getSubTree(id, null, null); 390 ids.add(id); 391 for (String cid : ids) { 392 // XXX TODO oneShot update, don't pollute transient space 393 DBSDocumentState docState = getStateForUpdate(cid); 394 docState.put(KEY_READ_ACL, getReadACL(docState)); 395 } 396 } 397 398 /** 399 * Gets the Read ACL (flat list of users having browse permission, including inheritance) on a document. 400 */ 401 protected String[] getReadACL(DBSDocumentState docState) { 402 Set<String> racls = new HashSet<>(); 403 State state = docState.getState(); 404 LOOP: do { 405 @SuppressWarnings("unchecked") 406 List<Serializable> aclList = (List<Serializable>) state.get(KEY_ACP); 407 if (aclList != null) { 408 for (Serializable aclSer : aclList) { 409 State aclMap = (State) aclSer; 410 @SuppressWarnings("unchecked") 411 List<Serializable> aceList = (List<Serializable>) aclMap.get(KEY_ACL); 412 for (Serializable aceSer : aceList) { 413 State aceMap = (State) aceSer; 414 String username = (String) aceMap.get(KEY_ACE_USER); 415 String permission = (String) aceMap.get(KEY_ACE_PERMISSION); 416 Boolean granted = (Boolean) aceMap.get(KEY_ACE_GRANT); 417 if (TRUE.equals(granted) && browsePermissions.contains(permission)) { 418 racls.add(username); 419 } 420 if (FALSE.equals(granted)) { 421 if (!EVERYONE.equals(username)) { 422 // TODO log 423 racls.add(UNSUPPORTED_ACL); 424 } 425 break LOOP; 426 } 427 } 428 } 429 } 430 // get parent 431 if (TRUE.equals(state.get(KEY_IS_VERSION))) { 432 String versionSeriesId = (String) state.get(KEY_VERSION_SERIES_ID); 433 state = versionSeriesId == null ? null : getStateForRead(versionSeriesId); 434 } else { 435 String parentId = (String) state.get(KEY_PARENT_ID); 436 state = parentId == null ? null : getStateForRead(parentId); 437 } 438 } while (state != null); 439 440 // sort to have canonical order 441 List<String> racl = new ArrayList<>(racls); 442 Collections.sort(racl); 443 return racl.toArray(new String[racl.size()]); 444 } 445 446 /** 447 * Gets all the ids under a given one, recursively. 448 * <p> 449 * Doesn't check transient (assumes save is done). 450 * 451 * @param id the root of the tree (not included in results) 452 * @param proxyTargets returns a map of proxy to target among the documents found 453 * @param targetProxies returns a map of target to proxies among the document found 454 */ 455 protected Set<String> getSubTree(String id, Map<String, String> proxyTargets, Map<String, Object[]> targetProxies) { 456 Set<String> ids = new HashSet<String>(); 457 // check repository 458 repository.queryKeyValueArray(KEY_ANCESTOR_IDS, id, ids, proxyTargets, targetProxies); 459 return ids; 460 } 461 462 public List<DBSDocumentState> getKeyValuedStates(String key, Object value) { 463 List<DBSDocumentState> docStates = new LinkedList<DBSDocumentState>(); 464 Set<String> seen = new HashSet<String>(); 465 // check transient state 466 for (DBSDocumentState docState : transientStates.values()) { 467 seen.add(docState.getId()); 468 if (!value.equals(docState.get(key))) { 469 continue; 470 } 471 docStates.add(docState); 472 } 473 // fetch from repository 474 List<State> states = repository.queryKeyValue(key, value, seen); 475 for (State state : states) { 476 docStates.add(newTransientState(state)); 477 } 478 return docStates; 479 } 480 481 public List<DBSDocumentState> getKeyValuedStates(String key1, Object value1, String key2, Object value2) { 482 List<DBSDocumentState> docStates = new LinkedList<DBSDocumentState>(); 483 Set<String> seen = new HashSet<String>(); 484 // check transient state 485 for (DBSDocumentState docState : transientStates.values()) { 486 seen.add(docState.getId()); 487 if (!(value1.equals(docState.get(key1)) && value2.equals(docState.get(key2)))) { 488 continue; 489 } 490 docStates.add(docState); 491 } 492 // fetch from repository 493 List<State> states = repository.queryKeyValue(key1, value1, key2, value2, seen); 494 for (State state : states) { 495 docStates.add(newTransientState(state)); 496 } 497 return docStates; 498 } 499 500 /** 501 * Removes a list of documents. 502 * <p> 503 * Called after a {@link #save} has been done. 504 */ 505 public void removeStates(Set<String> ids) { 506 if (undoLog != null) { 507 for (String id : ids) { 508 if (undoLog.containsKey(id)) { 509 // there's already a create or an update in the undo log 510 State oldUndo = undoLog.get(id); 511 if (oldUndo == null) { 512 // create + delete -> forget 513 undoLog.remove(id); 514 } else { 515 // update + delete -> original old state to re-create 516 oldUndo.put(KEY_UNDOLOG_CREATE, TRUE); 517 } 518 } else { 519 // just delete -> store old state to re-create 520 State oldState = StateHelper.deepCopy(getStateForRead(id)); 521 oldState.put(KEY_UNDOLOG_CREATE, TRUE); 522 undoLog.put(id, oldState); 523 } 524 } 525 } 526 for (String id : ids) { 527 transientStates.remove(id); 528 } 529 repository.deleteStates(ids); 530 } 531 532 /** 533 * Writes transient state to database. 534 * <p> 535 * An undo log is kept in order to rollback the transaction later if needed. 536 */ 537 public void save() { 538 updateProxies(); 539 List<Work> works; 540 if (!repository.isFulltextDisabled()) { 541 // TODO getting fulltext already does a getStateChange 542 works = getFulltextWorks(); 543 } else { 544 works = Collections.emptyList(); 545 } 546 for (String id : transientCreated) { // ordered 547 DBSDocumentState docState = transientStates.get(id); 548 docState.setNotDirty(); 549 if (undoLog != null) { 550 undoLog.put(id, null); // marker to denote create 551 } 552 repository.createState(docState.getState()); 553 } 554 for (DBSDocumentState docState : transientStates.values()) { 555 String id = docState.getId(); 556 if (transientCreated.contains(id)) { 557 continue; // already done 558 } 559 StateDiff diff = docState.getStateChange(); 560 if (diff != null) { 561 if (undoLog != null) { 562 if (!undoLog.containsKey(id)) { 563 undoLog.put(id, StateHelper.deepCopy(docState.getOriginalState())); 564 } 565 // else there's already a create or an update in the undo log so original info is enough 566 } 567 repository.updateState(id, diff); 568 } 569 docState.setNotDirty(); 570 } 571 transientCreated.clear(); 572 scheduleWork(works); 573 } 574 575 protected void applyUndoLog() { 576 Set<String> deletes = new HashSet<>(); 577 for (Entry<String, State> es : undoLog.entrySet()) { 578 String id = es.getKey(); 579 State state = es.getValue(); 580 if (state == null) { 581 deletes.add(id); 582 } else { 583 boolean recreate = state.remove(KEY_UNDOLOG_CREATE) != null; 584 if (recreate) { 585 repository.createState(state); 586 } else { 587 // undo update 588 State currentState = repository.readState(id); 589 if (currentState != null) { 590 StateDiff diff = StateHelper.diff(currentState, state); 591 if (!diff.isEmpty()) { 592 repository.updateState(id, diff); 593 } 594 } 595 // else we expected to read a current state but it was concurrently deleted... 596 // in that case leave it deleted 597 } 598 } 599 } 600 if (!deletes.isEmpty()) { 601 repository.deleteStates(deletes); 602 } 603 } 604 605 /** 606 * Checks if the changed documents are proxy targets, and updates the proxies if that's the case. 607 */ 608 protected void updateProxies() { 609 for (String id : transientCreated) { // ordered 610 DBSDocumentState docState = transientStates.get(id); 611 updateProxies(docState); 612 } 613 // copy as we may modify proxies 614 for (String id : transientStates.keySet().toArray(new String[0])) { 615 DBSDocumentState docState = transientStates.get(id); 616 if (transientCreated.contains(id)) { 617 continue; // already done 618 } 619 if (docState.isDirty()) { 620 updateProxies(docState); 621 } 622 } 623 } 624 625 protected void updateProxies(DBSDocumentState target) { 626 Object[] proxyIds = (Object[]) target.get(KEY_PROXY_IDS); 627 if (proxyIds != null) { 628 for (Object proxyId : proxyIds) { 629 try { 630 updateProxy(target, (String) proxyId); 631 } catch (ConcurrentUpdateException e) { 632 e.addInfo("On doc " + target.getId()); 633 throw e; 634 } 635 } 636 } 637 } 638 639 /** 640 * Updates the state of a proxy based on its target. 641 */ 642 protected void updateProxy(DBSDocumentState target, String proxyId) { 643 DBSDocumentState proxy = getStateForUpdate(proxyId); 644 if (proxy == null) { 645 throw new ConcurrentUpdateException("Proxy " + proxyId + " concurrently deleted"); 646 } 647 SchemaManager schemaManager = Framework.getService(SchemaManager.class); 648 // clear all proxy data 649 for (String key : proxy.getState().keyArray()) { 650 if (!isProxySpecific(key, schemaManager)) { 651 proxy.put(key, null); 652 } 653 } 654 // copy from target 655 for (Entry<String, Serializable> en : target.getState().entrySet()) { 656 String key = en.getKey(); 657 if (!isProxySpecific(key, schemaManager)) { 658 proxy.put(key, StateHelper.deepCopy(en.getValue())); 659 } 660 } 661 } 662 663 /** 664 * Things that we don't touch on a proxy when updating it. 665 */ 666 protected boolean isProxySpecific(String key, SchemaManager schemaManager) { 667 switch (key) { 668 // these are placeful stuff 669 case KEY_ID: 670 case KEY_PARENT_ID: 671 case KEY_ANCESTOR_IDS: 672 case KEY_NAME: 673 case KEY_POS: 674 case KEY_ACP: 675 case KEY_READ_ACL: 676 // these are proxy-specific 677 case KEY_IS_PROXY: 678 case KEY_PROXY_TARGET_ID: 679 case KEY_PROXY_VERSION_SERIES_ID: 680 case KEY_IS_VERSION: 681 case KEY_PROXY_IDS: 682 return true; 683 } 684 int p = key.indexOf(':'); 685 if (p == -1) { 686 // no prefix, assume not proxy-specific 687 return false; 688 } 689 String prefix = key.substring(0, p); 690 Schema schema = schemaManager.getSchemaFromPrefix(prefix); 691 if (schema == null) { 692 schema = schemaManager.getSchema(prefix); 693 if (schema == null) { 694 // unknown prefix, assume not proxy-specific 695 return false; 696 } 697 } 698 return schemaManager.isProxySchema(schema.getName(), null); // type unused 699 } 700 701 /** 702 * Called when created in a transaction. 703 * 704 * @since 7.4 705 */ 706 public void begin() { 707 undoLog = new HashMap<String, State>(); 708 } 709 710 /** 711 * Saves and flushes to database. 712 */ 713 public void commit() { 714 save(); 715 commitSave(); 716 } 717 718 /** 719 * Commits the saved state to the database. 720 */ 721 protected void commitSave() { 722 // clear transient, this means that after this references to states will be stale 723 // TODO mark states as invalid 724 clearTransient(); 725 // the transaction ended, the proxied DBSSession will disappear and cannot be reused anyway 726 undoLog = null; 727 } 728 729 /** 730 * Rolls back the save state by applying the undo log. 731 */ 732 public void rollback() { 733 clearTransient(); 734 applyUndoLog(); 735 // the transaction ended, the proxied DBSSession will disappear and cannot be reused anyway 736 undoLog = null; 737 } 738 739 protected void clearTransient() { 740 transientStates.clear(); 741 transientCreated.clear(); 742 } 743 744 /** 745 * Gets the fulltext updates to do. Called at save() time. 746 * 747 * @return a list of {@link Work} instances to schedule post-commit. 748 */ 749 protected List<Work> getFulltextWorks() { 750 Set<String> docsWithDirtyStrings = new HashSet<String>(); 751 Set<String> docsWithDirtyBinaries = new HashSet<String>(); 752 findDirtyDocuments(docsWithDirtyStrings, docsWithDirtyBinaries); 753 if (docsWithDirtyStrings.isEmpty() && docsWithDirtyBinaries.isEmpty()) { 754 return Collections.emptyList(); 755 } 756 List<Work> works = new LinkedList<Work>(); 757 getFulltextSimpleWorks(works, docsWithDirtyStrings); 758 getFulltextBinariesWorks(works, docsWithDirtyBinaries); 759 return works; 760 } 761 762 /** 763 * Finds the documents having dirty text or dirty binaries that have to be reindexed as fulltext. 764 * 765 * @param docsWithDirtyStrings set of ids, updated by this method 766 * @param docWithDirtyBinaries set of ids, updated by this method 767 */ 768 protected void findDirtyDocuments(Set<String> docsWithDirtyStrings, Set<String> docWithDirtyBinaries) { 769 for (DBSDocumentState docState : transientStates.values()) { 770 State originalState = docState.getOriginalState(); 771 State state = docState.getState(); 772 if (originalState == state) { 773 continue; 774 } 775 StateDiff diff = StateHelper.diff(originalState, state); 776 if (diff.isEmpty()) { 777 continue; 778 } 779 StateDiff rdiff = StateHelper.diff(state, originalState); 780 // we do diffs in both directions to capture removal of complex list elements, 781 // for instance for {foo: [{bar: baz}] -> {foo: []} 782 // diff paths = foo and rdiff paths = foo/*/bar 783 Set<String> paths = new HashSet<>(); 784 DirtyPathsFinder dirtyPathsFinder = new DirtyPathsFinder(paths); 785 dirtyPathsFinder.findDirtyPaths(diff); 786 dirtyPathsFinder.findDirtyPaths(rdiff); 787 FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration(); 788 boolean dirtyStrings = false; 789 boolean dirtyBinaries = false; 790 for (String path : paths) { 791 Set<String> indexesSimple = fulltextConfiguration.indexesByPropPathSimple.get(path); 792 if (indexesSimple != null && !indexesSimple.isEmpty()) { 793 dirtyStrings = true; 794 if (dirtyBinaries) { 795 break; 796 } 797 } 798 Set<String> indexesBinary= fulltextConfiguration.indexesByPropPathBinary.get(path); 799 if (indexesBinary != null && !indexesBinary.isEmpty()) { 800 dirtyBinaries = true; 801 if (dirtyStrings) { 802 break; 803 } 804 } 805 } 806 if (dirtyStrings) { 807 docsWithDirtyStrings.add(docState.getId()); 808 } 809 if (dirtyBinaries) { 810 docWithDirtyBinaries.add(docState.getId()); 811 } 812 } 813 } 814 815 /** 816 * Iterates on a state diff to find the paths corresponding to dirty values. 817 * 818 * @since 7.10-HF04, 8.1 819 */ 820 protected static class DirtyPathsFinder { 821 822 protected Set<String> paths; 823 824 public DirtyPathsFinder(Set<String> paths) { 825 this.paths = paths; 826 } 827 828 public void findDirtyPaths(StateDiff value) { 829 findDirtyPaths(value, (String) null); 830 } 831 832 protected void findDirtyPaths(Object value, String path) { 833 if (value instanceof Object[]) { 834 findDirtyPaths((Object[]) value, path); 835 } else if (value instanceof List) { 836 findDirtyPaths((List<?>) value, path); 837 } else if (value instanceof ListDiff) { 838 findDirtyPaths((ListDiff) value, path); 839 } else if (value instanceof State) { 840 findDirtyPaths((State) value, path); 841 } else { 842 paths.add(path); 843 } 844 } 845 846 protected void findDirtyPaths(Object[] value, String path) { 847 String newPath = path + "/*"; 848 for (Object v : value) { 849 findDirtyPaths(v, newPath); 850 } 851 } 852 853 protected void findDirtyPaths(List<?> value, String path) { 854 String newPath = path + "/*"; 855 for (Object v : value) { 856 findDirtyPaths(v, newPath); 857 } 858 } 859 860 protected void findDirtyPaths(ListDiff value, String path) { 861 String newPath = path + "/*"; 862 if (value.diff != null) { 863 findDirtyPaths(value.diff, newPath); 864 } 865 if (value.rpush != null) { 866 findDirtyPaths(value.rpush, newPath); 867 } 868 } 869 870 protected void findDirtyPaths(State value, String path) { 871 for (Entry<String, Serializable> es : value.entrySet()) { 872 String key = es.getKey(); 873 Serializable v = es.getValue(); 874 String newPath = path == null ? key : path + "/" + key; 875 findDirtyPaths(v, newPath); 876 } 877 } 878 } 879 880 protected void getFulltextSimpleWorks(List<Work> works, Set<String> docsWithDirtyStrings) { 881 // TODO XXX make configurable, see also FulltextExtractorWork 882 FulltextParser fulltextParser = new DefaultFulltextParser(); 883 FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration(); 884 // update simpletext on documents with dirty strings 885 for (String id : docsWithDirtyStrings) { 886 if (id == null) { 887 // cannot happen, but has been observed :( 888 log.error("Got null doc id in fulltext update, cannot happen"); 889 continue; 890 } 891 DBSDocumentState docState = getStateForUpdate(id); 892 if (docState == null) { 893 // cannot happen 894 continue; 895 } 896 String documentType = docState.getPrimaryType(); 897 // Object[] mixinTypes = (Object[]) docState.get(KEY_MIXIN_TYPES); 898 899 if (!fulltextConfiguration.isFulltextIndexable(documentType)) { 900 continue; 901 } 902 docState.put(KEY_FULLTEXT_JOBID, docState.getId()); 903 FulltextFinder fulltextFinder = new FulltextFinder(fulltextParser, docState, session); 904 List<IndexAndText> indexesAndText = new LinkedList<IndexAndText>(); 905 for (String indexName : fulltextConfiguration.indexNames) { 906 // TODO paths from config 907 String text = fulltextFinder.findFulltext(indexName); 908 indexesAndText.add(new IndexAndText(indexName, text)); 909 } 910 if (!indexesAndText.isEmpty()) { 911 Work work = new FulltextUpdaterWork(repository.getName(), id, true, false, indexesAndText); 912 works.add(work); 913 } 914 } 915 } 916 917 protected void getFulltextBinariesWorks(List<Work> works, Set<String> docWithDirtyBinaries) { 918 if (docWithDirtyBinaries.isEmpty()) { 919 return; 920 } 921 922 FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration(); 923 924 // mark indexing in progress, so that future copies (including versions) 925 // will be indexed as well 926 for (String id : docWithDirtyBinaries) { 927 DBSDocumentState docState = getStateForUpdate(id); 928 if (docState == null) { 929 // cannot happen 930 continue; 931 } 932 if (!fulltextConfiguration.isFulltextIndexable(docState.getPrimaryType())) { 933 continue; 934 } 935 docState.put(KEY_FULLTEXT_JOBID, docState.getId()); 936 } 937 938 // FulltextExtractorWork does fulltext extraction using converters 939 // and then schedules a FulltextUpdaterWork to write the results 940 // single-threaded 941 for (String id : docWithDirtyBinaries) { 942 // don't exclude proxies 943 Work work = new DBSFulltextExtractorWork(repository.getName(), id); 944 works.add(work); 945 } 946 } 947 948 protected static class FulltextFinder { 949 950 protected final FulltextParser fulltextParser; 951 952 protected final DBSDocumentState document; 953 954 protected final DBSSession session; 955 956 protected final String documentType; 957 958 protected final Object[] mixinTypes; 959 960 /** 961 * Prepares parsing for one document. 962 */ 963 public FulltextFinder(FulltextParser fulltextParser, DBSDocumentState document, DBSSession session) { 964 this.fulltextParser = fulltextParser; 965 this.document = document; 966 this.session = session; 967 if (document == null) { 968 documentType = null; 969 mixinTypes = null; 970 } else { // null in tests 971 documentType = document.getPrimaryType(); 972 mixinTypes = (Object[]) document.get(KEY_MIXIN_TYPES); 973 } 974 } 975 976 /** 977 * Parses the document for one index. 978 */ 979 public String findFulltext(String indexName) { 980 // TODO indexName 981 // TODO paths 982 List<String> strings = new ArrayList<String>(); 983 findFulltext(indexName, document.getState(), strings); 984 return StringUtils.join(strings, ' '); 985 } 986 987 protected void findFulltext(String indexName, State state, List<String> strings) { 988 for (Entry<String, Serializable> en : state.entrySet()) { 989 String key = en.getKey(); 990 if (key.startsWith(KEY_PREFIX)) { 991 switch (key) { 992 // allow indexing of this: 993 case DBSDocument.KEY_NAME: 994 break; 995 default: 996 continue; 997 } 998 } 999 Serializable value = en.getValue(); 1000 if (value instanceof State) { 1001 State s = (State) value; 1002 findFulltext(indexName, s, strings); 1003 } else if (value instanceof List) { 1004 @SuppressWarnings("unchecked") 1005 List<State> v = (List<State>) value; 1006 for (State s : v) { 1007 findFulltext(indexName, s, strings); 1008 } 1009 } else if (value instanceof Object[]) { 1010 Object[] ar = (Object[]) value; 1011 for (Object v : ar) { 1012 if (v instanceof String) { 1013 fulltextParser.parse((String) v, null, strings); 1014 } else { 1015 // arrays are homogeneous, no need to continue 1016 break; 1017 } 1018 } 1019 } else { 1020 if (value instanceof String) { 1021 fulltextParser.parse((String) value, null, strings); 1022 } 1023 } 1024 } 1025 } 1026 } 1027 1028 protected void scheduleWork(List<Work> works) { 1029 // do async fulltext indexing only if high-level sessions are available 1030 RepositoryManager repositoryManager = Framework.getLocalService(RepositoryManager.class); 1031 if (repositoryManager != null && !works.isEmpty()) { 1032 WorkManager workManager = Framework.getLocalService(WorkManager.class); 1033 for (Work work : works) { 1034 // schedule work post-commit 1035 // in non-tx mode, this may execute it nearly immediately 1036 workManager.schedule(work, Scheduling.IF_NOT_SCHEDULED, true); 1037 } 1038 } 1039 } 1040 1041}