001/*
002 * (C) Copyright 2014 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019package org.nuxeo.ecm.core.storage.dbs;
020
021import static java.lang.Boolean.FALSE;
022import static java.lang.Boolean.TRUE;
023import static org.nuxeo.ecm.core.api.security.SecurityConstants.BROWSE;
024import static org.nuxeo.ecm.core.api.security.SecurityConstants.EVERYONE;
025import static org.nuxeo.ecm.core.api.security.SecurityConstants.UNSUPPORTED_ACL;
026import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_GRANT;
027import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_PERMISSION;
028import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_USER;
029import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACL;
030import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACP;
031import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ANCESTOR_IDS;
032import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_FULLTEXT_JOBID;
033import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ID;
034import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_IS_PROXY;
035import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_IS_VERSION;
036import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_MIXIN_TYPES;
037import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_NAME;
038import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PARENT_ID;
039import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_POS;
040import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PREFIX;
041import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PRIMARY_TYPE;
042import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_IDS;
043import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_TARGET_ID;
044import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_VERSION_SERIES_ID;
045import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_READ_ACL;
046import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_VERSION_SERIES_ID;
047
048import java.io.Serializable;
049import java.util.ArrayList;
050import java.util.Arrays;
051import java.util.Collections;
052import java.util.HashMap;
053import java.util.HashSet;
054import java.util.LinkedHashSet;
055import java.util.LinkedList;
056import java.util.List;
057import java.util.Map;
058import java.util.Map.Entry;
059import java.util.Set;
060
061import org.apache.commons.lang.StringUtils;
062import org.apache.commons.logging.Log;
063import org.apache.commons.logging.LogFactory;
064import org.nuxeo.ecm.core.api.ConcurrentUpdateException;
065import org.nuxeo.ecm.core.api.repository.RepositoryManager;
066import org.nuxeo.ecm.core.schema.SchemaManager;
067import org.nuxeo.ecm.core.schema.types.Schema;
068import org.nuxeo.ecm.core.security.SecurityService;
069import org.nuxeo.ecm.core.storage.State.ListDiff;
070import org.nuxeo.ecm.core.storage.State.StateDiff;
071import org.nuxeo.ecm.core.storage.StateHelper;
072import org.nuxeo.ecm.core.storage.DefaultFulltextParser;
073import org.nuxeo.ecm.core.storage.FulltextConfiguration;
074import org.nuxeo.ecm.core.storage.FulltextParser;
075import org.nuxeo.ecm.core.storage.FulltextUpdaterWork;
076import org.nuxeo.ecm.core.storage.State;
077import org.nuxeo.ecm.core.storage.FulltextUpdaterWork.IndexAndText;
078import org.nuxeo.ecm.core.work.api.Work;
079import org.nuxeo.ecm.core.work.api.WorkManager;
080import org.nuxeo.ecm.core.work.api.WorkManager.Scheduling;
081import org.nuxeo.runtime.api.Framework;
082
083/**
084 * Transactional state for a session.
085 * <p>
086 * Until {@code save()} is called, data lives in the transient map.
087 * <p>
088 * Upon save, data is written to the repository, even though it has not yet been committed (this means that other
089 * sessions can read uncommitted data). It's also kept in an undo log in order for rollback to be possible.
090 * <p>
091 * On commit, the undo log is forgotten. On rollback, the undo log is replayed.
092 *
093 * @since 5.9.4
094 */
095public class DBSTransactionState {
096
097    private static final Log log = LogFactory.getLog(DBSTransactionState.class);
098
099    private static final String KEY_UNDOLOG_CREATE = "__UNDOLOG_CREATE__\0\0";
100
101    protected final DBSRepository repository;
102
103    protected final DBSSession session;
104
105    /** Retrieved and created document state. */
106    protected Map<String, DBSDocumentState> transientStates = new HashMap<String, DBSDocumentState>();
107
108    /** Ids of documents created but not yet saved. */
109    protected Set<String> transientCreated = new LinkedHashSet<String>();
110
111    /**
112     * Undo log.
113     * <p>
114     * A map of document ids to null or State. The value is null when the document has to be deleted when applying the
115     * undo log. Otherwise the value is a State. If the State contains the key {@link #KEY_UNDOLOG_CREATE} then the
116     * state must be re-created completely when applying the undo log, otherwise just applied as an update.
117     * <p>
118     * Null when there is no active transaction.
119     */
120    protected Map<String, State> undoLog;
121
122    protected final Set<String> browsePermissions;
123
124    public DBSTransactionState(DBSRepository repository, DBSSession session) {
125        this.repository = repository;
126        this.session = session;
127        SecurityService securityService = Framework.getLocalService(SecurityService.class);
128        browsePermissions = new HashSet<>(Arrays.asList(securityService.getPermissionsToCheck(BROWSE)));
129    }
130
131    /**
132     * New transient state for something just read from the repository.
133     */
134    protected DBSDocumentState newTransientState(State state) {
135        if (state == null) {
136            return null;
137        }
138        String id = (String) state.get(KEY_ID);
139        if (transientStates.containsKey(id)) {
140            throw new IllegalStateException("Already transient: " + id);
141        }
142        DBSDocumentState docState = new DBSDocumentState(state); // copy
143        transientStates.put(id, docState);
144        return docState;
145    }
146
147    /**
148     * Returns a state and marks it as transient, because it's about to be modified or returned to user code (where it
149     * may be modified).
150     */
151    public DBSDocumentState getStateForUpdate(String id) {
152        // check transient state
153        DBSDocumentState docState = transientStates.get(id);
154        if (docState != null) {
155            return docState;
156        }
157        // fetch from repository
158        State state = repository.readState(id);
159        return newTransientState(state);
160    }
161
162    /**
163     * Returns a state which won't be modified.
164     */
165    // TODO in some cases it's good to have this kept in memory instead of
166    // rereading from database every time
167    // XXX getStateForReadOneShot
168    public State getStateForRead(String id) {
169        // check transient state
170        DBSDocumentState docState = transientStates.get(id);
171        if (docState != null) {
172            return docState.getState();
173        }
174        // fetch from repository
175        return repository.readState(id);
176    }
177
178    /**
179     * Returns states and marks them transient, because they're about to be returned to user code (where they may be
180     * modified).
181     */
182    public List<DBSDocumentState> getStatesForUpdate(List<String> ids) {
183        // check which ones we have to fetch from repository
184        List<String> idsToFetch = new LinkedList<String>();
185        for (String id : ids) {
186            // check transient state
187            DBSDocumentState docState = transientStates.get(id);
188            if (docState != null) {
189                continue;
190            }
191            // will have to fetch it
192            idsToFetch.add(id);
193        }
194        if (!idsToFetch.isEmpty()) {
195            List<State> states = repository.readStates(idsToFetch);
196            for (State state : states) {
197                newTransientState(state);
198            }
199        }
200        // everything now fetched in transient
201        List<DBSDocumentState> docStates = new ArrayList<DBSDocumentState>(ids.size());
202        for (String id : ids) {
203            DBSDocumentState docState = transientStates.get(id);
204            if (docState != null) {
205                docStates.add(docState);
206            } else {
207                log.warn("Cannot fetch document with id: " + id, new Throwable("debug stack trace"));
208            }
209        }
210        return docStates;
211    }
212
213    // XXX TODO for update or for read?
214    public DBSDocumentState getChildState(String parentId, String name) {
215        Set<String> seen = new HashSet<String>();
216        // check transient state
217        for (DBSDocumentState docState : transientStates.values()) {
218            seen.add(docState.getId());
219            if (!parentId.equals(docState.getParentId())) {
220                continue;
221            }
222            if (!name.equals(docState.getName())) {
223                continue;
224            }
225            return docState;
226        }
227        // fetch from repository
228        State state = repository.readChildState(parentId, name, seen);
229        return newTransientState(state);
230    }
231
232    public boolean hasChild(String parentId, String name) {
233        Set<String> seen = new HashSet<String>();
234        // check transient state
235        for (DBSDocumentState docState : transientStates.values()) {
236            seen.add(docState.getId());
237            if (!parentId.equals(docState.getParentId())) {
238                continue;
239            }
240            if (!name.equals(docState.getName())) {
241                continue;
242            }
243            return true;
244        }
245        // check repository
246        return repository.hasChild(parentId, name, seen);
247    }
248
249    public List<DBSDocumentState> getChildrenStates(String parentId) {
250        List<DBSDocumentState> docStates = new LinkedList<DBSDocumentState>();
251        Set<String> seen = new HashSet<String>();
252        // check transient state
253        for (DBSDocumentState docState : transientStates.values()) {
254            seen.add(docState.getId());
255            if (!parentId.equals(docState.getParentId())) {
256                continue;
257            }
258            docStates.add(docState);
259        }
260        // fetch from repository
261        List<State> states = repository.queryKeyValue(KEY_PARENT_ID, parentId, seen);
262        for (State state : states) {
263            docStates.add(newTransientState(state));
264        }
265        return docStates;
266    }
267
268    public List<String> getChildrenIds(String parentId) {
269        List<String> children = new ArrayList<String>();
270        Set<String> seen = new HashSet<String>();
271        // check transient state
272        for (DBSDocumentState docState : transientStates.values()) {
273            String id = docState.getId();
274            seen.add(id);
275            if (!parentId.equals(docState.getParentId())) {
276                continue;
277            }
278            children.add(id);
279        }
280        // fetch from repository
281        List<State> states = repository.queryKeyValue(KEY_PARENT_ID, parentId, seen);
282        for (State state : states) {
283            children.add((String) state.get(KEY_ID));
284        }
285        return new ArrayList<String>(children);
286    }
287
288    public boolean hasChildren(String parentId) {
289        Set<String> seen = new HashSet<String>();
290        // check transient state
291        for (DBSDocumentState docState : transientStates.values()) {
292            seen.add(docState.getId());
293            if (!parentId.equals(docState.getParentId())) {
294                continue;
295            }
296            return true;
297        }
298        // check repository
299        return repository.queryKeyValuePresence(KEY_PARENT_ID, parentId, seen);
300    }
301
302    public DBSDocumentState createChild(String id, String parentId, String name, Long pos, String typeName) {
303        // id may be not-null for import
304        if (id == null) {
305            id = repository.generateNewId();
306        }
307        transientCreated.add(id);
308        DBSDocumentState docState = new DBSDocumentState();
309        transientStates.put(id, docState);
310        docState.put(KEY_ID, id);
311        docState.put(KEY_PARENT_ID, parentId);
312        docState.put(KEY_ANCESTOR_IDS, getAncestorIds(parentId));
313        docState.put(KEY_NAME, name);
314        docState.put(KEY_POS, pos);
315        docState.put(KEY_PRIMARY_TYPE, typeName);
316        // update read acls for new doc
317        updateReadAcls(id);
318        return docState;
319    }
320
321    /** Gets ancestors including id itself. */
322    protected Object[] getAncestorIds(String id) {
323        if (id == null) {
324            return null;
325        }
326        State state = getStateForRead(id);
327        if (state == null) {
328            throw new RuntimeException("No such id: " + id);
329        }
330        Object[] ancestors = (Object[]) state.get(KEY_ANCESTOR_IDS);
331        if (ancestors == null) {
332            return new Object[] { id };
333        } else {
334            Object[] newAncestors = new Object[ancestors.length + 1];
335            System.arraycopy(ancestors, 0, newAncestors, 0, ancestors.length);
336            newAncestors[ancestors.length] = id;
337            return newAncestors;
338        }
339    }
340
341    /**
342     * Copies the document into a newly-created object.
343     * <p>
344     * The copy is automatically saved.
345     */
346    public DBSDocumentState copy(String id) {
347        DBSDocumentState copyState = new DBSDocumentState(getStateForRead(id));
348        String copyId = repository.generateNewId();
349        copyState.put(KEY_ID, copyId);
350        copyState.put(KEY_PROXY_IDS, null); // no proxies to this new doc
351        // other fields updated by the caller
352        transientStates.put(copyId, copyState);
353        transientCreated.add(copyId);
354        return copyState;
355    }
356
357    /**
358     * Updates ancestors recursively after a move.
359     * <p>
360     * Recursing from given doc, replace the first ndel ancestors with those passed.
361     * <p>
362     * Doesn't check transient (assumes save is done). The modifications are automatically saved.
363     */
364    public void updateAncestors(String id, int ndel, Object[] ancestorIds) {
365        int nadd = ancestorIds.length;
366        Set<String> ids = getSubTree(id, null, null);
367        ids.add(id);
368        for (String cid : ids) {
369            // XXX TODO oneShot update, don't pollute transient space
370            DBSDocumentState docState = getStateForUpdate(cid);
371            Object[] ancestors = (Object[]) docState.get(KEY_ANCESTOR_IDS);
372            Object[] newAncestors;
373            if (ancestors == null) {
374                newAncestors = ancestorIds.clone();
375            } else {
376                newAncestors = new Object[ancestors.length - ndel + nadd];
377                System.arraycopy(ancestorIds, 0, newAncestors, 0, nadd);
378                System.arraycopy(ancestors, ndel, newAncestors, nadd, ancestors.length - ndel);
379            }
380            docState.put(KEY_ANCESTOR_IDS, newAncestors);
381        }
382    }
383
384    /**
385     * Updates the Read ACLs recursively on a document.
386     */
387    public void updateReadAcls(String id) {
388        // versions too XXX TODO
389        Set<String> ids = getSubTree(id, null, null);
390        ids.add(id);
391        for (String cid : ids) {
392            // XXX TODO oneShot update, don't pollute transient space
393            DBSDocumentState docState = getStateForUpdate(cid);
394            docState.put(KEY_READ_ACL, getReadACL(docState));
395        }
396    }
397
398    /**
399     * Gets the Read ACL (flat list of users having browse permission, including inheritance) on a document.
400     */
401    protected String[] getReadACL(DBSDocumentState docState) {
402        Set<String> racls = new HashSet<>();
403        State state = docState.getState();
404        LOOP: do {
405            @SuppressWarnings("unchecked")
406            List<Serializable> aclList = (List<Serializable>) state.get(KEY_ACP);
407            if (aclList != null) {
408                for (Serializable aclSer : aclList) {
409                    State aclMap = (State) aclSer;
410                    @SuppressWarnings("unchecked")
411                    List<Serializable> aceList = (List<Serializable>) aclMap.get(KEY_ACL);
412                    for (Serializable aceSer : aceList) {
413                        State aceMap = (State) aceSer;
414                        String username = (String) aceMap.get(KEY_ACE_USER);
415                        String permission = (String) aceMap.get(KEY_ACE_PERMISSION);
416                        Boolean granted = (Boolean) aceMap.get(KEY_ACE_GRANT);
417                        if (TRUE.equals(granted) && browsePermissions.contains(permission)) {
418                            racls.add(username);
419                        }
420                        if (FALSE.equals(granted)) {
421                            if (!EVERYONE.equals(username)) {
422                                // TODO log
423                                racls.add(UNSUPPORTED_ACL);
424                            }
425                            break LOOP;
426                        }
427                    }
428                }
429            }
430            // get parent
431            if (TRUE.equals(state.get(KEY_IS_VERSION))) {
432                String versionSeriesId = (String) state.get(KEY_VERSION_SERIES_ID);
433                state = versionSeriesId == null ? null : getStateForRead(versionSeriesId);
434            } else {
435                String parentId = (String) state.get(KEY_PARENT_ID);
436                state = parentId == null ? null : getStateForRead(parentId);
437            }
438        } while (state != null);
439
440        // sort to have canonical order
441        List<String> racl = new ArrayList<>(racls);
442        Collections.sort(racl);
443        return racl.toArray(new String[racl.size()]);
444    }
445
446    /**
447     * Gets all the ids under a given one, recursively.
448     * <p>
449     * Doesn't check transient (assumes save is done).
450     *
451     * @param id the root of the tree (not included in results)
452     * @param proxyTargets returns a map of proxy to target among the documents found
453     * @param targetProxies returns a map of target to proxies among the document found
454     */
455    protected Set<String> getSubTree(String id, Map<String, String> proxyTargets, Map<String, Object[]> targetProxies) {
456        Set<String> ids = new HashSet<String>();
457        // check repository
458        repository.queryKeyValueArray(KEY_ANCESTOR_IDS, id, ids, proxyTargets, targetProxies);
459        return ids;
460    }
461
462    public List<DBSDocumentState> getKeyValuedStates(String key, Object value) {
463        List<DBSDocumentState> docStates = new LinkedList<DBSDocumentState>();
464        Set<String> seen = new HashSet<String>();
465        // check transient state
466        for (DBSDocumentState docState : transientStates.values()) {
467            seen.add(docState.getId());
468            if (!value.equals(docState.get(key))) {
469                continue;
470            }
471            docStates.add(docState);
472        }
473        // fetch from repository
474        List<State> states = repository.queryKeyValue(key, value, seen);
475        for (State state : states) {
476            docStates.add(newTransientState(state));
477        }
478        return docStates;
479    }
480
481    public List<DBSDocumentState> getKeyValuedStates(String key1, Object value1, String key2, Object value2) {
482        List<DBSDocumentState> docStates = new LinkedList<DBSDocumentState>();
483        Set<String> seen = new HashSet<String>();
484        // check transient state
485        for (DBSDocumentState docState : transientStates.values()) {
486            seen.add(docState.getId());
487            if (!(value1.equals(docState.get(key1)) && value2.equals(docState.get(key2)))) {
488                continue;
489            }
490            docStates.add(docState);
491        }
492        // fetch from repository
493        List<State> states = repository.queryKeyValue(key1, value1, key2, value2, seen);
494        for (State state : states) {
495            docStates.add(newTransientState(state));
496        }
497        return docStates;
498    }
499
500    /**
501     * Removes a list of documents.
502     * <p>
503     * Called after a {@link #save} has been done.
504     */
505    public void removeStates(Set<String> ids) {
506        if (undoLog != null) {
507            for (String id : ids) {
508                if (undoLog.containsKey(id)) {
509                    // there's already a create or an update in the undo log
510                    State oldUndo = undoLog.get(id);
511                    if (oldUndo == null) {
512                        // create + delete -> forget
513                        undoLog.remove(id);
514                    } else {
515                        // update + delete -> original old state to re-create
516                        oldUndo.put(KEY_UNDOLOG_CREATE, TRUE);
517                    }
518                } else {
519                    // just delete -> store old state to re-create
520                    State oldState = StateHelper.deepCopy(getStateForRead(id));
521                    oldState.put(KEY_UNDOLOG_CREATE, TRUE);
522                    undoLog.put(id, oldState);
523                }
524            }
525        }
526        for (String id : ids) {
527            transientStates.remove(id);
528        }
529        repository.deleteStates(ids);
530    }
531
532    /**
533     * Writes transient state to database.
534     * <p>
535     * An undo log is kept in order to rollback the transaction later if needed.
536     */
537    public void save() {
538        updateProxies();
539        List<Work> works;
540        if (!repository.isFulltextDisabled()) {
541            // TODO getting fulltext already does a getStateChange
542            works = getFulltextWorks();
543        } else {
544            works = Collections.emptyList();
545        }
546        for (String id : transientCreated) { // ordered
547            DBSDocumentState docState = transientStates.get(id);
548            docState.setNotDirty();
549            if (undoLog != null) {
550                undoLog.put(id, null); // marker to denote create
551            }
552            repository.createState(docState.getState());
553        }
554        for (DBSDocumentState docState : transientStates.values()) {
555            String id = docState.getId();
556            if (transientCreated.contains(id)) {
557                continue; // already done
558            }
559            StateDiff diff = docState.getStateChange();
560            if (diff != null) {
561                if (undoLog != null) {
562                    if (!undoLog.containsKey(id)) {
563                        undoLog.put(id, StateHelper.deepCopy(docState.getOriginalState()));
564                    }
565                    // else there's already a create or an update in the undo log so original info is enough
566                }
567                repository.updateState(id, diff);
568            }
569            docState.setNotDirty();
570        }
571        transientCreated.clear();
572        scheduleWork(works);
573    }
574
575    protected void applyUndoLog() {
576        Set<String> deletes = new HashSet<>();
577        for (Entry<String, State> es : undoLog.entrySet()) {
578            String id = es.getKey();
579            State state = es.getValue();
580            if (state == null) {
581                deletes.add(id);
582            } else {
583                boolean recreate = state.remove(KEY_UNDOLOG_CREATE) != null;
584                if (recreate) {
585                    repository.createState(state);
586                } else {
587                    // undo update
588                    State currentState = repository.readState(id);
589                    if (currentState != null) {
590                        StateDiff diff = StateHelper.diff(currentState, state);
591                        if (!diff.isEmpty()) {
592                            repository.updateState(id, diff);
593                        }
594                    }
595                    // else we expected to read a current state but it was concurrently deleted...
596                    // in that case leave it deleted
597                }
598            }
599        }
600        if (!deletes.isEmpty()) {
601            repository.deleteStates(deletes);
602        }
603    }
604
605    /**
606     * Checks if the changed documents are proxy targets, and updates the proxies if that's the case.
607     */
608    protected void updateProxies() {
609        for (String id : transientCreated) { // ordered
610            DBSDocumentState docState = transientStates.get(id);
611            updateProxies(docState);
612        }
613        // copy as we may modify proxies
614        for (String id : transientStates.keySet().toArray(new String[0])) {
615            DBSDocumentState docState = transientStates.get(id);
616            if (transientCreated.contains(id)) {
617                continue; // already done
618            }
619            if (docState.isDirty()) {
620                updateProxies(docState);
621            }
622        }
623    }
624
625    protected void updateProxies(DBSDocumentState target) {
626        Object[] proxyIds = (Object[]) target.get(KEY_PROXY_IDS);
627        if (proxyIds != null) {
628            for (Object proxyId : proxyIds) {
629                try {
630                    updateProxy(target, (String) proxyId);
631                } catch (ConcurrentUpdateException e) {
632                    e.addInfo("On doc " + target.getId());
633                    throw e;
634                }
635            }
636        }
637    }
638
639    /**
640     * Updates the state of a proxy based on its target.
641     */
642    protected void updateProxy(DBSDocumentState target, String proxyId) {
643        DBSDocumentState proxy = getStateForUpdate(proxyId);
644        if (proxy == null) {
645            throw new ConcurrentUpdateException("Proxy " + proxyId + " concurrently deleted");
646        }
647        SchemaManager schemaManager = Framework.getService(SchemaManager.class);
648        // clear all proxy data
649        for (String key : proxy.getState().keyArray()) {
650            if (!isProxySpecific(key, schemaManager)) {
651                proxy.put(key, null);
652            }
653        }
654        // copy from target
655        for (Entry<String, Serializable> en : target.getState().entrySet()) {
656            String key = en.getKey();
657            if (!isProxySpecific(key, schemaManager)) {
658                proxy.put(key, StateHelper.deepCopy(en.getValue()));
659            }
660        }
661    }
662
663    /**
664     * Things that we don't touch on a proxy when updating it.
665     */
666    protected boolean isProxySpecific(String key, SchemaManager schemaManager) {
667        switch (key) {
668        // these are placeful stuff
669        case KEY_ID:
670        case KEY_PARENT_ID:
671        case KEY_ANCESTOR_IDS:
672        case KEY_NAME:
673        case KEY_POS:
674        case KEY_ACP:
675        case KEY_READ_ACL:
676            // these are proxy-specific
677        case KEY_IS_PROXY:
678        case KEY_PROXY_TARGET_ID:
679        case KEY_PROXY_VERSION_SERIES_ID:
680        case KEY_IS_VERSION:
681        case KEY_PROXY_IDS:
682            return true;
683        }
684        int p = key.indexOf(':');
685        if (p == -1) {
686            // no prefix, assume not proxy-specific
687            return false;
688        }
689        String prefix = key.substring(0, p);
690        Schema schema = schemaManager.getSchemaFromPrefix(prefix);
691        if (schema == null) {
692            schema = schemaManager.getSchema(prefix);
693            if (schema == null) {
694                // unknown prefix, assume not proxy-specific
695                return false;
696            }
697        }
698        return schemaManager.isProxySchema(schema.getName(), null); // type unused
699    }
700
701    /**
702     * Called when created in a transaction.
703     *
704     * @since 7.4
705     */
706    public void begin() {
707        undoLog = new HashMap<String, State>();
708    }
709
710    /**
711     * Saves and flushes to database.
712     */
713    public void commit() {
714        save();
715        commitSave();
716    }
717
718    /**
719     * Commits the saved state to the database.
720     */
721    protected void commitSave() {
722        // clear transient, this means that after this references to states will be stale
723        // TODO mark states as invalid
724        clearTransient();
725        // the transaction ended, the proxied DBSSession will disappear and cannot be reused anyway
726        undoLog = null;
727    }
728
729    /**
730     * Rolls back the save state by applying the undo log.
731     */
732    public void rollback() {
733        clearTransient();
734        applyUndoLog();
735        // the transaction ended, the proxied DBSSession will disappear and cannot be reused anyway
736        undoLog = null;
737    }
738
739    protected void clearTransient() {
740        transientStates.clear();
741        transientCreated.clear();
742    }
743
744    /**
745     * Gets the fulltext updates to do. Called at save() time.
746     *
747     * @return a list of {@link Work} instances to schedule post-commit.
748     */
749    protected List<Work> getFulltextWorks() {
750        Set<String> docsWithDirtyStrings = new HashSet<String>();
751        Set<String> docsWithDirtyBinaries = new HashSet<String>();
752        findDirtyDocuments(docsWithDirtyStrings, docsWithDirtyBinaries);
753        if (docsWithDirtyStrings.isEmpty() && docsWithDirtyBinaries.isEmpty()) {
754            return Collections.emptyList();
755        }
756        List<Work> works = new LinkedList<Work>();
757        getFulltextSimpleWorks(works, docsWithDirtyStrings);
758        getFulltextBinariesWorks(works, docsWithDirtyBinaries);
759        return works;
760    }
761
762    /**
763     * Finds the documents having dirty text or dirty binaries that have to be reindexed as fulltext.
764     *
765     * @param docsWithDirtyStrings set of ids, updated by this method
766     * @param docWithDirtyBinaries set of ids, updated by this method
767     */
768    protected void findDirtyDocuments(Set<String> docsWithDirtyStrings, Set<String> docWithDirtyBinaries) {
769        for (DBSDocumentState docState : transientStates.values()) {
770            State originalState = docState.getOriginalState();
771            State state = docState.getState();
772            if (originalState == state) {
773                continue;
774            }
775            StateDiff diff = StateHelper.diff(originalState, state);
776            if (diff.isEmpty()) {
777                continue;
778            }
779            StateDiff rdiff = StateHelper.diff(state, originalState);
780            // we do diffs in both directions to capture removal of complex list elements,
781            // for instance for {foo: [{bar: baz}] -> {foo: []}
782            // diff paths = foo and rdiff paths = foo/*/bar
783            Set<String> paths = new HashSet<>();
784            DirtyPathsFinder dirtyPathsFinder = new DirtyPathsFinder(paths);
785            dirtyPathsFinder.findDirtyPaths(diff);
786            dirtyPathsFinder.findDirtyPaths(rdiff);
787            FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration();
788            boolean dirtyStrings = false;
789            boolean dirtyBinaries = false;
790            for (String path : paths) {
791                Set<String> indexesSimple = fulltextConfiguration.indexesByPropPathSimple.get(path);
792                if (indexesSimple != null && !indexesSimple.isEmpty()) {
793                    dirtyStrings = true;
794                    if (dirtyBinaries) {
795                        break;
796                    }
797                }
798                Set<String> indexesBinary= fulltextConfiguration.indexesByPropPathBinary.get(path);
799                if (indexesBinary != null && !indexesBinary.isEmpty()) {
800                    dirtyBinaries = true;
801                    if (dirtyStrings) {
802                        break;
803                    }
804                }
805            }
806            if (dirtyStrings) {
807                docsWithDirtyStrings.add(docState.getId());
808            }
809            if (dirtyBinaries) {
810                docWithDirtyBinaries.add(docState.getId());
811            }
812        }
813    }
814
815    /**
816     * Iterates on a state diff to find the paths corresponding to dirty values.
817     *
818     * @since 7.10-HF04, 8.1
819     */
820    protected static class DirtyPathsFinder {
821
822        protected Set<String> paths;
823
824        public DirtyPathsFinder(Set<String> paths) {
825            this.paths = paths;
826        }
827
828        public void findDirtyPaths(StateDiff value) {
829            findDirtyPaths(value, (String) null);
830        }
831
832        protected void findDirtyPaths(Object value, String path) {
833            if (value instanceof Object[]) {
834                findDirtyPaths((Object[]) value, path);
835            } else if (value instanceof List) {
836                findDirtyPaths((List<?>) value, path);
837            } else if (value instanceof ListDiff) {
838                findDirtyPaths((ListDiff) value, path);
839            } else if (value instanceof State) {
840                findDirtyPaths((State) value, path);
841            } else {
842                paths.add(path);
843            }
844        }
845
846        protected void findDirtyPaths(Object[] value, String path) {
847            String newPath = path + "/*";
848            for (Object v : value) {
849                findDirtyPaths(v, newPath);
850            }
851        }
852
853        protected void findDirtyPaths(List<?> value, String path) {
854            String newPath = path + "/*";
855            for (Object v : value) {
856                findDirtyPaths(v, newPath);
857            }
858        }
859
860        protected void findDirtyPaths(ListDiff value, String path) {
861            String newPath = path + "/*";
862            if (value.diff != null) {
863                findDirtyPaths(value.diff, newPath);
864            }
865            if (value.rpush != null) {
866                findDirtyPaths(value.rpush, newPath);
867            }
868        }
869
870        protected void findDirtyPaths(State value, String path) {
871            for (Entry<String, Serializable> es : value.entrySet()) {
872                String key = es.getKey();
873                Serializable v = es.getValue();
874                String newPath = path == null ? key : path + "/" + key;
875                findDirtyPaths(v, newPath);
876            }
877        }
878    }
879
880    protected void getFulltextSimpleWorks(List<Work> works, Set<String> docsWithDirtyStrings) {
881        // TODO XXX make configurable, see also FulltextExtractorWork
882        FulltextParser fulltextParser = new DefaultFulltextParser();
883        FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration();
884        // update simpletext on documents with dirty strings
885        for (String id : docsWithDirtyStrings) {
886            if (id == null) {
887                // cannot happen, but has been observed :(
888                log.error("Got null doc id in fulltext update, cannot happen");
889                continue;
890            }
891            DBSDocumentState docState = getStateForUpdate(id);
892            if (docState == null) {
893                // cannot happen
894                continue;
895            }
896            String documentType = docState.getPrimaryType();
897            // Object[] mixinTypes = (Object[]) docState.get(KEY_MIXIN_TYPES);
898
899            if (!fulltextConfiguration.isFulltextIndexable(documentType)) {
900                continue;
901            }
902            docState.put(KEY_FULLTEXT_JOBID, docState.getId());
903            FulltextFinder fulltextFinder = new FulltextFinder(fulltextParser, docState, session);
904            List<IndexAndText> indexesAndText = new LinkedList<IndexAndText>();
905            for (String indexName : fulltextConfiguration.indexNames) {
906                // TODO paths from config
907                String text = fulltextFinder.findFulltext(indexName);
908                indexesAndText.add(new IndexAndText(indexName, text));
909            }
910            if (!indexesAndText.isEmpty()) {
911                Work work = new FulltextUpdaterWork(repository.getName(), id, true, false, indexesAndText);
912                works.add(work);
913            }
914        }
915    }
916
917    protected void getFulltextBinariesWorks(List<Work> works, Set<String> docWithDirtyBinaries) {
918        if (docWithDirtyBinaries.isEmpty()) {
919            return;
920        }
921
922        FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration();
923
924        // mark indexing in progress, so that future copies (including versions)
925        // will be indexed as well
926        for (String id : docWithDirtyBinaries) {
927            DBSDocumentState docState = getStateForUpdate(id);
928            if (docState == null) {
929                // cannot happen
930                continue;
931            }
932            if (!fulltextConfiguration.isFulltextIndexable(docState.getPrimaryType())) {
933                continue;
934            }
935            docState.put(KEY_FULLTEXT_JOBID, docState.getId());
936        }
937
938        // FulltextExtractorWork does fulltext extraction using converters
939        // and then schedules a FulltextUpdaterWork to write the results
940        // single-threaded
941        for (String id : docWithDirtyBinaries) {
942            // don't exclude proxies
943            Work work = new DBSFulltextExtractorWork(repository.getName(), id);
944            works.add(work);
945        }
946    }
947
948    protected static class FulltextFinder {
949
950        protected final FulltextParser fulltextParser;
951
952        protected final DBSDocumentState document;
953
954        protected final DBSSession session;
955
956        protected final String documentType;
957
958        protected final Object[] mixinTypes;
959
960        /**
961         * Prepares parsing for one document.
962         */
963        public FulltextFinder(FulltextParser fulltextParser, DBSDocumentState document, DBSSession session) {
964            this.fulltextParser = fulltextParser;
965            this.document = document;
966            this.session = session;
967            if (document == null) {
968                documentType = null;
969                mixinTypes = null;
970            } else { // null in tests
971                documentType = document.getPrimaryType();
972                mixinTypes = (Object[]) document.get(KEY_MIXIN_TYPES);
973            }
974        }
975
976        /**
977         * Parses the document for one index.
978         */
979        public String findFulltext(String indexName) {
980            // TODO indexName
981            // TODO paths
982            List<String> strings = new ArrayList<String>();
983            findFulltext(indexName, document.getState(), strings);
984            return StringUtils.join(strings, ' ');
985        }
986
987        protected void findFulltext(String indexName, State state, List<String> strings) {
988            for (Entry<String, Serializable> en : state.entrySet()) {
989                String key = en.getKey();
990                if (key.startsWith(KEY_PREFIX)) {
991                    switch (key) {
992                    // allow indexing of this:
993                    case DBSDocument.KEY_NAME:
994                        break;
995                    default:
996                        continue;
997                    }
998                }
999                Serializable value = en.getValue();
1000                if (value instanceof State) {
1001                    State s = (State) value;
1002                    findFulltext(indexName, s, strings);
1003                } else if (value instanceof List) {
1004                    @SuppressWarnings("unchecked")
1005                    List<State> v = (List<State>) value;
1006                    for (State s : v) {
1007                        findFulltext(indexName, s, strings);
1008                    }
1009                } else if (value instanceof Object[]) {
1010                    Object[] ar = (Object[]) value;
1011                    for (Object v : ar) {
1012                        if (v instanceof String) {
1013                            fulltextParser.parse((String) v, null, strings);
1014                        } else {
1015                            // arrays are homogeneous, no need to continue
1016                            break;
1017                        }
1018                    }
1019                } else {
1020                    if (value instanceof String) {
1021                        fulltextParser.parse((String) value, null, strings);
1022                    }
1023                }
1024            }
1025        }
1026    }
1027
1028    protected void scheduleWork(List<Work> works) {
1029        // do async fulltext indexing only if high-level sessions are available
1030        RepositoryManager repositoryManager = Framework.getLocalService(RepositoryManager.class);
1031        if (repositoryManager != null && !works.isEmpty()) {
1032            WorkManager workManager = Framework.getLocalService(WorkManager.class);
1033            for (Work work : works) {
1034                // schedule work post-commit
1035                // in non-tx mode, this may execute it nearly immediately
1036                workManager.schedule(work, Scheduling.IF_NOT_SCHEDULED, true);
1037            }
1038        }
1039    }
1040
1041}