001/*
002 * (C) Copyright 2014-2016 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019package org.nuxeo.ecm.core.storage.dbs;
020
021import static java.lang.Boolean.FALSE;
022import static java.lang.Boolean.TRUE;
023import static org.nuxeo.ecm.core.api.security.SecurityConstants.BROWSE;
024import static org.nuxeo.ecm.core.api.security.SecurityConstants.EVERYONE;
025import static org.nuxeo.ecm.core.api.security.SecurityConstants.UNSUPPORTED_ACL;
026import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_GRANT;
027import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_PERMISSION;
028import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_USER;
029import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACL;
030import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACP;
031import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ANCESTOR_IDS;
032import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_FULLTEXT_JOBID;
033import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ID;
034import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_IS_PROXY;
035import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_IS_VERSION;
036import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_MIXIN_TYPES;
037import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_NAME;
038import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PARENT_ID;
039import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_POS;
040import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PREFIX;
041import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PRIMARY_TYPE;
042import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_IDS;
043import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_TARGET_ID;
044import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_VERSION_SERIES_ID;
045import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_READ_ACL;
046import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_VERSION_SERIES_ID;
047
048import java.io.Serializable;
049import java.util.ArrayList;
050import java.util.Arrays;
051import java.util.Collections;
052import java.util.HashMap;
053import java.util.HashSet;
054import java.util.LinkedHashSet;
055import java.util.LinkedList;
056import java.util.List;
057import java.util.Map;
058import java.util.Map.Entry;
059import java.util.Set;
060
061import org.apache.commons.lang.StringUtils;
062import org.apache.commons.logging.Log;
063import org.apache.commons.logging.LogFactory;
064import org.nuxeo.ecm.core.api.ConcurrentUpdateException;
065import org.nuxeo.ecm.core.api.repository.RepositoryManager;
066import org.nuxeo.ecm.core.schema.SchemaManager;
067import org.nuxeo.ecm.core.schema.types.Schema;
068import org.nuxeo.ecm.core.security.SecurityService;
069import org.nuxeo.ecm.core.storage.DefaultFulltextParser;
070import org.nuxeo.ecm.core.storage.FulltextConfiguration;
071import org.nuxeo.ecm.core.storage.FulltextParser;
072import org.nuxeo.ecm.core.storage.FulltextUpdaterWork;
073import org.nuxeo.ecm.core.storage.FulltextUpdaterWork.IndexAndText;
074import org.nuxeo.ecm.core.storage.State;
075import org.nuxeo.ecm.core.storage.State.ListDiff;
076import org.nuxeo.ecm.core.storage.State.StateDiff;
077import org.nuxeo.ecm.core.storage.StateHelper;
078import org.nuxeo.ecm.core.work.api.Work;
079import org.nuxeo.ecm.core.work.api.WorkManager;
080import org.nuxeo.ecm.core.work.api.WorkManager.Scheduling;
081import org.nuxeo.runtime.api.Framework;
082
083/**
084 * Transactional state for a session.
085 * <p>
086 * Until {@code save()} is called, data lives in the transient map.
087 * <p>
088 * Upon save, data is written to the repository, even though it has not yet been committed (this means that other
089 * sessions can read uncommitted data). It's also kept in an undo log in order for rollback to be possible.
090 * <p>
091 * On commit, the undo log is forgotten. On rollback, the undo log is replayed.
092 *
093 * @since 5.9.4
094 */
095public class DBSTransactionState {
096
097    private static final Log log = LogFactory.getLog(DBSTransactionState.class);
098
099    private static final String KEY_UNDOLOG_CREATE = "__UNDOLOG_CREATE__\0\0";
100
101    protected final DBSRepository repository;
102
103    protected final DBSSession session;
104
105    /** Retrieved and created document state. */
106    protected Map<String, DBSDocumentState> transientStates = new HashMap<>();
107
108    /** Ids of documents created but not yet saved. */
109    protected Set<String> transientCreated = new LinkedHashSet<>();
110
111    /**
112     * Undo log.
113     * <p>
114     * A map of document ids to null or State. The value is null when the document has to be deleted when applying the
115     * undo log. Otherwise the value is a State. If the State contains the key {@link #KEY_UNDOLOG_CREATE} then the
116     * state must be re-created completely when applying the undo log, otherwise just applied as an update.
117     * <p>
118     * Null when there is no active transaction.
119     */
120    protected Map<String, State> undoLog;
121
122    protected final Set<String> browsePermissions;
123
124    public DBSTransactionState(DBSRepository repository, DBSSession session) {
125        this.repository = repository;
126        this.session = session;
127        SecurityService securityService = Framework.getLocalService(SecurityService.class);
128        browsePermissions = new HashSet<>(Arrays.asList(securityService.getPermissionsToCheck(BROWSE)));
129    }
130
131    /**
132     * New transient state for something just read from the repository.
133     */
134    protected DBSDocumentState newTransientState(State state) {
135        if (state == null) {
136            return null;
137        }
138        String id = (String) state.get(KEY_ID);
139        if (transientStates.containsKey(id)) {
140            throw new IllegalStateException("Already transient: " + id);
141        }
142        DBSDocumentState docState = new DBSDocumentState(state); // copy
143        transientStates.put(id, docState);
144        return docState;
145    }
146
147    /**
148     * Returns a state and marks it as transient, because it's about to be modified or returned to user code (where it
149     * may be modified).
150     */
151    public DBSDocumentState getStateForUpdate(String id) {
152        // check transient state
153        DBSDocumentState docState = transientStates.get(id);
154        if (docState != null) {
155            return docState;
156        }
157        // fetch from repository
158        State state = repository.readState(id);
159        return newTransientState(state);
160    }
161
162    /**
163     * Returns a state which won't be modified.
164     */
165    // TODO in some cases it's good to have this kept in memory instead of
166    // rereading from database every time
167    // XXX getStateForReadOneShot
168    public State getStateForRead(String id) {
169        // check transient state
170        DBSDocumentState docState = transientStates.get(id);
171        if (docState != null) {
172            return docState.getState();
173        }
174        // fetch from repository
175        return repository.readState(id);
176    }
177
178    /**
179     * Returns states and marks them transient, because they're about to be returned to user code (where they may be
180     * modified).
181     */
182    public List<DBSDocumentState> getStatesForUpdate(List<String> ids) {
183        // check which ones we have to fetch from repository
184        List<String> idsToFetch = new LinkedList<>();
185        for (String id : ids) {
186            // check transient state
187            DBSDocumentState docState = transientStates.get(id);
188            if (docState != null) {
189                continue;
190            }
191            // will have to fetch it
192            idsToFetch.add(id);
193        }
194        if (!idsToFetch.isEmpty()) {
195            List<State> states = repository.readStates(idsToFetch);
196            for (State state : states) {
197                newTransientState(state);
198            }
199        }
200        // everything now fetched in transient
201        List<DBSDocumentState> docStates = new ArrayList<>(ids.size());
202        for (String id : ids) {
203            DBSDocumentState docState = transientStates.get(id);
204            if (docState == null) {
205                if (log.isTraceEnabled()) {
206                    log.trace("Cannot fetch document with id: " + id, new Throwable("debug stack trace"));
207                }
208                continue;
209            }
210            docStates.add(docState);
211        }
212        return docStates;
213    }
214
215    // XXX TODO for update or for read?
216    public DBSDocumentState getChildState(String parentId, String name) {
217        // check transient state
218        for (DBSDocumentState docState : transientStates.values()) {
219            if (!parentId.equals(docState.getParentId())) {
220                continue;
221            }
222            if (!name.equals(docState.getName())) {
223                continue;
224            }
225            return docState;
226        }
227        // fetch from repository
228        State state = repository.readChildState(parentId, name, Collections.emptySet());
229        return newTransientState(state);
230    }
231
232    public boolean hasChild(String parentId, String name) {
233        // check transient state
234        for (DBSDocumentState docState : transientStates.values()) {
235            if (!parentId.equals(docState.getParentId())) {
236                continue;
237            }
238            if (!name.equals(docState.getName())) {
239                continue;
240            }
241            return true;
242        }
243        // check repository
244        return repository.hasChild(parentId, name, Collections.emptySet());
245    }
246
247    public List<DBSDocumentState> getChildrenStates(String parentId) {
248        List<DBSDocumentState> docStates = new LinkedList<>();
249        Set<String> seen = new HashSet<>();
250        // check transient state
251        for (DBSDocumentState docState : transientStates.values()) {
252            if (!parentId.equals(docState.getParentId())) {
253                continue;
254            }
255            docStates.add(docState);
256            seen.add(docState.getId());
257        }
258        // fetch from repository
259        List<State> states = repository.queryKeyValue(KEY_PARENT_ID, parentId, seen);
260        for (State state : states) {
261            docStates.add(newTransientState(state));
262        }
263        return docStates;
264    }
265
266    public List<String> getChildrenIds(String parentId) {
267        List<String> children = new ArrayList<>();
268        Set<String> seen = new HashSet<>();
269        // check transient state
270        for (DBSDocumentState docState : transientStates.values()) {
271            String id = docState.getId();
272            if (!parentId.equals(docState.getParentId())) {
273                continue;
274            }
275            seen.add(id);
276            children.add(id);
277        }
278        // fetch from repository
279        List<State> states = repository.queryKeyValue(KEY_PARENT_ID, parentId, seen);
280        for (State state : states) {
281            children.add((String) state.get(KEY_ID));
282        }
283        return new ArrayList<>(children);
284    }
285
286    public boolean hasChildren(String parentId) {
287        // check transient state
288        for (DBSDocumentState docState : transientStates.values()) {
289            if (!parentId.equals(docState.getParentId())) {
290                continue;
291            }
292            return true;
293        }
294        // check repository
295        return repository.queryKeyValuePresence(KEY_PARENT_ID, parentId, Collections.emptySet());
296    }
297
298    public DBSDocumentState createChild(String id, String parentId, String name, Long pos, String typeName) {
299        // id may be not-null for import
300        if (id == null) {
301            id = repository.generateNewId();
302        }
303        transientCreated.add(id);
304        DBSDocumentState docState = new DBSDocumentState();
305        transientStates.put(id, docState);
306        docState.put(KEY_ID, id);
307        docState.put(KEY_PARENT_ID, parentId);
308        docState.put(KEY_ANCESTOR_IDS, getAncestorIds(parentId));
309        docState.put(KEY_NAME, name);
310        docState.put(KEY_POS, pos);
311        docState.put(KEY_PRIMARY_TYPE, typeName);
312        // update read acls for new doc
313        updateDocumentReadAcls(id);
314        return docState;
315    }
316
317    /** Gets ancestors including id itself. */
318    protected Object[] getAncestorIds(String id) {
319        if (id == null) {
320            return null;
321        }
322        State state = getStateForRead(id);
323        if (state == null) {
324            throw new RuntimeException("No such id: " + id);
325        }
326        Object[] ancestors = (Object[]) state.get(KEY_ANCESTOR_IDS);
327        if (ancestors == null) {
328            return new Object[] { id };
329        } else {
330            Object[] newAncestors = new Object[ancestors.length + 1];
331            System.arraycopy(ancestors, 0, newAncestors, 0, ancestors.length);
332            newAncestors[ancestors.length] = id;
333            return newAncestors;
334        }
335    }
336
337    /**
338     * Copies the document into a newly-created object.
339     * <p>
340     * The copy is automatically saved.
341     */
342    public DBSDocumentState copy(String id) {
343        DBSDocumentState copyState = new DBSDocumentState(getStateForRead(id));
344        String copyId = repository.generateNewId();
345        copyState.put(KEY_ID, copyId);
346        copyState.put(KEY_PROXY_IDS, null); // no proxies to this new doc
347        // other fields updated by the caller
348        transientStates.put(copyId, copyState);
349        transientCreated.add(copyId);
350        return copyState;
351    }
352
353    /**
354     * Updates ancestors recursively after a move.
355     * <p>
356     * Recursing from given doc, replace the first ndel ancestors with those passed.
357     * <p>
358     * Doesn't check transient (assumes save is done). The modifications are automatically saved.
359     */
360    public void updateAncestors(String id, int ndel, Object[] ancestorIds) {
361        int nadd = ancestorIds.length;
362        Set<String> ids = getSubTree(id, null, null);
363        ids.add(id);
364        for (String cid : ids) {
365            // XXX TODO oneShot update, don't pollute transient space
366            DBSDocumentState docState = getStateForUpdate(cid);
367            Object[] ancestors = (Object[]) docState.get(KEY_ANCESTOR_IDS);
368            Object[] newAncestors;
369            if (ancestors == null) {
370                newAncestors = ancestorIds.clone();
371            } else {
372                newAncestors = new Object[ancestors.length - ndel + nadd];
373                System.arraycopy(ancestorIds, 0, newAncestors, 0, nadd);
374                System.arraycopy(ancestors, ndel, newAncestors, nadd, ancestors.length - ndel);
375            }
376            docState.put(KEY_ANCESTOR_IDS, newAncestors);
377        }
378    }
379
380    /**
381     * Updates the Read ACLs recursively on a document.
382     */
383    public void updateTreeReadAcls(String id) {
384        // versions too XXX TODO
385        Set<String> ids = getSubTree(id, null, null);
386        ids.add(id);
387        ids.forEach(this::updateDocumentReadAcls);
388    }
389
390    /**
391     * Updates the Read ACLs on a document (not recursively)
392     */
393    protected void updateDocumentReadAcls(String id) {
394        // XXX TODO oneShot update, don't pollute transient space
395        DBSDocumentState docState = getStateForUpdate(id);
396        docState.put(KEY_READ_ACL, getReadACL(docState));
397    }
398
399    /**
400     * Gets the Read ACL (flat list of users having browse permission, including inheritance) on a document.
401     */
402    protected String[] getReadACL(DBSDocumentState docState) {
403        Set<String> racls = new HashSet<>();
404        State state = docState.getState();
405        LOOP: do {
406            @SuppressWarnings("unchecked")
407            List<Serializable> aclList = (List<Serializable>) state.get(KEY_ACP);
408            if (aclList != null) {
409                for (Serializable aclSer : aclList) {
410                    State aclMap = (State) aclSer;
411                    @SuppressWarnings("unchecked")
412                    List<Serializable> aceList = (List<Serializable>) aclMap.get(KEY_ACL);
413                    for (Serializable aceSer : aceList) {
414                        State aceMap = (State) aceSer;
415                        String username = (String) aceMap.get(KEY_ACE_USER);
416                        String permission = (String) aceMap.get(KEY_ACE_PERMISSION);
417                        Boolean granted = (Boolean) aceMap.get(KEY_ACE_GRANT);
418                        if (TRUE.equals(granted) && browsePermissions.contains(permission)) {
419                            racls.add(username);
420                        }
421                        if (FALSE.equals(granted)) {
422                            if (!EVERYONE.equals(username)) {
423                                // TODO log
424                                racls.add(UNSUPPORTED_ACL);
425                            }
426                            break LOOP;
427                        }
428                    }
429                }
430            }
431            // get parent
432            if (TRUE.equals(state.get(KEY_IS_VERSION))) {
433                String versionSeriesId = (String) state.get(KEY_VERSION_SERIES_ID);
434                state = versionSeriesId == null ? null : getStateForRead(versionSeriesId);
435            } else {
436                String parentId = (String) state.get(KEY_PARENT_ID);
437                state = parentId == null ? null : getStateForRead(parentId);
438            }
439        } while (state != null);
440
441        // sort to have canonical order
442        List<String> racl = new ArrayList<>(racls);
443        Collections.sort(racl);
444        return racl.toArray(new String[racl.size()]);
445    }
446
447    /**
448     * Gets all the ids under a given one, recursively.
449     * <p>
450     * Doesn't check transient (assumes save is done).
451     *
452     * @param id the root of the tree (not included in results)
453     * @param proxyTargets returns a map of proxy to target among the documents found
454     * @param targetProxies returns a map of target to proxies among the document found
455     */
456    protected Set<String> getSubTree(String id, Map<String, String> proxyTargets, Map<String, Object[]> targetProxies) {
457        Set<String> ids = new HashSet<>();
458        // check repository
459        repository.queryKeyValueArray(KEY_ANCESTOR_IDS, id, ids, proxyTargets, targetProxies);
460        return ids;
461    }
462
463    public List<DBSDocumentState> getKeyValuedStates(String key, Object value) {
464        List<DBSDocumentState> docStates = new LinkedList<>();
465        Set<String> seen = new HashSet<>();
466        // check transient state
467        for (DBSDocumentState docState : transientStates.values()) {
468            if (!value.equals(docState.get(key))) {
469                continue;
470            }
471            docStates.add(docState);
472            seen.add(docState.getId());
473        }
474        // fetch from repository
475        List<State> states = repository.queryKeyValue(key, value, seen);
476        for (State state : states) {
477            docStates.add(newTransientState(state));
478        }
479        return docStates;
480    }
481
482    public List<DBSDocumentState> getKeyValuedStates(String key1, Object value1, String key2, Object value2) {
483        List<DBSDocumentState> docStates = new LinkedList<>();
484        Set<String> seen = new HashSet<>();
485        // check transient state
486        for (DBSDocumentState docState : transientStates.values()) {
487            seen.add(docState.getId());
488            if (!(value1.equals(docState.get(key1)) && value2.equals(docState.get(key2)))) {
489                continue;
490            }
491            docStates.add(docState);
492        }
493        // fetch from repository
494        List<State> states = repository.queryKeyValue(key1, value1, key2, value2, seen);
495        for (State state : states) {
496            docStates.add(newTransientState(state));
497        }
498        return docStates;
499    }
500
501    /**
502     * Removes a list of documents.
503     * <p>
504     * Called after a {@link #save} has been done.
505     */
506    public void removeStates(Set<String> ids) {
507        if (undoLog != null) {
508            for (String id : ids) {
509                if (undoLog.containsKey(id)) {
510                    // there's already a create or an update in the undo log
511                    State oldUndo = undoLog.get(id);
512                    if (oldUndo == null) {
513                        // create + delete -> forget
514                        undoLog.remove(id);
515                    } else {
516                        // update + delete -> original old state to re-create
517                        oldUndo.put(KEY_UNDOLOG_CREATE, TRUE);
518                    }
519                } else {
520                    // just delete -> store old state to re-create
521                    State oldState = StateHelper.deepCopy(getStateForRead(id));
522                    oldState.put(KEY_UNDOLOG_CREATE, TRUE);
523                    undoLog.put(id, oldState);
524                }
525            }
526        }
527        for (String id : ids) {
528            transientStates.remove(id);
529        }
530        repository.deleteStates(ids);
531    }
532
533    /**
534     * Writes transient state to database.
535     * <p>
536     * An undo log is kept in order to rollback the transaction later if needed.
537     */
538    public void save() {
539        updateProxies();
540        List<Work> works;
541        if (!repository.isFulltextDisabled()) {
542            // TODO getting fulltext already does a getStateChange
543            works = getFulltextWorks();
544        } else {
545            works = Collections.emptyList();
546        }
547        List<State> statesToCreate = new ArrayList<>();
548        for (String id : transientCreated) { // ordered
549            DBSDocumentState docState = transientStates.get(id);
550            docState.setNotDirty();
551            if (undoLog != null) {
552                undoLog.put(id, null); // marker to denote create
553            }
554            statesToCreate.add(docState.getState());
555        }
556        if (!statesToCreate.isEmpty()) {
557            repository.createStates(statesToCreate);
558        }
559        for (DBSDocumentState docState : transientStates.values()) {
560            String id = docState.getId();
561            if (transientCreated.contains(id)) {
562                continue; // already done
563            }
564            StateDiff diff = docState.getStateChange();
565            if (diff != null) {
566                if (undoLog != null) {
567                    if (!undoLog.containsKey(id)) {
568                        undoLog.put(id, StateHelper.deepCopy(docState.getOriginalState()));
569                    }
570                    // else there's already a create or an update in the undo log so original info is enough
571                }
572                repository.updateState(id, diff);
573            }
574            docState.setNotDirty();
575        }
576        transientCreated.clear();
577        scheduleWork(works);
578    }
579
580    protected void applyUndoLog() {
581        Set<String> deletes = new HashSet<>();
582        for (Entry<String, State> es : undoLog.entrySet()) {
583            String id = es.getKey();
584            State state = es.getValue();
585            if (state == null) {
586                deletes.add(id);
587            } else {
588                boolean recreate = state.remove(KEY_UNDOLOG_CREATE) != null;
589                if (recreate) {
590                    repository.createState(state);
591                } else {
592                    // undo update
593                    State currentState = repository.readState(id);
594                    if (currentState != null) {
595                        StateDiff diff = StateHelper.diff(currentState, state);
596                        if (!diff.isEmpty()) {
597                            repository.updateState(id, diff);
598                        }
599                    }
600                    // else we expected to read a current state but it was concurrently deleted...
601                    // in that case leave it deleted
602                }
603            }
604        }
605        if (!deletes.isEmpty()) {
606            repository.deleteStates(deletes);
607        }
608    }
609
610    /**
611     * Checks if the changed documents are proxy targets, and updates the proxies if that's the case.
612     */
613    protected void updateProxies() {
614        for (String id : transientCreated) { // ordered
615            DBSDocumentState docState = transientStates.get(id);
616            updateProxies(docState);
617        }
618        // copy as we may modify proxies
619        for (String id : transientStates.keySet().toArray(new String[0])) {
620            DBSDocumentState docState = transientStates.get(id);
621            if (transientCreated.contains(id)) {
622                continue; // already done
623            }
624            if (docState.isDirty()) {
625                updateProxies(docState);
626            }
627        }
628    }
629
630    protected void updateProxies(DBSDocumentState target) {
631        Object[] proxyIds = (Object[]) target.get(KEY_PROXY_IDS);
632        if (proxyIds != null) {
633            for (Object proxyId : proxyIds) {
634                try {
635                    updateProxy(target, (String) proxyId);
636                } catch (ConcurrentUpdateException e) {
637                    e.addInfo("On doc " + target.getId());
638                    log.error(e, e);
639                    // do not throw, this avoids crashing the session
640                }
641            }
642        }
643    }
644
645    /**
646     * Updates the state of a proxy based on its target.
647     */
648    protected void updateProxy(DBSDocumentState target, String proxyId) {
649        DBSDocumentState proxy = getStateForUpdate(proxyId);
650        if (proxy == null) {
651            throw new ConcurrentUpdateException("Proxy " + proxyId + " concurrently deleted");
652        }
653        SchemaManager schemaManager = Framework.getService(SchemaManager.class);
654        // clear all proxy data
655        for (String key : proxy.getState().keyArray()) {
656            if (!isProxySpecific(key, schemaManager)) {
657                proxy.put(key, null);
658            }
659        }
660        // copy from target
661        for (Entry<String, Serializable> en : target.getState().entrySet()) {
662            String key = en.getKey();
663            if (!isProxySpecific(key, schemaManager)) {
664                proxy.put(key, StateHelper.deepCopy(en.getValue()));
665            }
666        }
667    }
668
669    /**
670     * Things that we don't touch on a proxy when updating it.
671     */
672    protected boolean isProxySpecific(String key, SchemaManager schemaManager) {
673        switch (key) {
674        // these are placeful stuff
675        case KEY_ID:
676        case KEY_PARENT_ID:
677        case KEY_ANCESTOR_IDS:
678        case KEY_NAME:
679        case KEY_POS:
680        case KEY_ACP:
681        case KEY_READ_ACL:
682            // these are proxy-specific
683        case KEY_IS_PROXY:
684        case KEY_PROXY_TARGET_ID:
685        case KEY_PROXY_VERSION_SERIES_ID:
686        case KEY_IS_VERSION:
687        case KEY_PROXY_IDS:
688            return true;
689        }
690        int p = key.indexOf(':');
691        if (p == -1) {
692            // no prefix, assume not proxy-specific
693            return false;
694        }
695        String prefix = key.substring(0, p);
696        Schema schema = schemaManager.getSchemaFromPrefix(prefix);
697        if (schema == null) {
698            schema = schemaManager.getSchema(prefix);
699            if (schema == null) {
700                // unknown prefix, assume not proxy-specific
701                return false;
702            }
703        }
704        return schemaManager.isProxySchema(schema.getName(), null); // type unused
705    }
706
707    /**
708     * Called when created in a transaction.
709     *
710     * @since 7.4
711     */
712    public void begin() {
713        undoLog = new HashMap<>();
714        repository.begin();
715    }
716
717    /**
718     * Saves and flushes to database.
719     */
720    public void commit() {
721        save();
722        commitSave();
723        repository.commit();
724    }
725
726    /**
727     * Commits the saved state to the database.
728     */
729    protected void commitSave() {
730        // clear transient, this means that after this references to states will be stale
731        // TODO mark states as invalid
732        clearTransient();
733        // the transaction ended, the proxied DBSSession will disappear and cannot be reused anyway
734        undoLog = null;
735    }
736
737    /**
738     * Rolls back the save state by applying the undo log.
739     */
740    public void rollback() {
741        clearTransient();
742        applyUndoLog();
743        // the transaction ended, the proxied DBSSession will disappear and cannot be reused anyway
744        undoLog = null;
745        repository.rollback();
746    }
747
748    protected void clearTransient() {
749        transientStates.clear();
750        transientCreated.clear();
751    }
752
753    /**
754     * Gets the fulltext updates to do. Called at save() time.
755     *
756     * @return a list of {@link Work} instances to schedule post-commit.
757     */
758    protected List<Work> getFulltextWorks() {
759        Set<String> docsWithDirtyStrings = new HashSet<>();
760        Set<String> docsWithDirtyBinaries = new HashSet<>();
761        findDirtyDocuments(docsWithDirtyStrings, docsWithDirtyBinaries);
762        if (docsWithDirtyStrings.isEmpty() && docsWithDirtyBinaries.isEmpty()) {
763            return Collections.emptyList();
764        }
765        List<Work> works = new LinkedList<>();
766        getFulltextSimpleWorks(works, docsWithDirtyStrings);
767        getFulltextBinariesWorks(works, docsWithDirtyBinaries);
768        return works;
769    }
770
771    /**
772     * Finds the documents having dirty text or dirty binaries that have to be reindexed as fulltext.
773     *
774     * @param docsWithDirtyStrings set of ids, updated by this method
775     * @param docWithDirtyBinaries set of ids, updated by this method
776     */
777    protected void findDirtyDocuments(Set<String> docsWithDirtyStrings, Set<String> docWithDirtyBinaries) {
778        for (DBSDocumentState docState : transientStates.values()) {
779            State originalState = docState.getOriginalState();
780            State state = docState.getState();
781            if (originalState == state) {
782                continue;
783            }
784            StateDiff diff = StateHelper.diff(originalState, state);
785            if (diff.isEmpty()) {
786                continue;
787            }
788            StateDiff rdiff = StateHelper.diff(state, originalState);
789            // we do diffs in both directions to capture removal of complex list elements,
790            // for instance for {foo: [{bar: baz}] -> {foo: []}
791            // diff paths = foo and rdiff paths = foo/*/bar
792            Set<String> paths = new HashSet<>();
793            DirtyPathsFinder dirtyPathsFinder = new DirtyPathsFinder(paths);
794            dirtyPathsFinder.findDirtyPaths(diff);
795            dirtyPathsFinder.findDirtyPaths(rdiff);
796            FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration();
797            boolean dirtyStrings = false;
798            boolean dirtyBinaries = false;
799            for (String path : paths) {
800                Set<String> indexesSimple = fulltextConfiguration.indexesByPropPathSimple.get(path);
801                if (indexesSimple != null && !indexesSimple.isEmpty()) {
802                    dirtyStrings = true;
803                    if (dirtyBinaries) {
804                        break;
805                    }
806                }
807                Set<String> indexesBinary = fulltextConfiguration.indexesByPropPathBinary.get(path);
808                if (indexesBinary != null && !indexesBinary.isEmpty()) {
809                    dirtyBinaries = true;
810                    if (dirtyStrings) {
811                        break;
812                    }
813                }
814            }
815            if (dirtyStrings) {
816                docsWithDirtyStrings.add(docState.getId());
817            }
818            if (dirtyBinaries) {
819                docWithDirtyBinaries.add(docState.getId());
820            }
821        }
822    }
823
824    /**
825     * Iterates on a state diff to find the paths corresponding to dirty values.
826     *
827     * @since 7.10-HF04, 8.1
828     */
829    protected static class DirtyPathsFinder {
830
831        protected Set<String> paths;
832
833        public DirtyPathsFinder(Set<String> paths) {
834            this.paths = paths;
835        }
836
837        public void findDirtyPaths(StateDiff value) {
838            findDirtyPaths(value, null);
839        }
840
841        protected void findDirtyPaths(Object value, String path) {
842            if (value instanceof Object[]) {
843                findDirtyPaths((Object[]) value, path);
844            } else if (value instanceof List) {
845                findDirtyPaths((List<?>) value, path);
846            } else if (value instanceof ListDiff) {
847                findDirtyPaths((ListDiff) value, path);
848            } else if (value instanceof State) {
849                findDirtyPaths((State) value, path);
850            } else {
851                paths.add(path);
852            }
853        }
854
855        protected void findDirtyPaths(Object[] value, String path) {
856            String newPath = path + "/*";
857            for (Object v : value) {
858                findDirtyPaths(v, newPath);
859            }
860        }
861
862        protected void findDirtyPaths(List<?> value, String path) {
863            String newPath = path + "/*";
864            for (Object v : value) {
865                findDirtyPaths(v, newPath);
866            }
867        }
868
869        protected void findDirtyPaths(ListDiff value, String path) {
870            String newPath = path + "/*";
871            if (value.diff != null) {
872                findDirtyPaths(value.diff, newPath);
873            }
874            if (value.rpush != null) {
875                findDirtyPaths(value.rpush, newPath);
876            }
877        }
878
879        protected void findDirtyPaths(State value, String path) {
880            for (Entry<String, Serializable> es : value.entrySet()) {
881                String key = es.getKey();
882                Serializable v = es.getValue();
883                String newPath = path == null ? key : path + "/" + key;
884                findDirtyPaths(v, newPath);
885            }
886        }
887    }
888
889    protected void getFulltextSimpleWorks(List<Work> works, Set<String> docsWithDirtyStrings) {
890        // TODO XXX make configurable, see also FulltextExtractorWork
891        FulltextParser fulltextParser = new DefaultFulltextParser();
892        FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration();
893        if (fulltextConfiguration.fulltextSearchDisabled) {
894            return;
895        }
896        // update simpletext on documents with dirty strings
897        for (String id : docsWithDirtyStrings) {
898            if (id == null) {
899                // cannot happen, but has been observed :(
900                log.error("Got null doc id in fulltext update, cannot happen");
901                continue;
902            }
903            DBSDocumentState docState = getStateForUpdate(id);
904            if (docState == null) {
905                // cannot happen
906                continue;
907            }
908            String documentType = docState.getPrimaryType();
909            // Object[] mixinTypes = (Object[]) docState.get(KEY_MIXIN_TYPES);
910
911            if (!fulltextConfiguration.isFulltextIndexable(documentType)) {
912                continue;
913            }
914            docState.put(KEY_FULLTEXT_JOBID, docState.getId());
915            FulltextFinder fulltextFinder = new FulltextFinder(fulltextParser, docState, session);
916            List<IndexAndText> indexesAndText = new LinkedList<>();
917            for (String indexName : fulltextConfiguration.indexNames) {
918                // TODO paths from config
919                String text = fulltextFinder.findFulltext(indexName);
920                indexesAndText.add(new IndexAndText(indexName, text));
921            }
922            if (!indexesAndText.isEmpty()) {
923                Work work = new FulltextUpdaterWork(repository.getName(), id, true, false, indexesAndText);
924                works.add(work);
925            }
926        }
927    }
928
929    protected void getFulltextBinariesWorks(List<Work> works, Set<String> docWithDirtyBinaries) {
930        if (docWithDirtyBinaries.isEmpty()) {
931            return;
932        }
933
934        FulltextConfiguration fulltextConfiguration = repository.getFulltextConfiguration();
935
936        // mark indexing in progress, so that future copies (including versions)
937        // will be indexed as well
938        for (String id : docWithDirtyBinaries) {
939            DBSDocumentState docState = getStateForUpdate(id);
940            if (docState == null) {
941                // cannot happen
942                continue;
943            }
944            if (!fulltextConfiguration.isFulltextIndexable(docState.getPrimaryType())) {
945                continue;
946            }
947            docState.put(KEY_FULLTEXT_JOBID, docState.getId());
948        }
949
950        // FulltextExtractorWork does fulltext extraction using converters
951        // and then schedules a FulltextUpdaterWork to write the results
952        // single-threaded
953        for (String id : docWithDirtyBinaries) {
954            // don't exclude proxies
955            Work work = new DBSFulltextExtractorWork(repository.getName(), id);
956            works.add(work);
957        }
958    }
959
960    protected static class FulltextFinder {
961
962        protected final FulltextParser fulltextParser;
963
964        protected final DBSDocumentState document;
965
966        protected final DBSSession session;
967
968        protected final String documentType;
969
970        protected final Object[] mixinTypes;
971
972        /**
973         * Prepares parsing for one document.
974         */
975        public FulltextFinder(FulltextParser fulltextParser, DBSDocumentState document, DBSSession session) {
976            this.fulltextParser = fulltextParser;
977            this.document = document;
978            this.session = session;
979            if (document == null) {
980                documentType = null;
981                mixinTypes = null;
982            } else { // null in tests
983                documentType = document.getPrimaryType();
984                mixinTypes = (Object[]) document.get(KEY_MIXIN_TYPES);
985            }
986        }
987
988        /**
989         * Parses the document for one index.
990         */
991        public String findFulltext(String indexName) {
992            // TODO indexName
993            // TODO paths
994            List<String> strings = new ArrayList<>();
995            findFulltext(indexName, document.getState(), strings);
996            return StringUtils.join(strings, ' ');
997        }
998
999        protected void findFulltext(String indexName, State state, List<String> strings) {
1000            for (Entry<String, Serializable> en : state.entrySet()) {
1001                String key = en.getKey();
1002                if (key.startsWith(KEY_PREFIX)) {
1003                    switch (key) {
1004                    // allow indexing of this:
1005                    case DBSDocument.KEY_NAME:
1006                        break;
1007                    default:
1008                        continue;
1009                    }
1010                }
1011                Serializable value = en.getValue();
1012                if (value instanceof State) {
1013                    State s = (State) value;
1014                    findFulltext(indexName, s, strings);
1015                } else if (value instanceof List) {
1016                    @SuppressWarnings("unchecked")
1017                    List<State> v = (List<State>) value;
1018                    for (State s : v) {
1019                        findFulltext(indexName, s, strings);
1020                    }
1021                } else if (value instanceof Object[]) {
1022                    Object[] ar = (Object[]) value;
1023                    for (Object v : ar) {
1024                        if (v instanceof String) {
1025                            fulltextParser.parse((String) v, null, strings);
1026                        } else {
1027                            // arrays are homogeneous, no need to continue
1028                            break;
1029                        }
1030                    }
1031                } else {
1032                    if (value instanceof String) {
1033                        fulltextParser.parse((String) value, null, strings);
1034                    }
1035                }
1036            }
1037        }
1038    }
1039
1040    protected void scheduleWork(List<Work> works) {
1041        // do async fulltext indexing only if high-level sessions are available
1042        RepositoryManager repositoryManager = Framework.getLocalService(RepositoryManager.class);
1043        if (repositoryManager != null && !works.isEmpty()) {
1044            WorkManager workManager = Framework.getLocalService(WorkManager.class);
1045            for (Work work : works) {
1046                // schedule work post-commit
1047                // in non-tx mode, this may execute it nearly immediately
1048                workManager.schedule(work, Scheduling.IF_NOT_SCHEDULED, true);
1049            }
1050        }
1051    }
1052
1053}