001/*
002 * (C) Copyright 2014 Nuxeo SA (http://nuxeo.com/) and contributors.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the GNU Lesser General Public License
006 * (LGPL) version 2.1 which accompanies this distribution, and is available at
007 * http://www.gnu.org/licenses/lgpl-2.1.html
008 *
009 * This library is distributed in the hope that it will be useful,
010 * but WITHOUT ANY WARRANTY; without even the implied warranty of
011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012 * Lesser General Public License for more details.
013 *
014 * Contributors:
015 *     Florent Guillaume
016 */
017package org.nuxeo.ecm.core.storage.dbs;
018
019import static java.lang.Boolean.FALSE;
020import static java.lang.Boolean.TRUE;
021import static org.nuxeo.ecm.core.api.security.SecurityConstants.BROWSE;
022import static org.nuxeo.ecm.core.api.security.SecurityConstants.EVERYONE;
023import static org.nuxeo.ecm.core.api.security.SecurityConstants.UNSUPPORTED_ACL;
024import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_GRANT;
025import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_PERMISSION;
026import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACE_USER;
027import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACL;
028import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ACP;
029import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ANCESTOR_IDS;
030import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_BASE_VERSION_ID;
031import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_FULLTEXT_JOBID;
032import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_ID;
033import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_IS_PROXY;
034import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_IS_VERSION;
035import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_MIXIN_TYPES;
036import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_NAME;
037import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PARENT_ID;
038import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_POS;
039import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PREFIX;
040import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PRIMARY_TYPE;
041import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_IDS;
042import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_TARGET_ID;
043import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_PROXY_VERSION_SERIES_ID;
044import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_READ_ACL;
045import static org.nuxeo.ecm.core.storage.dbs.DBSDocument.KEY_VERSION_SERIES_ID;
046
047import java.io.Serializable;
048import java.util.ArrayList;
049import java.util.Arrays;
050import java.util.Collections;
051import java.util.HashMap;
052import java.util.HashSet;
053import java.util.LinkedHashSet;
054import java.util.LinkedList;
055import java.util.List;
056import java.util.Map;
057import java.util.Map.Entry;
058import java.util.Set;
059
060import org.apache.commons.lang.StringUtils;
061import org.apache.commons.logging.Log;
062import org.apache.commons.logging.LogFactory;
063import org.nuxeo.ecm.core.api.ConcurrentUpdateException;
064import org.nuxeo.ecm.core.api.repository.RepositoryManager;
065import org.nuxeo.ecm.core.security.SecurityService;
066import org.nuxeo.ecm.core.storage.State.StateDiff;
067import org.nuxeo.ecm.core.storage.StateHelper;
068import org.nuxeo.ecm.core.storage.DefaultFulltextParser;
069import org.nuxeo.ecm.core.storage.FulltextConfiguration;
070import org.nuxeo.ecm.core.storage.FulltextParser;
071import org.nuxeo.ecm.core.storage.FulltextUpdaterWork;
072import org.nuxeo.ecm.core.storage.State;
073import org.nuxeo.ecm.core.storage.FulltextUpdaterWork.IndexAndText;
074import org.nuxeo.ecm.core.work.api.Work;
075import org.nuxeo.ecm.core.work.api.WorkManager;
076import org.nuxeo.ecm.core.work.api.WorkManager.Scheduling;
077import org.nuxeo.runtime.api.Framework;
078
079/**
080 * Transactional state for a session.
081 * <p>
082 * Until {@code save()} is called, data lives in the transient map.
083 * <p>
084 * Upon save, data is written to the repository, even though it has not yet been committed (this means that other
085 * sessions can read uncommitted data). It's also kept in an undo log in order for rollback to be possible.
086 * <p>
087 * On commit, the undo log is forgotten. On rollback, the undo log is replayed.
088 *
089 * @since 5.9.4
090 */
091public class DBSTransactionState {
092
093    private static final Log log = LogFactory.getLog(DBSTransactionState.class);
094
095    private static final String KEY_UNDOLOG_CREATE = "__UNDOLOG_CREATE__\0\0";
096
097    protected final DBSRepository repository;
098
099    protected final DBSSession session;
100
101    /** Retrieved and created document state. */
102    protected Map<String, DBSDocumentState> transientStates = new HashMap<String, DBSDocumentState>();
103
104    /** Ids of documents created but not yet saved. */
105    protected Set<String> transientCreated = new LinkedHashSet<String>();
106
107    /**
108     * Undo log.
109     * <p>
110     * A map of document ids to null or State. The value is null when the document has to be deleted when applying the
111     * undo log. Otherwise the value is a State. If the State contains the key {@link #KEY_UNDOLOG_CREATE} then the
112     * state must be re-created completely when applying the undo log, otherwise just applied as an update.
113     * <p>
114     * Null when there is no active transaction.
115     */
116    protected Map<String, State> undoLog;
117
118    protected final Set<String> browsePermissions;
119
120    public DBSTransactionState(DBSRepository repository, DBSSession session) {
121        this.repository = repository;
122        this.session = session;
123        SecurityService securityService = Framework.getLocalService(SecurityService.class);
124        browsePermissions = new HashSet<>(Arrays.asList(securityService.getPermissionsToCheck(BROWSE)));
125    }
126
127    protected FulltextConfiguration getFulltextConfiguration() {
128        // TODO get from DBS repo service
129        FulltextConfiguration fulltextConfiguration = new FulltextConfiguration();
130        fulltextConfiguration.indexNames.add("default");
131        fulltextConfiguration.indexesAllBinary.add("default");
132        return fulltextConfiguration;
133    }
134
135    /**
136     * New transient state for something just read from the repository.
137     */
138    protected DBSDocumentState newTransientState(State state) {
139        if (state == null) {
140            return null;
141        }
142        String id = (String) state.get(KEY_ID);
143        if (transientStates.containsKey(id)) {
144            throw new IllegalStateException("Already transient: " + id);
145        }
146        DBSDocumentState docState = new DBSDocumentState(state); // copy
147        transientStates.put(id, docState);
148        return docState;
149    }
150
151    /**
152     * Returns a state and marks it as transient, because it's about to be modified or returned to user code (where it
153     * may be modified).
154     */
155    public DBSDocumentState getStateForUpdate(String id) {
156        // check transient state
157        DBSDocumentState docState = transientStates.get(id);
158        if (docState != null) {
159            return docState;
160        }
161        // fetch from repository
162        State state = repository.readState(id);
163        return newTransientState(state);
164    }
165
166    /**
167     * Returns a state which won't be modified.
168     */
169    // TODO in some cases it's good to have this kept in memory instead of
170    // rereading from database every time
171    // XXX getStateForReadOneShot
172    public State getStateForRead(String id) {
173        // check transient state
174        DBSDocumentState docState = transientStates.get(id);
175        if (docState != null) {
176            return docState.getState();
177        }
178        // fetch from repository
179        return repository.readState(id);
180    }
181
182    /**
183     * Returns states and marks them transient, because they're about to be returned to user code (where they may be
184     * modified).
185     */
186    public List<DBSDocumentState> getStatesForUpdate(List<String> ids) {
187        // check which ones we have to fetch from repository
188        List<String> idsToFetch = new LinkedList<String>();
189        for (String id : ids) {
190            // check transient state
191            DBSDocumentState docState = transientStates.get(id);
192            if (docState != null) {
193                continue;
194            }
195            // will have to fetch it
196            idsToFetch.add(id);
197        }
198        if (!idsToFetch.isEmpty()) {
199            List<State> states = repository.readStates(idsToFetch);
200            for (State state : states) {
201                newTransientState(state);
202            }
203        }
204        // everything now fetched in transient
205        List<DBSDocumentState> docStates = new ArrayList<DBSDocumentState>(ids.size());
206        for (String id : ids) {
207            DBSDocumentState docState = transientStates.get(id);
208            if (docState != null) {
209                docStates.add(docState);
210            } else {
211                log.warn("Cannot fetch document with id: " + id, new Throwable("debug stack trace"));
212            }
213        }
214        return docStates;
215    }
216
217    // XXX TODO for update or for read?
218    public DBSDocumentState getChildState(String parentId, String name) {
219        Set<String> seen = new HashSet<String>();
220        // check transient state
221        for (DBSDocumentState docState : transientStates.values()) {
222            seen.add(docState.getId());
223            if (!parentId.equals(docState.getParentId())) {
224                continue;
225            }
226            if (!name.equals(docState.getName())) {
227                continue;
228            }
229            return docState;
230        }
231        // fetch from repository
232        State state = repository.readChildState(parentId, name, seen);
233        return newTransientState(state);
234    }
235
236    public boolean hasChild(String parentId, String name) {
237        Set<String> seen = new HashSet<String>();
238        // check transient state
239        for (DBSDocumentState docState : transientStates.values()) {
240            seen.add(docState.getId());
241            if (!parentId.equals(docState.getParentId())) {
242                continue;
243            }
244            if (!name.equals(docState.getName())) {
245                continue;
246            }
247            return true;
248        }
249        // check repository
250        return repository.hasChild(parentId, name, seen);
251    }
252
253    public List<DBSDocumentState> getChildrenStates(String parentId) {
254        List<DBSDocumentState> docStates = new LinkedList<DBSDocumentState>();
255        Set<String> seen = new HashSet<String>();
256        // check transient state
257        for (DBSDocumentState docState : transientStates.values()) {
258            seen.add(docState.getId());
259            if (!parentId.equals(docState.getParentId())) {
260                continue;
261            }
262            docStates.add(docState);
263        }
264        // fetch from repository
265        List<State> states = repository.queryKeyValue(KEY_PARENT_ID, parentId, seen);
266        for (State state : states) {
267            docStates.add(newTransientState(state));
268        }
269        return docStates;
270    }
271
272    public List<String> getChildrenIds(String parentId) {
273        List<String> children = new ArrayList<String>();
274        Set<String> seen = new HashSet<String>();
275        // check transient state
276        for (DBSDocumentState docState : transientStates.values()) {
277            String id = docState.getId();
278            seen.add(id);
279            if (!parentId.equals(docState.getParentId())) {
280                continue;
281            }
282            children.add(id);
283        }
284        // fetch from repository
285        List<State> states = repository.queryKeyValue(KEY_PARENT_ID, parentId, seen);
286        for (State state : states) {
287            children.add((String) state.get(KEY_ID));
288        }
289        return new ArrayList<String>(children);
290    }
291
292    public boolean hasChildren(String parentId) {
293        Set<String> seen = new HashSet<String>();
294        // check transient state
295        for (DBSDocumentState docState : transientStates.values()) {
296            seen.add(docState.getId());
297            if (!parentId.equals(docState.getParentId())) {
298                continue;
299            }
300            return true;
301        }
302        // check repository
303        return repository.queryKeyValuePresence(KEY_PARENT_ID, parentId, seen);
304    }
305
306    public DBSDocumentState createChild(String id, String parentId, String name, Long pos, String typeName) {
307        // id may be not-null for import
308        if (id == null) {
309            id = repository.generateNewId();
310        }
311        transientCreated.add(id);
312        DBSDocumentState docState = new DBSDocumentState();
313        transientStates.put(id, docState);
314        docState.put(KEY_ID, id);
315        docState.put(KEY_PARENT_ID, parentId);
316        docState.put(KEY_ANCESTOR_IDS, getAncestorIds(parentId));
317        docState.put(KEY_NAME, name);
318        docState.put(KEY_POS, pos);
319        docState.put(KEY_PRIMARY_TYPE, typeName);
320        // update read acls for new doc
321        updateReadAcls(id);
322        return docState;
323    }
324
325    /** Gets ancestors including id itself. */
326    protected Object[] getAncestorIds(String id) {
327        if (id == null) {
328            return null;
329        }
330        State state = getStateForRead(id);
331        if (state == null) {
332            throw new RuntimeException("No such id: " + id);
333        }
334        Object[] ancestors = (Object[]) state.get(KEY_ANCESTOR_IDS);
335        if (ancestors == null) {
336            return new Object[] { id };
337        } else {
338            Object[] newAncestors = new Object[ancestors.length + 1];
339            System.arraycopy(ancestors, 0, newAncestors, 0, ancestors.length);
340            newAncestors[ancestors.length] = id;
341            return newAncestors;
342        }
343    }
344
345    /**
346     * Copies the document into a newly-created object.
347     * <p>
348     * The copy is automatically saved.
349     */
350    public DBSDocumentState copy(String id) {
351        DBSDocumentState copyState = new DBSDocumentState(getStateForRead(id));
352        String copyId = repository.generateNewId();
353        copyState.put(KEY_ID, copyId);
354        // other fields updated by the caller
355        transientStates.put(copyId, copyState);
356        transientCreated.add(copyId);
357        return copyState;
358    }
359
360    /**
361     * Updates ancestors recursively after a move.
362     * <p>
363     * Recursing from given doc, replace the first ndel ancestors with those passed.
364     * <p>
365     * Doesn't check transient (assumes save is done). The modifications are automatically saved.
366     */
367    public void updateAncestors(String id, int ndel, Object[] ancestorIds) {
368        int nadd = ancestorIds.length;
369        Set<String> ids = getSubTree(id, null, null);
370        ids.add(id);
371        for (String cid : ids) {
372            // XXX TODO oneShot update, don't pollute transient space
373            DBSDocumentState docState = getStateForUpdate(cid);
374            Object[] ancestors = (Object[]) docState.get(KEY_ANCESTOR_IDS);
375            Object[] newAncestors;
376            if (ancestors == null) {
377                newAncestors = ancestorIds.clone();
378            } else {
379                newAncestors = new Object[ancestors.length - ndel + nadd];
380                System.arraycopy(ancestorIds, 0, newAncestors, 0, nadd);
381                System.arraycopy(ancestors, ndel, newAncestors, nadd, ancestors.length - ndel);
382            }
383            docState.put(KEY_ANCESTOR_IDS, newAncestors);
384        }
385    }
386
387    /**
388     * Updates the Read ACLs recursively on a document.
389     */
390    public void updateReadAcls(String id) {
391        // versions too XXX TODO
392        Set<String> ids = getSubTree(id, null, null);
393        ids.add(id);
394        for (String cid : ids) {
395            // XXX TODO oneShot update, don't pollute transient space
396            DBSDocumentState docState = getStateForUpdate(cid);
397            docState.put(KEY_READ_ACL, getReadACL(docState));
398        }
399    }
400
401    /**
402     * Gets the Read ACL (flat list of users having browse permission, including inheritance) on a document.
403     */
404    protected String[] getReadACL(DBSDocumentState docState) {
405        Set<String> racls = new HashSet<>();
406        State state = docState.getState();
407        LOOP: do {
408            @SuppressWarnings("unchecked")
409            List<Serializable> aclList = (List<Serializable>) state.get(KEY_ACP);
410            if (aclList != null) {
411                for (Serializable aclSer : aclList) {
412                    State aclMap = (State) aclSer;
413                    @SuppressWarnings("unchecked")
414                    List<Serializable> aceList = (List<Serializable>) aclMap.get(KEY_ACL);
415                    for (Serializable aceSer : aceList) {
416                        State aceMap = (State) aceSer;
417                        String username = (String) aceMap.get(KEY_ACE_USER);
418                        String permission = (String) aceMap.get(KEY_ACE_PERMISSION);
419                        Boolean granted = (Boolean) aceMap.get(KEY_ACE_GRANT);
420                        if (TRUE.equals(granted) && browsePermissions.contains(permission)) {
421                            racls.add(username);
422                        }
423                        if (FALSE.equals(granted)) {
424                            if (!EVERYONE.equals(username)) {
425                                // TODO log
426                                racls.add(UNSUPPORTED_ACL);
427                            }
428                            break LOOP;
429                        }
430                    }
431                }
432            }
433            // get parent
434            if (TRUE.equals(state.get(KEY_IS_VERSION))) {
435                String versionSeriesId = (String) state.get(KEY_VERSION_SERIES_ID);
436                state = versionSeriesId == null ? null : getStateForRead(versionSeriesId);
437            } else {
438                String parentId = (String) state.get(KEY_PARENT_ID);
439                state = parentId == null ? null : getStateForRead(parentId);
440            }
441        } while (state != null);
442
443        // sort to have canonical order
444        List<String> racl = new ArrayList<>(racls);
445        Collections.sort(racl);
446        return racl.toArray(new String[racl.size()]);
447    }
448
449    /**
450     * Gets all the ids under a given one, recursively.
451     * <p>
452     * Doesn't check transient (assumes save is done).
453     *
454     * @param id the root of the tree (not included in results)
455     * @param proxyTargets returns a map of proxy to target among the documents found
456     * @param targetProxies returns a map of target to proxies among the document found
457     */
458    protected Set<String> getSubTree(String id, Map<String, String> proxyTargets, Map<String, Object[]> targetProxies) {
459        Set<String> ids = new HashSet<String>();
460        // check repository
461        repository.queryKeyValueArray(KEY_ANCESTOR_IDS, id, ids, proxyTargets, targetProxies);
462        return ids;
463    }
464
465    public List<DBSDocumentState> getKeyValuedStates(String key, Object value) {
466        List<DBSDocumentState> docStates = new LinkedList<DBSDocumentState>();
467        Set<String> seen = new HashSet<String>();
468        // check transient state
469        for (DBSDocumentState docState : transientStates.values()) {
470            seen.add(docState.getId());
471            if (!value.equals(docState.get(key))) {
472                continue;
473            }
474            docStates.add(docState);
475        }
476        // fetch from repository
477        List<State> states = repository.queryKeyValue(key, value, seen);
478        for (State state : states) {
479            docStates.add(newTransientState(state));
480        }
481        return docStates;
482    }
483
484    public List<DBSDocumentState> getKeyValuedStates(String key1, Object value1, String key2, Object value2) {
485        List<DBSDocumentState> docStates = new LinkedList<DBSDocumentState>();
486        Set<String> seen = new HashSet<String>();
487        // check transient state
488        for (DBSDocumentState docState : transientStates.values()) {
489            seen.add(docState.getId());
490            if (!(value1.equals(docState.get(key1)) && value2.equals(docState.get(key2)))) {
491                continue;
492            }
493            docStates.add(docState);
494        }
495        // fetch from repository
496        List<State> states = repository.queryKeyValue(key1, value1, key2, value2, seen);
497        for (State state : states) {
498            docStates.add(newTransientState(state));
499        }
500        return docStates;
501    }
502
503    /**
504     * Removes a list of documents.
505     * <p>
506     * Called after a {@link #save} has been done.
507     */
508    public void removeStates(Set<String> ids) {
509        if (undoLog != null) {
510            for (String id : ids) {
511                if (undoLog.containsKey(id)) {
512                    // there's already a create or an update in the undo log
513                    State oldUndo = undoLog.get(id);
514                    if (oldUndo == null) {
515                        // create + delete -> forget
516                        undoLog.remove(id);
517                    } else {
518                        // update + delete -> original old state to re-create
519                        oldUndo.put(KEY_UNDOLOG_CREATE, TRUE);
520                    }
521                } else {
522                    // just delete -> store old state to re-create
523                    State oldState = StateHelper.deepCopy(getStateForRead(id));
524                    oldState.put(KEY_UNDOLOG_CREATE, TRUE);
525                    undoLog.put(id, oldState);
526                }
527            }
528        }
529        for (String id : ids) {
530            transientStates.remove(id);
531        }
532        repository.deleteStates(ids);
533    }
534
535    /**
536     * Writes transient state to database.
537     * <p>
538     * An undo log is kept in order to rollback the transaction later if needed.
539     */
540    public void save() {
541        updateProxies();
542        List<Work> works;
543        if (!repository.isFulltextDisabled()) {
544            // TODO getting fulltext already does a getStateChange
545            works = getFulltextWorks();
546        } else {
547            works = Collections.emptyList();
548        }
549        for (String id : transientCreated) { // ordered
550            DBSDocumentState docState = transientStates.get(id);
551            docState.setNotDirty();
552            if (undoLog != null) {
553                undoLog.put(id, null); // marker to denote create
554            }
555            repository.createState(docState.getState());
556        }
557        for (DBSDocumentState docState : transientStates.values()) {
558            String id = docState.getId();
559            if (transientCreated.contains(id)) {
560                continue; // already done
561            }
562            StateDiff diff = docState.getStateChange();
563            if (diff != null) {
564                if (undoLog != null) {
565                    if (!undoLog.containsKey(id)) {
566                        undoLog.put(id, StateHelper.deepCopy(docState.getOriginalState()));
567                    }
568                    // else there's already a create or an update in the undo log so original info is enough
569                }
570                repository.updateState(id, diff);
571            }
572            docState.setNotDirty();
573        }
574        transientCreated.clear();
575        scheduleWork(works);
576    }
577
578    protected void applyUndoLog() {
579        Set<String> deletes = new HashSet<>();
580        for (Entry<String, State> es : undoLog.entrySet()) {
581            String id = es.getKey();
582            State state = es.getValue();
583            if (state == null) {
584                deletes.add(id);
585            } else {
586                boolean recreate = state.remove(KEY_UNDOLOG_CREATE) != null;
587                if (recreate) {
588                    repository.createState(state);
589                } else {
590                    // undo update
591                    State currentState = repository.readState(id);
592                    if (currentState != null) {
593                        StateDiff diff = StateHelper.diff(currentState, state);
594                        if (!diff.isEmpty()) {
595                            repository.updateState(id, diff);
596                        }
597                    }
598                    // else we expected to read a current state but it was concurrently deleted...
599                    // in that case leave it deleted
600                }
601            }
602        }
603        if (!deletes.isEmpty()) {
604            repository.deleteStates(deletes);
605        }
606    }
607
608    /**
609     * Checks if the changed documents are proxy targets, and updates the proxies if that's the case.
610     */
611    protected void updateProxies() {
612        for (String id : transientCreated) { // ordered
613            DBSDocumentState docState = transientStates.get(id);
614            Object[] proxyIds = (Object[]) docState.get(KEY_PROXY_IDS);
615            if (proxyIds != null) {
616                for (Object proxyId : proxyIds) {
617                    updateProxy(docState, (String) proxyId);
618                }
619            }
620        }
621        // copy as we may modify proxies
622        for (String id : transientStates.keySet().toArray(new String[0])) {
623            DBSDocumentState docState = transientStates.get(id);
624            if (transientCreated.contains(id)) {
625                continue; // already done
626            }
627            if (docState.isDirty()) {
628                Object[] proxyIds = (Object[]) docState.get(KEY_PROXY_IDS);
629                if (proxyIds != null) {
630                    for (Object proxyId : proxyIds) {
631                        try {
632                            updateProxy(docState, (String) proxyId);
633                        } catch (ConcurrentUpdateException e) {
634                            e.addInfo("On doc " + docState.getId());
635                            throw e;
636                        }
637                    }
638                }
639            }
640        }
641    }
642
643    /**
644     * Updates the state of a proxy based on its target.
645     */
646    protected void updateProxy(DBSDocumentState target, String proxyId) {
647        DBSDocumentState proxy = getStateForUpdate(proxyId);
648        if (proxy == null) {
649            rollback(); // XXX
650            throw new ConcurrentUpdateException("Proxy " + proxyId + " concurrently deleted");
651        }
652        // clear all proxy data
653        for (String key : proxy.getState().keyArray()) {
654            if (!isProxySpecific(key)) {
655                proxy.put(key, null);
656            }
657        }
658        // copy from target
659        for (Entry<String, Serializable> en : target.getState().entrySet()) {
660            String key = en.getKey();
661            if (!isProxySpecific(key)) {
662                proxy.put(key, StateHelper.deepCopy(en.getValue()));
663            }
664        }
665    }
666
667    /**
668     * Things that we don't touch on a proxy when updating it.
669     */
670    protected boolean isProxySpecific(String key) {
671        switch (key) {
672        // these are placeful stuff
673        case KEY_ID:
674        case KEY_PARENT_ID:
675        case KEY_ANCESTOR_IDS:
676        case KEY_NAME:
677        case KEY_POS:
678        case KEY_ACP:
679        case KEY_READ_ACL:
680            // these are proxy-specific
681        case KEY_IS_PROXY:
682        case KEY_PROXY_TARGET_ID:
683        case KEY_PROXY_VERSION_SERIES_ID:
684        case KEY_IS_VERSION:
685        case KEY_PROXY_IDS:
686            return true;
687        }
688        return false;
689    }
690
691    /**
692     * Called when created in a transaction.
693     *
694     * @since 7.4
695     */
696    public void begin() {
697        undoLog = new HashMap<String, State>();
698    }
699
700    /**
701     * Saves and flushes to database.
702     */
703    public void commit() {
704        save();
705        commitSave();
706    }
707
708    /**
709     * Commits the saved state to the database.
710     */
711    protected void commitSave() {
712        // clear transient, this means that after this references to states will be stale
713        // TODO mark states as invalid
714        clearTransient();
715        // the transaction ended, the proxied DBSSession will disappear and cannot be reused anyway
716        undoLog = null;
717    }
718
719    /**
720     * Rolls back the save state by applying the undo log.
721     */
722    public void rollback() {
723        clearTransient();
724        applyUndoLog();
725        // the transaction ended, the proxied DBSSession will disappear and cannot be reused anyway
726        undoLog = null;
727    }
728
729    protected void clearTransient() {
730        transientStates.clear();
731        transientCreated.clear();
732    }
733
734    /**
735     * Gets the fulltext updates to do. Called at save() time.
736     *
737     * @return a list of {@link Work} instances to schedule post-commit.
738     */
739    protected List<Work> getFulltextWorks() {
740        Set<String> docsWithDirtyStrings = new HashSet<String>();
741        Set<String> docsWithDirtyBinaries = new HashSet<String>();
742        findDirtyDocuments(docsWithDirtyStrings, docsWithDirtyBinaries);
743        if (docsWithDirtyStrings.isEmpty() && docsWithDirtyBinaries.isEmpty()) {
744            return Collections.emptyList();
745        }
746        List<Work> works = new LinkedList<Work>();
747        getFulltextSimpleWorks(works, docsWithDirtyStrings);
748        getFulltextBinariesWorks(works, docsWithDirtyBinaries);
749        return works;
750    }
751
752    /**
753     * Finds the documents having dirty text or dirty binaries that have to be reindexed as fulltext.
754     *
755     * @param docsWithDirtyStrings set of ids, updated by this method
756     * @param docWithDirtyBinaries set of ids, updated by this method
757     */
758    protected void findDirtyDocuments(Set<String> docsWithDirtyStrings, Set<String> docWithDirtyBinaries) {
759        for (String id : transientCreated) {
760            docsWithDirtyStrings.add(id);
761            docWithDirtyBinaries.add(id);
762        }
763        for (DBSDocumentState docState : transientStates.values()) {
764            // TODO finer-grained dirty state
765            if (docState.isDirtyIgnoringFulltext()) {
766                String id = docState.getId();
767                docsWithDirtyStrings.add(id);
768                docWithDirtyBinaries.add(id);
769            }
770        }
771    }
772
773    protected void getFulltextSimpleWorks(List<Work> works, Set<String> docsWithDirtyStrings) {
774        // TODO XXX make configurable, see also FulltextExtractorWork
775        FulltextParser fulltextParser = new DefaultFulltextParser();
776        // update simpletext on documents with dirty strings
777        for (String id : docsWithDirtyStrings) {
778            if (id == null) {
779                // cannot happen, but has been observed :(
780                log.error("Got null doc id in fulltext update, cannot happen");
781                continue;
782            }
783            DBSDocumentState docState = getStateForUpdate(id);
784            if (docState == null) {
785                // cannot happen
786                continue;
787            }
788            String documentType = docState.getPrimaryType();
789            // Object[] mixinTypes = (Object[]) docState.get(KEY_MIXIN_TYPES);
790
791            FulltextConfiguration config = getFulltextConfiguration();
792            if (!config.isFulltextIndexable(documentType)) {
793                continue;
794            }
795            docState.put(KEY_FULLTEXT_JOBID, docState.getId());
796            FulltextFinder fulltextFinder = new FulltextFinder(fulltextParser, docState, session);
797            List<IndexAndText> indexesAndText = new LinkedList<IndexAndText>();
798            for (String indexName : config.indexNames) {
799                // TODO paths from config
800                String text = fulltextFinder.findFulltext(indexName);
801                indexesAndText.add(new IndexAndText(indexName, text));
802            }
803            if (!indexesAndText.isEmpty()) {
804                Work work = new FulltextUpdaterWork(repository.getName(), id, true, false, indexesAndText);
805                works.add(work);
806            }
807        }
808    }
809
810    protected void getFulltextBinariesWorks(List<Work> works, Set<String> docWithDirtyBinaries) {
811        if (docWithDirtyBinaries.isEmpty()) {
812            return;
813        }
814
815        // TODO get from extension point, see also FulltextExtractorWork
816        // XXX hardcoded config for now
817        FulltextConfiguration config = getFulltextConfiguration();
818
819        // mark indexing in progress, so that future copies (including versions)
820        // will be indexed as well
821        for (String id : docWithDirtyBinaries) {
822            DBSDocumentState docState = getStateForUpdate(id);
823            if (docState == null) {
824                // cannot happen
825                continue;
826            }
827            if (!config.isFulltextIndexable(docState.getPrimaryType())) {
828                continue;
829            }
830            docState.put(KEY_FULLTEXT_JOBID, docState.getId());
831        }
832
833        // FulltextExtractorWork does fulltext extraction using converters
834        // and then schedules a FulltextUpdaterWork to write the results
835        // single-threaded
836        for (String id : docWithDirtyBinaries) {
837            // don't exclude proxies
838            Work work = new DBSFulltextExtractorWork(repository.getName(), id);
839            works.add(work);
840        }
841    }
842
843    protected static class FulltextFinder {
844
845        protected final FulltextParser fulltextParser;
846
847        protected final DBSDocumentState document;
848
849        protected final DBSSession session;
850
851        protected final String documentType;
852
853        protected final Object[] mixinTypes;
854
855        /**
856         * Prepares parsing for one document.
857         */
858        public FulltextFinder(FulltextParser fulltextParser, DBSDocumentState document, DBSSession session) {
859            this.fulltextParser = fulltextParser;
860            this.document = document;
861            this.session = session;
862            if (document == null) {
863                documentType = null;
864                mixinTypes = null;
865            } else { // null in tests
866                documentType = document.getPrimaryType();
867                mixinTypes = (Object[]) document.get(KEY_MIXIN_TYPES);
868            }
869        }
870
871        /**
872         * Parses the document for one index.
873         */
874        public String findFulltext(String indexName) {
875            // TODO indexName
876            // TODO paths
877            List<String> strings = new ArrayList<String>();
878            findFulltext(indexName, document.getState(), strings);
879            return StringUtils.join(strings, ' ');
880        }
881
882        protected void findFulltext(String indexName, State state, List<String> strings) {
883            for (Entry<String, Serializable> en : state.entrySet()) {
884                String key = en.getKey();
885                if (key.startsWith(KEY_PREFIX)) {
886                    switch (key) {
887                    // allow indexing of this:
888                    case DBSDocument.KEY_NAME:
889                        break;
890                    default:
891                        continue;
892                    }
893                }
894                Serializable value = en.getValue();
895                if (value instanceof State) {
896                    State s = (State) value;
897                    findFulltext(indexName, s, strings);
898                } else if (value instanceof List) {
899                    @SuppressWarnings("unchecked")
900                    List<State> v = (List<State>) value;
901                    for (State s : v) {
902                        findFulltext(indexName, s, strings);
903                    }
904                } else if (value instanceof Object[]) {
905                    Object[] ar = (Object[]) value;
906                    for (Object v : ar) {
907                        if (v instanceof String) {
908                            fulltextParser.parse((String) v, null, strings);
909                        } else {
910                            // arrays are homogeneous, no need to continue
911                            break;
912                        }
913                    }
914                } else {
915                    if (value instanceof String) {
916                        fulltextParser.parse((String) value, null, strings);
917                    }
918                }
919            }
920        }
921    }
922
923    protected void scheduleWork(List<Work> works) {
924        // do async fulltext indexing only if high-level sessions are available
925        RepositoryManager repositoryManager = Framework.getLocalService(RepositoryManager.class);
926        if (repositoryManager != null && !works.isEmpty()) {
927            WorkManager workManager = Framework.getLocalService(WorkManager.class);
928            for (Work work : works) {
929                // schedule work post-commit
930                // in non-tx mode, this may execute it nearly immediately
931                workManager.schedule(work, Scheduling.IF_NOT_SCHEDULED, true);
932            }
933        }
934    }
935
936}