001/*
002 * (C) Copyright 2014 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Delbosc Benoit
018 */
019
020package org.nuxeo.elasticsearch.work;
021
022import org.apache.commons.logging.Log;
023import org.apache.commons.logging.LogFactory;
024import org.nuxeo.ecm.core.api.ScrollResult;
025import org.nuxeo.ecm.core.work.api.Work;
026import org.nuxeo.ecm.core.work.api.WorkManager;
027import org.nuxeo.runtime.api.Framework;
028import org.nuxeo.runtime.transaction.TransactionHelper;
029
030import java.util.Collections;
031import java.util.List;
032
033import static org.nuxeo.elasticsearch.ElasticSearchConstants.REINDEX_BUCKET_READ_PROPERTY;
034
035/**
036 * Worker to reindex a large amount of document
037 *
038 * @since 7.1
039 */
040public class ScrollingIndexingWorker extends BaseIndexingWorker implements Work {
041    private static final Log log = LogFactory.getLog(ScrollingIndexingWorker.class);
042
043    private static final long serialVersionUID = -4507677669419340384L;
044
045    private static final String DEFAULT_BUCKET_SIZE = "500";
046
047    private static final long WARN_DOC_COUNT = 500;
048
049    protected final String nxql;
050
051    protected transient WorkManager workManager;
052
053    protected long documentCount = 0;
054
055    public ScrollingIndexingWorker(String repositoryName, String nxql) {
056        this.repositoryName = repositoryName;
057        this.nxql = nxql;
058    }
059
060    @Override
061    public String getTitle() {
062        return "Elasticsearch scrolling indexer: " + nxql + ", processed " + documentCount;
063    }
064
065    @Override
066    protected void doWork() {
067        String jobName = getSchedulePath().getPath();
068        if (log.isDebugEnabled()) {
069            log.debug(String.format("Re-indexing job: %s started, NXQL: %s on repository: %s", jobName, nxql,
070                    repositoryName));
071        }
072        openSystemSession();
073        int bucketSize = getBucketSize();
074        ScrollResult ret = session.scroll(nxql, bucketSize, 60);
075        int bucketCount = 0;
076        boolean warnAtEnd = false;
077        try {
078            while (ret.hasResults()) {
079                documentCount += ret.getResultIds().size();
080                scheduleBucketWorker(ret.getResultIds(), false);
081                bucketCount += 1;
082                ret = session.scroll(ret.getScrollId());
083                TransactionHelper.commitOrRollbackTransaction();
084                TransactionHelper.startTransaction();
085            }
086            if (documentCount > WARN_DOC_COUNT) {
087                warnAtEnd = true;
088                scheduleBucketWorker(Collections.emptyList(), warnAtEnd);
089            }
090        } finally {
091            if (warnAtEnd || log.isDebugEnabled()) {
092                String message = String.format("Re-indexing job: %s has submited %d documents in %d bucket workers",
093                        jobName, documentCount, bucketCount);
094                if (warnAtEnd) {
095                    log.warn(message);
096                } else {
097                    log.debug(message);
098                }
099            }
100        }
101    }
102
103    protected void scheduleBucketWorker(List<String> bucket, boolean isLast) {
104        if (bucket.isEmpty()) {
105            return;
106        }
107        BucketIndexingWorker subWorker = new BucketIndexingWorker(repositoryName, bucket, isLast);
108        getWorkManager().schedule(subWorker);
109    }
110
111    protected WorkManager getWorkManager() {
112        if (workManager == null) {
113            workManager = Framework.getLocalService(WorkManager.class);
114        }
115        return workManager;
116    }
117
118    protected int getBucketSize() {
119        String value = Framework.getProperty(REINDEX_BUCKET_READ_PROPERTY, DEFAULT_BUCKET_SIZE);
120        return Integer.parseInt(value);
121    }
122
123}