001/* 002 * (C) Copyright 2014 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * bdelbosc 018 */ 019package org.nuxeo.elasticsearch.fetcher; 020 021import static org.nuxeo.elasticsearch.ElasticSearchConstants.HIGHLIGHT_CTX_DATA; 022 023import java.io.Serializable; 024import java.util.ArrayList; 025import java.util.Collections; 026import java.util.Comparator; 027import java.util.HashMap; 028import java.util.List; 029import java.util.Map; 030 031import org.elasticsearch.action.search.SearchResponse; 032import org.elasticsearch.common.text.Text; 033import org.elasticsearch.search.SearchHit; 034import org.elasticsearch.search.highlight.HighlightField; 035import org.nuxeo.ecm.core.api.CoreInstance; 036import org.nuxeo.ecm.core.api.CoreSession; 037import org.nuxeo.ecm.core.api.DocumentModel; 038import org.nuxeo.ecm.core.api.impl.DocumentModelListImpl; 039import org.nuxeo.ecm.core.query.sql.NXQL; 040 041/** 042 * @since 6.0 043 */ 044public class VcsFetcher extends Fetcher { 045 046 private static final int CHUNK_SIZE = 100; 047 048 public VcsFetcher(CoreSession session, SearchResponse response, Map<String, String> repoNames) { 049 super(session, response, repoNames); 050 } 051 052 @Override 053 public DocumentModelListImpl fetchDocuments() { 054 Map<String, List<String>> repoHits = getHitsPerRepository(); 055 List<DocumentModel> docs = new ArrayList<>(); 056 String openSessionRepository = getSession().getRepositoryName(); 057 boolean closeSession; 058 CoreSession session; 059 for (String repo : repoHits.keySet()) { 060 if (openSessionRepository.equals(repo)) { 061 session = getSession(); 062 closeSession = false; 063 } else { 064 session = CoreInstance.openCoreSession(repo); 065 closeSession = true; 066 } 067 try { 068 docs.addAll(fetchFromVcs(repoHits.get(repo), session)); 069 } finally { 070 if (closeSession) { 071 session.close(); 072 } 073 } 074 } 075 sortResults(docs); 076 addHighlights(docs); 077 DocumentModelListImpl ret = new DocumentModelListImpl(docs.size()); 078 if (!docs.isEmpty()) { 079 ret.addAll(docs); 080 } 081 return ret; 082 } 083 084 private Map<String, List<String>> getHitsPerRepository() { 085 Map<String, List<String>> ret = new HashMap<>(); 086 for (SearchHit hit : getResponse().getHits()) { 087 String repoName = getRepoForIndex(hit.getIndex()); 088 List<String> docIds = ret.get(repoName); 089 if (docIds == null) { 090 docIds = new ArrayList<>(); 091 ret.put(repoName, docIds); 092 } 093 docIds.add(hit.getId()); 094 } 095 return ret; 096 } 097 098 private List<DocumentModel> fetchFromVcs(List<String> ids, CoreSession session) { 099 List<DocumentModel> ret = null; 100 int size = ids.size(); 101 int start = 0; 102 int end = Math.min(CHUNK_SIZE, size); 103 boolean done = false; 104 while (!done) { 105 List<DocumentModel> docs = fetchFromVcsChunk(ids.subList(start, end), session); 106 if (ret == null) { 107 ret = docs; 108 } else { 109 ret.addAll(docs); 110 } 111 if (end >= ids.size()) { 112 done = true; 113 } else { 114 start = end; 115 end = Math.min(start + CHUNK_SIZE, size); 116 } 117 } 118 return ret; 119 } 120 121 private List<DocumentModel> fetchFromVcsChunk(final List<String> ids, CoreSession session) 122 { 123 StringBuilder sb = new StringBuilder(); 124 sb.append("SELECT * FROM Document, Relation WHERE ecm:uuid IN ("); 125 for (int i = 0; i < ids.size(); i++) { 126 sb.append(NXQL.escapeString(ids.get(i))); 127 if (i < ids.size() - 1) { 128 sb.append(", "); 129 } 130 } 131 sb.append(")"); 132 return session.query(sb.toString()); 133 } 134 135 private void addHighlights(List<DocumentModel> docs) { 136 for (SearchHit hit : getResponse().getHits()) { 137 for (DocumentModel doc : docs) { 138 String docId = doc.getRepositoryName() + doc.getId(); 139 String hitId = getRepoForIndex(hit.getIndex()) + hit.getId(); 140 if (docId.equals(hitId)) { 141 // Add highlight if it exists 142 Map<String, HighlightField> esHighlights = hit.highlightFields(); 143 if (!esHighlights.isEmpty()) { 144 Map<String, List<String>> fields = new HashMap<>(); 145 for (Map.Entry<String, HighlightField> entry : esHighlights.entrySet()) { 146 String field = entry.getKey(); 147 List<String> list = new ArrayList<>(); 148 for (Text fragment : entry.getValue().getFragments()) { 149 list.add(fragment.toString()); 150 } 151 fields.put(field, list); 152 } 153 doc.putContextData(HIGHLIGHT_CTX_DATA, (Serializable) fields); 154 } 155 break; 156 } 157 } 158 } 159 } 160 161 private void sortResults(List<DocumentModel> docs) { 162 final List<String> ids = new ArrayList<>(); 163 for (SearchHit hit : getResponse().getHits()) { 164 ids.add(getRepoForIndex(hit.getIndex()) + hit.getId()); 165 } 166 167 Collections.sort(docs, new Comparator<DocumentModel>() { 168 @Override 169 public int compare(DocumentModel a, DocumentModel b) { 170 return ids.indexOf(a.getRepositoryName() + a.getId()) - ids.indexOf(b.getRepositoryName() + b.getId()); 171 } 172 }); 173 174 } 175 176}