001/*
002 * (C) Copyright 2016 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 */
017package org.nuxeo.ecm.platform.importer.mqueues.pattern.producer;
018
019import org.apache.commons.logging.Log;
020import org.apache.commons.logging.LogFactory;
021import org.nuxeo.ecm.core.api.Blob;
022import org.nuxeo.ecm.core.api.Blobs;
023import org.nuxeo.ecm.platform.importer.mqueues.pattern.message.DocumentMessage;
024import org.nuxeo.ecm.platform.importer.random.HunspellDictionaryHolder;
025import org.nuxeo.ecm.platform.importer.random.RandomTextGenerator;
026
027import java.io.Serializable;
028import java.nio.file.Path;
029import java.util.ArrayList;
030import java.util.Collections;
031import java.util.HashMap;
032import java.util.List;
033import java.util.Random;
034import java.util.concurrent.ThreadLocalRandom;
035
036/**
037 * @since 9.1
038 */
039public class RandomDocumentMessageProducer extends AbstractProducer<DocumentMessage> {
040    private static final Log log = LogFactory.getLog(RandomDocumentMessageProducer.class);
041    private final long nbDocuments;
042    private final RandomBlobInfoProvider blobInfoProvider;
043    private boolean countFolderAsDocument = true;
044    private int maxFoldersPerFolder = 50;
045    private int maxDocumentsPerFolder = 500;
046    private int blobSizeKB = 0;
047    private boolean blobOnlyText = false;
048
049    private int documentCount = 0;
050    private int folderCount = 0;
051    private final Random rand;
052    private static RandomTextGenerator gen;
053
054    static protected final String[] DC_NATURE = {"article", "acknowledgement", "assessment", "application", "order",
055            "contract", "quotation", "fax", "worksheet", "letter", "memo", "note", "notification", "procedure",
056            "report", "internshipReport", "pressReview"};
057
058    static protected final String[] DC_SUBJECTS = {"art/architecture", "art/comics", "art/cinema", "art/culture", "art/danse",
059            "art/music", "sciences/astronomy", "sciences/biology", "sciences/chemistry", "sciences/math",
060            "sciences/physic", "society/ecology", "daily life/gastronomy", "daily life/gardening", "daily life/sport",
061            "technology/it"};
062
063    static protected final String[] DC_RIGHTS = {"OpenContentL", "CC-BY-NC", "CC-BY-ND", "FreeArt", "ODbi", "GNUGPL",
064            "FreeBSD", "CC0"};
065
066    static protected final String[] DC_LANGUAGE = {"IT", "DE", "FR", "US", "EN"};
067
068    static protected final String[] DC_SOURCE = {"internal", "external", "unknown"};
069
070    static protected final String[] DC_COVERAGE = {"europe/France", "europe/Germany", "europe/Italy", "europe/Spain",
071            "oceania/Tonga", "africa/Mali", "asia/Japan", "north-america/United_States_of_America"};
072    private int foldersInCurrentFolderLimit;
073    private int documentInCurrentFolderLimit;
074
075    private enum DocType {Root, Folder, Document}
076
077    private DocType currentType = DocType.Root;
078    private int parentIndex = 0;
079    private List<String> parents = new ArrayList<>();
080    private List<String> children = new ArrayList<>();
081    private int documentInCurrentFolderCount = 0;
082
083    public RandomDocumentMessageProducer(int producerId, long nbDocuments, String lang, Path blobInfoDirectory) {
084        super(producerId);
085        this.nbDocuments = nbDocuments;
086        rand = ThreadLocalRandom.current();
087
088        synchronized (RandomDocumentMessageProducer.class) {
089            if (gen == null) {
090                gen = new RandomTextGenerator(new HunspellDictionaryHolder(lang));
091                gen.prefilCache();
092            }
093        }
094        if (blobInfoDirectory != null) {
095            this.blobInfoProvider = new RandomBlobInfoProvider(blobInfoDirectory, producerId);
096        } else {
097            this.blobInfoProvider = null;
098        }
099        log.info("RandomDocumentMessageProducer created, nbDocuments: " + nbDocuments);
100    }
101
102    public RandomDocumentMessageProducer setMaxFoldersPerFolder(int max) {
103        maxFoldersPerFolder = max;
104        return this;
105    }
106
107    public RandomDocumentMessageProducer setMaxDocumentsPerFolder(int max) {
108        maxDocumentsPerFolder = max;
109        return this;
110    }
111
112    public RandomDocumentMessageProducer countFolderAsDocument(boolean value) {
113        countFolderAsDocument = value;
114        return this;
115    }
116
117    public RandomDocumentMessageProducer withBlob(int sizeKB, boolean onlyText) {
118        this.blobSizeKB = sizeKB;
119        this.blobOnlyText = onlyText;
120        return this;
121    }
122
123
124    @Override
125    public int getPartition(DocumentMessage message, int partitions) {
126        return getProducerId() % partitions;
127    }
128
129    @Override
130    public boolean hasNext() {
131        if (countFolderAsDocument) {
132            return (documentCount + folderCount) < nbDocuments;
133        }
134        return documentCount <= nbDocuments;
135    }
136
137    @Override
138    public DocumentMessage next() {
139        DocumentMessage ret;
140        switch (currentType) {
141            case Root:
142                ret = createRoot();
143                parents.add(ret.getId());
144                currentType = DocType.Folder;
145                foldersInCurrentFolderLimit = rand.nextInt(maxFoldersPerFolder) + 1;
146                break;
147            case Folder:
148                ret = createFolder(parents.get(parentIndex));
149                children.add(ret.getId());
150                if (children.size() >= foldersInCurrentFolderLimit) {
151                    currentType = DocType.Document;
152                    documentInCurrentFolderCount = 0;
153                    documentInCurrentFolderLimit = rand.nextInt(maxDocumentsPerFolder);
154                }
155                break;
156            default:
157            case Document:
158                ret = createDocument(parents.get(parentIndex), children);
159                documentInCurrentFolderCount += 1;
160                if (documentInCurrentFolderCount > documentInCurrentFolderLimit) {
161                    parentIndex += 1;
162                    if (parentIndex >= parents.size()) {
163                        parents.clear();
164                        parents = children;
165                        children = new ArrayList<>();
166                        parentIndex = 0;
167                    }
168                    currentType = DocType.Folder;
169                    foldersInCurrentFolderLimit = rand.nextInt(maxFoldersPerFolder) + 1;
170                }
171                break;
172        }
173        // log.debug(ret.getType() + ": " + ret.getId());
174        return ret;
175    }
176
177    private DocumentMessage createRoot() {
178        folderCount++;
179        return getRandomNodeWithPrefix(String.format("%02d-", getProducerId()), "Folder", "");
180    }
181
182
183    private DocumentMessage createFolder(String parentPath) {
184        DocumentMessage node = getRandomNode("Folder", parentPath, false);
185        folderCount++;
186        return node;
187    }
188
189    private DocumentMessage createDocument(String parentPath, List<String> exclude) {
190        DocumentMessage node = getRandomNode("File", parentPath, true);
191        String ret = node.getId();
192        while (exclude.contains(ret)) {
193            log.debug("duplicate found");
194            node = getRandomNode("File", parentPath, true);
195            ret = node.getId();
196        }
197        documentCount++;
198        return node;
199    }
200
201    private DocumentMessage getRandomNode(String type, String parentPath, boolean withBlob) {
202        String title = getTitle();
203        String name = getName(title);
204        HashMap<String, Serializable> props = getRandomProperties(title);
205        DocumentMessage.Builder builder = DocumentMessage.builder(type, parentPath, name).setProperties(props);
206        if (withBlob) {
207            if (blobInfoProvider != null) {
208                builder.setBlobInfo(blobInfoProvider.getBlobInfo(builder));
209            } else {
210                builder.setBlob(getRandomBlob());
211            }
212        }
213        return builder.build();
214    }
215
216    private DocumentMessage getRandomNodeWithPrefix(String prefix, String type, String parentPath) {
217        String title = getTitle();
218        String name = prefix + getName(title);
219        HashMap<String, Serializable> props = getRandomProperties(title);
220        DocumentMessage.Builder builder = DocumentMessage.builder(type, parentPath, name).setProperties(props);
221        if (blobInfoProvider != null) {
222            builder.setBlobInfo(blobInfoProvider.getBlobInfo(builder));
223        } else {
224            builder.setBlob(getRandomBlob());
225        }
226        return builder.build();
227    }
228
229    private Blob getRandomBlob() {
230        if (blobSizeKB == 0) {
231            return null;
232        }
233        String content = gen.getRandomText(blobSizeKB);
234        return Blobs.createBlob(content, getBlobMimeType(), null, getName(getTitle()) + ".txt");
235    }
236
237    private String getBlobMimeType() {
238        if (blobOnlyText) {
239            return "text/plain";
240        } else {
241            return "text/partial";
242        }
243    }
244
245    private String getName(String title) {
246        return title.replaceAll("\\W+", "-").toLowerCase();
247    }
248
249    private String getTitle() {
250        return capitalize(gen.getRandomTitle(rand.nextInt(3) + 1).trim());
251        //  return "f" + folderCount;
252    }
253
254    private String capitalize(final String line) {
255        return Character.toUpperCase(line.charAt(0)) + line.substring(1);
256    }
257
258    protected HashMap<String, Serializable> getRandomProperties(String title) {
259        HashMap<String, Serializable> ret = new HashMap<>();
260        ret.put("dc:title", title);
261        if (rand.nextInt(10) == 1) {
262            String description = gen.getRandomTitle(rand.nextInt(5) + 1);
263            ret.put("dc:description", capitalize(description));
264        }
265        ret.put("dc:nature", getGaussian(DC_NATURE));
266        ret.put("dc:subjects", (Serializable) Collections.singletonList(getGaussian(DC_SUBJECTS)));
267        ret.put("dc:rights", getGaussian(DC_RIGHTS));
268        ret.put("dc:language", getGaussian(DC_LANGUAGE));
269        ret.put("dc:coverage", getGaussian(DC_COVERAGE));
270        ret.put("dc:source", getGaussian(DC_SOURCE));
271        return ret;
272    }
273
274    protected String getGaussian(String[] words) {
275        double g = Math.abs(rand.nextGaussian() / 4);
276        g = Math.min(g, 1);
277        int i = (int) Math.floor(g * (words.length - 1));
278        return words[i];
279    }
280
281}