001/*
002 * (C) Copyright 2006-2008 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo - initial API and implementation
018 *
019 * $Id$
020 */
021
022package org.nuxeo.ecm.platform.importer.source;
023
024import java.io.Serializable;
025import java.util.ArrayList;
026import java.util.Arrays;
027import java.util.HashMap;
028import java.util.List;
029import java.util.Map;
030import java.util.Random;
031
032import org.apache.commons.logging.Log;
033import org.apache.commons.logging.LogFactory;
034import org.nuxeo.ecm.core.api.Blob;
035import org.nuxeo.ecm.core.api.Blobs;
036import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
037import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder;
038import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolderWithProperties;
039import org.nuxeo.ecm.platform.importer.random.DictionaryHolder;
040import org.nuxeo.ecm.platform.importer.random.HunspellDictionaryHolder;
041import org.nuxeo.ecm.platform.importer.random.RandomTextGenerator;
042
043
044/**
045 * Random {@link SourceNode} to be used for load testing
046 *
047 * @author Thierry Delprat
048 */
049public class RandomTextSourceNode implements SourceNode {
050
051    private static final Log log = LogFactory.getLog(RandomTextSourceNode.class);
052
053    protected static RandomTextGenerator gen;
054
055    protected static int maxNode = 10000;
056
057    /**
058     * Used in {@link #getMaxChildren()} and {@link #getMaxFolderish()}.
059     */
060    protected static boolean nonUniformRepartition = false;
061
062    public static int maxDepth = 8;
063
064    public static int defaultNbDataNodesPerFolder = 100;
065
066    /**
067     * Used to generate a big number of children nodes when {@link #nonUniformRepartition} is {@code true}.
068     */
069    public static int bigNbNodesFactor = 50;
070
071    /**
072     * Used to generate a small number of children nodes when {@link #nonUniformRepartition} is {@code true}.
073     */
074    public static int smallNbNodesDivider = defaultNbDataNodesPerFolder;
075
076    protected static int minGlobalFolders = 0;
077
078    protected static int minFoldersPerNode = 0;
079
080    protected static Integer nbNodes = 0;
081
082    protected static Integer nbFolders = 0;
083
084    protected static Integer nbVisitedFolders = 0;
085
086    protected static Long size;
087
088    protected Random hazard;
089
090    protected String name;
091
092    protected boolean folderish;
093
094    protected int level = 0;
095
096    protected int idx = 0;
097
098    protected static Integer blobSizeInKB;
099
100    protected List<SourceNode> cachedChildren = null;
101
102    public static boolean CACHE_CHILDREN = false;
103
104    protected boolean onlyText = true;
105
106    protected boolean withProperties = false;
107
108    static protected String[] DC_NATURE = { "article", "acknowledgement", "assessment", "application", "order",
109            "contract", "quotation", "fax", "worksheet", "letter", "memo", "note", "notification", "procedure",
110            "report", "internshipReport", "pressReview"};
111
112    static protected String[] DC_SUBJECTS = {"art/architecture", "art/comics", "art/cinema", "art/culture","art/danse",
113            "art/music", "sciences/astronomy", "sciences/biology", "sciences/chemistry", "sciences/math",
114            "sciences/physic", "society/ecology", "daily life/gastronomy", "daily life/gardening", "daily life/sport",
115            "technology/it" };
116
117    static protected String[] DC_RIGHTS = { "OpenContentL", "CC-BY-NC", "CC-BY-ND", "FreeArt", "ODbi", "GNUGPL",
118            "FreeBSD", "CC0"};
119
120    static protected String[] DC_LANGUAGE = { "IT", "DE", "FR", "US", "EN"};
121
122    static protected String[] DC_SOURCE = { "internal", "external", "unknown" };
123
124    static protected String[] DC_COVERAGE = { "europe/France", "europe/Germany", "europe/Italy", "europe/Spain",
125            "oceania/Tonga", "africa/Mali", "asia/Japan", "north-america/United_States_of_America" };
126
127    public RandomTextSourceNode(boolean folderish, int level, int idx, boolean onlyText, boolean withProperties) {
128        this.folderish = folderish;
129        hazard = new Random();
130        this.level = level;
131        this.idx = idx;
132        this.onlyText = onlyText;
133        this.withProperties = withProperties;
134    }
135
136    public RandomTextSourceNode(boolean folderish, int level, int idx, boolean onlyText) {
137        this(folderish, level, idx, onlyText, false);
138    }
139
140    public static RandomTextSourceNode init(int maxSize) {
141        return init(maxSize, null, true);
142    }
143
144    public static RandomTextSourceNode init(int maxSize, Integer blobSizeInKB, boolean onlyText) {
145        return init(maxSize, blobSizeInKB, onlyText, false, false, null);
146    }
147
148    public static RandomTextSourceNode init(int maxSize, Integer blobSizeInKB, boolean onlyText, boolean nonUniform,
149                                            boolean withProperties, String lang) {
150        return init(maxSize, blobSizeInKB, onlyText, new HunspellDictionaryHolder(lang), nonUniform,
151                withProperties);
152    }
153
154    public static RandomTextSourceNode init(int maxSize, Integer blobSizeInKB, boolean onlyText,
155            DictionaryHolder dictionaryHolder, boolean nonUniform, boolean withProperties) {
156        gen = new RandomTextGenerator(dictionaryHolder);
157        gen.prefilCache();
158        maxNode = maxSize;
159        nbNodes = 0;
160        nbFolders = 1;
161        nbVisitedFolders = 0;
162        size = new Long(0);
163        RandomTextSourceNode.blobSizeInKB = blobSizeInKB;
164        minGlobalFolders = maxNode / defaultNbDataNodesPerFolder;
165        minFoldersPerNode = 1 + (int) Math.pow(minGlobalFolders, (1.0 / maxDepth));
166        nonUniformRepartition = nonUniform;
167        return new RandomTextSourceNode(true, 0, 0, onlyText, withProperties);
168    }
169
170    protected String getBlobMimeType() {
171        if (onlyText) {
172            return "text/plain";
173        } else {
174            return "text/partial";
175        }
176    }
177
178    private String capitalize(final String line) {
179        return Character.toUpperCase(line.charAt(0)) + line.substring(1);
180    }
181
182    @Override
183    public BlobHolder getBlobHolder() {
184        String content = null;
185        if (folderish) {
186            if (withProperties) {
187                return new SimpleBlobHolderWithProperties((Blob) null, getRandomProperties(content));
188            }
189            return null;
190        }
191        if (blobSizeInKB == null) {
192            content = gen.getRandomText();
193        } else {
194            content = gen.getRandomText(blobSizeInKB);
195        }
196        synchronized (size) {
197            size += content.length();
198        }
199        Blob blob = Blobs.createBlob(content, getBlobMimeType(), null, getName() + ".txt");
200        if (withProperties) {
201            return new SimpleBlobHolderWithProperties(blob, getRandomProperties(content));
202        }
203        return new SimpleBlobHolder(blob);
204    }
205
206    protected Map<String, Serializable> getRandomProperties(String content) {
207        Map<String, Serializable> ret = new HashMap<>();
208        ret.put("dc:title", capitalize(getName()));
209        if (hazard.nextInt(10) == 1) {
210            String description;
211            if (content != null && ! content.isEmpty()) {
212                description = content.substring(0, content.indexOf(' ', 40));
213            } else {
214                description = gen.getRandomTitle(hazard.nextInt(5)+1);
215            }
216            ret.put("dc:description", capitalize(description));
217        }
218        ret.put("dc:nature", getGaussian(DC_NATURE));
219        ret.put("dc:subjects", (Serializable) Arrays.asList(getGaussian(DC_SUBJECTS)));
220        ret.put("dc:rights", getGaussian(DC_RIGHTS));
221        ret.put("dc:language", getGaussian(DC_LANGUAGE));
222        ret.put("dc:coverage", getGaussian(DC_COVERAGE));
223        ret.put("dc:source", getGaussian(DC_SOURCE));
224        // validation contraint violation
225        // ret.put("dc:creator", String.format("user%03d", hazard.nextInt(500)));
226        return ret;
227    }
228
229    protected String getGaussian(String[] words) {
230        double g = Math.abs(hazard.nextGaussian() / 4);
231        g = Math.min(g, 1);
232        int i = (int) Math.floor(g * (words.length - 1));
233        return words[ i ];
234    }
235
236    protected int getMidRandom(int target) {
237        return 1 + (target / 2) + hazard.nextInt(target);
238    }
239
240    /**
241     * Allows to get a non uniform distribution of the number of nodes per folder. Returns:
242     * <ul>
243     * <li>A small number of nodes 10% of the time, see {@link #smallNbNodesDivider}.</li>
244     * <li>A big number of nodes 10% of the time, see {@link #bigNbNodesFactor}.</li>
245     * <li>A random variation of the target number of nodes 80% of the time.</li>
246     * </ul>
247     */
248    protected int getNonUniform(int target, boolean folderish) {
249        int res;
250        int remainder = nbVisitedFolders % 10;
251        if (remainder == 8) {
252            res = 1 + target / smallNbNodesDivider;
253            if (log.isDebugEnabled()) {
254                String nodeStr;
255                if (folderish) {
256                    nodeStr = "folderish";
257                } else {
258                    nodeStr = "data";
259                }
260                log.debug(String.format("### Small number of %s nodes: %d", nodeStr, res));
261            }
262        } else if (remainder == 9) {
263            int factor;
264            // Big number of folderish nodes is 10 times smaller than the big number of data nodes
265            if (folderish) {
266                factor = bigNbNodesFactor / 10;
267            } else {
268                factor = bigNbNodesFactor;
269            }
270            res = 1 + target * factor;
271            if (log.isDebugEnabled()) {
272                String nodeStr;
273                if (folderish) {
274                    nodeStr = "folderish";
275                } else {
276                    nodeStr = "data";
277                }
278                log.debug(String.format("### Big number of %s nodes: %d", nodeStr, res));
279            }
280        } else {
281            res = getMidRandom(target);
282        }
283        return res;
284    }
285
286    protected int getMaxChildren() {
287        if (maxNode < nbNodes) {
288            return 0;
289        }
290        int targetRemainingFolders = minGlobalFolders - nbFolders;
291        if (targetRemainingFolders <= 0) {
292            return defaultNbDataNodesPerFolder + 1;
293        }
294        int target = ((maxNode - nbNodes) / targetRemainingFolders);
295        if (target <= 0) {
296            return 0;
297        }
298        if (nonUniformRepartition) {
299            return getNonUniform(target, false);
300        } else {
301            return getMidRandom(target);
302        }
303    }
304
305    protected int getMaxFolderish() {
306        if (maxNode <= nbNodes) {
307            return 0;
308        }
309        if (nonUniformRepartition) {
310            return getNonUniform(minFoldersPerNode, true);
311        } else {
312            return getMidRandom(minFoldersPerNode);
313        }
314    }
315
316    @Override
317    public List<SourceNode> getChildren() {
318
319        if (!folderish) {
320            return null;
321        }
322
323        if (cachedChildren != null) {
324            return cachedChildren;
325        }
326
327        List<SourceNode> children = new ArrayList<SourceNode>();
328        if (nbNodes > maxNode) {
329            return children;
330        }
331
332        int nbChildren = getMaxChildren();
333        for (int i = 0; i < nbChildren; i++) {
334            children.add(new RandomTextSourceNode(false, level, i, onlyText, withProperties));
335        }
336        synchronized (nbNodes) {
337            nbNodes = nbNodes + nbChildren;
338            if (log.isDebugEnabled()) {
339                String nodeStr;
340                if (nbChildren > 1) {
341                    nodeStr = "nodes";
342                } else {
343                    nodeStr = "node";
344                }
345                log.debug(String.format("Added %d data %s to %s; data node total count = %d", nbChildren, nodeStr,
346                        getName(), nbNodes));
347            }
348        }
349
350        if (level < maxDepth) {
351            // In the case of a non uniform repartition, don't add folderish nodes if there are no data nodes to not
352            // overload the tree with folderish nodes that would probably be empty
353            if (!nonUniformRepartition || nbChildren > 0) {
354                int nbFolderish = getMaxFolderish();
355                for (int i = 0; i < nbFolderish; i++) {
356                    children.add(new RandomTextSourceNode(true, level + 1, i, onlyText, withProperties));
357                }
358                synchronized (nbFolders) {
359                    nbFolders = nbFolders + nbFolderish;
360                    if (log.isDebugEnabled()) {
361                        String nodeStr;
362                        if (nbFolderish > 1) {
363                            nodeStr = "nodes";
364                        } else {
365                            nodeStr = "node";
366                        }
367                        log.debug(String.format("Added %d folderish %s to %s; folderish node total count = %d",
368                                nbFolderish, nodeStr, getName(), nbFolders));
369                    }
370                }
371            }
372        }
373        if (CACHE_CHILDREN) {
374            cachedChildren = children;
375        }
376
377        synchronized (nbVisitedFolders) {
378            nbVisitedFolders++;
379            if (log.isDebugEnabled()) {
380                String folderStr;
381                if (nbVisitedFolders > 1) {
382                    folderStr = "folders";
383                } else {
384                    folderStr = "folder";
385                }
386                log.debug(String.format("Visited %d %s", nbVisitedFolders, folderStr));
387            }
388        }
389
390        return children;
391    }
392
393    @Override
394    public String getName() {
395        if (name == null) {
396            if (withProperties) {
397                name = gen.getRandomTitle(hazard.nextInt(3)+1);
398            }
399            else {
400                if (folderish) {
401                    name = "folder";
402                } else {
403                    name = "file";
404                }
405                if (level == 0 && folderish) {
406                    name = name + "-" + (System.currentTimeMillis() % 10000) + hazard.nextInt(100);
407                } else {
408                    name = name + "-" + level + "-" + idx;
409                }
410            }
411        }
412        return name;
413    }
414
415    @Override
416    public boolean isFolderish() {
417        return folderish;
418    }
419
420    public static Integer getNbNodes() {
421        return nbNodes;
422    }
423
424    public static Long getSize() {
425        return size;
426    }
427
428    public int getLevel() {
429        return level;
430    }
431
432    @Override
433    public String getSourcePath() {
434        return null;
435    }
436}