001/* 002 * (C) Copyright 2006-2008 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 * 019 * $Id$ 020 */ 021 022package org.nuxeo.ecm.platform.importer.source; 023 024import java.io.Serializable; 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.HashMap; 028import java.util.List; 029import java.util.Map; 030import java.util.Random; 031 032import org.apache.commons.logging.Log; 033import org.apache.commons.logging.LogFactory; 034import org.nuxeo.ecm.core.api.Blob; 035import org.nuxeo.ecm.core.api.Blobs; 036import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 037import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder; 038import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolderWithProperties; 039import org.nuxeo.ecm.platform.importer.random.DictionaryHolder; 040import org.nuxeo.ecm.platform.importer.random.HunspellDictionaryHolder; 041import org.nuxeo.ecm.platform.importer.random.RandomTextGenerator; 042 043 044/** 045 * Random {@link SourceNode} to be used for load testing 046 * 047 * @author Thierry Delprat 048 */ 049public class RandomTextSourceNode implements SourceNode { 050 051 private static final Log log = LogFactory.getLog(RandomTextSourceNode.class); 052 053 protected static RandomTextGenerator gen; 054 055 protected static int maxNode = 10000; 056 057 /** 058 * Used in {@link #getMaxChildren()} and {@link #getMaxFolderish()}. 059 */ 060 protected static boolean nonUniformRepartition = false; 061 062 public static int maxDepth = 8; 063 064 public static int defaultNbDataNodesPerFolder = 100; 065 066 /** 067 * Used to generate a big number of children nodes when {@link #nonUniformRepartition} is {@code true}. 068 */ 069 public static int bigNbNodesFactor = 50; 070 071 /** 072 * Used to generate a small number of children nodes when {@link #nonUniformRepartition} is {@code true}. 073 */ 074 public static int smallNbNodesDivider = defaultNbDataNodesPerFolder; 075 076 protected static int minGlobalFolders = 0; 077 078 protected static int minFoldersPerNode = 0; 079 080 protected static Integer nbNodes = 0; 081 082 protected static Integer nbFolders = 0; 083 084 protected static Integer nbVisitedFolders = 0; 085 086 protected static Long size; 087 088 protected Random hazard; 089 090 protected String name; 091 092 protected boolean folderish; 093 094 protected int level = 0; 095 096 protected int idx = 0; 097 098 protected static Integer blobSizeInKB; 099 100 protected List<SourceNode> cachedChildren = null; 101 102 public static boolean CACHE_CHILDREN = false; 103 104 protected boolean onlyText = true; 105 106 protected boolean withProperties = false; 107 108 static protected String[] DC_NATURE = { "article", "acknowledgement", "assessment", "application", "order", 109 "contract", "quotation", "fax", "worksheet", "letter", "memo", "note", "notification", "procedure", 110 "report", "internshipReport", "pressReview"}; 111 112 static protected String[] DC_SUBJECTS = {"art/architecture", "art/comics", "art/cinema", "art/culture","art/danse", 113 "art/music", "sciences/astronomy", "sciences/biology", "sciences/chemistry", "sciences/math", 114 "sciences/physic", "society/ecology", "daily life/gastronomy", "daily life/gardening", "daily life/sport", 115 "technology/it" }; 116 117 static protected String[] DC_RIGHTS = { "OpenContentL", "CC-BY-NC", "CC-BY-ND", "FreeArt", "ODbi", "GNUGPL", 118 "FreeBSD", "CC0"}; 119 120 static protected String[] DC_LANGUAGE = { "IT", "DE", "FR", "US", "EN"}; 121 122 static protected String[] DC_SOURCE = { "internal", "external", "unknown" }; 123 124 static protected String[] DC_COVERAGE = { "europe/France", "europe/Germany", "europe/Italy", "europe/Spain", 125 "oceania/Tonga", "africa/Mali", "asia/Japan", "north-america/United_States_of_America" }; 126 127 public RandomTextSourceNode(boolean folderish, int level, int idx, boolean onlyText, boolean withProperties) { 128 this.folderish = folderish; 129 hazard = new Random(); 130 this.level = level; 131 this.idx = idx; 132 this.onlyText = onlyText; 133 this.withProperties = withProperties; 134 } 135 136 public RandomTextSourceNode(boolean folderish, int level, int idx, boolean onlyText) { 137 this(folderish, level, idx, onlyText, false); 138 } 139 140 public static RandomTextSourceNode init(int maxSize) { 141 return init(maxSize, null, true); 142 } 143 144 public static RandomTextSourceNode init(int maxSize, Integer blobSizeInKB, boolean onlyText) { 145 return init(maxSize, blobSizeInKB, onlyText, false, false, null); 146 } 147 148 public static RandomTextSourceNode init(int maxSize, Integer blobSizeInKB, boolean onlyText, boolean nonUniform, 149 boolean withProperties, String lang) { 150 return init(maxSize, blobSizeInKB, onlyText, new HunspellDictionaryHolder(lang), nonUniform, 151 withProperties); 152 } 153 154 public static RandomTextSourceNode init(int maxSize, Integer blobSizeInKB, boolean onlyText, 155 DictionaryHolder dictionaryHolder, boolean nonUniform, boolean withProperties) { 156 gen = new RandomTextGenerator(dictionaryHolder); 157 gen.prefilCache(); 158 maxNode = maxSize; 159 nbNodes = 0; 160 nbFolders = 1; 161 nbVisitedFolders = 0; 162 size = new Long(0); 163 RandomTextSourceNode.blobSizeInKB = blobSizeInKB; 164 minGlobalFolders = maxNode / defaultNbDataNodesPerFolder; 165 minFoldersPerNode = 1 + (int) Math.pow(minGlobalFolders, (1.0 / maxDepth)); 166 nonUniformRepartition = nonUniform; 167 return new RandomTextSourceNode(true, 0, 0, onlyText, withProperties); 168 } 169 170 protected String getBlobMimeType() { 171 if (onlyText) { 172 return "text/plain"; 173 } else { 174 return "text/partial"; 175 } 176 } 177 178 private String capitalize(final String line) { 179 return Character.toUpperCase(line.charAt(0)) + line.substring(1); 180 } 181 182 @Override 183 public BlobHolder getBlobHolder() { 184 String content = null; 185 if (folderish) { 186 if (withProperties) { 187 return new SimpleBlobHolderWithProperties((Blob) null, getRandomProperties(content)); 188 } 189 return null; 190 } 191 if (blobSizeInKB == null) { 192 content = gen.getRandomText(); 193 } else { 194 content = gen.getRandomText(blobSizeInKB); 195 } 196 synchronized (size) { 197 size += content.length(); 198 } 199 Blob blob = Blobs.createBlob(content, getBlobMimeType(), null, getName() + ".txt"); 200 if (withProperties) { 201 return new SimpleBlobHolderWithProperties(blob, getRandomProperties(content)); 202 } 203 return new SimpleBlobHolder(blob); 204 } 205 206 protected Map<String, Serializable> getRandomProperties(String content) { 207 Map<String, Serializable> ret = new HashMap<>(); 208 ret.put("dc:title", capitalize(getName())); 209 if (hazard.nextInt(10) == 1) { 210 String description; 211 if (content != null && ! content.isEmpty()) { 212 description = content.substring(0, content.indexOf(' ', 40)); 213 } else { 214 description = gen.getRandomTitle(hazard.nextInt(5)+1); 215 } 216 ret.put("dc:description", capitalize(description)); 217 } 218 ret.put("dc:nature", getGaussian(DC_NATURE)); 219 ret.put("dc:subjects", (Serializable) Arrays.asList(getGaussian(DC_SUBJECTS))); 220 ret.put("dc:rights", getGaussian(DC_RIGHTS)); 221 ret.put("dc:language", getGaussian(DC_LANGUAGE)); 222 ret.put("dc:coverage", getGaussian(DC_COVERAGE)); 223 ret.put("dc:source", getGaussian(DC_SOURCE)); 224 // validation contraint violation 225 // ret.put("dc:creator", String.format("user%03d", hazard.nextInt(500))); 226 return ret; 227 } 228 229 protected String getGaussian(String[] words) { 230 double g = Math.abs(hazard.nextGaussian() / 4); 231 g = Math.min(g, 1); 232 int i = (int) Math.floor(g * (words.length - 1)); 233 return words[ i ]; 234 } 235 236 protected int getMidRandom(int target) { 237 return 1 + (target / 2) + hazard.nextInt(target); 238 } 239 240 /** 241 * Allows to get a non uniform distribution of the number of nodes per folder. Returns: 242 * <ul> 243 * <li>A small number of nodes 10% of the time, see {@link #smallNbNodesDivider}.</li> 244 * <li>A big number of nodes 10% of the time, see {@link #bigNbNodesFactor}.</li> 245 * <li>A random variation of the target number of nodes 80% of the time.</li> 246 * </ul> 247 */ 248 protected int getNonUniform(int target, boolean folderish) { 249 int res; 250 int remainder = nbVisitedFolders % 10; 251 if (remainder == 8) { 252 res = 1 + target / smallNbNodesDivider; 253 if (log.isDebugEnabled()) { 254 String nodeStr; 255 if (folderish) { 256 nodeStr = "folderish"; 257 } else { 258 nodeStr = "data"; 259 } 260 log.debug(String.format("### Small number of %s nodes: %d", nodeStr, res)); 261 } 262 } else if (remainder == 9) { 263 int factor; 264 // Big number of folderish nodes is 10 times smaller than the big number of data nodes 265 if (folderish) { 266 factor = bigNbNodesFactor / 10; 267 } else { 268 factor = bigNbNodesFactor; 269 } 270 res = 1 + target * factor; 271 if (log.isDebugEnabled()) { 272 String nodeStr; 273 if (folderish) { 274 nodeStr = "folderish"; 275 } else { 276 nodeStr = "data"; 277 } 278 log.debug(String.format("### Big number of %s nodes: %d", nodeStr, res)); 279 } 280 } else { 281 res = getMidRandom(target); 282 } 283 return res; 284 } 285 286 protected int getMaxChildren() { 287 if (maxNode < nbNodes) { 288 return 0; 289 } 290 int targetRemainingFolders = minGlobalFolders - nbFolders; 291 if (targetRemainingFolders <= 0) { 292 return defaultNbDataNodesPerFolder + 1; 293 } 294 int target = ((maxNode - nbNodes) / targetRemainingFolders); 295 if (target <= 0) { 296 return 0; 297 } 298 if (nonUniformRepartition) { 299 return getNonUniform(target, false); 300 } else { 301 return getMidRandom(target); 302 } 303 } 304 305 protected int getMaxFolderish() { 306 if (maxNode <= nbNodes) { 307 return 0; 308 } 309 if (nonUniformRepartition) { 310 return getNonUniform(minFoldersPerNode, true); 311 } else { 312 return getMidRandom(minFoldersPerNode); 313 } 314 } 315 316 @Override 317 public List<SourceNode> getChildren() { 318 319 if (!folderish) { 320 return null; 321 } 322 323 if (cachedChildren != null) { 324 return cachedChildren; 325 } 326 327 List<SourceNode> children = new ArrayList<SourceNode>(); 328 if (nbNodes > maxNode) { 329 return children; 330 } 331 332 int nbChildren = getMaxChildren(); 333 for (int i = 0; i < nbChildren; i++) { 334 children.add(new RandomTextSourceNode(false, level, i, onlyText, withProperties)); 335 } 336 synchronized (nbNodes) { 337 nbNodes = nbNodes + nbChildren; 338 if (log.isDebugEnabled()) { 339 String nodeStr; 340 if (nbChildren > 1) { 341 nodeStr = "nodes"; 342 } else { 343 nodeStr = "node"; 344 } 345 log.debug(String.format("Added %d data %s to %s; data node total count = %d", nbChildren, nodeStr, 346 getName(), nbNodes)); 347 } 348 } 349 350 if (level < maxDepth) { 351 // In the case of a non uniform repartition, don't add folderish nodes if there are no data nodes to not 352 // overload the tree with folderish nodes that would probably be empty 353 if (!nonUniformRepartition || nbChildren > 0) { 354 int nbFolderish = getMaxFolderish(); 355 for (int i = 0; i < nbFolderish; i++) { 356 children.add(new RandomTextSourceNode(true, level + 1, i, onlyText, withProperties)); 357 } 358 synchronized (nbFolders) { 359 nbFolders = nbFolders + nbFolderish; 360 if (log.isDebugEnabled()) { 361 String nodeStr; 362 if (nbFolderish > 1) { 363 nodeStr = "nodes"; 364 } else { 365 nodeStr = "node"; 366 } 367 log.debug(String.format("Added %d folderish %s to %s; folderish node total count = %d", 368 nbFolderish, nodeStr, getName(), nbFolders)); 369 } 370 } 371 } 372 } 373 if (CACHE_CHILDREN) { 374 cachedChildren = children; 375 } 376 377 synchronized (nbVisitedFolders) { 378 nbVisitedFolders++; 379 if (log.isDebugEnabled()) { 380 String folderStr; 381 if (nbVisitedFolders > 1) { 382 folderStr = "folders"; 383 } else { 384 folderStr = "folder"; 385 } 386 log.debug(String.format("Visited %d %s", nbVisitedFolders, folderStr)); 387 } 388 } 389 390 return children; 391 } 392 393 @Override 394 public String getName() { 395 if (name == null) { 396 if (withProperties) { 397 name = gen.getRandomTitle(hazard.nextInt(3)+1); 398 } 399 else { 400 if (folderish) { 401 name = "folder"; 402 } else { 403 name = "file"; 404 } 405 if (level == 0 && folderish) { 406 name = name + "-" + (System.currentTimeMillis() % 10000) + hazard.nextInt(100); 407 } else { 408 name = name + "-" + level + "-" + idx; 409 } 410 } 411 } 412 return name; 413 } 414 415 @Override 416 public boolean isFolderish() { 417 return folderish; 418 } 419 420 public static Integer getNbNodes() { 421 return nbNodes; 422 } 423 424 public static Long getSize() { 425 return size; 426 } 427 428 public int getLevel() { 429 return level; 430 } 431 432 @Override 433 public String getSourcePath() { 434 return null; 435 } 436}