001/*
002 * (C) Copyright 2006-2014 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo - initial API and implementation
018 *     Michaƫl Vachette
019 *
020 */
021package org.nuxeo.ecm.platform.importer.random;
022
023import org.apache.commons.logging.Log;
024import org.apache.commons.logging.LogFactory;
025
026import java.io.BufferedReader;
027import java.io.IOException;
028import java.io.InputStream;
029import java.io.InputStreamReader;
030import java.net.URL;
031import java.util.ArrayList;
032import java.util.List;
033import java.util.Random;
034
035/**
036 * @author Thierry Delprat
037 */
038public class HunspellDictionaryHolder implements DictionaryHolder {
039
040    protected static final String DEFAULT_DIC = "fr_FR.dic";
041
042    protected static final int INITIAL_SIZE = 100000;
043
044    protected List<String> words = new ArrayList<>(INITIAL_SIZE);
045
046    protected static final Random RANDOM = new Random(); // NOSONAR (doesn't need cryptographic strength)
047
048    protected int wordCount;
049
050    protected String dicName = DEFAULT_DIC;
051
052    public static final Log log = LogFactory.getLog(HunspellDictionaryHolder.class);
053
054    public HunspellDictionaryHolder(String lang) {
055        if (lang != null) {
056            // sanitize the input so we don't open a security breach.
057            dicName = lang.replaceAll("\\W+", "") + ".dic";
058        }
059    }
060
061    @Override
062    public void init() throws IOException {
063        loadDic();
064        wordCount = words.size();
065    }
066
067    /**
068     * @deprecated since 6.0
069     */
070    @Deprecated
071    protected void loadDic(String dicName) throws IOException {
072        this.dicName = dicName;
073        loadDic();
074    }
075
076    /**
077     * @since 6.0
078     */
079    protected void loadDic() throws IOException {
080        URL url = Thread.currentThread().getContextClassLoader().getResource(dicName);
081        if (url == null) {
082            log.error("not found: " + dicName);
083            return;
084        }
085        try (InputStream in = url.openStream(); BufferedReader reader = new BufferedReader(new InputStreamReader(in))) {
086            String line;
087            while ((line = reader.readLine()) != null) {
088                int idx = line.indexOf("/");
089                if (idx > 0) {
090                    String word = line.substring(0, idx);
091                    words.add(word + " ");
092                } else {
093                    words.add(line + " ");
094                }
095            }
096        }
097    }
098
099    @Override
100    public int getWordCount() {
101        return wordCount;
102    }
103
104    @Override
105    public String getRandomWord() {
106        int idx = RANDOM.nextInt(wordCount);
107        return words.get(idx);
108    }
109}