001/*
002 * (C) Copyright 2006-2014 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo - initial API and implementation
018 *     Michaƫl Vachette
019 *
020 */
021package org.nuxeo.ecm.platform.importer.random;
022
023import org.apache.commons.logging.Log;
024import org.apache.commons.logging.LogFactory;
025
026import java.io.BufferedReader;
027import java.io.IOException;
028import java.io.InputStream;
029import java.io.InputStreamReader;
030import java.net.URL;
031import java.util.ArrayList;
032import java.util.List;
033import java.util.Random;
034
035/**
036 * @author Thierry Delprat
037 */
038public class HunspellDictionaryHolder implements DictionaryHolder {
039
040    protected static final String DEFAULT_DIC = "fr_FR.dic";
041
042    protected static final int INITIAL_SIZE = 100000;
043
044    protected List<String> words = new ArrayList<>(INITIAL_SIZE);
045
046    protected Random generator;
047
048    protected int wordCount;
049
050    protected String dicName = DEFAULT_DIC;
051
052    public static final Log log = LogFactory.getLog(HunspellDictionaryHolder.class);
053
054    public HunspellDictionaryHolder(String lang) {
055        generator = new Random(System.currentTimeMillis());
056        if (lang != null) {
057            // sanitize the input so we don't open a security breach.
058            dicName = lang.replaceAll("\\W+", "") + ".dic";
059        }
060    }
061
062    @Override
063    public void init() throws IOException {
064        loadDic();
065        wordCount = words.size();
066    }
067
068    /**
069     * @deprecated since 6.0
070     */
071    @Deprecated
072    protected void loadDic(String dicName) throws IOException {
073        this.dicName = dicName;
074        loadDic();
075    }
076
077    /**
078     * @since 6.0
079     */
080    protected void loadDic() throws IOException {
081        URL url = Thread.currentThread().getContextClassLoader().getResource(dicName);
082        if (url == null) {
083            log.error("not found: " + dicName);
084            return;
085        }
086        try (InputStream in = url.openStream(); BufferedReader reader = new BufferedReader(new InputStreamReader(in))) {
087            String line;
088            while ((line = reader.readLine()) != null) {
089                int idx = line.indexOf("/");
090                if (idx > 0) {
091                    String word = line.substring(0, idx);
092                    words.add(word + " ");
093                } else {
094                    words.add(line + " ");
095                }
096            }
097        }
098    }
099
100    @Override
101    public int getWordCount() {
102        return wordCount;
103    }
104
105    @Override
106    public String getRandomWord() {
107        int idx = generator.nextInt(wordCount);
108        return words.get(idx);
109    }
110}