001/*
002 * (C) Copyright 2012-2014 Nuxeo SA (http://nuxeo.com/) and contributors.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the GNU Lesser General Public License
006 * (LGPL) version 2.1 which accompanies this distribution, and is available at
007 * http://www.gnu.org/licenses/lgpl.html
008 *
009 * This library is distributed in the hope that it will be useful,
010 * but WITHOUT ANY WARRANTY; without even the implied warranty of
011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012 * Lesser General Public License for more details.
013 *
014 * Contributors:
015 *     Florent Guillaume
016 */
017package org.nuxeo.ecm.core.storage;
018
019import java.util.List;
020
021/**
022 * Parser of strings for fulltext indexing.
023 * <p>
024 * From the strings extracted from the document, decides how they should be parsed, split and normalized for fulltext
025 * indexing by the underlying engine.
026 *
027 * @since 5.9.5
028 */
029public interface FulltextParser {
030
031    /**
032     * Parses one property value to normalize the fulltext for the database.
033     * <p>
034     * The passed {@code path} may be {@code null} if the passed string is not coming from a specific path, for instance
035     * when it was extracted from binary data.
036     *
037     * @param s the string to be parsed and normalized
038     * @param path the abstracted path for the property (where all complex indexes have been replaced by {@code *}), or
039     *            {@code null}
040     * @return the normalized words as a single space-separated string
041     */
042    String parse(String s, String path);
043
044    /**
045     * Parses one property value to normalize the fulltext for the database.
046     * <p>
047     * Like {@link #parse(String, String)} but uses the passed list to accumulate words.
048     *
049     * @param s the string to be parsed and normalized
050     * @param path the abstracted path for the property (where all complex indexes have been replaced by {@code *}), or
051     *            {@code null}
052     * @param strings the list into which normalized words should be accumulated
053     */
054    void parse(String s, String path, List<String> strings);
055
056}