001/*
002 * (C) Copyright 2012-2014 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019package org.nuxeo.ecm.core.storage;
020
021import java.util.List;
022
023/**
024 * Parser of strings for fulltext indexing.
025 * <p>
026 * From the strings extracted from the document, decides how they should be parsed, split and normalized for fulltext
027 * indexing by the underlying engine.
028 *
029 * @since 5.9.5
030 */
031public interface FulltextParser {
032
033    /**
034     * Parses one property value to normalize the fulltext for the database.
035     * <p>
036     * The passed {@code path} may be {@code null} if the passed string is not coming from a specific path, for instance
037     * when it was extracted from binary data.
038     *
039     * @param s the string to be parsed and normalized
040     * @param path the abstracted path for the property (where all complex indexes have been replaced by {@code *}), or
041     *            {@code null}
042     * @return the normalized words as a single space-separated string
043     */
044    String parse(String s, String path);
045
046    /**
047     * Parses one property value to normalize the fulltext for the database.
048     * <p>
049     * Like {@link #parse(String, String)} but uses the passed list to accumulate words.
050     *
051     * @param s the string to be parsed and normalized
052     * @param path the abstracted path for the property (where all complex indexes have been replaced by {@code *}), or
053     *            {@code null}
054     * @param strings the list into which normalized words should be accumulated
055     */
056    void parse(String s, String path, List<String> strings);
057
058}