001/* 002 * (C) Copyright 2012-2014 Nuxeo SA (http://nuxeo.com/) and contributors. 003 * 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the GNU Lesser General Public License 006 * (LGPL) version 2.1 which accompanies this distribution, and is available at 007 * http://www.gnu.org/licenses/lgpl.html 008 * 009 * This library is distributed in the hope that it will be useful, 010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 012 * Lesser General Public License for more details. 013 * 014 * Contributors: 015 * Florent Guillaume 016 */ 017package org.nuxeo.ecm.core.storage; 018 019import java.util.List; 020 021/** 022 * Parser of strings for fulltext indexing. 023 * <p> 024 * From the strings extracted from the document, decides how they should be parsed, split and normalized for fulltext 025 * indexing by the underlying engine. 026 * 027 * @since 5.9.5 028 */ 029public interface FulltextParser { 030 031 /** 032 * Parses one property value to normalize the fulltext for the database. 033 * <p> 034 * The passed {@code path} may be {@code null} if the passed string is not coming from a specific path, for instance 035 * when it was extracted from binary data. 036 * 037 * @param s the string to be parsed and normalized 038 * @param path the abstracted path for the property (where all complex indexes have been replaced by {@code *}), or 039 * {@code null} 040 * @return the normalized words as a single space-separated string 041 */ 042 String parse(String s, String path); 043 044 /** 045 * Parses one property value to normalize the fulltext for the database. 046 * <p> 047 * Like {@link #parse(String, String)} but uses the passed list to accumulate words. 048 * 049 * @param s the string to be parsed and normalized 050 * @param path the abstracted path for the property (where all complex indexes have been replaced by {@code *}), or 051 * {@code null} 052 * @param strings the list into which normalized words should be accumulated 053 */ 054 void parse(String s, String path, List<String> strings); 055 056}