001/* 002 * (C) Copyright 2012-2014 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.core.storage; 020 021import java.util.List; 022 023/** 024 * Parser of strings for fulltext indexing. 025 * <p> 026 * From the strings extracted from the document, decides how they should be parsed, split and normalized for fulltext 027 * indexing by the underlying engine. 028 * 029 * @since 5.9.5 030 */ 031public interface FulltextParser { 032 033 /** 034 * Parses one property value to normalize the fulltext for the database. 035 * <p> 036 * The passed {@code path} may be {@code null} if the passed string is not coming from a specific path, for instance 037 * when it was extracted from binary data. 038 * 039 * @param s the string to be parsed and normalized 040 * @param path the abstracted path for the property (where all complex indexes have been replaced by {@code *}), or 041 * {@code null} 042 * @return the normalized words as a single space-separated string 043 */ 044 String parse(String s, String path); 045 046 /** 047 * Parses one property value to normalize the fulltext for the database. 048 * <p> 049 * Like {@link #parse(String, String)} but uses the passed list to accumulate words. 050 * 051 * @param s the string to be parsed and normalized 052 * @param path the abstracted path for the property (where all complex indexes have been replaced by {@code *}), or 053 * {@code null} 054 * @param strings the list into which normalized words should be accumulated 055 */ 056 void parse(String s, String path, List<String> strings); 057 058}