001/* 002 * (C) Copyright 2012-2014 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.core.storage; 020 021import org.nuxeo.ecm.core.api.DocumentLocation; 022 023import java.util.List; 024 025/** 026 * Parser of strings for fulltext indexing. 027 * <p> 028 * From the strings extracted from the document, decides how they should be parsed, split and normalized for fulltext 029 * indexing by the underlying engine. 030 * 031 * @since 5.9.5 032 */ 033public interface FulltextParser { 034 035 /** 036 * Parses one property value to normalize the fulltext for the database. 037 * <p> 038 * The passed {@code path} may be {@code null} if the passed string is not coming from a specific path, for instance 039 * when it was extracted from binary data. 040 * 041 * @param s the string to be parsed and normalized 042 * @param path the abstracted path for the property (where all complex indexes have been replaced by {@code *}), or 043 * {@code null} 044 * @return the normalized words as a single space-separated string 045 */ 046 String parse(String s, String path); 047 048 /** 049 * Parses one property value to normalize the fulltext for the database. 050 * <p> 051 * Like {@link #parse(String, String)} but uses the passed list to accumulate words. 052 * 053 * @param s the string to be parsed and normalized 054 * @param path the abstracted path for the property (where all complex indexes have been replaced by {@code *}), or 055 * {@code null} 056 * @param strings the list into which normalized words should be accumulated 057 */ 058 void parse(String s, String path, List<String> strings); 059 060 /** 061 * Parses one property value to normalize the fulltext for the database. 062 * <p> 063 * The passed {@code path} may be {@code null} if the passed string is not coming from a specific path, for instance 064 * when it was extracted from binary data. 065 * 066 * @param s the string to be parsed and normalized 067 * @param path the abstracted path for the property (where all complex indexes have been replaced by {@code *}), or 068 * {@code null} 069 * @param mimeType the {@code mimeType} of the string to be parsed and normalized. This may be {@code null} 070 * @param documentLocation the {@code documentLocation} of the Document from which the property value string 071 * was extracted. This may be {@code null} 072 * @return the normalized words as a single space-separated string 073 * @since 8.4 074 */ 075 String parse(String s, String path, String mimeType, DocumentLocation documentLocation); 076 077 /** 078 * Parses one property value to normalize the fulltext for the database. 079 * <p> 080 * Like {@link #parse(String, String)} but uses the passed list to accumulate words. 081 * 082 * @param s the string to be parsed and normalized 083 * @param path the abstracted path for the property (where all complex indexes have been replaced by {@code *}), or 084 * {@code null} 085 * @param mimeType the {@code mimeType} of the string to be parsed and normalized. This may be {@code null} 086 * @param documentLocation the {@code documentLocation} of the Document from which the property value string 087 * was extracted. This may be {@code null} 088 * @param strings the list into which normalized words should be accumulated 089 * @since 8.4 090 */ 091 void parse(String s, String path, String mimeType, DocumentLocation documentLocation, List<String> strings); 092 093}