001/* 002 * (C) Copyright 2009 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Olivier Grisel 018 */ 019package org.nuxeo.ecm.platform.categorization.service; 020 021import java.util.List; 022 023/** 024 * Common interface for document categorization based on text content. 025 * 026 * @author ogrisel 027 */ 028public interface Categorizer { 029 030 /** 031 * Compute a list of suggested categories, sorted by decreasing confidence based on the textual content of the 032 * document. 033 * 034 * @param textContent 035 * @param maxSuggestions 036 */ 037 List<String> guessCategories(String textContent, int maxSuggestions); 038 039 /** 040 * Compute a list of suggested categories, sorted by decreasing confidence based on the textual content of the 041 * document. 042 * 043 * @param textContent 044 * @param maxSuggestions 045 * @param precisionThreshold or null to use the default threshold of the implementation. 046 * @return 047 */ 048 List<String> guessCategories(String textContent, int maxSuggestions, Double precisionThreshold); 049 050}