001/*
002 * (C) Copyright 2009 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Olivier Grisel
018 */
019package org.nuxeo.ecm.platform.categorization.service;
020
021import java.util.List;
022
023/**
024 * Common interface for document categorization based on text content.
025 *
026 * @author ogrisel
027 */
028public interface Categorizer {
029
030    /**
031     * Compute a list of suggested categories, sorted by decreasing confidence based on the textual content of the
032     * document.
033     *
034     * @param textContent
035     * @param maxSuggestions
036     */
037    List<String> guessCategories(String textContent, int maxSuggestions);
038
039    /**
040     * Compute a list of suggested categories, sorted by decreasing confidence based on the textual content of the
041     * document.
042     *
043     * @param textContent
044     * @param maxSuggestions
045     * @param precisionThreshold or null to use the default threshold of the implementation.
046     * @return
047     */
048    List<String> guessCategories(String textContent, int maxSuggestions, Double precisionThreshold);
049
050}