001/*
002 * (C) Copyright 2009 Nuxeo SAS (http://nuxeo.com/) and contributors.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the GNU Lesser General Public License
006 * (LGPL) version 2.1 which accompanies this distribution, and is available at
007 * http://www.gnu.org/licenses/lgpl.html
008 *
009 * This library is distributed in the hope that it will be useful,
010 * but WITHOUT ANY WARRANTY; without even the implied warranty of
011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012 * Lesser General Public License for more details.
013 *
014 * Contributors:
015 *     Olivier Grisel
016 */
017package org.nuxeo.ecm.platform.categorization.service;
018
019import java.util.List;
020
021/**
022 * Common interface for document categorization based on text content.
023 *
024 * @author ogrisel
025 */
026public interface Categorizer {
027
028    /**
029     * Compute a list of suggested categories, sorted by decreasing confidence based on the textual content of the
030     * document.
031     *
032     * @param textContent
033     * @param maxSuggestions
034     */
035    List<String> guessCategories(String textContent, int maxSuggestions);
036
037    /**
038     * Compute a list of suggested categories, sorted by decreasing confidence based on the textual content of the
039     * document.
040     *
041     * @param textContent
042     * @param maxSuggestions
043     * @param precisionThreshold or null to use the default threshold of the implementation.
044     * @return
045     */
046    List<String> guessCategories(String textContent, int maxSuggestions, Double precisionThreshold);
047
048}