Nuxeo Enterprise Platform 5.4

org.nuxeo.ecm.core.convert.plugins.text.extractors
Class HtmlParser

java.lang.Object
  extended by org.apache.xerces.parsers.XMLParser
      extended by org.apache.xerces.parsers.AbstractXMLDocumentParser
          extended by org.apache.xerces.parsers.AbstractSAXParser
              extended by org.nuxeo.ecm.core.convert.plugins.text.extractors.HtmlParser
All Implemented Interfaces:
org.apache.xerces.xni.XMLDocumentHandler, org.apache.xerces.xni.XMLDTDContentModelHandler, org.apache.xerces.xni.XMLDTDHandler, org.apache.xerces.xs.PSVIProvider, org.xml.sax.Parser, org.xml.sax.XMLReader

public class HtmlParser
extends org.apache.xerces.parsers.AbstractSAXParser

Author:
Thomas Roger

Field Summary
 
Fields inherited from interface org.apache.xerces.xni.XMLDTDHandler
CONDITIONAL_IGNORE, CONDITIONAL_INCLUDE
 
Fields inherited from interface org.apache.xerces.xni.XMLDTDContentModelHandler
OCCURS_ONE_OR_MORE, OCCURS_ZERO_OR_MORE, OCCURS_ZERO_OR_ONE, SEPARATOR_CHOICE, SEPARATOR_SEQUENCE
 
Constructor Summary
HtmlParser()
           
HtmlParser(java.lang.String tagFilter)
           
 
Method Summary
 void characters(org.apache.xerces.xni.XMLString xmlString, org.apache.xerces.xni.Augmentations augmentations)
           
 void endElement(org.apache.xerces.xni.QName element, org.apache.xerces.xni.Augmentations augs)
           
 java.lang.String getContents()
           
 void init(java.lang.String tagFilter)
           
 void startDocument(org.apache.xerces.xni.XMLLocator arg0, java.lang.String arg1, org.apache.xerces.xni.NamespaceContext arg2, org.apache.xerces.xni.Augmentations arg3)
           
 void startElement(org.apache.xerces.xni.QName element, org.apache.xerces.xni.XMLAttributes attributes, org.apache.xerces.xni.Augmentations augs)
           
 
Methods inherited from class org.apache.xerces.parsers.AbstractSAXParser
attributeDecl, comment, doctypeDecl, elementDecl, endCDATA, endDocument, endDTD, endExternalSubset, endGeneralEntity, endParameterEntity, externalEntityDecl, getAttributePSVI, getAttributePSVIByName, getContentHandler, getDTDHandler, getElementPSVI, getEntityResolver, getErrorHandler, getFeature, getProperty, ignorableWhitespace, internalEntityDecl, notationDecl, parse, parse, processingInstruction, reset, setContentHandler, setDocumentHandler, setDTDHandler, setEntityResolver, setErrorHandler, setFeature, setLocale, setProperty, startCDATA, startExternalSubset, startGeneralEntity, startParameterEntity, unparsedEntityDecl, xmlDecl
 
Methods inherited from class org.apache.xerces.parsers.AbstractXMLDocumentParser
any, element, empty, emptyElement, endAttlist, endConditional, endContentModel, endGroup, getDocumentSource, getDTDContentModelSource, getDTDSource, ignoredCharacters, occurrence, pcdata, separator, setDocumentSource, setDTDContentModelSource, setDTDSource, startAttlist, startConditional, startContentModel, startDTD, startGroup, textDecl
 
Methods inherited from class org.apache.xerces.parsers.XMLParser
parse
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

HtmlParser

public HtmlParser()

HtmlParser

public HtmlParser(java.lang.String tagFilter)
Method Detail

init

public void init(java.lang.String tagFilter)

startElement

public void startElement(org.apache.xerces.xni.QName element,
                         org.apache.xerces.xni.XMLAttributes attributes,
                         org.apache.xerces.xni.Augmentations augs)
                  throws org.apache.xerces.xni.XNIException
Specified by:
startElement in interface org.apache.xerces.xni.XMLDocumentHandler
Overrides:
startElement in class org.apache.xerces.parsers.AbstractSAXParser
Throws:
org.apache.xerces.xni.XNIException

endElement

public void endElement(org.apache.xerces.xni.QName element,
                       org.apache.xerces.xni.Augmentations augs)
                throws org.apache.xerces.xni.XNIException
Specified by:
endElement in interface org.apache.xerces.xni.XMLDocumentHandler
Overrides:
endElement in class org.apache.xerces.parsers.AbstractSAXParser
Throws:
org.apache.xerces.xni.XNIException

startDocument

public void startDocument(org.apache.xerces.xni.XMLLocator arg0,
                          java.lang.String arg1,
                          org.apache.xerces.xni.NamespaceContext arg2,
                          org.apache.xerces.xni.Augmentations arg3)
                   throws org.apache.xerces.xni.XNIException
Specified by:
startDocument in interface org.apache.xerces.xni.XMLDocumentHandler
Overrides:
startDocument in class org.apache.xerces.parsers.AbstractSAXParser
Throws:
org.apache.xerces.xni.XNIException

characters

public void characters(org.apache.xerces.xni.XMLString xmlString,
                       org.apache.xerces.xni.Augmentations augmentations)
                throws org.apache.xerces.xni.XNIException
Specified by:
characters in interface org.apache.xerces.xni.XMLDocumentHandler
Overrides:
characters in class org.apache.xerces.parsers.AbstractSAXParser
Throws:
org.apache.xerces.xni.XNIException

getContents

public java.lang.String getContents()
Returns:
the parsed content (as a String).

Nuxeo Enterprise Platform 5.4

Copyright © 2010 Nuxeo SAS. All Rights Reserved.