Class PDF2TextConverter.PatchedPDFTextStripper
- java.lang.Object
-
- org.apache.pdfbox.contentstream.PDFStreamEngine
-
- org.apache.pdfbox.text.PDFTextStripper
-
- org.nuxeo.ecm.core.convert.plugins.text.extractors.PDF2TextConverter.PatchedPDFTextStripper
-
- Enclosing class:
- PDF2TextConverter
public static class PDF2TextConverter.PatchedPDFTextStripper extends org.apache.pdfbox.text.PDFTextStripper
-
-
Constructor Summary
Constructors Constructor Description PatchedPDFTextStripper()
-
Method Summary
All Methods Instance Methods Concrete Methods Modifier and Type Method Description protected float
computeFontHeight(org.apache.pdfbox.pdmodel.font.PDFont arg0)
protected void
operatorException(org.apache.pdfbox.contentstream.operator.Operator operator, List<org.apache.pdfbox.cos.COSBase> operands, IOException e)
protected void
showGlyph(org.apache.pdfbox.util.Matrix arg0, org.apache.pdfbox.pdmodel.font.PDFont arg1, int arg2, String arg3, org.apache.pdfbox.util.Vector arg4)
-
Methods inherited from class org.apache.pdfbox.text.PDFTextStripper
endArticle, endDocument, endPage, getAddMoreFormatting, getArticleEnd, getArticleStart, getAverageCharTolerance, getCharactersByArticle, getCurrentPageNo, getDropThreshold, getEndBookmark, getEndPage, getIndentThreshold, getLineSeparator, getListItemPatterns, getOutput, getPageEnd, getPageStart, getParagraphEnd, getParagraphStart, getSeparateByBeads, getSortByPosition, getSpacingTolerance, getStartBookmark, getStartPage, getSuppressDuplicateOverlappingText, getText, getWordSeparator, matchPattern, processPage, processPages, processTextPosition, setAddMoreFormatting, setArticleEnd, setArticleStart, setAverageCharTolerance, setDropThreshold, setEndBookmark, setEndPage, setIndentThreshold, setLineSeparator, setListItemPatterns, setPageEnd, setPageStart, setParagraphEnd, setParagraphStart, setShouldSeparateByBeads, setSortByPosition, setSpacingTolerance, setStartBookmark, setStartPage, setSuppressDuplicateOverlappingText, setWordSeparator, startArticle, startArticle, startDocument, startPage, writeCharacters, writeLineSeparator, writePage, writePageEnd, writePageStart, writeParagraphEnd, writeParagraphSeparator, writeParagraphStart, writeString, writeString, writeText, writeWordSeparator
-
Methods inherited from class org.apache.pdfbox.contentstream.PDFStreamEngine
addOperator, applyTextAdjustment, beginMarkedContentSequence, beginText, decreaseLevel, endMarkedContentSequence, endText, getAppearance, getCurrentPage, getGraphicsStackSize, getGraphicsState, getInitialMatrix, getLevel, getResources, getTextLineMatrix, getTextMatrix, increaseLevel, processAnnotation, processChildStream, processOperator, processOperator, processSoftMask, processTilingPattern, processTilingPattern, processTransparencyGroup, processType3Stream, registerOperatorProcessor, restoreGraphicsStack, restoreGraphicsState, saveGraphicsStack, saveGraphicsState, setLineDashPattern, setTextLineMatrix, setTextMatrix, showAnnotation, showFontGlyph, showFontGlyph, showForm, showGlyph, showText, showTextString, showTextStrings, showTransparencyGroup, showType3Glyph, showType3Glyph, transformedPoint, transformWidth, unsupportedOperator
-
-
-
-
Constructor Detail
-
PatchedPDFTextStripper
public PatchedPDFTextStripper() throws IOException
- Throws:
IOException
-
-
Method Detail
-
operatorException
protected void operatorException(org.apache.pdfbox.contentstream.operator.Operator operator, List<org.apache.pdfbox.cos.COSBase> operands, IOException e) throws IOException
- Overrides:
operatorException
in classorg.apache.pdfbox.contentstream.PDFStreamEngine
- Throws:
IOException
-
showGlyph
protected void showGlyph(org.apache.pdfbox.util.Matrix arg0, org.apache.pdfbox.pdmodel.font.PDFont arg1, int arg2, String arg3, org.apache.pdfbox.util.Vector arg4) throws IOException
- Overrides:
showGlyph
in classorg.apache.pdfbox.contentstream.PDFStreamEngine
- Throws:
IOException
-
computeFontHeight
protected float computeFontHeight(org.apache.pdfbox.pdmodel.font.PDFont arg0) throws IOException
- Throws:
IOException
-
-