Example usage for org.apache.pdfbox.pdmodel PDDocument PDDocument

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument PDDocument.

Prototype

public PDDocument(COSDocument doc)

Source Link

Document

Constructor that uses an existing document.

Usage

From source file:aplicacion.sistema.indexer.test.PDFTextStripperOrg.java

License:Apache License

/**
 * @deprecated/*from w w w  . j a  va 2s.  com*/
 * @see PDFTextStripper#getText( PDDocument )
 * @param doc The document to extract the text from.
 * @return The document text.
 * @throws IOException If there is an error extracting the text.
 */
public String getText(COSDocument doc) throws IOException {
    return getText(new PDDocument(doc));
}

From source file:aplicacion.sistema.indexer.test.PDFTextStripperOrg.java

License:Apache License

/**
 * @deprecated//  w w  w .  j a  v a 2 s.  c  o  m
 * @see PDFTextStripper#writeText( PDDocument, Writer )
 * @param doc The document to extract the text.
 * @param outputStream The stream to write the text to.
 * @throws IOException If there is an error extracting the text.
 */
public void writeText(COSDocument doc, Writer outputStream) throws IOException {
    writeText(new PDDocument(doc), outputStream);
}

From source file:com.aaasec.sigserv.csspsupport.pdfbox.modifications.CsCOSWriter.java

License:Apache License

/**
 * This will write the pdf document.//  w w w  . ja va  2s .  co  m
 *
 * @param doc The document to write.
 *
 * @throws COSVisitorException If an error occurs while generating the data.
 */
public void write(COSDocument doc) throws COSVisitorException {
    PDDocument pdDoc = new PDDocument(doc);
    write(pdDoc);
}

From source file:com.amandine.NewEmptyJUnitTest.java

public String pdflookbook() throws IOException {
    String filePath = "C:\\Users\\janitha\\OneDrive\\Documents\\lookbookSS2016.pdf";
    InputStream inputStream = null;
    String statementPDF = null;/*ww  w .j a va  2s.com*/
    try {
        inputStream = new FileInputStream(filePath);
        PDFParser parser = new PDFParser(inputStream);

        // This will parse the stream and populate the COSDocument object.
        parser.parse();

        // Get the document that was parsed.
        COSDocument cosDoc = parser.getDocument();

        // This class will take a pdf document and strip out all of the text and 
        // ignore the formatting and such.
        PDFTextStripper pdfStripper = new PDFTextStripper();

        // This is the in-memory representation of the PDF document
        PDDocument pdDoc = new PDDocument(cosDoc);
        pdfStripper.setStartPage(3);
        pdfStripper.setEndPage(pdDoc.getNumberOfPages() - 1);
        assertEquals(41, pdDoc.getNumberOfPages() - 1);

        // This will return the text of a document.
        statementPDF = pdfStripper.getText(pdDoc);
        //            System.out.println(statementPDF);

        //            String [] statementPDFArray = statementPDF.split("\\n");
        //            assertEquals(256, statementPDFArray.length);
    } catch (Exception e) {
        //Syste
        String errorMessage = "\nUnexpected Exception: " + e.getClass() + "\n" + e.getMessage();
        for (StackTraceElement trace : e.getStackTrace()) {
            errorMessage += "\n\t" + trace;
        }
        System.out.println(errorMessage);
    } finally {
        if (inputStream != null) {
            inputStream.close();
        }
    }
    return statementPDF;
}

From source file:com.cisco.iwe.services.util.EmailMonitor.java

/**
 * //from  w  w  w .  j  a v  a 2s .c om
 * @param fileDir
 * @return
 */
/* This method is used to scan the uploaded expense receipt in .pdf format and extract the text embedded in it. */
public String scanPDF(String fileDir) {
    PDFParser parser;
    String parsedText = null;
    PDFTextStripper pdfStripper = null;
    PDDocument pdDoc = null;
    COSDocument cosDoc = null;
    File file = new File(fileDir);
    if (!file.isFile()) {
        System.err.println("File " + fileDir + " does not exist.");
        return null;
    }
    try {
        parser = new PDFParser(new FileInputStream(file));
    } catch (IOException e) {
        System.err.println("Unable to open PDF Parser. " + e.getMessage());
        return null;
    }
    try {
        parser.parse();
        cosDoc = parser.getDocument();
        pdfStripper = new PDFTextStripper();
        pdDoc = new PDDocument(cosDoc);
        pdfStripper.setStartPage(1);
        pdfStripper.setEndPage(pdDoc.getNumberOfPages());
        parsedText = pdfStripper.getText(pdDoc);
    } catch (Exception e) {
        System.err.println("An exception occured in parsing the PDF Document." + e.getMessage());
    } finally {
        try {
            if (cosDoc != null)
                cosDoc.close();
            if (pdDoc != null)
                pdDoc.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    return parsedText;
}

From source file:com.iqtb.validacion.util.LeerPDF.java

public String pdftoText(byte[] bytesPdf) {

    InputStream in = new ByteArrayInputStream(bytesPdf);

    // Se verifica si se puede abrir el InputStream
    try {//ww w. j ava2  s . c  om
        parser = new PDFParser(in);
    } catch (IOException e) {
        logger.error("No se puede abrir. ERROR " + e);
        return null;
    }

    // En este proceso se abre, convierte y se cierra
    // el archivo PDF
    try {
        parser.parse();
        cosDoc = parser.getDocument();
        pdfStripper = new PDFTextStripper();
        pdDoc = new PDDocument(cosDoc);
        parsedText = pdfStripper.getText(pdDoc);
        cosDoc.close();
        pdDoc.close();

    } catch (IOException e) {
        logger.error("Ocurri un error. ERROR " + e);
        try {
            if (cosDoc != null) {
                cosDoc.close();
            }
            if (pdDoc != null) {
                pdDoc.close();
            }
        } catch (IOException e1) {
            logger.error("Ocurri un error. ERROR " + e1);
        }

        return null;
    }

    return parsedText;
}

From source file:com.lanacion.adminsiteln.services.PdfIndexerService.PdfIndexerService.java

/**
 * Metodos privados para la indexacin/*  w  w  w  .j a  v  a2s .c om*/
 */
private String pdftoText(String fileName, int pagina) {

    PDFParser parser;
    String parsedText = null;
    ;
    PDFTextStripper pdfStripper = null;
    //pdfStripper.setStartPage(0);
    //pdfStripper.setEndPage(0);
    PDDocument pdDoc = null;
    COSDocument cosDoc = null;
    File file = new File(fileName);
    if (!file.isFile()) {
        System.err.println("File " + fileName + " does not exist.");
        return null;
    }
    try {
        parser = new PDFParser(new FileInputStream(file));
    } catch (IOException e) {
        System.err.println("Unable to open PDF Parser. " + e.getMessage());
        return null;
    }
    try {
        parser.parse();
        cosDoc = parser.getDocument();
        pdfStripper = new PDFTextStripper();
        pdDoc = new PDDocument(cosDoc);
        pdfStripper.setStartPage(pagina);
        pdfStripper.setEndPage(pagina);
        parsedText = pdfStripper.getText(pdDoc);
    } catch (Exception e) {
        System.err.println("An exception occured in parsing the PDF Document." + e.getMessage());
    } finally {
        try {
            if (cosDoc != null) {
                cosDoc.close();
            }
            if (pdDoc != null) {
                pdDoc.close();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    return parsedText;

}

From source file:com.lanacion.adminsiteln.services.PdfIndexerService.PdfIndexerService.java

private int pdfgetPages(String fileName) {

    int numero_paginas = 0;
    PDFParser parser;// w w w.j  a va 2  s  . c o m
    String parsedText = null;
    ;
    PDFTextStripper pdfStripper = null;
    //pdfStripper.setStartPage(0);
    //pdfStripper.setEndPage(0);
    PDDocument pdDoc = null;
    COSDocument cosDoc = null;
    File file = new File(fileName);
    if (!file.isFile()) {
        System.err.println("File " + fileName + " does not exist.");
        return 0;
    }
    try {
        parser = new PDFParser(new FileInputStream(file));
    } catch (IOException e) {
        System.err.println("Unable to open PDF Parser. " + e.getMessage());
        return 0;
    }
    try {
        parser.parse();
        cosDoc = parser.getDocument();
        pdfStripper = new PDFTextStripper();
        pdDoc = new PDDocument(cosDoc);
        numero_paginas = pdDoc.getNumberOfPages();
    } catch (Exception e) {
        System.err.println("An exception occured in parsing the PDF Document." + e.getMessage());
    } finally {
        try {
            if (cosDoc != null) {
                cosDoc.close();
            }
            if (pdDoc != null) {
                pdDoc.close();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    return numero_paginas;
}

From source file:com.pluszero.rostertogo.PdfManager.java

private void ToText(File file) throws IOException {
    this.pdfStripper = null;
    this.pdDoc = null;
    this.cosDoc = null;

    parser = new PDFParser(file); // for pfdBox 1.8, as 2.0 not yet supported in Android

    parser.parse();/*from   w  w w  .j av  a 2  s  .c  o  m*/
    cosDoc = parser.getDocument();
    pdfStripper = new PDFTextStripper();
    pdDoc = new PDDocument(cosDoc);
    pdDoc.getNumberOfPages();
    pdfStripper.setStartPage(1);
    pdfStripper.setEndPage(pdDoc.getNumberOfPages());
    text = pdfStripper.getText(pdDoc);
    pdDoc.close();
}

From source file:com.pluszero.rostertogo.PdfManager.java

private void ToText(InputStream is) throws IOException {
    this.pdfStripper = null;
    this.pdDoc = null;
    this.cosDoc = null;

    parser = new PDFParser(is); // for PdfBox 1.8 as 2.0 not yet supported in Android

    parser.parse();/*from  ww  w . j  av a 2 s . c om*/
    cosDoc = parser.getDocument();
    pdfStripper = new PDFTextStripper();
    pdDoc = new PDDocument(cosDoc);
    pdDoc.getNumberOfPages();
    pdfStripper.setStartPage(1);
    pdfStripper.setEndPage(pdDoc.getNumberOfPages());
    text = pdfStripper.getText(pdDoc);
    pdDoc.close();
}