Example usage for org.apache.pdfbox.pdmodel PDDocument load

List of usage examples for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException 

Source Link

Document

Parses a PDF.

Usage

From source file:drakkar.mast.retrieval.parser.PdfParser.java

/**
 * Para extraer contenido del pdf//from   w w  w .  j  a  v a2 s . c om
 *
 * @param f
 * @return
 */
public boolean analyzePdfDocument(File f) {

    try {

        pdoc = PDDocument.load(f);

        if (!pdoc.isEncrypted() && pdoc.getCurrentAccessPermission().canExtractContent()
                && pdoc.getNumberOfPages() != 0) {

            this.numberPages = pdoc.getNumberOfPages();
            pdfText = new PDFTextStripper();

            swriter = new StringWriter();

            ////////////////////datos
            pinf = pdoc.getDocumentInformation();
            if (pinf == null) {
                OutputMonitor.printLine("The document does not have available information.",
                        OutputMonitor.INFORMATION_MESSAGE);
            } else {
                setTitle(pinf.getTitle());
                setAuthor(pinf.getAuthor());
                setNumberpages(pdoc.getNumberOfPages());
                setCalCreation(pinf.getCreationDate());
                setCalModification(pinf.getModificationDate());

                pdfText.writeText(pdoc, swriter);
                allContent = swriter.getBuffer().toString();
            }

            pdoc.close();
            swriter.close();

            return true;

        } else {
            OutputMonitor.printLine("Encrypted document.", OutputMonitor.INFORMATION_MESSAGE);
        }
    } catch (Exception ex) {
        OutputMonitor.printStream("", ex);
    } finally {
        if (pdoc != null) {
            try {
                pdoc.close();
            } catch (IOException ex) {
                OutputMonitor.printStream("IO", ex);
            }
        }
    }

    return false;
}

From source file:drakkar.mast.retrieval.parser.PdfParser.java

/**
 * Divide el contenido del pdf de 100 en 100 pginas de acuerdo al nmero
 * total para el motor de bsqueda Minion
 *
 * @param f/*  w  w  w  .  j a v a2  s  .co m*/
 * @param indexer indexador de Minion
 * @throws IOException
 */
public void divideTextforMinion(File f, SimpleIndexer indexer) throws IOException {
    pdoc = PDDocument.load(f);
    this.numberPages = pdoc.getNumberOfPages();

    if (!pdoc.isEncrypted() && pdoc.getCurrentAccessPermission().canExtractContent()
            && pdoc.getNumberOfPages() != 0) {

        String fragment = null;
        int start = 0, end = 0;
        int count = 0;

        if (this.numberPages > 100) {

            for (int i = 0; i < numberPages; i = i + 100) {
                count++; //para el key del document
                swriter = new StringWriter();
                pdfText = new PDFTextStripper();

                start = i;
                end = 99 + i;

                if (end > numberPages) {
                    end = numberPages;
                }

                pdfText.setStartPage(start);
                pdfText.setEndPage(end);
                pdfText.writeText(pdoc, swriter);

                fragment = swriter.getBuffer().toString();

                DocumentMinion docm = new DocumentMinion(indexer, f.getPath() + count);
                docm.addField("filepath", f.getAbsolutePath());
                docm.addField("name", f.getName());
                docm.addField("book", fragment);
                docm.closeDocument();

                swriter.close();
            }

            pdoc.close();

        } else { //si tiene menos de 100 pginas toma todo el texto como est

            swriter = new StringWriter();
            pdfText = new PDFTextStripper();

            pdfText.writeText(pdoc, swriter);
            fragment = swriter.getBuffer().toString();
            DocumentMinion docm = new DocumentMinion(indexer, f.getPath());
            docm.addField("filepath", f.getAbsolutePath());
            docm.addField("name", f.getName());
            docm.addField("book", fragment);
            docm.closeDocument();

            swriter.close();
            pdoc.close();
        }
    } else {
        OutputMonitor.printLine("Encrypted book.", OutputMonitor.INFORMATION_MESSAGE);
    }

    if (pdoc != null) {
        try {
            pdoc.close();
        } catch (IOException ex) {
            OutputMonitor.printStream("", ex);
        }
    }

}

From source file:drakkar.mast.retrieval.parser.PdfParser.java

/**
 * Divide el contenido del pdf de 100 en 100 pginas de acuerdo al nmero
 * total para el motor de bsqueda Lucene
 *
 * @param f//from w ww . j a  va  2  s.c  o m
 * @param doccs
 * @param doc
 * @param doclsi
 * @throws IOException
 */
public void divideTextforLucene(File f, DocumentLucene doccs, DocumentLucene doc, DocumentLucene doclsi)
        throws IOException {
    pdoc = PDDocument.load(f);
    this.numberPages = pdoc.getNumberOfPages();

    if (!pdoc.isEncrypted() && pdoc.getCurrentAccessPermission().canExtractContent()
            && pdoc.getNumberOfPages() != 0) {

        String fragment = null;
        int start = 0, end = 0;
        int count = 0;

        if (this.numberPages > 100) {

            for (int i = 0; i < numberPages; i = i + 100) {
                count++; //para el key del document
                swriter = new StringWriter();
                pdfText = new PDFTextStripper();

                start = i;
                end = 99 + i;

                if (end > numberPages) {
                    end = numberPages;
                }

                pdfText.setStartPage(start);
                pdfText.setEndPage(end);
                pdfText.writeText(pdoc, swriter);

                fragment = swriter.getBuffer().toString();

                doc.addField("filepath", f.getCanonicalPath());
                doccs.addField("filepathcs", f.getCanonicalPath());
                doc.addField("name", f.getName());
                doccs.addField("namecs", f.getName());
                doc.addField("book", fragment);
                doccs.addField("bookcs", fragment);
                ///////
                if (doclsi != null) {
                    doclsi.addField("book", fragment);

                }

                swriter.close();
            }

            pdoc.close();

        } else { //si tiene menos de 100 pginas toma todo el texto como est

            swriter = new StringWriter();
            pdfText = new PDFTextStripper();

            //index
            pdfText.writeText(pdoc, swriter);
            fragment = swriter.getBuffer().toString();
            doc.addField("filepath", f.getCanonicalPath());
            doccs.addField("filepathcs", f.getCanonicalPath());
            doc.addField("name", f.getName());
            doccs.addField("namecs", f.getName());
            doc.addField("book", fragment);
            doccs.addField("bookcs", fragment);

            if (doclsi != null) {
                doclsi.addField("book", fragment);

            }
            swriter.close();
            pdoc.close();
        }
    } else {
        OutputMonitor.printLine("Encrypted book.", OutputMonitor.INFORMATION_MESSAGE);
    }

    if (pdoc != null) {
        try {
            pdoc.close();
        } catch (IOException ex) {
            OutputMonitor.printStream("", ex);
        }
    }

}

From source file:editorframework.PDDocumentAdapter.java

public PDDocumentAdapter(String fileName) {
    try {//from  w w  w .  j  ava 2s.  c  om

        this.pdPanel = new PDFPagePanel();
        pdDocument = PDDocument.load(fileName);

        allPages = pdDocument.getDocumentCatalog().getAllPages();
    } catch (IOException ex) {
        Logger.getLogger(PDDocumentAdapter.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:editorframework.PDDocumentAdapter.java

public boolean open(String fileName) {

    try {//from   w w  w .j a va2 s.  co  m
        pdDocument = PDDocument.load(new FileInputStream(fileName));

        allPages = pdDocument.getDocumentCatalog().getAllPages();
        return true;
    } catch (IOException ex) {
        Logger.getLogger(PDDocumentAdapter.class.getName()).log(Level.SEVERE, null, ex);
    }
    return false;
}

From source file:editorframework.pdfbox.OpenTextPDF.java

public PDDocument openPDF(String pdfFilename) {
    //openPDF(fileName);
    PDDocument document = null;/*from w  ww.jav  a  2  s .c  o  m*/
    try {
        document = PDDocument.load(new File(pdfFilename));
        return document;
    } catch (IOException ex) {
        Logger.getLogger(OpenTextPDFAdapter.class.getName()).log(Level.SEVERE, null, ex);
    }
    return null;
}

From source file:editorframework.pdfbox.PDFBoxDocumentAdaptee.java

private static PDDocument parseDocument(String filename) throws IOException {
    PDDocument document = PDDocument.load(filename);
    if (document.isEncrypted()) {
        try {//  www . ja va 2s  .  com
            document.decrypt("");
        } catch (org.apache.pdfbox.exceptions.CryptographyException e) {
            e.printStackTrace();
        }
    }

    return document;
}

From source file:editorframework.pdfbox.testes.MyPDFBox.java

private void init() {
    JFrame jFrame = new JFrame();
    jFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
    try {/*from   w  w w.  j  av  a  2s  .  com*/
        final PDDocument doc = PDDocument.load(new File("./simple.pdf"));
        List<PDPage> allPages = doc.getDocumentCatalog().getAllPages();
        PDPage page = (PDPage) allPages.get(1);
        setPage(page);
        jFrame.setBackground(Color.DARK_GRAY);
        setLayout(new FlowLayout());
        jFrame.add(this);
        jFrame.setBounds(40, 40, getWidth() + 100, getHeight() + 50);
        jFrame.setVisible(true);
        jFrame.addWindowListener(new WindowAdapter() {
            @Override
            public void windowClosing(WindowEvent e) {
                try {
                    doc.close();
                } catch (IOException e1) {
                    e1.printStackTrace();
                }
            }
        });
    } catch (IOException e) {
        System.out.println(e.toString());
    }
    //doc.close();
}

From source file:editorframework.pdfbox.testes.PDFReaderAdaptor.java

License:Apache License

private static PDDocument parseDocument(InputStream input) throws IOException {
    PDDocument document = PDDocument.load(input);
    if (document.isEncrypted()) {
        try {//from  www .ja v  a2s  . c o m
            document.decrypt("");
        } catch (org.apache.pdfbox.exceptions.CryptographyException e) {
            e.printStackTrace();
        }
    }

    return document;
}

From source file:editorframework.pdfbox.testes.TestesComPDFBox.java

public static void transformarPDFemImagem() throws IOException {

    //OPCAO 1 transformar o pdfpage em bufered image, e exibir no JFrame normal
    PDDocument document = PDDocument.load(new File("./simple.pdf"));
    List<PDPage> allPages = document.getDocumentCatalog().getAllPages();

    PDPage firstPage = allPages.get(0);/* w ww.  j  ava2 s .  c o  m*/
    BufferedImage bi = firstPage.convertToImage();
    File outputfile = new File("image.jpg");
    ImageIO.write(bi, "jpg", outputfile);

}