Example usage for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException

Source Link

Document

Parses a PDF.

Usage

From source file:editorframework.pdfbox.testes.TestesComPDFBox.java

public static void exibirPDFEmUmJFrame() throws IOException {
    File PDFPath = new File("./simple.pdf");
    PDDocument inputPDF = PDDocument.load(PDFPath);
    List<PDPage> allPages = inputPDF.getDocumentCatalog().getAllPages();

    PDPage testPage = (PDPage) allPages.get(0);

    JFrame testFrame = new JFrame();
    testFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);

    PDFPagePanel pdfPanel = new PDFPagePanel();
    pdfPanel.setPage(testPage);//from   www  .j  av a 2 s . c  o m
    testFrame.add(pdfPanel);
    testFrame.setBounds(40, 40, pdfPanel.getWidth(), pdfPanel.getHeight());
    testFrame.setVisible(true);
    pdfPanel.validate();
    //        inputPDF.close();

}

From source file:editorframework.pdfbox.testes.TestesComPDFBox.java

public static void exibirPDFemImagemNoJComponent() throws IOException {
    PDDocument document = PDDocument.load(new File("./simple.pdf"));
    List<PDPage> allPages = document.getDocumentCatalog().getAllPages();

    PDPage firstPage = allPages.get(0);//from ww w  . j a va  2s.c  o  m
    BufferedImage bi = firstPage.convertToImage();

    JFrame testFrame = new JFrame();
    testFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);

    JScrollPane scrollPane = new JScrollPane();
    JLabel picLabel = new JLabel(new ImageIcon(bi));
    scrollPane.getViewport().add(picLabel);
    testFrame.add(scrollPane);
    testFrame.setVisible(true);
    testFrame.pack();

}

From source file:edu.harvard.mcz.precapture.encoder.PrintingUtility.java

License:Open Source License

/**
 * Send the generated PDF file to a printer.  (file to print is from LabelEncoder.getPrintFile().
 * /*from w  w w .j  a v a2  s  .c om*/
 * @param printDefinition Used to find paper size
 * @param paperWidthPoints
 * @param paperHeightPoints
 * @throws PrintFailedException if printing fails for any reason.
 */
public static void sendPDFToPrinter(LabelDefinitionType printDefinition, int paperWidthPoints,
        int paperHeightPoints) throws PrintFailedException {
    try {

        // send generated PDF to printer.

        FileInputStream pdfInputStream = new FileInputStream(PreCaptureSingleton.getInstance().getProperties()
                .getProperties().getProperty(PreCaptureProperties.KEY_LABELPRINTFILE));

        DocFlavor psInFormat = DocFlavor.INPUT_STREAM.PDF;

        // No printers listed... Don't Try autosense instead of PDF
        // DocFlavor psInFormat = DocFlavor.INPUT_STREAM.AUTOSENSE;
        // Ends up listing printers that can't take the PDF,
        // Need instead to fail over to using a pdf printing library 
        // and having the pdf printing library pull up the printer dialog.

        Doc myDoc = new SimpleDoc(pdfInputStream, psInFormat, null);
        PrintRequestAttributeSet atset = new HashPrintRequestAttributeSet();
        atset.add(new Copies(1));
        // Set paper size
        if (paperWidthPoints == 612 && paperHeightPoints == 792) {
            atset.add(MediaSizeName.NA_LETTER);
        } else {
            float x = printDefinition.getPaperWidth();
            float y = printDefinition.getPaperHeight();
            if (printDefinition.getUnits().toString().toLowerCase().equals("inches")) {
                MediaSizeName mediaSizeName = MediaSize.findMedia(x, y, Size2DSyntax.INCH);
                if (mediaSizeName == null) {
                    // TODO: Handle non-standard paper sizes.  The following doesn't provide
                    // what is needed.
                    atset.add(new MediaPrintableArea(0, 0, x, y, MediaPrintableArea.INCH));
                } else {
                    atset.add(mediaSizeName);
                }
            }
            if (printDefinition.getUnits().toString().toLowerCase().equals("cm")) {
                x = x * 10f;
                y = y * 10f;
                atset.add(MediaSize.findMedia(x, y, Size2DSyntax.INCH));
            }
            if (printDefinition.getUnits().toString().toLowerCase().equals("points")) {
                x = x / 72f;
                y = y / 72f;
                atset.add(MediaSize.findMedia(x, y, Size2DSyntax.INCH));
            }
        }
        atset.add(Sides.ONE_SIDED);
        PrintService[] services = PrintServiceLookup.lookupPrintServices(psInFormat, atset);
        log.debug("Number of matching printing services =  " + services.length);
        boolean printed = false;
        if (services.length == 0) {
            log.debug("No PDF printing services found.");
            log.error("Failing over to print using a pdf printing library");

            try {
                pdfInputStream.close();

                pdfInputStream = new FileInputStream(PreCaptureSingleton.getInstance().getProperties()
                        .getProperties().getProperty(PreCaptureProperties.KEY_LABELPRINTFILE));

                // trying pdfbox instead of pdf-renderer
                PDDocument pdfDocument = PDDocument.load(pdfInputStream);
                pdfDocument.print();
                pdfDocument.close();
                printed = true;
            } catch (Exception e) {
                log.error(e.getMessage(), e);
            }
        } else {
            log.debug("Available printing services " + services.length);
            for (int i = 0; i < services.length; i++) {
                log.debug(services[i].getName());
            }
            Object selectedService = JOptionPane.showInputDialog(null, "Send labels to which printer?", "Input",
                    JOptionPane.INFORMATION_MESSAGE, null, services, services[0]);
            if (selectedService != null) {
                DocPrintJob job = ((PrintService) selectedService).createPrintJob();
                log.debug("Printing to " + ((PrintService) selectedService).getName());
                try {
                    job.print(myDoc, atset);
                    printed = true;
                } catch (PrintException pe) {
                    log.error("Printing Error: " + pe.getMessage());
                    if (pe.getClass().getName().equals("sun.print.PrintJobFlavorException")) {

                        log.error("Failing over to print using a pdf printing library");

                        try {
                            pdfInputStream.close();

                            pdfInputStream = new FileInputStream(
                                    PreCaptureSingleton.getInstance().getProperties().getProperties()
                                            .getProperty(PreCaptureProperties.KEY_LABELPRINTFILE));

                            // Send PDF to printer using PDFBox PDF printing support.
                            PDDocument pdfDocument = PDDocument.load(pdfInputStream);
                            pdfDocument.print();
                            pdfDocument.close();
                            printed = true;
                            // Note, can't get pdf-renderer to print without re-scaling and shrinking the document.

                        } catch (Exception e) {
                            log.error(e.getMessage(), e);
                        }
                    }
                }
            }
            pdfInputStream.close();
        }
        if (!printed) {
            log.error("No available printing services");
            throw new PrintFailedException("Unable to locate or use a printer, print the file '"
                    + PreCaptureSingleton.getInstance().getProperties().getProperties()
                            .getProperty(PreCaptureProperties.KEY_LABELPRINTFILE)
                    + "'");
        }
    } catch (FileNotFoundException e) {
        log.error(e.getMessage());
        throw new PrintFailedException("Unable to find PDF file to print " + e.getMessage());
    } catch (Exception e) {
        log.error(e.getMessage());
        if (e != null && e.getCause() != null) {
            log.error(e.getCause().getMessage());
        }
        throw new PrintFailedException("No labels to print." + e.getMessage());
    }
}

From source file:edu.ist.psu.sagnik.research.pdfbox2playground.javatest.DrawPrintTextLocations.java

License:Apache License

/**
 * This will print the documents data.//  www.  j  av a  2 s . co m
 *
 * @param args The command line arguments.
 *
 * @throws IOException If there is an error parsing the document.
 */
public static void main(String[] args) throws IOException {

    PDDocument document = null;
    try {
        document = PDDocument.load(new File(new DataLocation().pdLoc));

        DrawPrintTextLocations stripper = new DrawPrintTextLocations(document, new DataLocation().pdLoc);
        stripper.setSortByPosition(true);

        for (int page = 0; page < document.getNumberOfPages(); ++page) {
            stripper.stripPage(page);
        }
    } finally {
        if (document != null) {
            document.close();
        }
    }

}

From source file:edu.uci.ics.crawler4j.parser.Parser.java

License:Apache License

private void treatPDFContentType(Page page) throws IOException {
    PDDocument doc = PDDocument.load(new ByteArrayInputStream(page.getContentData()));
    page.setParseData(new PDFParseData(pdfTextStripper.getText(doc)));
    doc.close();//w ww .  j a  v a 2 s.  co  m
}

From source file:edu.umsl.runPDF.java

public void readPDF() throws IOException {
    System.out.println("Please enter PDF file location, omit extension: ");
    String input = sc.next();/*from  w ww  .  j a  va 2  s . c o m*/
    pdfFile = new File(input);
    PDDocument pdDocument = PDDocument.load(pdfFile);
    PDFTextStripper strip = new PDFTextStripper();
    //        strip.setStartPage(1);
    //        strip.setEndPage(1);
    content = strip.getText(pdDocument);
    System.out.println("PDF Read");
    //        System.out.println(content);
    //        FileOutputStream outStream;
    //        strip.writeText(txtFile, outStream);

}

From source file:edu.ur.ir.index.DefaultPdfTextExtractor.java

License:Apache License

/**
 * Extract text from the PDF document//from w ww  . ja v  a 2  s  . c o m
 * @throws Exception 
 * 
 * @see edu.ur.ir.index.FileTextExtractor#getText(java.io.File)
 */
public String getText(File f) throws Exception {
    String text = null;

    // don't even try if the file is too large
    if (isFileTooLarge(f) || f.length() <= 0l) {
        return text;
    }
    PDDocument pdDoc = null;
    try {
        pdDoc = PDDocument.load(f);

        // don't do anything with decripted docs
        if (!pdDoc.isEncrypted()) {
            PDFTextStripper stripper = new PDFTextStripper();
            String myText = stripper.getText(pdDoc);

            if (myText != null && !myText.trim().equals("")) {
                text = myText;
            }

        } else {
            log.error("pdf " + f.getAbsolutePath() + " is encrypted and "
                    + " cannot be decrypted because we don't have a password");
        }

    } catch (OutOfMemoryError oome) {
        text = null;
        log.error("could not extract text", oome);
        throw (oome);
    } catch (Exception e) {
        log.error("could not extract text with other error", e);
        text = null;
        throw (e);
    } finally {
        closePDDocument(pdDoc);
        pdDoc = null;
    }

    return text;

}

From source file:edu.uwm.jiaoduan.lab.ExtractTextByArea.java

License:Apache License

/**
 * This will print the documents text in a certain area.
 *
 * @param args The command line arguments.
 *
 * @throws Exception If there is an error parsing the document.
 *///ww w  .  ja va2  s . co m
public static void main(String[] args) throws Exception {
    args = new String[] { "test.pdf" };
    if (args.length != 1) {
        usage();
    } else {
        PDDocument document = null;
        try {
            document = PDDocument.load(args[0]);
            if (document.isEncrypted()) {
                try {
                    document.decrypt("");
                } catch (InvalidPasswordException e) {
                    System.err.println("Error: Document is encrypted with a password.");
                    System.exit(1);
                }
            }
            PDFTextStripperByArea stripper = new PDFTextStripperByArea();
            stripper.setSortByPosition(true);
            //Rectangle rect = new Rectangle( 99,219,80,15 );
            //convert xfdf coordinate to rectangle

            Rectangle2D.Double rect = new Rectangle2D.Double();

            List allPages = document.getDocumentCatalog().getAllPages();
            PDPage firstPage = (PDPage) allPages.get(0);

            double pageHeight = firstPage.getMediaBox().getHeight();

            //125.824906,672.39502,390.577109,694.679017
            double[] coords = new double[] { 58.50615, 500.847504, 302.919073, 552.419312 };
            //rect.height = 694.679017 - 672.39502;
            rect.height = coords[3] - coords[1];
            //rect.width = 390.577109 - 125.824906;
            rect.width = coords[2] - coords[0];
            ;

            //rect.x = 125.824906;
            rect.x = coords[0];
            //rect.y = pageHeight -672.39502 - rect.height; 
            rect.y = pageHeight - coords[1] - rect.height;
            System.out.println(rect);

            stripper.addRegion("class1", rect);
            stripper.extractRegions(firstPage);

            System.out.println("Text in the area:" + rect);
            System.out.println(stripper.getTextForRegion("class1"));

        } finally {
            if (document != null) {
                document.close();
            }
        }
    }
}

From source file:edworld.pdfreader4humans.impl.MainPDFComponentLocatorTest.java

License:Apache License

@Before
public void setUp() throws Exception {
    doc = PDDocument.load(getClass().getResource("/testcase1/input.pdf"));
    locator = new MainPDFComponentLocator();
    page1 = (PDPage) doc.getDocumentCatalog().getAllPages().get(0);
}

From source file:edworld.pdfreader4humans.PDFReader.java

License:Apache License

/**
 * Class responsible for reading PDF contents in the same order a human would read them.
 * /*from   w w w. ja v  a2s.co m*/
 * @param url
 *            the PDF's location
 * @param componentLocator
 *            an instance of a PDFComponentLocator subclass such as MainPDFComponentLocator
 * @param boxDetector
 *            an instance of a BoxDetector subclass such as MainBoxDetector
 * @param marginDetector
 *            an instance of a MarginDetector subclass such as MainMarginDetector
 * @throws IOException
 */
public PDFReader(URL url, PDFComponentLocator componentLocator, BoxDetector boxDetector,
        MarginDetector marginDetector) throws IOException {
    this.url = url;
    PDDocument doc = PDDocument.load(url);
    try {
        readAllPages(doc, componentLocator, boxDetector, marginDetector);
    } finally {
        doc.close();
    }
}