Example usage for org.apache.pdfbox.pdmodel PDDocument load

List of usage examples for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException 

Source Link

Document

Parses a PDF.

Usage

From source file:editorframework.pdfbox.testes.TestesComPDFBox.java

public static void exibirPDFEmUmJFrame() throws IOException {
    File PDFPath = new File("./simple.pdf");
    PDDocument inputPDF = PDDocument.load(PDFPath);
    List<PDPage> allPages = inputPDF.getDocumentCatalog().getAllPages();

    PDPage testPage = (PDPage) allPages.get(0);

    JFrame testFrame = new JFrame();
    testFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);

    PDFPagePanel pdfPanel = new PDFPagePanel();
    pdfPanel.setPage(testPage);//from   www  .j  av a 2 s . c  o m
    testFrame.add(pdfPanel);
    testFrame.setBounds(40, 40, pdfPanel.getWidth(), pdfPanel.getHeight());
    testFrame.setVisible(true);
    pdfPanel.validate();
    //        inputPDF.close();

}

From source file:editorframework.pdfbox.testes.TestesComPDFBox.java

public static void exibirPDFemImagemNoJComponent() throws IOException {
    PDDocument document = PDDocument.load(new File("./simple.pdf"));
    List<PDPage> allPages = document.getDocumentCatalog().getAllPages();

    PDPage firstPage = allPages.get(0);//from ww w  . j a va  2s.c  o  m
    BufferedImage bi = firstPage.convertToImage();

    JFrame testFrame = new JFrame();
    testFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);

    JScrollPane scrollPane = new JScrollPane();
    JLabel picLabel = new JLabel(new ImageIcon(bi));
    scrollPane.getViewport().add(picLabel);
    testFrame.add(scrollPane);
    testFrame.setVisible(true);
    testFrame.pack();

}

From source file:edu.harvard.mcz.precapture.encoder.PrintingUtility.java

License:Open Source License

/**
 * Send the generated PDF file to a printer.  (file to print is from LabelEncoder.getPrintFile().
 * /*from w  w w .j  a v a2  s  .c om*/
 * @param printDefinition Used to find paper size
 * @param paperWidthPoints
 * @param paperHeightPoints
 * @throws PrintFailedException if printing fails for any reason.
 */
public static void sendPDFToPrinter(LabelDefinitionType printDefinition, int paperWidthPoints,
        int paperHeightPoints) throws PrintFailedException {
    try {

        // send generated PDF to printer.

        FileInputStream pdfInputStream = new FileInputStream(PreCaptureSingleton.getInstance().getProperties()
                .getProperties().getProperty(PreCaptureProperties.KEY_LABELPRINTFILE));

        DocFlavor psInFormat = DocFlavor.INPUT_STREAM.PDF;

        // No printers listed... Don't Try autosense instead of PDF
        // DocFlavor psInFormat = DocFlavor.INPUT_STREAM.AUTOSENSE;
        // Ends up listing printers that can't take the PDF,
        // Need instead to fail over to using a pdf printing library 
        // and having the pdf printing library pull up the printer dialog.

        Doc myDoc = new SimpleDoc(pdfInputStream, psInFormat, null);
        PrintRequestAttributeSet atset = new HashPrintRequestAttributeSet();
        atset.add(new Copies(1));
        // Set paper size
        if (paperWidthPoints == 612 && paperHeightPoints == 792) {
            atset.add(MediaSizeName.NA_LETTER);
        } else {
            float x = printDefinition.getPaperWidth();
            float y = printDefinition.getPaperHeight();
            if (printDefinition.getUnits().toString().toLowerCase().equals("inches")) {
                MediaSizeName mediaSizeName = MediaSize.findMedia(x, y, Size2DSyntax.INCH);
                if (mediaSizeName == null) {
                    // TODO: Handle non-standard paper sizes.  The following doesn't provide
                    // what is needed.
                    atset.add(new MediaPrintableArea(0, 0, x, y, MediaPrintableArea.INCH));
                } else {
                    atset.add(mediaSizeName);
                }
            }
            if (printDefinition.getUnits().toString().toLowerCase().equals("cm")) {
                x = x * 10f;
                y = y * 10f;
                atset.add(MediaSize.findMedia(x, y, Size2DSyntax.INCH));
            }
            if (printDefinition.getUnits().toString().toLowerCase().equals("points")) {
                x = x / 72f;
                y = y / 72f;
                atset.add(MediaSize.findMedia(x, y, Size2DSyntax.INCH));
            }
        }
        atset.add(Sides.ONE_SIDED);
        PrintService[] services = PrintServiceLookup.lookupPrintServices(psInFormat, atset);
        log.debug("Number of matching printing services =  " + services.length);
        boolean printed = false;
        if (services.length == 0) {
            log.debug("No PDF printing services found.");
            log.error("Failing over to print using a pdf printing library");

            try {
                pdfInputStream.close();

                pdfInputStream = new FileInputStream(PreCaptureSingleton.getInstance().getProperties()
                        .getProperties().getProperty(PreCaptureProperties.KEY_LABELPRINTFILE));

                // trying pdfbox instead of pdf-renderer
                PDDocument pdfDocument = PDDocument.load(pdfInputStream);
                pdfDocument.print();
                pdfDocument.close();
                printed = true;
            } catch (Exception e) {
                log.error(e.getMessage(), e);
            }
        } else {
            log.debug("Available printing services " + services.length);
            for (int i = 0; i < services.length; i++) {
                log.debug(services[i].getName());
            }
            Object selectedService = JOptionPane.showInputDialog(null, "Send labels to which printer?", "Input",
                    JOptionPane.INFORMATION_MESSAGE, null, services, services[0]);
            if (selectedService != null) {
                DocPrintJob job = ((PrintService) selectedService).createPrintJob();
                log.debug("Printing to " + ((PrintService) selectedService).getName());
                try {
                    job.print(myDoc, atset);
                    printed = true;
                } catch (PrintException pe) {
                    log.error("Printing Error: " + pe.getMessage());
                    if (pe.getClass().getName().equals("sun.print.PrintJobFlavorException")) {

                        log.error("Failing over to print using a pdf printing library");

                        try {
                            pdfInputStream.close();

                            pdfInputStream = new FileInputStream(
                                    PreCaptureSingleton.getInstance().getProperties().getProperties()
                                            .getProperty(PreCaptureProperties.KEY_LABELPRINTFILE));

                            // Send PDF to printer using PDFBox PDF printing support.
                            PDDocument pdfDocument = PDDocument.load(pdfInputStream);
                            pdfDocument.print();
                            pdfDocument.close();
                            printed = true;
                            // Note, can't get pdf-renderer to print without re-scaling and shrinking the document.

                        } catch (Exception e) {
                            log.error(e.getMessage(), e);
                        }
                    }
                }
            }
            pdfInputStream.close();
        }
        if (!printed) {
            log.error("No available printing services");
            throw new PrintFailedException("Unable to locate or use a printer, print the file '"
                    + PreCaptureSingleton.getInstance().getProperties().getProperties()
                            .getProperty(PreCaptureProperties.KEY_LABELPRINTFILE)
                    + "'");
        }
    } catch (FileNotFoundException e) {
        log.error(e.getMessage());
        throw new PrintFailedException("Unable to find PDF file to print " + e.getMessage());
    } catch (Exception e) {
        log.error(e.getMessage());
        if (e != null && e.getCause() != null) {
            log.error(e.getCause().getMessage());
        }
        throw new PrintFailedException("No labels to print." + e.getMessage());
    }
}

From source file:edu.ist.psu.sagnik.research.pdfbox2playground.javatest.DrawPrintTextLocations.java

License:Apache License

/**
 * This will print the documents data.//  www.  j  av a  2 s . co m
 *
 * @param args The command line arguments.
 *
 * @throws IOException If there is an error parsing the document.
 */
public static void main(String[] args) throws IOException {

    PDDocument document = null;
    try {
        document = PDDocument.load(new File(new DataLocation().pdLoc));

        DrawPrintTextLocations stripper = new DrawPrintTextLocations(document, new DataLocation().pdLoc);
        stripper.setSortByPosition(true);

        for (int page = 0; page < document.getNumberOfPages(); ++page) {
            stripper.stripPage(page);
        }
    } finally {
        if (document != null) {
            document.close();
        }
    }

}

From source file:edu.uci.ics.crawler4j.parser.Parser.java

License:Apache License

private void treatPDFContentType(Page page) throws IOException {
    PDDocument doc = PDDocument.load(new ByteArrayInputStream(page.getContentData()));
    page.setParseData(new PDFParseData(pdfTextStripper.getText(doc)));
    doc.close();//w ww .  j a  v a 2 s.  co  m
}

From source file:edu.umsl.runPDF.java

public void readPDF() throws IOException {
    System.out.println("Please enter PDF file location, omit extension: ");
    String input = sc.next();/*from  w ww  .  j a  va 2  s . c o m*/
    pdfFile = new File(input);
    PDDocument pdDocument = PDDocument.load(pdfFile);
    PDFTextStripper strip = new PDFTextStripper();
    //        strip.setStartPage(1);
    //        strip.setEndPage(1);
    content = strip.getText(pdDocument);
    System.out.println("PDF Read");
    //        System.out.println(content);
    //        FileOutputStream outStream;
    //        strip.writeText(txtFile, outStream);

}

From source file:edu.ur.ir.index.DefaultPdfTextExtractor.java

License:Apache License

/**
 * Extract text from the PDF document//from w ww  . ja v  a 2  s  . c o m
 * @throws Exception 
 * 
 * @see edu.ur.ir.index.FileTextExtractor#getText(java.io.File)
 */
public String getText(File f) throws Exception {
    String text = null;

    // don't even try if the file is too large
    if (isFileTooLarge(f) || f.length() <= 0l) {
        return text;
    }
    PDDocument pdDoc = null;
    try {
        pdDoc = PDDocument.load(f);

        // don't do anything with decripted docs
        if (!pdDoc.isEncrypted()) {
            PDFTextStripper stripper = new PDFTextStripper();
            String myText = stripper.getText(pdDoc);

            if (myText != null && !myText.trim().equals("")) {
                text = myText;
            }

        } else {
            log.error("pdf " + f.getAbsolutePath() + " is encrypted and "
                    + " cannot be decrypted because we don't have a password");
        }

    } catch (OutOfMemoryError oome) {
        text = null;
        log.error("could not extract text", oome);
        throw (oome);
    } catch (Exception e) {
        log.error("could not extract text with other error", e);
        text = null;
        throw (e);
    } finally {
        closePDDocument(pdDoc);
        pdDoc = null;
    }

    return text;

}

From source file:edu.uwm.jiaoduan.lab.ExtractTextByArea.java

License:Apache License

/**
 * This will print the documents text in a certain area.
 *
 * @param args The command line arguments.
 *
 * @throws Exception If there is an error parsing the document.
 *///ww w  .  ja va2  s . co m
public static void main(String[] args) throws Exception {
    args = new String[] { "test.pdf" };
    if (args.length != 1) {
        usage();
    } else {
        PDDocument document = null;
        try {
            document = PDDocument.load(args[0]);
            if (document.isEncrypted()) {
                try {
                    document.decrypt("");
                } catch (InvalidPasswordException e) {
                    System.err.println("Error: Document is encrypted with a password.");
                    System.exit(1);
                }
            }
            PDFTextStripperByArea stripper = new PDFTextStripperByArea();
            stripper.setSortByPosition(true);
            //Rectangle rect = new Rectangle( 99,219,80,15 );
            //convert xfdf coordinate to rectangle

            Rectangle2D.Double rect = new Rectangle2D.Double();

            List allPages = document.getDocumentCatalog().getAllPages();
            PDPage firstPage = (PDPage) allPages.get(0);

            double pageHeight = firstPage.getMediaBox().getHeight();

            //125.824906,672.39502,390.577109,694.679017
            double[] coords = new double[] { 58.50615, 500.847504, 302.919073, 552.419312 };
            //rect.height = 694.679017 - 672.39502;
            rect.height = coords[3] - coords[1];
            //rect.width = 390.577109 - 125.824906;
            rect.width = coords[2] - coords[0];
            ;

            //rect.x = 125.824906;
            rect.x = coords[0];
            //rect.y = pageHeight -672.39502 - rect.height; 
            rect.y = pageHeight - coords[1] - rect.height;
            System.out.println(rect);

            stripper.addRegion("class1", rect);
            stripper.extractRegions(firstPage);

            System.out.println("Text in the area:" + rect);
            System.out.println(stripper.getTextForRegion("class1"));

        } finally {
            if (document != null) {
                document.close();
            }
        }
    }
}

From source file:edworld.pdfreader4humans.impl.MainPDFComponentLocatorTest.java

License:Apache License

@Before
public void setUp() throws Exception {
    doc = PDDocument.load(getClass().getResource("/testcase1/input.pdf"));
    locator = new MainPDFComponentLocator();
    page1 = (PDPage) doc.getDocumentCatalog().getAllPages().get(0);
}

From source file:edworld.pdfreader4humans.PDFReader.java

License:Apache License

/**
 * Class responsible for reading PDF contents in the same order a human would read them.
 * /*from   w w w. ja v  a2s.co m*/
 * @param url
 *            the PDF's location
 * @param componentLocator
 *            an instance of a PDFComponentLocator subclass such as MainPDFComponentLocator
 * @param boxDetector
 *            an instance of a BoxDetector subclass such as MainBoxDetector
 * @param marginDetector
 *            an instance of a MarginDetector subclass such as MainMarginDetector
 * @throws IOException
 */
public PDFReader(URL url, PDFComponentLocator componentLocator, BoxDetector boxDetector,
        MarginDetector marginDetector) throws IOException {
    this.url = url;
    PDDocument doc = PDDocument.load(url);
    try {
        readAllPages(doc, componentLocator, boxDetector, marginDetector);
    } finally {
        doc.close();
    }
}