List of usage examples for org.apache.pdfbox.pdmodel PDDocument load
public static PDDocument load(byte[] input) throws IOException
From source file:editorframework.pdfbox.testes.TestesComPDFBox.java
public static void exibirPDFEmUmJFrame() throws IOException { File PDFPath = new File("./simple.pdf"); PDDocument inputPDF = PDDocument.load(PDFPath); List<PDPage> allPages = inputPDF.getDocumentCatalog().getAllPages(); PDPage testPage = (PDPage) allPages.get(0); JFrame testFrame = new JFrame(); testFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); PDFPagePanel pdfPanel = new PDFPagePanel(); pdfPanel.setPage(testPage);//from www .j av a 2 s . c o m testFrame.add(pdfPanel); testFrame.setBounds(40, 40, pdfPanel.getWidth(), pdfPanel.getHeight()); testFrame.setVisible(true); pdfPanel.validate(); // inputPDF.close(); }
From source file:editorframework.pdfbox.testes.TestesComPDFBox.java
public static void exibirPDFemImagemNoJComponent() throws IOException { PDDocument document = PDDocument.load(new File("./simple.pdf")); List<PDPage> allPages = document.getDocumentCatalog().getAllPages(); PDPage firstPage = allPages.get(0);//from ww w . j a va 2s.c o m BufferedImage bi = firstPage.convertToImage(); JFrame testFrame = new JFrame(); testFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); JScrollPane scrollPane = new JScrollPane(); JLabel picLabel = new JLabel(new ImageIcon(bi)); scrollPane.getViewport().add(picLabel); testFrame.add(scrollPane); testFrame.setVisible(true); testFrame.pack(); }
From source file:edu.harvard.mcz.precapture.encoder.PrintingUtility.java
License:Open Source License
/** * Send the generated PDF file to a printer. (file to print is from LabelEncoder.getPrintFile(). * /*from w w w .j a v a2 s .c om*/ * @param printDefinition Used to find paper size * @param paperWidthPoints * @param paperHeightPoints * @throws PrintFailedException if printing fails for any reason. */ public static void sendPDFToPrinter(LabelDefinitionType printDefinition, int paperWidthPoints, int paperHeightPoints) throws PrintFailedException { try { // send generated PDF to printer. FileInputStream pdfInputStream = new FileInputStream(PreCaptureSingleton.getInstance().getProperties() .getProperties().getProperty(PreCaptureProperties.KEY_LABELPRINTFILE)); DocFlavor psInFormat = DocFlavor.INPUT_STREAM.PDF; // No printers listed... Don't Try autosense instead of PDF // DocFlavor psInFormat = DocFlavor.INPUT_STREAM.AUTOSENSE; // Ends up listing printers that can't take the PDF, // Need instead to fail over to using a pdf printing library // and having the pdf printing library pull up the printer dialog. Doc myDoc = new SimpleDoc(pdfInputStream, psInFormat, null); PrintRequestAttributeSet atset = new HashPrintRequestAttributeSet(); atset.add(new Copies(1)); // Set paper size if (paperWidthPoints == 612 && paperHeightPoints == 792) { atset.add(MediaSizeName.NA_LETTER); } else { float x = printDefinition.getPaperWidth(); float y = printDefinition.getPaperHeight(); if (printDefinition.getUnits().toString().toLowerCase().equals("inches")) { MediaSizeName mediaSizeName = MediaSize.findMedia(x, y, Size2DSyntax.INCH); if (mediaSizeName == null) { // TODO: Handle non-standard paper sizes. The following doesn't provide // what is needed. atset.add(new MediaPrintableArea(0, 0, x, y, MediaPrintableArea.INCH)); } else { atset.add(mediaSizeName); } } if (printDefinition.getUnits().toString().toLowerCase().equals("cm")) { x = x * 10f; y = y * 10f; atset.add(MediaSize.findMedia(x, y, Size2DSyntax.INCH)); } if (printDefinition.getUnits().toString().toLowerCase().equals("points")) { x = x / 72f; y = y / 72f; atset.add(MediaSize.findMedia(x, y, Size2DSyntax.INCH)); } } atset.add(Sides.ONE_SIDED); PrintService[] services = PrintServiceLookup.lookupPrintServices(psInFormat, atset); log.debug("Number of matching printing services = " + services.length); boolean printed = false; if (services.length == 0) { log.debug("No PDF printing services found."); log.error("Failing over to print using a pdf printing library"); try { pdfInputStream.close(); pdfInputStream = new FileInputStream(PreCaptureSingleton.getInstance().getProperties() .getProperties().getProperty(PreCaptureProperties.KEY_LABELPRINTFILE)); // trying pdfbox instead of pdf-renderer PDDocument pdfDocument = PDDocument.load(pdfInputStream); pdfDocument.print(); pdfDocument.close(); printed = true; } catch (Exception e) { log.error(e.getMessage(), e); } } else { log.debug("Available printing services " + services.length); for (int i = 0; i < services.length; i++) { log.debug(services[i].getName()); } Object selectedService = JOptionPane.showInputDialog(null, "Send labels to which printer?", "Input", JOptionPane.INFORMATION_MESSAGE, null, services, services[0]); if (selectedService != null) { DocPrintJob job = ((PrintService) selectedService).createPrintJob(); log.debug("Printing to " + ((PrintService) selectedService).getName()); try { job.print(myDoc, atset); printed = true; } catch (PrintException pe) { log.error("Printing Error: " + pe.getMessage()); if (pe.getClass().getName().equals("sun.print.PrintJobFlavorException")) { log.error("Failing over to print using a pdf printing library"); try { pdfInputStream.close(); pdfInputStream = new FileInputStream( PreCaptureSingleton.getInstance().getProperties().getProperties() .getProperty(PreCaptureProperties.KEY_LABELPRINTFILE)); // Send PDF to printer using PDFBox PDF printing support. PDDocument pdfDocument = PDDocument.load(pdfInputStream); pdfDocument.print(); pdfDocument.close(); printed = true; // Note, can't get pdf-renderer to print without re-scaling and shrinking the document. } catch (Exception e) { log.error(e.getMessage(), e); } } } } pdfInputStream.close(); } if (!printed) { log.error("No available printing services"); throw new PrintFailedException("Unable to locate or use a printer, print the file '" + PreCaptureSingleton.getInstance().getProperties().getProperties() .getProperty(PreCaptureProperties.KEY_LABELPRINTFILE) + "'"); } } catch (FileNotFoundException e) { log.error(e.getMessage()); throw new PrintFailedException("Unable to find PDF file to print " + e.getMessage()); } catch (Exception e) { log.error(e.getMessage()); if (e != null && e.getCause() != null) { log.error(e.getCause().getMessage()); } throw new PrintFailedException("No labels to print." + e.getMessage()); } }
From source file:edu.ist.psu.sagnik.research.pdfbox2playground.javatest.DrawPrintTextLocations.java
License:Apache License
/** * This will print the documents data.// www. j av a 2 s . co m * * @param args The command line arguments. * * @throws IOException If there is an error parsing the document. */ public static void main(String[] args) throws IOException { PDDocument document = null; try { document = PDDocument.load(new File(new DataLocation().pdLoc)); DrawPrintTextLocations stripper = new DrawPrintTextLocations(document, new DataLocation().pdLoc); stripper.setSortByPosition(true); for (int page = 0; page < document.getNumberOfPages(); ++page) { stripper.stripPage(page); } } finally { if (document != null) { document.close(); } } }
From source file:edu.uci.ics.crawler4j.parser.Parser.java
License:Apache License
private void treatPDFContentType(Page page) throws IOException { PDDocument doc = PDDocument.load(new ByteArrayInputStream(page.getContentData())); page.setParseData(new PDFParseData(pdfTextStripper.getText(doc))); doc.close();//w ww . j a v a 2 s. co m }
From source file:edu.umsl.runPDF.java
public void readPDF() throws IOException { System.out.println("Please enter PDF file location, omit extension: "); String input = sc.next();/*from w ww . j a va 2 s . c o m*/ pdfFile = new File(input); PDDocument pdDocument = PDDocument.load(pdfFile); PDFTextStripper strip = new PDFTextStripper(); // strip.setStartPage(1); // strip.setEndPage(1); content = strip.getText(pdDocument); System.out.println("PDF Read"); // System.out.println(content); // FileOutputStream outStream; // strip.writeText(txtFile, outStream); }
From source file:edu.ur.ir.index.DefaultPdfTextExtractor.java
License:Apache License
/** * Extract text from the PDF document//from w ww . ja v a 2 s . c o m * @throws Exception * * @see edu.ur.ir.index.FileTextExtractor#getText(java.io.File) */ public String getText(File f) throws Exception { String text = null; // don't even try if the file is too large if (isFileTooLarge(f) || f.length() <= 0l) { return text; } PDDocument pdDoc = null; try { pdDoc = PDDocument.load(f); // don't do anything with decripted docs if (!pdDoc.isEncrypted()) { PDFTextStripper stripper = new PDFTextStripper(); String myText = stripper.getText(pdDoc); if (myText != null && !myText.trim().equals("")) { text = myText; } } else { log.error("pdf " + f.getAbsolutePath() + " is encrypted and " + " cannot be decrypted because we don't have a password"); } } catch (OutOfMemoryError oome) { text = null; log.error("could not extract text", oome); throw (oome); } catch (Exception e) { log.error("could not extract text with other error", e); text = null; throw (e); } finally { closePDDocument(pdDoc); pdDoc = null; } return text; }
From source file:edu.uwm.jiaoduan.lab.ExtractTextByArea.java
License:Apache License
/** * This will print the documents text in a certain area. * * @param args The command line arguments. * * @throws Exception If there is an error parsing the document. *///ww w . ja va2 s . co m public static void main(String[] args) throws Exception { args = new String[] { "test.pdf" }; if (args.length != 1) { usage(); } else { PDDocument document = null; try { document = PDDocument.load(args[0]); if (document.isEncrypted()) { try { document.decrypt(""); } catch (InvalidPasswordException e) { System.err.println("Error: Document is encrypted with a password."); System.exit(1); } } PDFTextStripperByArea stripper = new PDFTextStripperByArea(); stripper.setSortByPosition(true); //Rectangle rect = new Rectangle( 99,219,80,15 ); //convert xfdf coordinate to rectangle Rectangle2D.Double rect = new Rectangle2D.Double(); List allPages = document.getDocumentCatalog().getAllPages(); PDPage firstPage = (PDPage) allPages.get(0); double pageHeight = firstPage.getMediaBox().getHeight(); //125.824906,672.39502,390.577109,694.679017 double[] coords = new double[] { 58.50615, 500.847504, 302.919073, 552.419312 }; //rect.height = 694.679017 - 672.39502; rect.height = coords[3] - coords[1]; //rect.width = 390.577109 - 125.824906; rect.width = coords[2] - coords[0]; ; //rect.x = 125.824906; rect.x = coords[0]; //rect.y = pageHeight -672.39502 - rect.height; rect.y = pageHeight - coords[1] - rect.height; System.out.println(rect); stripper.addRegion("class1", rect); stripper.extractRegions(firstPage); System.out.println("Text in the area:" + rect); System.out.println(stripper.getTextForRegion("class1")); } finally { if (document != null) { document.close(); } } } }
From source file:edworld.pdfreader4humans.impl.MainPDFComponentLocatorTest.java
License:Apache License
@Before public void setUp() throws Exception { doc = PDDocument.load(getClass().getResource("/testcase1/input.pdf")); locator = new MainPDFComponentLocator(); page1 = (PDPage) doc.getDocumentCatalog().getAllPages().get(0); }
From source file:edworld.pdfreader4humans.PDFReader.java
License:Apache License
/** * Class responsible for reading PDF contents in the same order a human would read them. * /*from w w w. ja v a2s.co m*/ * @param url * the PDF's location * @param componentLocator * an instance of a PDFComponentLocator subclass such as MainPDFComponentLocator * @param boxDetector * an instance of a BoxDetector subclass such as MainBoxDetector * @param marginDetector * an instance of a MarginDetector subclass such as MainMarginDetector * @throws IOException */ public PDFReader(URL url, PDFComponentLocator componentLocator, BoxDetector boxDetector, MarginDetector marginDetector) throws IOException { this.url = url; PDDocument doc = PDDocument.load(url); try { readAllPages(doc, componentLocator, boxDetector, marginDetector); } finally { doc.close(); } }