List of usage examples for org.apache.pdfbox.pdmodel PDDocument load
public static PDDocument load(byte[] input) throws IOException
From source file:name.marcelomorales.siqisiqi.pdfbox.CoordinatesGenerator.java
License:Apache License
public void generarPdf(OutputStream os, String template, Map<String, Object> m, String path, String coordenates, float fontSize, float ancho) throws IOException { long t = System.currentTimeMillis(); PDDocument doc = null;//www . j ava2 s . com try { doc = PDDocument.load(new File(path)); List pages = doc.getDocumentCatalog().getAllPages(); PDPage sourcePage = (PDPage) pages.get(0); boolean append = sourcePage.getContents() != null; PDPageContentStream contentStream = new PDPageContentStream(doc, sourcePage, append, true); StringReader fileReader = null; try { fileReader = new StringReader(template); List<String> list = CharStreams.readLines(fileReader); boolean textHasBegun = false; float currentOffset = 0f; for (String line : list) { if (line == null) { continue; } if (line.startsWith("#")) { continue; } final Iterable<String> str = Splitter.on(',').omitEmptyStrings().trimResults().split(line); final String[] split = Iterables.toArray(str, String.class); if (split == null || split.length < 4) { continue; } if (Character.isDigit(split[0].charAt(0))) { if (textHasBegun) { contentStream.endText(); } contentStream.beginText(); textHasBegun = true; contentStream.moveTextPositionByAmount(parseFloat(split[0]), parseFloat(split[1])); } else { contentStream.moveTextPositionByAmount(currentOffset, 0); } if (!textHasBegun) { LOGGER.warn("Hay un posible mal uso de un .ree", new Throwable()); contentStream.beginText(); textHasBegun = true; } PDType1Font font; if ("b".equals(split[2])) { font = HELVETICA_BOLD; } else { font = HELVETICA; } contentStream.setFont(font, fontSize); Object text = null; if (split[3].startsWith("\"")) { // TODO: text = substring(split[3], 1, -1); } else { // TODO: text = new PropertyModel(m, split[3]).getObject(); } if (text == null) { LOGGER.warn("Propiedad {} no se encuentra", split[3]); //contentStream.drawString("ERROR: propiedad no encontrada"); contentStream.drawString(" "); } else { String string = text.toString(); currentOffset = font.getStringWidth(string) * ancho; contentStream.drawString(string); } } if (textHasBegun) { contentStream.endText(); } } finally { Closeables.closeQuietly(fileReader); } contentStream.close(); try { doc.save(os); } catch (COSVisitorException e) { throw new IOException("Ha ocurrido un error al escribir en el Os", e); } } finally { if (doc != null) { doc.close(); } LOGGER.info("Me ha tomado {} milisegundos hacer el pdf", System.currentTimeMillis() - t); } }
From source file:net.anthonypoon.billscrapper.JavaBillScrapper.java
public JavaBillScrapper(File pdfFile) throws IOException { PDDocument doc = PDDocument.load(pdfFile); PDFTextStripper stripper = new PDFTextStripper(); String rawText = stripper.getText(doc); String[] textArray = rawText.split("[\\r\\n]+"); this.billObj = parsePdf(textArray); doc.close();//from w w w . j av a2s. c o m }
From source file:net.anthonypoon.billscrapper.JavaBillScrapper.java
public static void main(String[] args) { // TODO code application logic here try {//w w w .ja v a2s .com for (String arg : args) { if (!arg.startsWith("-")) { filePaths.add(arg); } else { try { options.add(Flags.fromString(arg)); } catch (IllegalArgumentException ex) { System.err.println("Illegal options: " + arg); } } } Collections.sort(filePaths); for (String filePath : filePaths) { System.out.println("Loading: " + filePath); PDDocument doc = PDDocument.load(new File(filePath)); PDFTextStripper stripper = new PDFTextStripper(); String rawText = stripper.getText(doc); String[] textArray = rawText.split("[\\r\\n]+"); Bill bill = parsePdf(textArray); if (options.contains(Flags.INSERT_INTO_DB)) { DatabaseConnector db = new DatabaseConnector(); DbWriter writer = new DbWriter(db.getConnection()); boolean isInserted = writer.insertDetail(bill.getBillSummary(), bill.getPhoneSummaryData(), bill.getPhoneDetail()); writer.commit(); doc.close(); if (!isInserted) { System.out.println(filePath + " was not inserted into database."); } } } } catch (Exception ex) { ex.printStackTrace(System.out); } }
From source file:net.awl.edoc.pdfa.PdfBoxIsartorValidate.java
License:Apache License
public static void coin(File f) { nbFile++;//from w w w.j av a 2 s . c o m // PDFBox try { PDDocument document = PDDocument.load(f); COSDocument cDocument = document.getDocument(); boolean result = PDFParser.parse(new FileInputStream(f)); if (result) { nbOk++; } else { nbBad++; } ; document.close(); } catch (IOException e) { System.err.println("Failed for : " + f.getAbsolutePath()); // } catch (ParseException e) { // nbBad++; } catch (Throwable e) { nbBad++; } }
From source file:net.betzel.fop.pdf.viewer.FXMLController.java
License:Apache License
private void createImages(FileStreamSources fileStreamSources) { if (Platform.isFxApplicationThread()) { final Task<List<BufferedImage>> createImagesTask = new Task<List<BufferedImage>>() { @Override/*from w w w . j a va 2 s . c o m*/ protected List<BufferedImage> call() throws Exception { ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); List<BufferedImage> bufferedImages = new ArrayList(); FOUserAgent userAgent = fopFactory.newFOUserAgent(); userAgent.getEventBroadcaster().addEventListener(fopEventListener); Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF, userAgent, byteArrayOutputStream); Transformer transformer = transformerFactory.newTransformer(fileStreamSources.getXslSource()); transformer.setErrorListener(xmlTransformErrorListener); Result result = new SAXResult(fop.getDefaultHandler()); transformer.transform(fileStreamSources.getXmlSource(), result); FormattingResults foResults = fop.getResults(); List pageSequences = foResults.getPageSequences(); for (java.util.Iterator it = pageSequences.iterator(); it.hasNext();) { PageSequenceResults pageSequenceResults = (PageSequenceResults) it.next(); logging.appendText("PageSequence " + (String.valueOf(pageSequenceResults.getID()).length() > 0 ? pageSequenceResults.getID() : "<no id>") + " generated " + pageSequenceResults.getPageCount() + " pages.\n"); } try (PDDocument pdDocument = PDDocument .load(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()))) { PDFRenderer pdfRenderer = new PDFRenderer(pdDocument); int pageCounter = 0; for (PDPage pdPage : pdDocument.getPages()) { bufferedImages.add(pdfRenderer.renderImageWithDPI(pageCounter, 150, ImageType.RGB)); pageCounter++; } } return bufferedImages; } }; createImagesTask.setOnSucceeded((WorkerStateEvent event) -> { Platform.runLater(() -> { images.clear(); images.addAll(createImagesTask.getValue()); }); }); createImagesTask.setOnFailed((WorkerStateEvent event) -> { Platform.runLater(() -> { scanProgressDialog.close(); logging.appendText("Error creating images from PDF\n"); reentrantLock.unlock(); images.clear(); }); }); backgoundExecutor.submit(createImagesTask); } }
From source file:net.bookinaction.ExtractAnnotations.java
License:Apache License
public void doJob(String job, Float[] pA) throws IOException { PDDocument document = null;/* w w w . j a va 2 s . co m*/ Stamper s = new Stamper(); // utility class final String job_file = job + ".pdf"; final String dic_file = job + "-dict.txt"; final String new_job = job + "-new.pdf"; PrintWriter writer = new PrintWriter(dic_file); ImageLocationListener imageLocationsListener = new ImageLocationListener(); AnnotationMaker annotMaker = new AnnotationMaker(); try { document = PDDocument.load(new File(job_file)); int pageNum = 0; for (PDPage page : document.getPages()) { pageNum++; PDRectangle cropBox = page.getCropBox(); List<PDAnnotation> annotations = page.getAnnotations(); // extract image locations List<Rectangle2D> imageRects = new ArrayList<Rectangle2D>(); imageLocationsListener.setImageRects(imageRects); imageLocationsListener.processPage(page); int im = 0; for (Rectangle2D pdImageRect : imageRects) { s.recordImage(writer, pageNum, "[im" + im + "]", (Rectangle2D.Float) pdImageRect); annotations.add(annotMaker.squareAnnotation(Color.YELLOW, (Rectangle2D.Float) pdImageRect, "[im" + im + "]")); im++; } PDFTextStripperByArea stripper = new PDFTextStripperByArea(); int j = 0; List<PDAnnotation> viableAnnots = new ArrayList(); for (PDAnnotation annot : annotations) { if (annot instanceof PDAnnotationTextMarkup || annot instanceof PDAnnotationLink) { stripper.addRegion(Integer.toString(j++), s.getAwtRect( s.adjustedRect(annot.getRectangle(), pA[0], pA[1], pA[2], pA[3]), cropBox)); viableAnnots.add(annot); } else if (annot instanceof PDAnnotationPopup || annot instanceof PDAnnotationText) { viableAnnots.add(annot); } } stripper.extractRegions(page); List<PDRectangle> rects = new ArrayList<PDRectangle>(); List<String> comments = new ArrayList<String>(); List<String> highlightTexts = new ArrayList<String>(); j = 0; for (PDAnnotation viableAnnot : viableAnnots) { if (viableAnnot instanceof PDAnnotationTextMarkup) { String highlightText = stripper.getTextForRegion(Integer.toString(j++)); String withoutCR = highlightText.replace((char) 0x0A, '^'); String comment = viableAnnot.getContents(); String colorString = String.format("%06x", viableAnnot.getColor().toRGB()); PDRectangle aRect = s.adjustedRect(viableAnnot.getRectangle(), pA[4], pA[5], pA[6], pA[7]); rects.add(aRect); comments.add(comment); highlightTexts.add(highlightText); s.recordTextMarkup(writer, pageNum, comment, withoutCR, aRect, colorString); } else if (viableAnnot instanceof PDAnnotationText) { String comment = viableAnnot.getContents(); String colorString = String.format("%06x", viableAnnot.getColor().toRGB()); for (Rectangle2D pdImageRect : imageRects) { if (pdImageRect.contains(viableAnnot.getRectangle().getLowerLeftX(), viableAnnot.getRectangle().getLowerLeftY())) { s.recordTextMarkup(writer, pageNum, comment, "", (Rectangle2D.Float) pdImageRect, colorString); annotations.add(annotMaker.squareAnnotation(Color.GREEN, (Rectangle2D.Float) pdImageRect, comment)); } ; } } } PDPageContentStream canvas = new PDPageContentStream(document, page, true, true, true); int i = 0; for (PDRectangle pdRect : rects) { String comment = comments.get(i); String highlightText = highlightTexts.get(i); //annotations.add(linkAnnotation(pdRect, comment, highlightText)); //annotations.add(annotationSquareCircle(pdRect, BLUE)); s.showBox(canvas, new Rectangle2D.Float(pdRect.getLowerLeftX(), pdRect.getUpperRightY(), pdRect.getWidth(), pdRect.getHeight()), cropBox, Color.BLUE); i++; } canvas.close(); } writer.close(); document.save(new_job); } finally { if (document != null) { document.close(); } } }
From source file:net.bookinaction.TextInfoExtractor.java
License:Apache License
public void doTextPosition(String source, String coord_text, StripperParam stripperParam) throws IOException { String source_pdf = source;//ww w . j ava 2 s .c om String new_file = source.split("\\.")[0] + "-new.pdf"; PDDocument document = PDDocument.load(new File(source_pdf)); PrintWriter writer = new PrintWriter(new File(coord_text)); //s.recordHeader(writer, source_pdf, document.getNumberOfPages(), sParam); for (int i = 0; i < document.getNumberOfPages(); i++) { getTextPositionFromPage(document, stripperParam, i + 1, writer, true); } if (document != null) { document.save(new_file); document.close(); } if (writer != null) writer.close(); }
From source file:net.ontopia.topicmaps.classify.PDFFormatModule.java
License:Apache License
public void readContent(ClassifiableContentIF cc, TextHandlerIF handler) { try {//from w ww. j a v a 2s . c o m PDDocument pdoc = PDDocument.load(new BufferedInputStream(new ByteArrayInputStream(cc.getContent()))); PDFTextStripper stripper = new PDFTextStripper(); String s = stripper.getText(pdoc); pdoc.close(); char[] c = s.toCharArray(); handler.startRegion("document"); handler.text(c, 0, c.length); handler.endRegion(); } catch (Exception e) { throw new OntopiaRuntimeException(e); } }
From source file:net.padaf.preflight.PdfA1bValidator.java
License:Apache License
public synchronized ValidationResult validate(DataSource source) throws ValidationException { DocumentHandler handler = createDocumentHandler(source); try {// w w w .ja v a 2 s . c o m // syntax (javacc) validation try { PDFParser parser = new PDFParser(source.getInputStream()); parser.PDF(); handler.setParser(parser); } catch (IOException e) { throw new ValidationException("Failed to parse datasource due to : " + e.getMessage(), e); } catch (ParseException e) { return createErrorResult(e); } // if here is reached, validate with helpers // init PDF Box document PDDocument document = null; try { document = PDDocument.load(handler.getSource().getInputStream()); handler.setDocument(document); } catch (IOException e) { throw new ValidationException("PDFBox failed to parse datasource", e); } // init PDF Extractor try { SimpleCharStream scs = new SimpleCharStream(source.getInputStream()); ExtractorTokenManager extractor = new ExtractorTokenManager(scs); extractor.parse(); handler.setPdfExtractor(extractor); } catch (IOException e) { throw new ValidationException("PDF ExtractorTokenMng failed to parse datasource", e); } // call all helpers ArrayList<ValidationError> allErrors = new ArrayList<ValidationError>(); // Execute priority helpers. for (AbstractValidationHelper helper : priorHelpers) { runValidation(handler, helper, allErrors); } // Execute other helpers. for (AbstractValidationHelper helper : standHelpers) { runValidation(handler, helper, allErrors); } // check result ValidationResult valRes = null; if (allErrors.size() == 0) { valRes = new ValidationResult(true); } else { // there are some errors valRes = new ValidationResult(allErrors); } // addition of the some objects to avoid a second file parsing valRes.setPdf(document); valRes.setXmpMetaData(handler.getMetadata()); return valRes; } catch (ValidationException e) { // ---- Close all open resources if an error occurs. handler.close(); throw e; } }
From source file:net.sf.jabref.gui.PdfPreviewPanel.java
License:Open Source License
private void renderPDFFile(File file) { try (InputStream input = new FileInputStream(file); PDDocument document = PDDocument.load(input)) { List<PDPage> pages = document.getDocumentCatalog().getAllPages(); PDPage page = pages.get(0);/*www . j av a2 s .co m*/ BufferedImage image; try { image = page.convertToImage(); } catch (Exception e1) { // silently ignores all rendering exceptions image = null; } if (image != null) { int width = this.getParent().getWidth(); int height = this.getParent().getHeight(); BufferedImage resImage = resizeImage(image, width, height, BufferedImage.TYPE_INT_RGB); ImageIcon icon = new ImageIcon(resImage); picLabel.setText(null); picLabel.setIcon(icon); } else { clearPreview(); } } catch (IOException e) { LOGGER.warn("Cannot open file/PDF document", e); } }