Example usage for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException

Source Link

Document

Parses a PDF.

Usage

From source file:name.marcelomorales.siqisiqi.pdfbox.CoordinatesGenerator.java

License:Apache License

public void generarPdf(OutputStream os, String template, Map<String, Object> m, String path, String coordenates,
        float fontSize, float ancho) throws IOException {
    long t = System.currentTimeMillis();
    PDDocument doc = null;//www  . j  ava2 s  . com
    try {
        doc = PDDocument.load(new File(path));

        List pages = doc.getDocumentCatalog().getAllPages();

        PDPage sourcePage = (PDPage) pages.get(0);

        boolean append = sourcePage.getContents() != null;
        PDPageContentStream contentStream = new PDPageContentStream(doc, sourcePage, append, true);

        StringReader fileReader = null;
        try {

            fileReader = new StringReader(template);
            List<String> list = CharStreams.readLines(fileReader);
            boolean textHasBegun = false;
            float currentOffset = 0f;
            for (String line : list) {

                if (line == null) {
                    continue;
                }

                if (line.startsWith("#")) {
                    continue;
                }

                final Iterable<String> str = Splitter.on(',').omitEmptyStrings().trimResults().split(line);
                final String[] split = Iterables.toArray(str, String.class);
                if (split == null || split.length < 4) {
                    continue;
                }

                if (Character.isDigit(split[0].charAt(0))) {
                    if (textHasBegun) {
                        contentStream.endText();
                    }
                    contentStream.beginText();
                    textHasBegun = true;
                    contentStream.moveTextPositionByAmount(parseFloat(split[0]), parseFloat(split[1]));
                } else {
                    contentStream.moveTextPositionByAmount(currentOffset, 0);
                }

                if (!textHasBegun) {
                    LOGGER.warn("Hay un posible mal uso de un .ree", new Throwable());
                    contentStream.beginText();
                    textHasBegun = true;
                }

                PDType1Font font;
                if ("b".equals(split[2])) {
                    font = HELVETICA_BOLD;
                } else {
                    font = HELVETICA;
                }
                contentStream.setFont(font, fontSize);

                Object text = null;
                if (split[3].startsWith("\"")) {
                    // TODO: text = substring(split[3], 1, -1);
                } else {
                    // TODO: text = new PropertyModel(m, split[3]).getObject();
                }

                if (text == null) {
                    LOGGER.warn("Propiedad {} no se encuentra", split[3]);
                    //contentStream.drawString("ERROR: propiedad no encontrada");
                    contentStream.drawString(" ");
                } else {
                    String string = text.toString();
                    currentOffset = font.getStringWidth(string) * ancho;
                    contentStream.drawString(string);
                }
            }

            if (textHasBegun) {
                contentStream.endText();
            }
        } finally {
            Closeables.closeQuietly(fileReader);
        }

        contentStream.close();

        try {
            doc.save(os);
        } catch (COSVisitorException e) {
            throw new IOException("Ha ocurrido un error al escribir en el Os", e);
        }
    } finally {
        if (doc != null) {
            doc.close();
        }
        LOGGER.info("Me ha tomado {} milisegundos hacer el pdf", System.currentTimeMillis() - t);
    }
}

From source file:net.anthonypoon.billscrapper.JavaBillScrapper.java

public JavaBillScrapper(File pdfFile) throws IOException {
    PDDocument doc = PDDocument.load(pdfFile);
    PDFTextStripper stripper = new PDFTextStripper();
    String rawText = stripper.getText(doc);
    String[] textArray = rawText.split("[\\r\\n]+");
    this.billObj = parsePdf(textArray);
    doc.close();//from  w w w  . j  av a2s.  c  o  m
}

From source file:net.anthonypoon.billscrapper.JavaBillScrapper.java

public static void main(String[] args) {
    // TODO code application logic here
    try {//w w  w .ja  v a2s  .com
        for (String arg : args) {
            if (!arg.startsWith("-")) {
                filePaths.add(arg);
            } else {
                try {
                    options.add(Flags.fromString(arg));
                } catch (IllegalArgumentException ex) {
                    System.err.println("Illegal options: " + arg);
                }
            }
        }
        Collections.sort(filePaths);
        for (String filePath : filePaths) {
            System.out.println("Loading: " + filePath);
            PDDocument doc = PDDocument.load(new File(filePath));
            PDFTextStripper stripper = new PDFTextStripper();
            String rawText = stripper.getText(doc);
            String[] textArray = rawText.split("[\\r\\n]+");
            Bill bill = parsePdf(textArray);
            if (options.contains(Flags.INSERT_INTO_DB)) {
                DatabaseConnector db = new DatabaseConnector();
                DbWriter writer = new DbWriter(db.getConnection());
                boolean isInserted = writer.insertDetail(bill.getBillSummary(), bill.getPhoneSummaryData(),
                        bill.getPhoneDetail());
                writer.commit();
                doc.close();
                if (!isInserted) {
                    System.out.println(filePath + " was not inserted into database.");
                }
            }
        }

    } catch (Exception ex) {
        ex.printStackTrace(System.out);
    }
}

From source file:net.awl.edoc.pdfa.PdfBoxIsartorValidate.java

License:Apache License

public static void coin(File f) {
    nbFile++;//from  w  w w.j  av a 2 s  .  c o m
    // PDFBox
    try {
        PDDocument document = PDDocument.load(f);
        COSDocument cDocument = document.getDocument();

        boolean result = PDFParser.parse(new FileInputStream(f));
        if (result) {
            nbOk++;
        } else {
            nbBad++;
        }
        ;

        document.close();
    } catch (IOException e) {
        System.err.println("Failed for : " + f.getAbsolutePath());
        // } catch (ParseException e) {
        // nbBad++;
    } catch (Throwable e) {
        nbBad++;
    }

}

From source file:net.betzel.fop.pdf.viewer.FXMLController.java

License:Apache License

private void createImages(FileStreamSources fileStreamSources) {
    if (Platform.isFxApplicationThread()) {
        final Task<List<BufferedImage>> createImagesTask = new Task<List<BufferedImage>>() {
            @Override/*from  w  w w  . j  a  va  2  s  . c o m*/
            protected List<BufferedImage> call() throws Exception {

                ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
                List<BufferedImage> bufferedImages = new ArrayList();
                FOUserAgent userAgent = fopFactory.newFOUserAgent();
                userAgent.getEventBroadcaster().addEventListener(fopEventListener);
                Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF, userAgent, byteArrayOutputStream);
                Transformer transformer = transformerFactory.newTransformer(fileStreamSources.getXslSource());
                transformer.setErrorListener(xmlTransformErrorListener);
                Result result = new SAXResult(fop.getDefaultHandler());
                transformer.transform(fileStreamSources.getXmlSource(), result);
                FormattingResults foResults = fop.getResults();
                List pageSequences = foResults.getPageSequences();
                for (java.util.Iterator it = pageSequences.iterator(); it.hasNext();) {
                    PageSequenceResults pageSequenceResults = (PageSequenceResults) it.next();
                    logging.appendText("PageSequence "
                            + (String.valueOf(pageSequenceResults.getID()).length() > 0
                                    ? pageSequenceResults.getID()
                                    : "<no id>")
                            + " generated " + pageSequenceResults.getPageCount() + " pages.\n");
                }
                try (PDDocument pdDocument = PDDocument
                        .load(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()))) {
                    PDFRenderer pdfRenderer = new PDFRenderer(pdDocument);
                    int pageCounter = 0;
                    for (PDPage pdPage : pdDocument.getPages()) {
                        bufferedImages.add(pdfRenderer.renderImageWithDPI(pageCounter, 150, ImageType.RGB));
                        pageCounter++;
                    }
                }
                return bufferedImages;
            }
        };
        createImagesTask.setOnSucceeded((WorkerStateEvent event) -> {
            Platform.runLater(() -> {
                images.clear();
                images.addAll(createImagesTask.getValue());
            });
        });
        createImagesTask.setOnFailed((WorkerStateEvent event) -> {
            Platform.runLater(() -> {
                scanProgressDialog.close();
                logging.appendText("Error creating images from PDF\n");
                reentrantLock.unlock();
                images.clear();
            });
        });
        backgoundExecutor.submit(createImagesTask);
    }
}

From source file:net.bookinaction.ExtractAnnotations.java

License:Apache License

public void doJob(String job, Float[] pA) throws IOException {

    PDDocument document = null;/* w  w  w . j a  va 2 s .  co  m*/

    Stamper s = new Stamper(); // utility class

    final String job_file = job + ".pdf";
    final String dic_file = job + "-dict.txt";
    final String new_job = job + "-new.pdf";

    PrintWriter writer = new PrintWriter(dic_file);

    ImageLocationListener imageLocationsListener = new ImageLocationListener();
    AnnotationMaker annotMaker = new AnnotationMaker();

    try {
        document = PDDocument.load(new File(job_file));

        int pageNum = 0;
        for (PDPage page : document.getPages()) {
            pageNum++;

            PDRectangle cropBox = page.getCropBox();

            List<PDAnnotation> annotations = page.getAnnotations();

            // extract image locations
            List<Rectangle2D> imageRects = new ArrayList<Rectangle2D>();
            imageLocationsListener.setImageRects(imageRects);
            imageLocationsListener.processPage(page);

            int im = 0;
            for (Rectangle2D pdImageRect : imageRects) {
                s.recordImage(writer, pageNum, "[im" + im + "]", (Rectangle2D.Float) pdImageRect);
                annotations.add(annotMaker.squareAnnotation(Color.YELLOW, (Rectangle2D.Float) pdImageRect,
                        "[im" + im + "]"));
                im++;
            }

            PDFTextStripperByArea stripper = new PDFTextStripperByArea();

            int j = 0;
            List<PDAnnotation> viableAnnots = new ArrayList();

            for (PDAnnotation annot : annotations) {
                if (annot instanceof PDAnnotationTextMarkup || annot instanceof PDAnnotationLink) {

                    stripper.addRegion(Integer.toString(j++), s.getAwtRect(
                            s.adjustedRect(annot.getRectangle(), pA[0], pA[1], pA[2], pA[3]), cropBox));
                    viableAnnots.add(annot);

                } else if (annot instanceof PDAnnotationPopup || annot instanceof PDAnnotationText) {
                    viableAnnots.add(annot);

                }
            }

            stripper.extractRegions(page);

            List<PDRectangle> rects = new ArrayList<PDRectangle>();

            List<String> comments = new ArrayList<String>();
            List<String> highlightTexts = new ArrayList<String>();

            j = 0;
            for (PDAnnotation viableAnnot : viableAnnots) {

                if (viableAnnot instanceof PDAnnotationTextMarkup) {
                    String highlightText = stripper.getTextForRegion(Integer.toString(j++));
                    String withoutCR = highlightText.replace((char) 0x0A, '^');

                    String comment = viableAnnot.getContents();

                    String colorString = String.format("%06x", viableAnnot.getColor().toRGB());

                    PDRectangle aRect = s.adjustedRect(viableAnnot.getRectangle(), pA[4], pA[5], pA[6], pA[7]);
                    rects.add(aRect);
                    comments.add(comment);
                    highlightTexts.add(highlightText);

                    s.recordTextMarkup(writer, pageNum, comment, withoutCR, aRect, colorString);

                } else if (viableAnnot instanceof PDAnnotationText) {
                    String comment = viableAnnot.getContents();
                    String colorString = String.format("%06x", viableAnnot.getColor().toRGB());

                    for (Rectangle2D pdImageRect : imageRects) {
                        if (pdImageRect.contains(viableAnnot.getRectangle().getLowerLeftX(),
                                viableAnnot.getRectangle().getLowerLeftY())) {
                            s.recordTextMarkup(writer, pageNum, comment, "", (Rectangle2D.Float) pdImageRect,
                                    colorString);
                            annotations.add(annotMaker.squareAnnotation(Color.GREEN,
                                    (Rectangle2D.Float) pdImageRect, comment));
                        }
                        ;
                    }
                }
            }
            PDPageContentStream canvas = new PDPageContentStream(document, page, true, true, true);

            int i = 0;
            for (PDRectangle pdRect : rects) {
                String comment = comments.get(i);
                String highlightText = highlightTexts.get(i);
                //annotations.add(linkAnnotation(pdRect, comment, highlightText));
                //annotations.add(annotationSquareCircle(pdRect, BLUE));
                s.showBox(canvas, new Rectangle2D.Float(pdRect.getLowerLeftX(), pdRect.getUpperRightY(),
                        pdRect.getWidth(), pdRect.getHeight()), cropBox, Color.BLUE);

                i++;
            }
            canvas.close();
        }
        writer.close();
        document.save(new_job);

    } finally {
        if (document != null) {
            document.close();
        }

    }

}

From source file:net.bookinaction.TextInfoExtractor.java

License:Apache License

public void doTextPosition(String source, String coord_text, StripperParam stripperParam) throws IOException {

    String source_pdf = source;//ww  w . j ava  2 s  .c  om
    String new_file = source.split("\\.")[0] + "-new.pdf";

    PDDocument document = PDDocument.load(new File(source_pdf));

    PrintWriter writer = new PrintWriter(new File(coord_text));

    //s.recordHeader(writer, source_pdf, document.getNumberOfPages(), sParam);

    for (int i = 0; i < document.getNumberOfPages(); i++) {
        getTextPositionFromPage(document, stripperParam, i + 1, writer, true);

    }

    if (document != null) {
        document.save(new_file);
        document.close();
    }

    if (writer != null)
        writer.close();

}

From source file:net.ontopia.topicmaps.classify.PDFFormatModule.java

License:Apache License

public void readContent(ClassifiableContentIF cc, TextHandlerIF handler) {
    try {//from w ww.  j  a  v  a 2s  .  c  o  m
        PDDocument pdoc = PDDocument.load(new BufferedInputStream(new ByteArrayInputStream(cc.getContent())));
        PDFTextStripper stripper = new PDFTextStripper();
        String s = stripper.getText(pdoc);
        pdoc.close();
        char[] c = s.toCharArray();
        handler.startRegion("document");
        handler.text(c, 0, c.length);
        handler.endRegion();
    } catch (Exception e) {
        throw new OntopiaRuntimeException(e);
    }
}

From source file:net.padaf.preflight.PdfA1bValidator.java

License:Apache License

public synchronized ValidationResult validate(DataSource source) throws ValidationException {
    DocumentHandler handler = createDocumentHandler(source);
    try {// w  w  w  .ja  v  a 2 s  . c o  m
        // syntax (javacc) validation
        try {
            PDFParser parser = new PDFParser(source.getInputStream());
            parser.PDF();
            handler.setParser(parser);
        } catch (IOException e) {
            throw new ValidationException("Failed to parse datasource due to : " + e.getMessage(), e);
        } catch (ParseException e) {
            return createErrorResult(e);
        }

        // if here is reached, validate with helpers
        // init PDF Box document
        PDDocument document = null;
        try {
            document = PDDocument.load(handler.getSource().getInputStream());
            handler.setDocument(document);
        } catch (IOException e) {
            throw new ValidationException("PDFBox failed to parse datasource", e);
        }

        // init PDF Extractor
        try {
            SimpleCharStream scs = new SimpleCharStream(source.getInputStream());
            ExtractorTokenManager extractor = new ExtractorTokenManager(scs);
            extractor.parse();
            handler.setPdfExtractor(extractor);
        } catch (IOException e) {
            throw new ValidationException("PDF ExtractorTokenMng failed to parse datasource", e);
        }

        // call all helpers
        ArrayList<ValidationError> allErrors = new ArrayList<ValidationError>();

        // Execute priority helpers.
        for (AbstractValidationHelper helper : priorHelpers) {
            runValidation(handler, helper, allErrors);
        }

        // Execute other helpers.
        for (AbstractValidationHelper helper : standHelpers) {
            runValidation(handler, helper, allErrors);
        }

        // check result
        ValidationResult valRes = null;
        if (allErrors.size() == 0) {
            valRes = new ValidationResult(true);
        } else {
            // there are some errors
            valRes = new ValidationResult(allErrors);
        }

        // addition of the some objects to avoid a second file parsing  
        valRes.setPdf(document);
        valRes.setXmpMetaData(handler.getMetadata());
        return valRes;
    } catch (ValidationException e) {
        // ---- Close all open resources if an error occurs.
        handler.close();
        throw e;
    }
}

From source file:net.sf.jabref.gui.PdfPreviewPanel.java

License:Open Source License

private void renderPDFFile(File file) {

    try (InputStream input = new FileInputStream(file); PDDocument document = PDDocument.load(input)) {
        List<PDPage> pages = document.getDocumentCatalog().getAllPages();

        PDPage page = pages.get(0);/*www  .  j  av  a2  s .co  m*/
        BufferedImage image;
        try {
            image = page.convertToImage();
        } catch (Exception e1) {
            // silently ignores all rendering exceptions
            image = null;
        }

        if (image != null) {
            int width = this.getParent().getWidth();
            int height = this.getParent().getHeight();
            BufferedImage resImage = resizeImage(image, width, height, BufferedImage.TYPE_INT_RGB);
            ImageIcon icon = new ImageIcon(resImage);
            picLabel.setText(null);
            picLabel.setIcon(icon);
        } else {
            clearPreview();
        }

    } catch (IOException e) {
        LOGGER.warn("Cannot open file/PDF document", e);
    }
}