Example usage for org.apache.pdfbox.pdmodel PDDocument load

List of usage examples for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException 

Source Link

Document

Parses a PDF.

Usage

From source file:name.marcelomorales.siqisiqi.pdfbox.CoordinatesGenerator.java

License:Apache License

public void generarPdf(OutputStream os, String template, Map<String, Object> m, String path, String coordenates,
        float fontSize, float ancho) throws IOException {
    long t = System.currentTimeMillis();
    PDDocument doc = null;//www  . j  ava2 s  . com
    try {
        doc = PDDocument.load(new File(path));

        List pages = doc.getDocumentCatalog().getAllPages();

        PDPage sourcePage = (PDPage) pages.get(0);

        boolean append = sourcePage.getContents() != null;
        PDPageContentStream contentStream = new PDPageContentStream(doc, sourcePage, append, true);

        StringReader fileReader = null;
        try {

            fileReader = new StringReader(template);
            List<String> list = CharStreams.readLines(fileReader);
            boolean textHasBegun = false;
            float currentOffset = 0f;
            for (String line : list) {

                if (line == null) {
                    continue;
                }

                if (line.startsWith("#")) {
                    continue;
                }

                final Iterable<String> str = Splitter.on(',').omitEmptyStrings().trimResults().split(line);
                final String[] split = Iterables.toArray(str, String.class);
                if (split == null || split.length < 4) {
                    continue;
                }

                if (Character.isDigit(split[0].charAt(0))) {
                    if (textHasBegun) {
                        contentStream.endText();
                    }
                    contentStream.beginText();
                    textHasBegun = true;
                    contentStream.moveTextPositionByAmount(parseFloat(split[0]), parseFloat(split[1]));
                } else {
                    contentStream.moveTextPositionByAmount(currentOffset, 0);
                }

                if (!textHasBegun) {
                    LOGGER.warn("Hay un posible mal uso de un .ree", new Throwable());
                    contentStream.beginText();
                    textHasBegun = true;
                }

                PDType1Font font;
                if ("b".equals(split[2])) {
                    font = HELVETICA_BOLD;
                } else {
                    font = HELVETICA;
                }
                contentStream.setFont(font, fontSize);

                Object text = null;
                if (split[3].startsWith("\"")) {
                    // TODO: text = substring(split[3], 1, -1);
                } else {
                    // TODO: text = new PropertyModel(m, split[3]).getObject();
                }

                if (text == null) {
                    LOGGER.warn("Propiedad {} no se encuentra", split[3]);
                    //contentStream.drawString("ERROR: propiedad no encontrada");
                    contentStream.drawString(" ");
                } else {
                    String string = text.toString();
                    currentOffset = font.getStringWidth(string) * ancho;
                    contentStream.drawString(string);
                }
            }

            if (textHasBegun) {
                contentStream.endText();
            }
        } finally {
            Closeables.closeQuietly(fileReader);
        }

        contentStream.close();

        try {
            doc.save(os);
        } catch (COSVisitorException e) {
            throw new IOException("Ha ocurrido un error al escribir en el Os", e);
        }
    } finally {
        if (doc != null) {
            doc.close();
        }
        LOGGER.info("Me ha tomado {} milisegundos hacer el pdf", System.currentTimeMillis() - t);
    }
}

From source file:net.anthonypoon.billscrapper.JavaBillScrapper.java

public JavaBillScrapper(File pdfFile) throws IOException {
    PDDocument doc = PDDocument.load(pdfFile);
    PDFTextStripper stripper = new PDFTextStripper();
    String rawText = stripper.getText(doc);
    String[] textArray = rawText.split("[\\r\\n]+");
    this.billObj = parsePdf(textArray);
    doc.close();//from  w w w  . j  av a2s.  c  o  m
}

From source file:net.anthonypoon.billscrapper.JavaBillScrapper.java

public static void main(String[] args) {
    // TODO code application logic here
    try {//w w  w .ja  v a2s  .com
        for (String arg : args) {
            if (!arg.startsWith("-")) {
                filePaths.add(arg);
            } else {
                try {
                    options.add(Flags.fromString(arg));
                } catch (IllegalArgumentException ex) {
                    System.err.println("Illegal options: " + arg);
                }
            }
        }
        Collections.sort(filePaths);
        for (String filePath : filePaths) {
            System.out.println("Loading: " + filePath);
            PDDocument doc = PDDocument.load(new File(filePath));
            PDFTextStripper stripper = new PDFTextStripper();
            String rawText = stripper.getText(doc);
            String[] textArray = rawText.split("[\\r\\n]+");
            Bill bill = parsePdf(textArray);
            if (options.contains(Flags.INSERT_INTO_DB)) {
                DatabaseConnector db = new DatabaseConnector();
                DbWriter writer = new DbWriter(db.getConnection());
                boolean isInserted = writer.insertDetail(bill.getBillSummary(), bill.getPhoneSummaryData(),
                        bill.getPhoneDetail());
                writer.commit();
                doc.close();
                if (!isInserted) {
                    System.out.println(filePath + " was not inserted into database.");
                }
            }
        }

    } catch (Exception ex) {
        ex.printStackTrace(System.out);
    }
}

From source file:net.awl.edoc.pdfa.PdfBoxIsartorValidate.java

License:Apache License

public static void coin(File f) {
    nbFile++;//from  w  w w.j  av a 2 s  .  c o m
    // PDFBox
    try {
        PDDocument document = PDDocument.load(f);
        COSDocument cDocument = document.getDocument();

        boolean result = PDFParser.parse(new FileInputStream(f));
        if (result) {
            nbOk++;
        } else {
            nbBad++;
        }
        ;

        document.close();
    } catch (IOException e) {
        System.err.println("Failed for : " + f.getAbsolutePath());
        // } catch (ParseException e) {
        // nbBad++;
    } catch (Throwable e) {
        nbBad++;
    }

}

From source file:net.betzel.fop.pdf.viewer.FXMLController.java

License:Apache License

private void createImages(FileStreamSources fileStreamSources) {
    if (Platform.isFxApplicationThread()) {
        final Task<List<BufferedImage>> createImagesTask = new Task<List<BufferedImage>>() {
            @Override/*from  w  w w  . j  a  va  2  s  . c o m*/
            protected List<BufferedImage> call() throws Exception {

                ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
                List<BufferedImage> bufferedImages = new ArrayList();
                FOUserAgent userAgent = fopFactory.newFOUserAgent();
                userAgent.getEventBroadcaster().addEventListener(fopEventListener);
                Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF, userAgent, byteArrayOutputStream);
                Transformer transformer = transformerFactory.newTransformer(fileStreamSources.getXslSource());
                transformer.setErrorListener(xmlTransformErrorListener);
                Result result = new SAXResult(fop.getDefaultHandler());
                transformer.transform(fileStreamSources.getXmlSource(), result);
                FormattingResults foResults = fop.getResults();
                List pageSequences = foResults.getPageSequences();
                for (java.util.Iterator it = pageSequences.iterator(); it.hasNext();) {
                    PageSequenceResults pageSequenceResults = (PageSequenceResults) it.next();
                    logging.appendText("PageSequence "
                            + (String.valueOf(pageSequenceResults.getID()).length() > 0
                                    ? pageSequenceResults.getID()
                                    : "<no id>")
                            + " generated " + pageSequenceResults.getPageCount() + " pages.\n");
                }
                try (PDDocument pdDocument = PDDocument
                        .load(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()))) {
                    PDFRenderer pdfRenderer = new PDFRenderer(pdDocument);
                    int pageCounter = 0;
                    for (PDPage pdPage : pdDocument.getPages()) {
                        bufferedImages.add(pdfRenderer.renderImageWithDPI(pageCounter, 150, ImageType.RGB));
                        pageCounter++;
                    }
                }
                return bufferedImages;
            }
        };
        createImagesTask.setOnSucceeded((WorkerStateEvent event) -> {
            Platform.runLater(() -> {
                images.clear();
                images.addAll(createImagesTask.getValue());
            });
        });
        createImagesTask.setOnFailed((WorkerStateEvent event) -> {
            Platform.runLater(() -> {
                scanProgressDialog.close();
                logging.appendText("Error creating images from PDF\n");
                reentrantLock.unlock();
                images.clear();
            });
        });
        backgoundExecutor.submit(createImagesTask);
    }
}

From source file:net.bookinaction.ExtractAnnotations.java

License:Apache License

public void doJob(String job, Float[] pA) throws IOException {

    PDDocument document = null;/* w  w  w . j a  va 2 s .  co  m*/

    Stamper s = new Stamper(); // utility class

    final String job_file = job + ".pdf";
    final String dic_file = job + "-dict.txt";
    final String new_job = job + "-new.pdf";

    PrintWriter writer = new PrintWriter(dic_file);

    ImageLocationListener imageLocationsListener = new ImageLocationListener();
    AnnotationMaker annotMaker = new AnnotationMaker();

    try {
        document = PDDocument.load(new File(job_file));

        int pageNum = 0;
        for (PDPage page : document.getPages()) {
            pageNum++;

            PDRectangle cropBox = page.getCropBox();

            List<PDAnnotation> annotations = page.getAnnotations();

            // extract image locations
            List<Rectangle2D> imageRects = new ArrayList<Rectangle2D>();
            imageLocationsListener.setImageRects(imageRects);
            imageLocationsListener.processPage(page);

            int im = 0;
            for (Rectangle2D pdImageRect : imageRects) {
                s.recordImage(writer, pageNum, "[im" + im + "]", (Rectangle2D.Float) pdImageRect);
                annotations.add(annotMaker.squareAnnotation(Color.YELLOW, (Rectangle2D.Float) pdImageRect,
                        "[im" + im + "]"));
                im++;
            }

            PDFTextStripperByArea stripper = new PDFTextStripperByArea();

            int j = 0;
            List<PDAnnotation> viableAnnots = new ArrayList();

            for (PDAnnotation annot : annotations) {
                if (annot instanceof PDAnnotationTextMarkup || annot instanceof PDAnnotationLink) {

                    stripper.addRegion(Integer.toString(j++), s.getAwtRect(
                            s.adjustedRect(annot.getRectangle(), pA[0], pA[1], pA[2], pA[3]), cropBox));
                    viableAnnots.add(annot);

                } else if (annot instanceof PDAnnotationPopup || annot instanceof PDAnnotationText) {
                    viableAnnots.add(annot);

                }
            }

            stripper.extractRegions(page);

            List<PDRectangle> rects = new ArrayList<PDRectangle>();

            List<String> comments = new ArrayList<String>();
            List<String> highlightTexts = new ArrayList<String>();

            j = 0;
            for (PDAnnotation viableAnnot : viableAnnots) {

                if (viableAnnot instanceof PDAnnotationTextMarkup) {
                    String highlightText = stripper.getTextForRegion(Integer.toString(j++));
                    String withoutCR = highlightText.replace((char) 0x0A, '^');

                    String comment = viableAnnot.getContents();

                    String colorString = String.format("%06x", viableAnnot.getColor().toRGB());

                    PDRectangle aRect = s.adjustedRect(viableAnnot.getRectangle(), pA[4], pA[5], pA[6], pA[7]);
                    rects.add(aRect);
                    comments.add(comment);
                    highlightTexts.add(highlightText);

                    s.recordTextMarkup(writer, pageNum, comment, withoutCR, aRect, colorString);

                } else if (viableAnnot instanceof PDAnnotationText) {
                    String comment = viableAnnot.getContents();
                    String colorString = String.format("%06x", viableAnnot.getColor().toRGB());

                    for (Rectangle2D pdImageRect : imageRects) {
                        if (pdImageRect.contains(viableAnnot.getRectangle().getLowerLeftX(),
                                viableAnnot.getRectangle().getLowerLeftY())) {
                            s.recordTextMarkup(writer, pageNum, comment, "", (Rectangle2D.Float) pdImageRect,
                                    colorString);
                            annotations.add(annotMaker.squareAnnotation(Color.GREEN,
                                    (Rectangle2D.Float) pdImageRect, comment));
                        }
                        ;
                    }
                }
            }
            PDPageContentStream canvas = new PDPageContentStream(document, page, true, true, true);

            int i = 0;
            for (PDRectangle pdRect : rects) {
                String comment = comments.get(i);
                String highlightText = highlightTexts.get(i);
                //annotations.add(linkAnnotation(pdRect, comment, highlightText));
                //annotations.add(annotationSquareCircle(pdRect, BLUE));
                s.showBox(canvas, new Rectangle2D.Float(pdRect.getLowerLeftX(), pdRect.getUpperRightY(),
                        pdRect.getWidth(), pdRect.getHeight()), cropBox, Color.BLUE);

                i++;
            }
            canvas.close();
        }
        writer.close();
        document.save(new_job);

    } finally {
        if (document != null) {
            document.close();
        }

    }

}

From source file:net.bookinaction.TextInfoExtractor.java

License:Apache License

public void doTextPosition(String source, String coord_text, StripperParam stripperParam) throws IOException {

    String source_pdf = source;//ww  w . j ava  2 s  .c  om
    String new_file = source.split("\\.")[0] + "-new.pdf";

    PDDocument document = PDDocument.load(new File(source_pdf));

    PrintWriter writer = new PrintWriter(new File(coord_text));

    //s.recordHeader(writer, source_pdf, document.getNumberOfPages(), sParam);

    for (int i = 0; i < document.getNumberOfPages(); i++) {
        getTextPositionFromPage(document, stripperParam, i + 1, writer, true);

    }

    if (document != null) {
        document.save(new_file);
        document.close();
    }

    if (writer != null)
        writer.close();

}

From source file:net.ontopia.topicmaps.classify.PDFFormatModule.java

License:Apache License

public void readContent(ClassifiableContentIF cc, TextHandlerIF handler) {
    try {//from w ww.  j  a  v  a 2s  .  c  o  m
        PDDocument pdoc = PDDocument.load(new BufferedInputStream(new ByteArrayInputStream(cc.getContent())));
        PDFTextStripper stripper = new PDFTextStripper();
        String s = stripper.getText(pdoc);
        pdoc.close();
        char[] c = s.toCharArray();
        handler.startRegion("document");
        handler.text(c, 0, c.length);
        handler.endRegion();
    } catch (Exception e) {
        throw new OntopiaRuntimeException(e);
    }
}

From source file:net.padaf.preflight.PdfA1bValidator.java

License:Apache License

public synchronized ValidationResult validate(DataSource source) throws ValidationException {
    DocumentHandler handler = createDocumentHandler(source);
    try {// w  w  w  .ja  v  a 2 s  . c o  m
        // syntax (javacc) validation
        try {
            PDFParser parser = new PDFParser(source.getInputStream());
            parser.PDF();
            handler.setParser(parser);
        } catch (IOException e) {
            throw new ValidationException("Failed to parse datasource due to : " + e.getMessage(), e);
        } catch (ParseException e) {
            return createErrorResult(e);
        }

        // if here is reached, validate with helpers
        // init PDF Box document
        PDDocument document = null;
        try {
            document = PDDocument.load(handler.getSource().getInputStream());
            handler.setDocument(document);
        } catch (IOException e) {
            throw new ValidationException("PDFBox failed to parse datasource", e);
        }

        // init PDF Extractor
        try {
            SimpleCharStream scs = new SimpleCharStream(source.getInputStream());
            ExtractorTokenManager extractor = new ExtractorTokenManager(scs);
            extractor.parse();
            handler.setPdfExtractor(extractor);
        } catch (IOException e) {
            throw new ValidationException("PDF ExtractorTokenMng failed to parse datasource", e);
        }

        // call all helpers
        ArrayList<ValidationError> allErrors = new ArrayList<ValidationError>();

        // Execute priority helpers.
        for (AbstractValidationHelper helper : priorHelpers) {
            runValidation(handler, helper, allErrors);
        }

        // Execute other helpers.
        for (AbstractValidationHelper helper : standHelpers) {
            runValidation(handler, helper, allErrors);
        }

        // check result
        ValidationResult valRes = null;
        if (allErrors.size() == 0) {
            valRes = new ValidationResult(true);
        } else {
            // there are some errors
            valRes = new ValidationResult(allErrors);
        }

        // addition of the some objects to avoid a second file parsing  
        valRes.setPdf(document);
        valRes.setXmpMetaData(handler.getMetadata());
        return valRes;
    } catch (ValidationException e) {
        // ---- Close all open resources if an error occurs.
        handler.close();
        throw e;
    }
}

From source file:net.sf.jabref.gui.PdfPreviewPanel.java

License:Open Source License

private void renderPDFFile(File file) {

    try (InputStream input = new FileInputStream(file); PDDocument document = PDDocument.load(input)) {
        List<PDPage> pages = document.getDocumentCatalog().getAllPages();

        PDPage page = pages.get(0);/*www  .  j  av  a2  s .co  m*/
        BufferedImage image;
        try {
            image = page.convertToImage();
        } catch (Exception e1) {
            // silently ignores all rendering exceptions
            image = null;
        }

        if (image != null) {
            int width = this.getParent().getWidth();
            int height = this.getParent().getHeight();
            BufferedImage resImage = resizeImage(image, width, height, BufferedImage.TYPE_INT_RGB);
            ImageIcon icon = new ImageIcon(resImage);
            picLabel.setText(null);
            picLabel.setIcon(icon);
        } else {
            clearPreview();
        }

    } catch (IOException e) {
        LOGGER.warn("Cannot open file/PDF document", e);
    }
}