Example usage for org.apache.pdfbox.pdmodel PDDocument getNumberOfPages

List of usage examples for org.apache.pdfbox.pdmodel PDDocument getNumberOfPages

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument getNumberOfPages.

Prototype

public int getNumberOfPages() 

Source Link

Document

This will return the total page count of the PDF document.

Usage

From source file:eu.transkribus.languageresources.extractor.pdf.PDFExtractor.java

@Override
public List<String> extractTextFromDocumentPagewise(String pathToFile) {
    List<String> pageWiseText = new LinkedList<>();

    COSDocument cosDoc = null;//from w ww.j  a v  a 2s. c o m
    PDDocument pdDoc = null;
    try {
        PDFParser parser = new PDFParser(new FileInputStream(new File(pathToFile)));
        parser.parse();
        cosDoc = parser.getDocument();
        pdDoc = new PDDocument(cosDoc);

        for (int pageId = 0; pageId < pdDoc.getNumberOfPages(); pageId++) {
            pageWiseText.add(extractTextFromPage(pdDoc, pageId));
        }
    } catch (IOException ex) {
        Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex);
    } finally {
        if (pdDoc != null) {
            try {
                pdDoc.close();
            } catch (IOException ex) {
                Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        if (cosDoc != null) {
            try {
                cosDoc.close();
            } catch (IOException ex) {
                Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    return pageWiseText;
}

From source file:eu.transkribus.languageresources.extractor.pdf.PDFExtractor.java

private String extractTextFromPage(PDDocument pdDoc, int page) {
    try {/*from   w ww  .  j a v a 2s  .c om*/
        PDFTextStripper pdfStripper = new PDFTextStripper();
        pdDoc.getNumberOfPages();
        pdfStripper.setStartPage(page + 1);
        pdfStripper.setEndPage(page + 1);

        return pdfStripper.getText(pdDoc);
    } catch (IOException ex) {
        Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex);
        throw new RuntimeException(ex.getMessage());
    }
}

From source file:FileIOAux.PrintAux.java

/**
 * @see/*from  ww  w.  j  a v  a 2s. c o  m*/
 * http://stackoverflow.com/questions/23326562/apache-pdfbox-convert-pdf-to-images
 * @param fil
 * @return
 */
public static BufferedImage[] pdfToImage(String fil) {
    BufferedImage[] bim = null;
    try {
        PDDocument document = PDDocument.load(new File(fil));
        if (document != null) {
            PDFRenderer pdfRenderer = new PDFRenderer(document);
            bim = new BufferedImage[document.getNumberOfPages()];
            for (int i = 0; i < document.getNumberOfPages(); i++) {
                bim[i] = pdfRenderer.renderImage(i);
            }
            document.close();
        }
    } catch (IOException ex) {
        Logger.getLogger(PrintAux.class.getName()).log(Level.SEVERE, null, ex);
    }
    return bim;
}

From source file:FileIOAux.PrintAux.java

/**
 * @see//from w ww.java  2 s. com
 * http://stackoverflow.com/questions/3701644/how-can-i-get-the-total-number-of-pages-to-be-printed
 */
int getNumberOfPages(PageRanges pageRanges) {
    if (doc != null) {
        PDDocument doco;
        try {
            doco = PDDocument.load(file);
        } catch (IOException ex) {
            Logger.getLogger(PrintAux.class.getName()).log(Level.SEVERE, null, ex);
            doco = null;
        }
        if (doco != null) {
            int pages = 0;
            int[][] ranges = pageRanges.getMembers();
            for (int[] range : ranges) {
                pages += 1;
                if (range.length == 2) {
                    pages += range[1] - range[0];
                }
            }
            pages = Math.min(pages, doco.getNumberOfPages());
            return pages;
        } else {
            return 0;
        }
    } else {
        return 0;
    }
}

From source file:fr.acxio.tools.agia.file.pdf.PageSplittingPDDocumentFactory.java

License:Apache License

private PDDocumentContainer splitDocument(PDDocument sDocument, Integer sStartPage, Integer sEndPage,
        Integer sSplitAtPage) throws IOException {
    Splitter aSplitter = new Splitter();
    int aNumberOfPages = sDocument.getNumberOfPages();
    boolean aStartEndPageSet = false;
    if (sStartPage != null) {
        aSplitter.setStartPage(sStartPage);
        aStartEndPageSet = true;//from  www .ja va2  s  .  c  o  m
        if (sSplitAtPage == null) {
            aSplitter.setSplitAtPage(aNumberOfPages);
        }
    }
    if (sEndPage != null) {
        aSplitter.setEndPage(sEndPage);
        aStartEndPageSet = true;
        if (sSplitAtPage == null) {
            aSplitter.setSplitAtPage(sEndPage);
        }
    }
    if (sSplitAtPage != null) {
        aSplitter.setSplitAtPage(sSplitAtPage);
    } else if (!aStartEndPageSet) {
        aSplitter.setSplitAtPage(1);
    }

    List<PDDocument> aParts = aSplitter.split(sDocument);

    return new BasicPDDocumentContainer(sDocument, aParts);
}

From source file:GUI.Helper.PDFIOHelper.java

private static void drawReportHeaderFooter(PDDocument report, Project proj, boolean headerOnFirstPage) {

    int pageIdx = headerOnFirstPage ? 0 : 1;
    int marginOffset = 10;
    try {/*from   w  w w  . java2  s.c  om*/
        PDPageContentStream cs;
        for (int p = pageIdx; p < report.getNumberOfPages(); p++) {
            cs = new PDPageContentStream(report, report.getPage(p), true, false);
            cs.setFont(PDType1Font.TIMES_ROMAN, 11);
            cs.setNonStrokingColor(Color.BLACK);
            cs.beginText();
            String dateString = DateFormat.getDateInstance(DateFormat.MEDIUM)
                    .format(Calendar.getInstance().getTime());
            cs.setTextMatrix(new Matrix(1, 0, 0, 1, MARGIN_LEFT_X, MARGIN_TOP_Y + marginOffset));
            cs.showText(dateString);
            String projectString = "WZ ITS Tool Report: " + proj.getName();
            cs.setTextMatrix(new Matrix(1, 0, 0, 1,
                    MARGIN_RIGHT_X - (PDType1Font.TIMES_ROMAN.getStringWidth(projectString) / 1000 * 11),
                    MARGIN_TOP_Y + marginOffset));
            cs.showText(projectString);
            String pageNumString = "Page " + String.valueOf(p + 1) + " of "
                    + String.valueOf(report.getNumberOfPages());
            cs.setTextMatrix(new Matrix(1, 0, 0, 1,
                    MARGIN_LEFT_X + (MARGIN_RIGHT_X - MARGIN_LEFT_X) / 2.0f
                            - (PDType1Font.TIMES_ROMAN.getStringWidth(pageNumString) / 1000 * 11) / 2.0f,
                    MARGIN_BOTTOM_Y - marginOffset));
            cs.showText(pageNumString);
            cs.setTextMatrix(new Matrix(1, 0, 0, 1, MARGIN_LEFT_X + 20, MARGIN_BOTTOM_Y - marginOffset));
            cs.showText("WZ ITS Tool V" + WZITS_FX.VERSION);
            String analystAgencyStr = (proj.getAnalyst() != null ? proj.getAnalyst() : "")
                    + (proj.getAnalyst() != null && proj.getAgency() != null ? " / " : "")
                    + (proj.getAgency() != null ? proj.getAgency() : "");
            cs.setTextMatrix(new Matrix(1, 0, 0, 1,
                    MARGIN_RIGHT_X - (PDType1Font.TIMES_ROMAN.getStringWidth(analystAgencyStr) / 1000 * 11),
                    MARGIN_BOTTOM_Y - marginOffset));
            cs.showText(analystAgencyStr);
            cs.endText();

            BufferedImage logoWZITS = ImageIO.read(WZITS_FX.class.getResource("/GUI/Icon/wzits_icon_64.png"));
            //ColorConvertOp op = new ColorConvertOp(ColorSpace.getInstance(ColorSpace.CS_GRAY), null);
            //op.filter(logoWZITS, logoWZITS);
            cs.drawImage(LosslessFactory.createFromImage(report, logoWZITS), MARGIN_LEFT_X,
                    MARGIN_BOTTOM_Y - marginOffset - 3, 16, 16);
            cs.close();
        }
    } catch (IOException e) {
        System.out.println("Something went wrong");
    }

}

From source file:hrpod.tools.PDFTools.java

public String getStringFromPDF(InputStream inputStream) {

    String text = null;//from w ww  .j  av a2 s  .  c  o  m

    try {
        COSDocument cosDoc = getParser(inputStream).getDocument();
        PDFTextStripper pdfStripper = new PDFTextStripper();
        PDDocument pdDoc = new PDDocument(cosDoc);
        pdfStripper.setStartPage(1);
        pdfStripper.setEndPage(pdDoc.getNumberOfPages());

        text = pdfStripper.getText(pdDoc);

    } catch (Exception ex) {
        logger.error("ERROR", ex);
    }

    return text;
}

From source file:hrpod.tools.PDFTools.java

public String[] getPagesFromPDF(InputStream inputStream) {
    String[] pages = null;//from w ww  . j a  v a 2 s  . c o m
    try {
        COSDocument cosDoc = getParser(inputStream).getDocument();
        PDFTextStripper pdfStripper = new PDFTextStripper();
        PDDocument pdDoc = new PDDocument(cosDoc);
        int pagesCount = pdDoc.getNumberOfPages();
        pages = new String[pagesCount];

        for (int p = 1; p < pagesCount; p++) {
            pdfStripper.setStartPage(p);
            pdfStripper.setEndPage(p + 1);
            pages[p - 1] = pdfStripper.getText(pdDoc);
        }

    } catch (IOException e) {
        logger.error("IO ERROR", e);
    } catch (Exception ex) {
        logger.error("ERROR", ex);
    }
    return pages;
}

From source file:idp.pdf_converter.java

public static void pdf_converter(File file) throws IOException {
    PDDocument document = PDDocument.load(file);
    PDFRenderer pdfRenderer = new PDFRenderer(document);
    String path = System.getProperty("user.dir") + "\\src\\main\\temp\\images\\";
    for (int page = 0; page < document.getNumberOfPages(); ++page) {
        BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.GRAY);
        ImageIOUtil.writeImage(bim, path + file.getName() + "-" + (page + 1) + ".png", 300);
    }//ww w . j av  a 2 s.  com
    document.close();
}

From source file:indexer.Indexer.java

public static int getPDFPages(String fileLoc) throws IOException {
    PDDocument doc = PDDocument.load(new File(fileLoc));
    int count = doc.getNumberOfPages();
    return count;
}