Example usage for org.apache.pdfbox.pdmodel PDDocument getNumberOfPages

List of usage examples for org.apache.pdfbox.pdmodel PDDocument getNumberOfPages

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument getNumberOfPages.

Prototype

public int getNumberOfPages() 

Source Link

Document

This will return the total page count of the PDF document.

Usage

From source file:com.fcore.base.fileSystem.utils.FileUtil.java

/**
 * PDF to Image(png)/*from  w  ww  . ja  v  a 2s .com*/
 * @param pdfPath
 * @param imagePath
 */
public static void pdf2png(String pdfPath, String imagePath) {
    long old = System.currentTimeMillis();
    // pdf ??
    File file = new File(pdfPath);
    try {
        PDDocument doc = PDDocument.load(file);
        PDFRenderer renderer = new PDFRenderer(doc);
        int pageCount = doc.getNumberOfPages();
        for (int i = 0; i < pageCount; i++) {
            BufferedImage image = renderer.renderImageWithDPI(i, 250); // Windows
            //BufferedImage srcImage = resize(image, 240, 240);// 
            ImageIO.write(image, "PNG", new File(imagePath + i + ".png"));
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    long now = System.currentTimeMillis();
    System.out.println("" + ((now - old) / 1000.0) + "\n\n" + "?:" + imagePath);
}

From source file:com.formkiq.core.service.conversion.PdfToPngFormatConverter.java

License:Apache License

@Override
public ConversionResult convert(final Object data, final WorkflowOutputDocumentType inputType,
        final WorkflowOutputDocumentType outputType) throws IOException {

    PDDocument doc = (PDDocument) data;

    PDFRenderer pdfRenderer = new PDFRenderer(doc);

    BufferedImage[] images = new BufferedImage[doc.getNumberOfPages()];

    for (int page = 0; page < doc.getNumberOfPages(); ++page) {
        BufferedImage bim = pdfRenderer.renderImage(page, SCALE);
        images[page] = bim;//  ww w .  j  a v a2s.com
    }

    ConversionResult result = merge(images);

    List<ConversionField> fields = findSigningButtons(doc, result);
    result.setFields(fields);

    return result;
}

From source file:com.formkiq.core.service.conversion.PdfToPngFormatConverter.java

License:Apache License

/**
 * Find {@link PDSignatureField} on the Image.
 * @param doc {@link PDDocument}//www.  j a v a 2s.com
 * @param result {@link ConversionResult}
 * @return {@link List} of {@link ConversionField}
 * @throws IOException IOException
 */
private List<ConversionField> findSigningButtons(final PDDocument doc, final ConversionResult result)
        throws IOException {

    List<ConversionField> fields = new ArrayList<>();
    List<PDSignatureField> sigs = doc.getSignatureFields();

    for (PDSignatureField s : sigs) {

        PDRectangle rect = PDRectangleUtil.calculateWidget(s.getWidgets());
        PDAnnotationWidget widget = s.getWidgets().get(0);
        PDPage page = widget.getPage();

        int pageNumber = doc.getPages().indexOf(page);
        float imagePageSize = result.getDataheight() / doc.getNumberOfPages();
        float x = rect.getLowerLeftX();
        float y = (imagePageSize - rect.getUpperRightY()) + (imagePageSize * pageNumber);

        ConversionField f = new ConversionField();
        f.setDocumentfieldname(s.getFullyQualifiedName());
        f.setX(x);
        f.setY(y);
        f.setHeight(rect.getHeight());
        fields.add(f);
    }

    return fields;
}

From source file:com.jaeksoft.searchlib.parser.PdfParser.java

License:Open Source License

private void extractMetaData(ParserResultItem result, PDDocument pdf) throws IOException {
    PDDocumentInformation info = pdf.getDocumentInformation();
    if (info != null) {
        result.addField(ParserFieldEnum.title, info.getTitle());
        result.addField(ParserFieldEnum.subject, info.getSubject());
        result.addField(ParserFieldEnum.author, info.getAuthor());
        result.addField(ParserFieldEnum.producer, info.getProducer());
        result.addField(ParserFieldEnum.keywords, info.getKeywords());
        String d = getDate(getCreationDate(info));
        if (d != null)
            result.addField(ParserFieldEnum.creation_date, d);
        d = getDate(getModificationDate(info));
        if (d != null)
            result.addField(ParserFieldEnum.modification_date, d);
    }/*w w w .  j  av a  2  s  . c  om*/
    int pages = pdf.getNumberOfPages();
    result.addField(ParserFieldEnum.number_of_pages, pages);
    PDDocumentCatalog catalog = pdf.getDocumentCatalog();
    if (catalog != null) {
        result.addField(ParserFieldEnum.language, catalog.getLanguage());
    }
}

From source file:com.jaeksoft.searchlib.web.controller.ViewerController.java

License:Open Source License

private void loadPdfBox() throws IOException, CryptographyException, SearchLibException, InterruptedException {
    PDDocument document = null;
    try {/*from w w  w .j ava 2  s  . com*/
        document = PDDocument.loadNonSeq(tempFile, null);
        // Trying to open with empty password
        boolean isEncrypted = document.isEncrypted();
        if (isEncrypted)
            document.decrypt("");
        loadGS(isEncrypted ? "" : null);
        List<Rectangle> boxList = new ArrayList<Rectangle>(0);
        checkPdfBoxHighlight(document, boxList);
        checkHocrHighlight(currentImage.getWidth(), currentImage.getHeight(), boxList);
        ImageUtils.yellowHighlight(currentImage, boxList, 0.1F);
        numberOfPages = document.getNumberOfPages();
    } finally {
        if (document != null)
            IOUtils.close(document);
    }
}

From source file:com.joowon.returnA.classifier.export.PdfImageExport.java

License:Open Source License

public static File[] export(PDDocument document, String filePath, String fileName) {
    File[] exportFiles = new File[document.getNumberOfPages()];
    try {/*  ww w .j  a  v  a2s. com*/
        PDFRenderer renderer = new PDFRenderer(document);
        for (int page = 0; page < document.getNumberOfPages(); ++page) {
            BufferedImage image = renderer.renderImageWithDPI(page, 300, ImageType.RGB);

            final String file = filePath + "/" + fileName + "_" + (page + 1) + ".png";
            ImageIOUtil.writeImage(image, file, 300);
            exportFiles[page] = new File(file);

            System.out.println("Export image file from PDF : " + file + " [" + (page + 1) + "/"
                    + document.getNumberOfPages() + "]");
        }
    } catch (IOException exception) {
        exception.printStackTrace();
        System.err.println("IOException occurred\nCheck file path.");
    }
    return exportFiles;
}

From source file:com.joowon.returnA.classifier.extractor.PdfTextExtractor.java

License:Open Source License

public static void main(String[] args) throws IOException, PrinterException {
    // Target PDF Document
    PDDocument document = PDDocument
            .load(new File("/Users/Joowon/Documents/Github/ReturnA/data/tests/YAPNXRPm_eng1_mun.pdf"));

    List<String> pdfTextList = new ArrayList<>();
    final int width = (int) document.getPage(0).getMediaBox().getWidth();
    final int height = (int) document.getPage(0).getMediaBox().getHeight();

    // Extract Test Information (first page's info area)
    pdfTextList.addAll(new PdfTextExtractor(document.getPage(0)).addRegion(0, 0, width, height / 4).extract());

    for (int i = 0; i < document.getNumberOfPages(); ++i) {
        // Left side
        pdfTextList/*from w  ww.ja v a2 s . c o m*/
                .addAll(new PdfTextExtractor(document.getPage(i)).addRegion(0, 0, width / 2, height).extract());

        // Right side
        pdfTextList.addAll(
                new PdfTextExtractor(document.getPage(i)).addRegion(width / 2, 0, width / 2, height).extract());
    }
    System.out.println(pdfTextList.toString());
}

From source file:com.lanacion.adminsiteln.services.PdfIndexerService.PdfIndexerService.java

private int pdfgetPages(String fileName) {

    int numero_paginas = 0;
    PDFParser parser;//  w  ww  .j ava 2 s  . c  om
    String parsedText = null;
    ;
    PDFTextStripper pdfStripper = null;
    //pdfStripper.setStartPage(0);
    //pdfStripper.setEndPage(0);
    PDDocument pdDoc = null;
    COSDocument cosDoc = null;
    File file = new File(fileName);
    if (!file.isFile()) {
        System.err.println("File " + fileName + " does not exist.");
        return 0;
    }
    try {
        parser = new PDFParser(new FileInputStream(file));
    } catch (IOException e) {
        System.err.println("Unable to open PDF Parser. " + e.getMessage());
        return 0;
    }
    try {
        parser.parse();
        cosDoc = parser.getDocument();
        pdfStripper = new PDFTextStripper();
        pdDoc = new PDDocument(cosDoc);
        numero_paginas = pdDoc.getNumberOfPages();
    } catch (Exception e) {
        System.err.println("An exception occured in parsing the PDF Document." + e.getMessage());
    } finally {
        try {
            if (cosDoc != null) {
                cosDoc.close();
            }
            if (pdDoc != null) {
                pdDoc.close();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    return numero_paginas;
}

From source file:com.openkm.util.metadata.MetadataExtractor.java

License:Open Source License

/**
 * Extract metadata from PDF// w  ww . j  a  v a  2  s  .com
 */
public static PdfMetadata pdfExtractor(InputStream is) throws IOException {
    PDDocument doc = PDDocument.load(is);
    PDDocumentInformation info = doc.getDocumentInformation();
    PdfMetadata md = new PdfMetadata();

    md.setNumberOfPages(doc.getNumberOfPages());
    md.setTitle(info.getTitle());
    md.setAuthor(info.getAuthor());
    md.setSubject(info.getSubject());
    md.setKeywords(info.getKeywords());
    md.setCreator(info.getCreator());
    md.setProducer(info.getProducer());
    md.setTrapped(info.getTrapped());
    md.setCreationDate(info.getCreationDate());
    md.setModificationDate(info.getModificationDate());

    log.info("pdfExtractor: {}", md);
    return md;
}

From source file:com.opensearchserver.extractor.parser.PdfBox.java

License:Apache License

private void extractMetaData(PDDocument pdf) throws IOException {
    PDDocumentInformation info = pdf.getDocumentInformation();
    if (info != null) {
        metas.add(TITLE, info.getTitle());
        metas.add(SUBJECT, info.getSubject());
        metas.add(AUTHOR, info.getAuthor());
        metas.add(PRODUCER, info.getProducer());
        metas.add(KEYWORDS, info.getKeywords());
        metas.add(CREATION_DATE, getDate(getCreationDate(info)));
        metas.add(MODIFICATION_DATE, getModificationDate(info));
    }/*from   w  w w  .j a v  a  2  s .c om*/
    int pages = pdf.getNumberOfPages();
    metas.add(NUMBER_OF_PAGES, pages);
    PDDocumentCatalog catalog = pdf.getDocumentCatalog();
    if (catalog != null)
        metas.add(LANGUAGE, catalog.getLanguage());
}