Example usage for org.apache.pdfbox.pdmodel PDDocument getNumberOfPages

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument getNumberOfPages.

Prototype

public int getNumberOfPages()

Source Link

Document

This will return the total page count of the PDF document.

Usage

From source file:cz.mzk.editor.server.newObject.CreateObject.java

License:Open Source License

/**
 * Insert foxml.//from  w w  w .  j a va2s  .c om
 *
 * @param node    the node
 * @param mods    the mods
 * @param dc      the dc
 * @param attempt the attempt
 * @return the string
 * @throws CreateObjectException the create object exception
 */
private String insertFOXML(NewDigitalObject node, Document mods, Document dc, int attempt)
        throws CreateObjectException {
    if (attempt == 0) {
        throw new CreateObjectException("max number of attempts has been reached");
    }

    boolean isPdf = node.getModel().getTopLevelType() != null
            && (node.getChildren() == null || node.getChildren().size() == 0) && node.getPath() != null;

    if (isPdf && attempt == Constants.MAX_NUMBER_OF_INGEST_ATTEMPTS) {
        PDDocument document = null;
        String newPdfPath = null;

        try {
            newPdfPath = imageResolverDAO.getNewImageFilePath(node.getPath());
            if (!newPdfPath.endsWith(Constants.PDF_EXTENSION)) {
                newPdfPath = newPdfPath.concat(Constants.PDF_EXTENSION);
            }
            document = PDDocument.load(new File(newPdfPath));
            int numberOfPages = document.getNumberOfPages();
            LOGGER.warn(newPdfPath + ": Count of pages is 0");
            if (numberOfPages > 0 && node.getPageIndex() > numberOfPages)
                throw new CreateObjectException("The number of page: " + node.getPageIndex()
                        + " to be used for thumbnail is bigger than count of pages in the file: "
                        + numberOfPages);

        } catch (IOException e) {
            LOGGER.error(e.getMessage());
            e.printStackTrace();
            throw new CreateObjectException("Unable to read the pdf file: " + newPdfPath);
        } catch (DatabaseException e) {
            LOGGER.error(e.getMessage());
            e.printStackTrace();
            throw new CreateObjectException(e.getMessage(), e);
        } finally {
            if (document != null)
                try {
                    document.close();
                } catch (IOException e) {
                    LOGGER.error(e.getMessage());
                    e.printStackTrace();
                    throw new CreateObjectException("Unable to close the pdf file: " + newPdfPath);
                }
        }
    }

    if (processedPages.containsKey(node.getPath())) {
        node.setExist(true);
        node.setUuid(processedPages.get(node.getPath()));
    }
    if (processedTracks.containsKey(node.getPath())) {
        node.setExist(true);
        node.setUuid(processedTracks.get(node.getPath()));
    }
    if (node.getExist()) {
        // do not create, but append only 
        List<NewDigitalObject> childrenToAdd = node.getChildren();
        if (childrenToAdd != null && !childrenToAdd.isEmpty()) {
            for (NewDigitalObject child : childrenToAdd) {
                if (!child.getExist()) {
                    String uuid = insertFOXML(child, mods, dc);
                    child.setUuid(Constants.FEDORA_UUID_PREFIX + uuid);
                    append(node, child);
                } else {
                    insertFOXML(child, mods, dc);
                }
            }
        }
        return node.getUuid();
    }
    FoxmlBuilder builder = FOXMLBuilderMapping.getBuilder(node);
    if (builder == null) {
        throw new CreateObjectException("unknown type " + node.getModel());
    }

    if (node.getUuid() == null || attempt != Constants.MAX_NUMBER_OF_INGEST_ATTEMPTS) {

        node.setUuid(FoxmlUtils.getRandomUuid());

        if (topLevelUuid == null) {
            topLevelUuid = node.getUuid();
            try {
                digitalObjectDAO.insertNewDigitalObject(node.getUuid(), node.getModel().getValue(),
                        node.getName(), inputDirPath, node.getUuid(), false, userId);
            } catch (DatabaseException e) {
                LOGGER.error("DB ERROR!!!: " + e.getMessage() + ": " + e);
                e.printStackTrace();
            }
        }
    }
    boolean isPage = node.getModel() == DigitalObjectModel.PAGE;
    boolean isTrack = node.getModel() == DigitalObjectModel.TRACK;
    boolean isSoundUnit = node.getModel() == DigitalObjectModel.SOUND_UNIT;

    builder.setSignature(node.getSignature());
    builder.setBase(base);
    builder.setUuid(node.getUuid());
    builder.setDcXmlContent(dc);
    builder.setModsXmlContent(mods);
    builder.setBundle(node.getBundle());
    builder.setType(node.getType());
    builder.setPolicy(node.getVisible() ? Policy.PUBLIC : Policy.PRIVATE);
    builder.setDateOrIntPartName(node.getDateOrIntPartName());
    builder.setNoteOrIntSubtitle(node.getNoteOrIntSubtitle());
    if (!isPage) {
        builder.setPartNumber(node.getPartNumberOrAlto());
        builder.setAditionalInfo(node.getAditionalInfoOrOcr());
    }
    if (node.getModel() == DigitalObjectModel.PAGE) {
        builder.setPageIndex(node.getPageIndex());
    }

    List<NewDigitalObject> childrenToAdd = node.getChildren();
    if (childrenToAdd != null && !childrenToAdd.isEmpty()) {
        List<RelsExtRelation> relations = builder.getChildren();
        for (NewDigitalObject child : childrenToAdd) {
            if (!child.getExist()) {
                String uuid = insertFOXML(child, mods, dc);
                child.setUuid(uuid);
            }
            relations.add(new RelsExtRelation(child.getUuid(),
                    NamedGraphModel.getRelationship(node.getModel(), child.getModel()), child.getName()));
        }
    }
    String imageUrl = null;
    String newFilePath = null;

    if (isPage || isSoundUnit) {
        String url = config.getImageServerUrl();
        url = addSlash(url);
        if (!url.startsWith("http://")) {
            if (url.startsWith("https://")) {
                url = url.substring(8);
            }
            url = "http://" + url;
        }
        if (!isSysno(sysno)) {
            imageUrl = url + "meditor" + getPathFromNonSysno(sysno) + (node.getUuid());
            newFilePath = addSlash(config.getImageServerUnknown()) + getPathFromNonSysno(sysno)
                    + node.getUuid();
        } else {
            String basePath = "";
            if (base != null && !"".equals(base)) {
                basePath = base.toLowerCase() + "/";
            }
            imageUrl = url + basePath + getSysnoPath(sysno) + (node.getUuid());
            newFilePath = addSlash(config.getImageServerKnown()) + basePath + getSysnoPath(sysno)
                    + node.getUuid();
        }

        builder.setImageUrl(imageUrl);
    } else if (isTrack) {
        String url = config.getRecordingServerUrl();
        url = addSlash(url);
        if (!url.startsWith("http://")) {
            if (url.startsWith("https://")) {
                url = url.substring(8);
            }
            url = "http://" + url;
        }
        String soundUrl;

        if (!isSysno(sysno)) {
            soundUrl = url + "meditor" + getPathFromNonSysno(sysno) + (node.getUuid());
            newFilePath = addSlash(config.getRecordingServerUnknown()) + getPathFromNonSysno(sysno)
                    + node.getUuid();
        } else {
            String basePath = "";
            if (base != null && !"".equals(base)) {
                basePath = base.toLowerCase() + "/";
            }
            newFilePath = addSlash(config.getRecordingServerKnown()) + basePath + getSysnoPath(sysno)
                    + node.getUuid();
            soundUrl = url + basePath + getSysnoPath(sysno) + (node.getUuid());
        }

        //No lossless audio on the input queue
        String soundPath = null;
        try {
            soundPath = imageResolverDAO.getNewImageFilePath(node.getPath());
        } catch (DatabaseException e) {
            LOGGER.error(e.getMessage());
            e.printStackTrace();
            throw new CreateObjectException(e.getMessage(), e);
        }
        if (builder instanceof TrackBuilder) {
            if (new File(soundPath + Constants.AUDIO_MIMETYPES.WAV_MIMETYPE.getExtension()).exists()) {
                ((TrackBuilder) builder).wavProvided(true);
            }
        }
        builder.setImageUrl(soundUrl);

    }

    builder.createDocument();

    String foxmlRepresentation = builder.getDocument(false);
    boolean success = IngestUtils.ingest(foxmlRepresentation, node.getName(), node.getUuid(),
            node.getModel().getValue(), topLevelUuid, inputDirPath);

    if (success)
        ingestedObjects.add(node.getUuid());

    if ((isPage || isSoundUnit) && success) {
        // TODO: StringBuffer
        boolean copySuccess;
        String newImagePath = null;
        try {
            newImagePath = imageResolverDAO.getNewImageFilePath(node.getPath());
            if (newImagePath == null) {
                throw new CreateObjectException("Unkown file path for " + node.getPath());
            } else if (!newImagePath.endsWith(Constants.JPEG_2000_EXTENSION)) {
                newImagePath = newImagePath.concat(Constants.JPEG_2000_EXTENSION);
            }

            copySuccess = IOUtils.copyFile(newImagePath, newFilePath + Constants.JPEG_2000_EXTENSION);

            if (copySuccess && LOGGER.isInfoEnabled()) {
                LOGGER.info("image " + newImagePath + "  was copied to  " + newFilePath
                        + Constants.JPEG_2000_EXTENSION);
            }
        } catch (IOException e) {
            LOGGER.error(e.getMessage());
            e.printStackTrace();
            throw new CreateObjectException(e.getMessage(), e);
        } catch (DatabaseException e) {
            LOGGER.error(e.getMessage());
            e.printStackTrace();
            throw new CreateObjectException(e.getMessage(), e);
        }
    }

    if (isPage) {
        String ocrPath = node.getAditionalInfoOrOcr();
        if (ocrPath != null && !"".equals(ocrPath)) {
            insertManagedDatastream(DATASTREAM_ID.TEXT_OCR, node.getUuid(), ocrPath, true, "text/plain");
        }

        String altoPath = node.getPartNumberOrAlto();
        if (altoPath != null && !"".equals(altoPath)) {
            insertManagedDatastream(DATASTREAM_ID.ALTO, node.getUuid(), altoPath, true, "text/xml");
        }
    }

    if (isTrack && success) {
        boolean copySuccessWav;
        boolean copySuccessMp3;
        boolean copySuccessOgg;
        String soundPath;
        try {
            soundPath = imageResolverDAO.getNewImageFilePath(node.getPath());
            soundPath = soundPath.substring(0, soundPath.length() - 4);

            if (new File(soundPath + Constants.AUDIO_MIMETYPES.WAV_MIMETYPE.getExtension()).exists()) {
                copySuccessWav = IOUtils.copyFile(
                        soundPath + Constants.AUDIO_MIMETYPES.WAV_MIMETYPE.getExtension(),
                        newFilePath + Constants.AUDIO_MIMETYPES.WAV_MIMETYPE.getExtension());
            }

            copySuccessMp3 = IOUtils.copyFile(soundPath + Constants.AUDIO_MIMETYPES.MP3_MIMETYPE.getExtension(),
                    newFilePath + Constants.AUDIO_MIMETYPES.MP3_MIMETYPE.getExtension());
            copySuccessOgg = IOUtils.copyFile(soundPath + Constants.AUDIO_MIMETYPES.OGG_MIMETYPE.getExtension(),
                    newFilePath + Constants.AUDIO_MIMETYPES.OGG_MIMETYPE.getExtension());

        } catch (DatabaseException e) {
            LOGGER.error(e.getMessage());
            e.printStackTrace();
            throw new CreateObjectException(e.getMessage(), e);
        } catch (IOException e) {
            LOGGER.error(e.getMessage());
            e.printStackTrace();
            throw new CreateObjectException(e.getMessage(), e);
        }
    }

    if (!success) {
        insertFOXML(node, mods, dc, attempt - 1);
    } else if (isPdf) {
        handlePdf(node);
    }
    if (node.getModel() == DigitalObjectModel.PAGE)
        processedPages.put(node.getPath(), node.getUuid());
    if (node.getModel() == DigitalObjectModel.TRACK)
        processedTracks.put(node.getPath(), node.getUuid());

    return node.getUuid();
}

From source file:ddf.catalog.transformer.input.pdf.PdfThumbnailGeneratorImpl.java

License:Open Source License

@Override
public Optional<byte[]> apply(PDDocument pdfDocument) throws IOException {
    PDFRenderer pdfRenderer = new PDFRenderer(pdfDocument);

    if (pdfDocument.getNumberOfPages() < 1) {
        return Optional.empty();
    }//from  ww w . ja v a  2  s  . c  om

    BufferedImage image = pdfRenderer.renderImageWithDPI(0, RESOLUTION_DPI, ImageType.RGB);

    int largestDimension = Math.max(image.getHeight(), image.getWidth());
    float scalingFactor = IMAGE_HEIGHTWIDTH / largestDimension;
    int scaledHeight = (int) (image.getHeight() * scalingFactor);
    int scaledWidth = (int) (image.getWidth() * scalingFactor);

    BufferedImage scaledImage = new BufferedImage(scaledWidth, scaledHeight, BufferedImage.TYPE_INT_RGB);
    Graphics2D graphics = scaledImage.createGraphics();
    graphics.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BILINEAR);
    graphics.drawImage(image, 0, 0, scaledWidth, scaledHeight, null);
    graphics.dispose();

    try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
        ImageIOUtil.writeImage(scaledImage, FORMAT_NAME, outputStream, RESOLUTION_DPI, IMAGE_QUALITY);
        return Optional.of(outputStream.toByteArray());
    }
}

From source file:de.haber.pdfbox.CountPages.java

License:Apache License

/**
 * Counts the number of pages from a given <b>input</b> file.
 * //from www  .ja  va  2s  .  co m
 * @param input
 *            input pdf file that has to exist and must be a file.
 * @return number of pages from the given pdf file.
 * @throws IOException
 *             If there is an error reading from the given file.
 * @throws IllegalArgumentException
 *             If the <b>file</b> does not exist or is not a file.
 */
public int count(File input) throws IOException {
    checkArgument(input.exists() && input.isFile(), "The input pdf has to exist and must be a file.");
    PDDocument doc = PDDocument.load(input);
    int res = doc.getNumberOfPages();
    doc.close();
    return res;
}

From source file:de.redsix.pdfcompare.PdfComparator.java

License:Apache License

private void compare(final PDDocument expectedDocument, final PDDocument actualDocument) throws IOException {
    expectedDocument.setResourceCache(new ResourceCacheWithLimitedImages());
    PDFRenderer expectedPdfRenderer = new PDFRenderer(expectedDocument);

    actualDocument.setResourceCache(new ResourceCacheWithLimitedImages());
    PDFRenderer actualPdfRenderer = new PDFRenderer(actualDocument);

    final int minPageCount = Math.min(expectedDocument.getNumberOfPages(), actualDocument.getNumberOfPages());
    CountDownLatch latch = new CountDownLatch(minPageCount);
    for (int pageIndex = 0; pageIndex < minPageCount; pageIndex++) {
        drawImage(latch, pageIndex, expectedDocument, actualDocument, expectedPdfRenderer, actualPdfRenderer);
    }//w ww  .  jav a 2  s  .  c  o m
    Utilities.await(latch, "FullCompare");
    Utilities.shutdownAndAwaitTermination(drawExecutor, "Draw");
    Utilities.shutdownAndAwaitTermination(parrallelDrawExecutor, "Parallel Draw");
    Utilities.shutdownAndAwaitTermination(diffExecutor, "Diff");
    if (expectedDocument.getNumberOfPages() > minPageCount) {
        addExtraPages(expectedDocument, expectedPdfRenderer, minPageCount, MISSING_RGB, true);
    } else if (actualDocument.getNumberOfPages() > minPageCount) {
        addExtraPages(actualDocument, actualPdfRenderer, minPageCount, EXTRA_RGB, false);
    }
}

From source file:de.redsix.pdfcompare.PdfComparator.java

License:Apache License

private void addExtraPages(final PDDocument document, final PDFRenderer pdfRenderer, final int minPageCount,
        final int color, final boolean expected) throws IOException {
    for (int pageIndex = minPageCount; pageIndex < document.getNumberOfPages(); pageIndex++) {
        ImageWithDimension image = renderPageAsImage(document, pdfRenderer, pageIndex);
        final DataBuffer dataBuffer = image.bufferedImage.getRaster().getDataBuffer();
        for (int i = 0; i < image.bufferedImage.getWidth() * MARKER_WIDTH; i++) {
            dataBuffer.setElem(i, color);
        }//from  ww w . ja v a  2s  . c  om
        for (int i = 0; i < image.bufferedImage.getHeight(); i++) {
            for (int j = 0; j < MARKER_WIDTH; j++) {
                dataBuffer.setElem(i * image.bufferedImage.getWidth() + j, color);
            }
        }
        if (expected) {
            compareResult.addPage(new PageDiffCalculator(true, false), pageIndex, image, blank(image), image);
        } else {
            compareResult.addPage(new PageDiffCalculator(true, false), pageIndex, blank(image), image, image);
        }
    }
}

From source file:diagramextractor.DiagramExtractor.java

/**
 * @param args the command line arguments
 *//*www. jav  a 2s.  c  o m*/
public static void main(String[] args) throws IOException, COSVisitorException {

    if (args.length < 2) {
        showHelp();
        System.exit(-1);
    }

    List<Integer> diagramOptionsList = new LinkedList<>();
    diagramOptionsList = parseOptions(args);

    List<String> diagramNameList = new LinkedList<>();
    diagramNameList = getDiagramNames(diagramOptionsList);

    File inputDir = new File(args[0]);
    File[] reports = inputDir.listFiles();
    String diagramName = args[1];

    PDDocument outputDocument = new PDDocument();

    PDFMergerUtility merger = new PDFMergerUtility();
    merger.setDestinationFileName("output.pdf");

    for (File report : reports) {

        PDDocument doc = PDDocument.load(report);
        System.out.println("LOADED FILE: " + report.getName());

        int pageNumber = 0;

        System.out.println("NUMBER OF PAGES: " + doc.getNumberOfPages());

        for (int i = 0; i <= doc.getNumberOfPages(); i++) {
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setStartPage(i);
            stripper.setEndPage(i);

            String contents = stripper.getText(doc);

            boolean containsDiagram = false;

            for (String diagram : diagramNameList) {
                if (contents.contains(diagram)) {
                    containsDiagram = true;
                }
            }

            if (containsDiagram && !contents.contains("Table of Contents")
                    && !contents.contains("Table of Figures") && !contents.contains("Obsah")
                    && !contents.contains("Tabulka ?sel")) {
                pageNumber = i;
                System.out.println("Diagram found on page: " + pageNumber);

                PageExtractor extractor = new PageExtractor(doc, pageNumber, pageNumber);
                PDDocument extractedPage = extractor.extract();

                PDPage page = (PDPage) extractedPage.getDocumentCatalog().getAllPages().get(0);
                PDPageContentStream contentStream = new PDPageContentStream(extractedPage, page, true, true,
                        true);
                contentStream.beginText();
                contentStream.setFont(PDType1Font.HELVETICA_BOLD, 12);
                contentStream.moveTextPositionByAmount(100, 50);
                contentStream.drawString(report.getName());
                contentStream.endText();
                contentStream.close();

                merger.appendDocument(outputDocument, extractedPage);

            }

        }

        if (pageNumber == 0) {
            System.out.println("The diagram " + diagramName + " was not found in file " + report.getName());
        }
        doc.close();
    }

    merger.mergeDocuments();

    System.out.println();
    System.out.println("Diagrams have been merged.");

    String outputFileName = generateFilename(inputDir.getCanonicalPath(), "output.pdf");
    outputDocument.save(outputFileName);
    outputDocument.close();

    System.out.println("Output file saved as: " + outputFileName);
}

From source file:dk.defxws.fedoragsearch.server.TransformerToText.java

License:Open Source License

public int getPdfPagesCount_(byte[] doc) throws Exception {
    String password = "";
    PDDocument pdDoc = null;
    // extract PDF document's textual content
    try {/*w w  w  .  jav a2  s . co m*/
        pdDoc = PDDocument.load(new ByteArrayInputStream(doc), password);
        return pdDoc.getNumberOfPages();
    } catch (Exception e) {
        throw new Exception("Cannot parse PDF document", e);
    } finally {
        closePDDocument(pdDoc);
    }
}

From source file:edu.ist.psu.sagnik.research.pdfbox2playground.javatest.DrawPrintTextLocations.java

License:Apache License

/**
 * This will print the documents data.//from w w w  .  j a va 2 s. c o  m
 *
 * @param args The command line arguments.
 *
 * @throws IOException If there is an error parsing the document.
 */
public static void main(String[] args) throws IOException {

    PDDocument document = null;
    try {
        document = PDDocument.load(new File(new DataLocation().pdLoc));

        DrawPrintTextLocations stripper = new DrawPrintTextLocations(document, new DataLocation().pdLoc);
        stripper.setSortByPosition(true);

        for (int page = 0; page < document.getNumberOfPages(); ++page) {
            stripper.stripPage(page);
        }
    } finally {
        if (document != null) {
            document.close();
        }
    }

}

From source file:edu.ist.psu.sagnik.research.pdfbox2playground.javatest.ExtractImages.java

License:Apache License

private void extract(String pdfFile, String password) throws IOException {
    PDDocument document = null;
    try {/* w w w .ja va 2  s  .c  om*/
        document = PDDocument.load(new File(pdfFile), password);
        AccessPermission ap = document.getCurrentAccessPermission();
        if (!ap.canExtractContent()) {
            throw new IOException("You do not have permission to extract images");
        }

        for (int i = 0; i < document.getNumberOfPages(); i++) // todo: ITERATOR would be much better
        {
            PDPage page = document.getPage(i);
            ImageGraphicsEngine extractor = new ImageGraphicsEngine(page);
            extractor.run();
        }
    } finally {
        if (document != null) {
            document.close();
        }
    }
}

From source file:es.rickyepoderi.pdfimages.Converter.java

License:Open Source License

/**
 * Method that converts a PDF file in a series of images. 
 * /*from   w w w  .j  a v  a2s .  com*/
 * @param pdfFile The PDF file to read
 * @param prefix The prefix of the images to write
 * @param imgFormat The image format ("jpg", "png",...) used by ImageIO
 * @param dpi The DPI of the images to render pages
 * @param type The type of the image (RGB, GREY,...)
 * @throws IOException Some error generating the images
 */
public void pdf2Images(File pdfFile, String prefix, String suffix, int dpi, ImageType type) throws IOException {
    PDDocument document = PDDocument.load(pdfFile);
    PDFRenderer pdfRenderer = new PDFRenderer(document);
    int pad = (int) Math.ceil(Math.log10(document.getNumberOfPages()));
    if (pad == 0) {
        pad = 1;
    }
    String format = String.format("%s.%%0%dd.%s", prefix, pad, suffix);
    for (int i = 0; i < document.getNumberOfPages(); i++) {
        BufferedImage image = pdfRenderer.renderImageWithDPI(i, dpi, type);
        ImageIO.write(image, suffix, new File(String.format(format, i)));
    }
}