Example usage for org.apache.pdfbox.pdmodel PDDocument getNumberOfPages

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument getNumberOfPages.

Prototype

public int getNumberOfPages()

Source Link

Document

This will return the total page count of the PDF document.

Usage

From source file:org.pdfsam.pdf.DefaultPDFBoxLoader.java

License:Open Source License

public void accept(PDDocument document, PdfDocumentDescriptor descriptor) {
    descriptor.pages(document.getNumberOfPages());
    descriptor.setVersion(getVersion(Float.toString(document.getVersion())));
    PDDocumentInformation info = document.getDocumentInformation();
    descriptor.putInformation(PdfMetadataKey.TITLE.getKey(), info.getTitle());
    descriptor.putInformation(PdfMetadataKey.AUTHOR.getKey(), info.getAuthor());
    descriptor.putInformation(PdfMetadataKey.CREATOR.getKey(), info.getCreator());
    descriptor.putInformation(PdfMetadataKey.SUBJECT.getKey(), info.getSubject());
    descriptor.putInformation(PdfMetadataKey.KEYWORDS.getKey(), info.getKeywords());
    descriptor.putInformation("Producer", info.getProducer());
    Optional.ofNullable(info.getCreationDate()).map(FORMATTER::format)
            .ifPresent(c -> descriptor.putInformation("FormattedCreationDate", c));
}

From source file:org.pdfsam.pdfbox.component.PdfRotatorTest.java

License:Open Source License

@Test
public void multiplePages() {
    PDDocument document = mock(PDDocument.class);
    PDPage page1 = mock(PDPage.class);
    when(page1.getRotation()).thenReturn(180);
    when(document.getPage(0)).thenReturn(page1);
    PDPage page2 = mock(PDPage.class);
    when(page2.getRotation()).thenReturn(90);
    when(document.getPage(1)).thenReturn(page2);
    when(document.getNumberOfPages()).thenReturn(2);
    applyRotation(Rotation.DEGREES_270, new HashSet<>(Arrays.asList(1, 2))).to(document);
    verify(page1).setRotation(90);/*from  ww  w. j av  a 2 s. c o m*/
    verify(page2).setRotation(0);
}

From source file:org.pdfsam.pdfbox.component.split.AbstractPdfSplitter.java

License:Open Source License

/**
 * Creates a new splitter that reads pages from the given document
 *//*from  ww  w.java 2 s  . c o  m*/
public AbstractPdfSplitter(PDDocument document, T parameters) {
    this.document = document;
    this.totalPages = document.getNumberOfPages();
    this.parameters = parameters;
}

From source file:org.pdfsam.pdfbox.component.split.PagesPdfSplitter.java

License:Open Source License

public PagesPdfSplitter(PDDocument document, T parameters) {
    super(document, parameters);
    this.splitPages = new SplitPages(parameters.getPages(document.getNumberOfPages()));
}

From source file:org.pennyledger.docstore.parser.impl.PDFImageExtractor.java

License:Apache License

/**
 * Entry point for the application./*from w  w w.j a v  a2s  .  c o m*/
 *
 * @param args
 *          The command-line arguments.
 * @throws IOException
 *           if there is an error reading the file or extracting the images.
 */
//  public static void main(String[] args) throws IOException {
//    // suppress the Dock icon on OS X
//    System.setProperty("apple.awt.UIElement", "true");
//
//    IImageParser imageParser = new TesseractImageOCR();
//    PDFImageExtractor extractor = new PDFImageExtractor(imageParser, 150);
//    extractor.run(args);
//  }

//  private void run(String[] args) throws IOException {
//    //String pdfFile = args[0];
//    String pdfFile = "c:/PennyLedger/1d51-9fe1b211e8039458b2ac4dbbfbf1.pdf";
//    if (pdfFile.length() <= 4) {
//      throw new IllegalArgumentException("Invalid file name: not PDF");
//    }
//    String password = "";
//    Path pdfPath = Paths.get(pdfFile);
//    PDDocument document = PDDocument.load(pdfPath.toFile(), password);
//    IDocumentContents docContents = new DocumentContents();
//    extract(document, id, pdfPath, docContents);
//  }

IDocumentContents extract(PDDocument document, String id, IDocumentContents docContents) throws IOException {
    AccessPermission ap = document.getCurrentAccessPermission();
    if (!ap.canExtractContent()) {
        throw new IOException("You do not have permission to extract images");
    }

    for (int i = 0; i < document.getNumberOfPages(); i++) {
        PDPage page = document.getPage(i);
        ImageGraphicsEngine extractor = new ImageGraphicsEngine(page, i, id);
        extractor.run();
        IDocumentContents pageContents = extractor.getPageContents();
        docContents = docContents.merge(pageContents);
    }
    return docContents;
}

From source file:org.quelea.data.pdf.PDFPresentation.java

License:Open Source License

/**
 * Make the slides that go in this PDF, this is what takes time and should
 * only be done once.//w  w w .  j  a  va 2 s  .  c  om
 *
 * @return all the slides.
 */
private PdfSlide[] makeSlides() throws IOException {
    File pdf = new File(file);
    PDDocument document = PDDocument.load(pdf.getAbsoluteFile());
    Path f = Files.createTempDirectory(null);
    f.toFile().deleteOnExit();
    ArrayList<PdfSlide> ret = new ArrayList<>();
    PDFRenderer pdfRenderer = new PDFRenderer(document);
    int totalPages = document.getNumberOfPages();
    for (int i = 0; i < totalPages; i++) {
        ret.add(new PdfSlide(i + 1, pdfRenderer));
    }
    document.close();
    return ret.toArray(new PdfSlide[ret.size()]);
}

From source file:org.quelea.services.importexport.SurvivorSongbookParser.java

License:Open Source License

/**
 * Get all the songs in the PDF document.
 * @return a list of all the songs.//from  www.  ja v  a2  s.com
 * @throws IOException if something went wrong.
 */
@Override
public List<SongDisplayable> getSongs(File location, StatusPanel statusPanel) throws IOException {
    PDDocument document = PDDocument.load(location);
    List<SongDisplayable> pdfSongs = new ArrayList<>();
    PDFTextStripper stripper = new PDFTextStripper();
    List<String> songParts = new ArrayList<>();
    for (int i = 0; i < document.getNumberOfPages(); i++) {
        String pageText = getPageText(document, stripper, i);
        if (pageText.trim().isEmpty()) {
            continue;
        }
        songParts.add(pageText);
        boolean twoPart = pageText.contains("(1 of");
        if (i < document.getNumberOfPages() - 1) { //This section in case the original (1 of x) is missed out
            String nextPageText = getPageText(document, stripper, i + 1);
            if (nextPageText.contains("(2 of")) {
                twoPart = true;
            }
        }
        if (!twoPart) {
            SongDisplayable song = processSong(songParts.toArray(new String[songParts.size()]));
            if (song != null) {
                pdfSongs.add(song);
            }
            songParts.clear();
        }
    }
    document.close();
    if (pdfSongs == null) {
        return new ArrayList<>();
    } else {
        return pdfSongs;
    }
}

From source file:org.springframework.restdocs.asciidoctor.OperationBlockMacroTests.java

License:Apache License

private List<String> extractStrings(File pdfFile) throws IOException {
    PDDocument pdf = PDDocument.load(pdfFile);
    assertThat(pdf.getNumberOfPages()).isEqualTo(1);
    StringExtractor stringExtractor = new StringExtractor();
    stringExtractor.processPage(pdf.getPage(0));
    return stringExtractor.getStrings();
}

From source file:org.wandora.application.tools.extractors.files.SimplePDFExtractor.java

License:Open Source License

public void _extractTopicsFromStream(String locator, InputStream inputStream, TopicMap topicMap,
        Topic pdfTopic) {//from w w  w  . java2 s. c  o  m
    PDDocument doc = null;
    try {
        if (locator.startsWith("http://")) {
            doc = PDDocument.load(new URL(locator));
        } else {
            doc = PDDocument.load(new File(locator));
        }
        PDDocumentInformation info = doc.getDocumentInformation();
        DateFormat dateFormatter = new SimpleDateFormat(DEFAULT_DATE_FORMAT);

        // --- PDF PRODUCER ---
        String producer = info.getProducer();
        if (producer != null && producer.length() > 0) {
            Topic producerType = createTopic(topicMap, "pdf-producer");
            setData(pdfTopic, producerType, defaultLang, producer.trim());
        }

        // --- PDF MODIFICATION DATE ---
        Calendar mCal = info.getModificationDate();
        if (mCal != null) {
            String mdate = dateFormatter.format(mCal.getTime());
            if (mdate != null && mdate.length() > 0) {
                Topic modificationDateType = createTopic(topicMap, "pdf-modification-date");
                setData(pdfTopic, modificationDateType, defaultLang, mdate.trim());
            }
        }

        // --- PDF CREATOR ---
        String creator = info.getCreator();
        if (creator != null && creator.length() > 0) {
            Topic creatorType = createTopic(topicMap, "pdf-creator");
            setData(pdfTopic, creatorType, defaultLang, creator.trim());
        }

        // --- PDF CREATION DATE ---
        Calendar cCal = info.getCreationDate();
        if (cCal != null) {
            String cdate = dateFormatter.format(cCal.getTime());
            if (cdate != null && cdate.length() > 0) {
                Topic creationDateType = createTopic(topicMap, "pdf-creation-date");
                setData(pdfTopic, creationDateType, defaultLang, cdate.trim());
            }
        }

        // --- PDF AUTHOR ---
        String author = info.getAuthor();
        if (author != null && author.length() > 0) {
            Topic authorType = createTopic(topicMap, "pdf-author");
            setData(pdfTopic, authorType, defaultLang, author.trim());
        }

        // --- PDF SUBJECT ---
        String subject = info.getSubject();
        if (subject != null && subject.length() > 0) {
            Topic subjectType = createTopic(topicMap, "pdf-subject");
            setData(pdfTopic, subjectType, defaultLang, subject.trim());
        }

        // --- PDF TITLE ---
        String title = info.getSubject();
        if (title != null && title.length() > 0) {
            if (makeVariantFromTitle) {
                pdfTopic.setDisplayName(defaultLang, title);
            } else {
                Topic titleType = createTopic(topicMap, "pdf-title");
                setData(pdfTopic, titleType, defaultLang, title.trim());
            }
        }

        // --- PDF KEYWORDS (SEPARATED WITH SEMICOLON) ---
        String keywords = info.getKeywords();
        if (keywords != null && keywords.length() > 0) {
            Topic keywordType = createTopic(topicMap, "pdf-keyword");
            String[] keywordArray = keywords.split(";");
            String keyword = null;
            for (int i = 0; i < keywordArray.length; i++) {
                keyword = Textbox.trimExtraSpaces(keywordArray[i]);
                if (keyword != null && keyword.length() > 0) {
                    Topic keywordTopic = createTopic(topicMap, keyword, keywordType);
                    createAssociation(topicMap, keywordType, new Topic[] { pdfTopic, keywordTopic });
                }
            }
        }

        // --- PDF TEXT CONTENT ---
        PDFTextStripper stripper = new PDFTextStripper();
        String content = new String();

        if (makePageTopics) {
            int pages = doc.getNumberOfPages();
            String pageContent = null;
            for (int i = 0; i < pages; i++) {
                stripper.setStartPage(i);
                stripper.setEndPage(i);
                pageContent = stripper.getText(doc);
                Topic pageType = createTopic(topicMap, "pdf-page");
                Topic pageTopic = createTopic(topicMap, pdfTopic.getBaseName() + " (page " + i + ")", pageType);
                Topic orderType = createTopic(topicMap, "order");
                Topic orderTopic = createTopic(topicMap, i + ".", orderType);
                Topic contentType = createTopic(topicMap, "pdf-text");
                setData(pageTopic, contentType, defaultLang, pageContent.trim());
                createAssociation(topicMap, pageType, new Topic[] { pdfTopic, pageTopic, orderTopic });
            }
        } else {
            content = stripper.getText(doc);
        }

        if (!makePageTopics && content != null && content.length() > 0) {
            Topic contentType = createTopic(topicMap, "pdf-text");
            setData(pdfTopic, contentType, defaultLang, content.trim());
        }
        doc.close();
    } catch (Exception e) {
        e.printStackTrace();
        try {
            if (doc != null)
                doc.close();
        } catch (Exception ix) {
            e.printStackTrace();
        }
    }
}

From source file:PDF.PDFEditor.java

private void extractGoodBadPdf(PDDocument pdf, String auditPdfFileName, String rejectPdfFileName,
        Boolean[][] statusArray) throws COSVisitorException, IOException {
    PDDocument rejectPdf = new PDDocument();
    PDDocument auditPdf = new PDDocument();
    int pageNum = pdf.getNumberOfPages();

    // add reject page into rejectPdf
    for (int i = 0; i < pageNum; i++) {
        PDPage page = (PDPage) pdf.getDocumentCatalog().getAllPages().get(i);
        if (statusArray[GlobalVar.VOID_BUTTON_INDEX][i]) {
            rejectPdf.addPage(page);//w  ww  .j  av a 2  s . c  om
        } else {
            auditPdf.addPage(page);
        }
    }

    rejectPdf.save(rejectPdfFileName);
    rejectPdf.close();

    auditPdf.save(auditPdfFileName);
    auditPdf.close();
}