List of usage examples for org.apache.pdfbox.pdmodel PDDocument getNumberOfPages
public int getNumberOfPages()
From source file:org.pdfsam.pdf.DefaultPDFBoxLoader.java
License:Open Source License
public void accept(PDDocument document, PdfDocumentDescriptor descriptor) { descriptor.pages(document.getNumberOfPages()); descriptor.setVersion(getVersion(Float.toString(document.getVersion()))); PDDocumentInformation info = document.getDocumentInformation(); descriptor.putInformation(PdfMetadataKey.TITLE.getKey(), info.getTitle()); descriptor.putInformation(PdfMetadataKey.AUTHOR.getKey(), info.getAuthor()); descriptor.putInformation(PdfMetadataKey.CREATOR.getKey(), info.getCreator()); descriptor.putInformation(PdfMetadataKey.SUBJECT.getKey(), info.getSubject()); descriptor.putInformation(PdfMetadataKey.KEYWORDS.getKey(), info.getKeywords()); descriptor.putInformation("Producer", info.getProducer()); Optional.ofNullable(info.getCreationDate()).map(FORMATTER::format) .ifPresent(c -> descriptor.putInformation("FormattedCreationDate", c)); }
From source file:org.pdfsam.pdfbox.component.PdfRotatorTest.java
License:Open Source License
@Test public void multiplePages() { PDDocument document = mock(PDDocument.class); PDPage page1 = mock(PDPage.class); when(page1.getRotation()).thenReturn(180); when(document.getPage(0)).thenReturn(page1); PDPage page2 = mock(PDPage.class); when(page2.getRotation()).thenReturn(90); when(document.getPage(1)).thenReturn(page2); when(document.getNumberOfPages()).thenReturn(2); applyRotation(Rotation.DEGREES_270, new HashSet<>(Arrays.asList(1, 2))).to(document); verify(page1).setRotation(90);/*from ww w. j av a 2 s. c o m*/ verify(page2).setRotation(0); }
From source file:org.pdfsam.pdfbox.component.split.AbstractPdfSplitter.java
License:Open Source License
/** * Creates a new splitter that reads pages from the given document *//*from ww w.java 2 s . c o m*/ public AbstractPdfSplitter(PDDocument document, T parameters) { this.document = document; this.totalPages = document.getNumberOfPages(); this.parameters = parameters; }
From source file:org.pdfsam.pdfbox.component.split.PagesPdfSplitter.java
License:Open Source License
public PagesPdfSplitter(PDDocument document, T parameters) { super(document, parameters); this.splitPages = new SplitPages(parameters.getPages(document.getNumberOfPages())); }
From source file:org.pennyledger.docstore.parser.impl.PDFImageExtractor.java
License:Apache License
/** * Entry point for the application./*from w w w.j a v a2s . c o m*/ * * @param args * The command-line arguments. * @throws IOException * if there is an error reading the file or extracting the images. */ // public static void main(String[] args) throws IOException { // // suppress the Dock icon on OS X // System.setProperty("apple.awt.UIElement", "true"); // // IImageParser imageParser = new TesseractImageOCR(); // PDFImageExtractor extractor = new PDFImageExtractor(imageParser, 150); // extractor.run(args); // } // private void run(String[] args) throws IOException { // //String pdfFile = args[0]; // String pdfFile = "c:/PennyLedger/1d51-9fe1b211e8039458b2ac4dbbfbf1.pdf"; // if (pdfFile.length() <= 4) { // throw new IllegalArgumentException("Invalid file name: not PDF"); // } // String password = ""; // Path pdfPath = Paths.get(pdfFile); // PDDocument document = PDDocument.load(pdfPath.toFile(), password); // IDocumentContents docContents = new DocumentContents(); // extract(document, id, pdfPath, docContents); // } IDocumentContents extract(PDDocument document, String id, IDocumentContents docContents) throws IOException { AccessPermission ap = document.getCurrentAccessPermission(); if (!ap.canExtractContent()) { throw new IOException("You do not have permission to extract images"); } for (int i = 0; i < document.getNumberOfPages(); i++) { PDPage page = document.getPage(i); ImageGraphicsEngine extractor = new ImageGraphicsEngine(page, i, id); extractor.run(); IDocumentContents pageContents = extractor.getPageContents(); docContents = docContents.merge(pageContents); } return docContents; }
From source file:org.quelea.data.pdf.PDFPresentation.java
License:Open Source License
/** * Make the slides that go in this PDF, this is what takes time and should * only be done once.//w w w . j a va 2 s . c om * * @return all the slides. */ private PdfSlide[] makeSlides() throws IOException { File pdf = new File(file); PDDocument document = PDDocument.load(pdf.getAbsoluteFile()); Path f = Files.createTempDirectory(null); f.toFile().deleteOnExit(); ArrayList<PdfSlide> ret = new ArrayList<>(); PDFRenderer pdfRenderer = new PDFRenderer(document); int totalPages = document.getNumberOfPages(); for (int i = 0; i < totalPages; i++) { ret.add(new PdfSlide(i + 1, pdfRenderer)); } document.close(); return ret.toArray(new PdfSlide[ret.size()]); }
From source file:org.quelea.services.importexport.SurvivorSongbookParser.java
License:Open Source License
/** * Get all the songs in the PDF document. * @return a list of all the songs.//from www. ja v a2 s.com * @throws IOException if something went wrong. */ @Override public List<SongDisplayable> getSongs(File location, StatusPanel statusPanel) throws IOException { PDDocument document = PDDocument.load(location); List<SongDisplayable> pdfSongs = new ArrayList<>(); PDFTextStripper stripper = new PDFTextStripper(); List<String> songParts = new ArrayList<>(); for (int i = 0; i < document.getNumberOfPages(); i++) { String pageText = getPageText(document, stripper, i); if (pageText.trim().isEmpty()) { continue; } songParts.add(pageText); boolean twoPart = pageText.contains("(1 of"); if (i < document.getNumberOfPages() - 1) { //This section in case the original (1 of x) is missed out String nextPageText = getPageText(document, stripper, i + 1); if (nextPageText.contains("(2 of")) { twoPart = true; } } if (!twoPart) { SongDisplayable song = processSong(songParts.toArray(new String[songParts.size()])); if (song != null) { pdfSongs.add(song); } songParts.clear(); } } document.close(); if (pdfSongs == null) { return new ArrayList<>(); } else { return pdfSongs; } }
From source file:org.springframework.restdocs.asciidoctor.OperationBlockMacroTests.java
License:Apache License
private List<String> extractStrings(File pdfFile) throws IOException { PDDocument pdf = PDDocument.load(pdfFile); assertThat(pdf.getNumberOfPages()).isEqualTo(1); StringExtractor stringExtractor = new StringExtractor(); stringExtractor.processPage(pdf.getPage(0)); return stringExtractor.getStrings(); }
From source file:org.wandora.application.tools.extractors.files.SimplePDFExtractor.java
License:Open Source License
public void _extractTopicsFromStream(String locator, InputStream inputStream, TopicMap topicMap, Topic pdfTopic) {//from w w w . java2 s. c o m PDDocument doc = null; try { if (locator.startsWith("http://")) { doc = PDDocument.load(new URL(locator)); } else { doc = PDDocument.load(new File(locator)); } PDDocumentInformation info = doc.getDocumentInformation(); DateFormat dateFormatter = new SimpleDateFormat(DEFAULT_DATE_FORMAT); // --- PDF PRODUCER --- String producer = info.getProducer(); if (producer != null && producer.length() > 0) { Topic producerType = createTopic(topicMap, "pdf-producer"); setData(pdfTopic, producerType, defaultLang, producer.trim()); } // --- PDF MODIFICATION DATE --- Calendar mCal = info.getModificationDate(); if (mCal != null) { String mdate = dateFormatter.format(mCal.getTime()); if (mdate != null && mdate.length() > 0) { Topic modificationDateType = createTopic(topicMap, "pdf-modification-date"); setData(pdfTopic, modificationDateType, defaultLang, mdate.trim()); } } // --- PDF CREATOR --- String creator = info.getCreator(); if (creator != null && creator.length() > 0) { Topic creatorType = createTopic(topicMap, "pdf-creator"); setData(pdfTopic, creatorType, defaultLang, creator.trim()); } // --- PDF CREATION DATE --- Calendar cCal = info.getCreationDate(); if (cCal != null) { String cdate = dateFormatter.format(cCal.getTime()); if (cdate != null && cdate.length() > 0) { Topic creationDateType = createTopic(topicMap, "pdf-creation-date"); setData(pdfTopic, creationDateType, defaultLang, cdate.trim()); } } // --- PDF AUTHOR --- String author = info.getAuthor(); if (author != null && author.length() > 0) { Topic authorType = createTopic(topicMap, "pdf-author"); setData(pdfTopic, authorType, defaultLang, author.trim()); } // --- PDF SUBJECT --- String subject = info.getSubject(); if (subject != null && subject.length() > 0) { Topic subjectType = createTopic(topicMap, "pdf-subject"); setData(pdfTopic, subjectType, defaultLang, subject.trim()); } // --- PDF TITLE --- String title = info.getSubject(); if (title != null && title.length() > 0) { if (makeVariantFromTitle) { pdfTopic.setDisplayName(defaultLang, title); } else { Topic titleType = createTopic(topicMap, "pdf-title"); setData(pdfTopic, titleType, defaultLang, title.trim()); } } // --- PDF KEYWORDS (SEPARATED WITH SEMICOLON) --- String keywords = info.getKeywords(); if (keywords != null && keywords.length() > 0) { Topic keywordType = createTopic(topicMap, "pdf-keyword"); String[] keywordArray = keywords.split(";"); String keyword = null; for (int i = 0; i < keywordArray.length; i++) { keyword = Textbox.trimExtraSpaces(keywordArray[i]); if (keyword != null && keyword.length() > 0) { Topic keywordTopic = createTopic(topicMap, keyword, keywordType); createAssociation(topicMap, keywordType, new Topic[] { pdfTopic, keywordTopic }); } } } // --- PDF TEXT CONTENT --- PDFTextStripper stripper = new PDFTextStripper(); String content = new String(); if (makePageTopics) { int pages = doc.getNumberOfPages(); String pageContent = null; for (int i = 0; i < pages; i++) { stripper.setStartPage(i); stripper.setEndPage(i); pageContent = stripper.getText(doc); Topic pageType = createTopic(topicMap, "pdf-page"); Topic pageTopic = createTopic(topicMap, pdfTopic.getBaseName() + " (page " + i + ")", pageType); Topic orderType = createTopic(topicMap, "order"); Topic orderTopic = createTopic(topicMap, i + ".", orderType); Topic contentType = createTopic(topicMap, "pdf-text"); setData(pageTopic, contentType, defaultLang, pageContent.trim()); createAssociation(topicMap, pageType, new Topic[] { pdfTopic, pageTopic, orderTopic }); } } else { content = stripper.getText(doc); } if (!makePageTopics && content != null && content.length() > 0) { Topic contentType = createTopic(topicMap, "pdf-text"); setData(pdfTopic, contentType, defaultLang, content.trim()); } doc.close(); } catch (Exception e) { e.printStackTrace(); try { if (doc != null) doc.close(); } catch (Exception ix) { e.printStackTrace(); } } }
From source file:PDF.PDFEditor.java
private void extractGoodBadPdf(PDDocument pdf, String auditPdfFileName, String rejectPdfFileName, Boolean[][] statusArray) throws COSVisitorException, IOException { PDDocument rejectPdf = new PDDocument(); PDDocument auditPdf = new PDDocument(); int pageNum = pdf.getNumberOfPages(); // add reject page into rejectPdf for (int i = 0; i < pageNum; i++) { PDPage page = (PDPage) pdf.getDocumentCatalog().getAllPages().get(i); if (statusArray[GlobalVar.VOID_BUTTON_INDEX][i]) { rejectPdf.addPage(page);//w ww .j av a 2 s . c om } else { auditPdf.addPage(page); } } rejectPdf.save(rejectPdfFileName); rejectPdf.close(); auditPdf.save(auditPdfFileName); auditPdf.close(); }