List of usage examples for org.apache.pdfbox.pdmodel PDDocument getNumberOfPages
public int getNumberOfPages()
From source file:cz.mzk.editor.server.newObject.CreateObject.java
License:Open Source License
/** * Insert foxml.//from w w w . j a va2s .c om * * @param node the node * @param mods the mods * @param dc the dc * @param attempt the attempt * @return the string * @throws CreateObjectException the create object exception */ private String insertFOXML(NewDigitalObject node, Document mods, Document dc, int attempt) throws CreateObjectException { if (attempt == 0) { throw new CreateObjectException("max number of attempts has been reached"); } boolean isPdf = node.getModel().getTopLevelType() != null && (node.getChildren() == null || node.getChildren().size() == 0) && node.getPath() != null; if (isPdf && attempt == Constants.MAX_NUMBER_OF_INGEST_ATTEMPTS) { PDDocument document = null; String newPdfPath = null; try { newPdfPath = imageResolverDAO.getNewImageFilePath(node.getPath()); if (!newPdfPath.endsWith(Constants.PDF_EXTENSION)) { newPdfPath = newPdfPath.concat(Constants.PDF_EXTENSION); } document = PDDocument.load(new File(newPdfPath)); int numberOfPages = document.getNumberOfPages(); LOGGER.warn(newPdfPath + ": Count of pages is 0"); if (numberOfPages > 0 && node.getPageIndex() > numberOfPages) throw new CreateObjectException("The number of page: " + node.getPageIndex() + " to be used for thumbnail is bigger than count of pages in the file: " + numberOfPages); } catch (IOException e) { LOGGER.error(e.getMessage()); e.printStackTrace(); throw new CreateObjectException("Unable to read the pdf file: " + newPdfPath); } catch (DatabaseException e) { LOGGER.error(e.getMessage()); e.printStackTrace(); throw new CreateObjectException(e.getMessage(), e); } finally { if (document != null) try { document.close(); } catch (IOException e) { LOGGER.error(e.getMessage()); e.printStackTrace(); throw new CreateObjectException("Unable to close the pdf file: " + newPdfPath); } } } if (processedPages.containsKey(node.getPath())) { node.setExist(true); node.setUuid(processedPages.get(node.getPath())); } if (processedTracks.containsKey(node.getPath())) { node.setExist(true); node.setUuid(processedTracks.get(node.getPath())); } if (node.getExist()) { // do not create, but append only List<NewDigitalObject> childrenToAdd = node.getChildren(); if (childrenToAdd != null && !childrenToAdd.isEmpty()) { for (NewDigitalObject child : childrenToAdd) { if (!child.getExist()) { String uuid = insertFOXML(child, mods, dc); child.setUuid(Constants.FEDORA_UUID_PREFIX + uuid); append(node, child); } else { insertFOXML(child, mods, dc); } } } return node.getUuid(); } FoxmlBuilder builder = FOXMLBuilderMapping.getBuilder(node); if (builder == null) { throw new CreateObjectException("unknown type " + node.getModel()); } if (node.getUuid() == null || attempt != Constants.MAX_NUMBER_OF_INGEST_ATTEMPTS) { node.setUuid(FoxmlUtils.getRandomUuid()); if (topLevelUuid == null) { topLevelUuid = node.getUuid(); try { digitalObjectDAO.insertNewDigitalObject(node.getUuid(), node.getModel().getValue(), node.getName(), inputDirPath, node.getUuid(), false, userId); } catch (DatabaseException e) { LOGGER.error("DB ERROR!!!: " + e.getMessage() + ": " + e); e.printStackTrace(); } } } boolean isPage = node.getModel() == DigitalObjectModel.PAGE; boolean isTrack = node.getModel() == DigitalObjectModel.TRACK; boolean isSoundUnit = node.getModel() == DigitalObjectModel.SOUND_UNIT; builder.setSignature(node.getSignature()); builder.setBase(base); builder.setUuid(node.getUuid()); builder.setDcXmlContent(dc); builder.setModsXmlContent(mods); builder.setBundle(node.getBundle()); builder.setType(node.getType()); builder.setPolicy(node.getVisible() ? Policy.PUBLIC : Policy.PRIVATE); builder.setDateOrIntPartName(node.getDateOrIntPartName()); builder.setNoteOrIntSubtitle(node.getNoteOrIntSubtitle()); if (!isPage) { builder.setPartNumber(node.getPartNumberOrAlto()); builder.setAditionalInfo(node.getAditionalInfoOrOcr()); } if (node.getModel() == DigitalObjectModel.PAGE) { builder.setPageIndex(node.getPageIndex()); } List<NewDigitalObject> childrenToAdd = node.getChildren(); if (childrenToAdd != null && !childrenToAdd.isEmpty()) { List<RelsExtRelation> relations = builder.getChildren(); for (NewDigitalObject child : childrenToAdd) { if (!child.getExist()) { String uuid = insertFOXML(child, mods, dc); child.setUuid(uuid); } relations.add(new RelsExtRelation(child.getUuid(), NamedGraphModel.getRelationship(node.getModel(), child.getModel()), child.getName())); } } String imageUrl = null; String newFilePath = null; if (isPage || isSoundUnit) { String url = config.getImageServerUrl(); url = addSlash(url); if (!url.startsWith("http://")) { if (url.startsWith("https://")) { url = url.substring(8); } url = "http://" + url; } if (!isSysno(sysno)) { imageUrl = url + "meditor" + getPathFromNonSysno(sysno) + (node.getUuid()); newFilePath = addSlash(config.getImageServerUnknown()) + getPathFromNonSysno(sysno) + node.getUuid(); } else { String basePath = ""; if (base != null && !"".equals(base)) { basePath = base.toLowerCase() + "/"; } imageUrl = url + basePath + getSysnoPath(sysno) + (node.getUuid()); newFilePath = addSlash(config.getImageServerKnown()) + basePath + getSysnoPath(sysno) + node.getUuid(); } builder.setImageUrl(imageUrl); } else if (isTrack) { String url = config.getRecordingServerUrl(); url = addSlash(url); if (!url.startsWith("http://")) { if (url.startsWith("https://")) { url = url.substring(8); } url = "http://" + url; } String soundUrl; if (!isSysno(sysno)) { soundUrl = url + "meditor" + getPathFromNonSysno(sysno) + (node.getUuid()); newFilePath = addSlash(config.getRecordingServerUnknown()) + getPathFromNonSysno(sysno) + node.getUuid(); } else { String basePath = ""; if (base != null && !"".equals(base)) { basePath = base.toLowerCase() + "/"; } newFilePath = addSlash(config.getRecordingServerKnown()) + basePath + getSysnoPath(sysno) + node.getUuid(); soundUrl = url + basePath + getSysnoPath(sysno) + (node.getUuid()); } //No lossless audio on the input queue String soundPath = null; try { soundPath = imageResolverDAO.getNewImageFilePath(node.getPath()); } catch (DatabaseException e) { LOGGER.error(e.getMessage()); e.printStackTrace(); throw new CreateObjectException(e.getMessage(), e); } if (builder instanceof TrackBuilder) { if (new File(soundPath + Constants.AUDIO_MIMETYPES.WAV_MIMETYPE.getExtension()).exists()) { ((TrackBuilder) builder).wavProvided(true); } } builder.setImageUrl(soundUrl); } builder.createDocument(); String foxmlRepresentation = builder.getDocument(false); boolean success = IngestUtils.ingest(foxmlRepresentation, node.getName(), node.getUuid(), node.getModel().getValue(), topLevelUuid, inputDirPath); if (success) ingestedObjects.add(node.getUuid()); if ((isPage || isSoundUnit) && success) { // TODO: StringBuffer boolean copySuccess; String newImagePath = null; try { newImagePath = imageResolverDAO.getNewImageFilePath(node.getPath()); if (newImagePath == null) { throw new CreateObjectException("Unkown file path for " + node.getPath()); } else if (!newImagePath.endsWith(Constants.JPEG_2000_EXTENSION)) { newImagePath = newImagePath.concat(Constants.JPEG_2000_EXTENSION); } copySuccess = IOUtils.copyFile(newImagePath, newFilePath + Constants.JPEG_2000_EXTENSION); if (copySuccess && LOGGER.isInfoEnabled()) { LOGGER.info("image " + newImagePath + " was copied to " + newFilePath + Constants.JPEG_2000_EXTENSION); } } catch (IOException e) { LOGGER.error(e.getMessage()); e.printStackTrace(); throw new CreateObjectException(e.getMessage(), e); } catch (DatabaseException e) { LOGGER.error(e.getMessage()); e.printStackTrace(); throw new CreateObjectException(e.getMessage(), e); } } if (isPage) { String ocrPath = node.getAditionalInfoOrOcr(); if (ocrPath != null && !"".equals(ocrPath)) { insertManagedDatastream(DATASTREAM_ID.TEXT_OCR, node.getUuid(), ocrPath, true, "text/plain"); } String altoPath = node.getPartNumberOrAlto(); if (altoPath != null && !"".equals(altoPath)) { insertManagedDatastream(DATASTREAM_ID.ALTO, node.getUuid(), altoPath, true, "text/xml"); } } if (isTrack && success) { boolean copySuccessWav; boolean copySuccessMp3; boolean copySuccessOgg; String soundPath; try { soundPath = imageResolverDAO.getNewImageFilePath(node.getPath()); soundPath = soundPath.substring(0, soundPath.length() - 4); if (new File(soundPath + Constants.AUDIO_MIMETYPES.WAV_MIMETYPE.getExtension()).exists()) { copySuccessWav = IOUtils.copyFile( soundPath + Constants.AUDIO_MIMETYPES.WAV_MIMETYPE.getExtension(), newFilePath + Constants.AUDIO_MIMETYPES.WAV_MIMETYPE.getExtension()); } copySuccessMp3 = IOUtils.copyFile(soundPath + Constants.AUDIO_MIMETYPES.MP3_MIMETYPE.getExtension(), newFilePath + Constants.AUDIO_MIMETYPES.MP3_MIMETYPE.getExtension()); copySuccessOgg = IOUtils.copyFile(soundPath + Constants.AUDIO_MIMETYPES.OGG_MIMETYPE.getExtension(), newFilePath + Constants.AUDIO_MIMETYPES.OGG_MIMETYPE.getExtension()); } catch (DatabaseException e) { LOGGER.error(e.getMessage()); e.printStackTrace(); throw new CreateObjectException(e.getMessage(), e); } catch (IOException e) { LOGGER.error(e.getMessage()); e.printStackTrace(); throw new CreateObjectException(e.getMessage(), e); } } if (!success) { insertFOXML(node, mods, dc, attempt - 1); } else if (isPdf) { handlePdf(node); } if (node.getModel() == DigitalObjectModel.PAGE) processedPages.put(node.getPath(), node.getUuid()); if (node.getModel() == DigitalObjectModel.TRACK) processedTracks.put(node.getPath(), node.getUuid()); return node.getUuid(); }
From source file:ddf.catalog.transformer.input.pdf.PdfThumbnailGeneratorImpl.java
License:Open Source License
@Override public Optional<byte[]> apply(PDDocument pdfDocument) throws IOException { PDFRenderer pdfRenderer = new PDFRenderer(pdfDocument); if (pdfDocument.getNumberOfPages() < 1) { return Optional.empty(); }//from ww w . ja v a 2 s . c om BufferedImage image = pdfRenderer.renderImageWithDPI(0, RESOLUTION_DPI, ImageType.RGB); int largestDimension = Math.max(image.getHeight(), image.getWidth()); float scalingFactor = IMAGE_HEIGHTWIDTH / largestDimension; int scaledHeight = (int) (image.getHeight() * scalingFactor); int scaledWidth = (int) (image.getWidth() * scalingFactor); BufferedImage scaledImage = new BufferedImage(scaledWidth, scaledHeight, BufferedImage.TYPE_INT_RGB); Graphics2D graphics = scaledImage.createGraphics(); graphics.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BILINEAR); graphics.drawImage(image, 0, 0, scaledWidth, scaledHeight, null); graphics.dispose(); try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { ImageIOUtil.writeImage(scaledImage, FORMAT_NAME, outputStream, RESOLUTION_DPI, IMAGE_QUALITY); return Optional.of(outputStream.toByteArray()); } }
From source file:de.haber.pdfbox.CountPages.java
License:Apache License
/** * Counts the number of pages from a given <b>input</b> file. * //from www .ja va 2s . co m * @param input * input pdf file that has to exist and must be a file. * @return number of pages from the given pdf file. * @throws IOException * If there is an error reading from the given file. * @throws IllegalArgumentException * If the <b>file</b> does not exist or is not a file. */ public int count(File input) throws IOException { checkArgument(input.exists() && input.isFile(), "The input pdf has to exist and must be a file."); PDDocument doc = PDDocument.load(input); int res = doc.getNumberOfPages(); doc.close(); return res; }
From source file:de.redsix.pdfcompare.PdfComparator.java
License:Apache License
private void compare(final PDDocument expectedDocument, final PDDocument actualDocument) throws IOException { expectedDocument.setResourceCache(new ResourceCacheWithLimitedImages()); PDFRenderer expectedPdfRenderer = new PDFRenderer(expectedDocument); actualDocument.setResourceCache(new ResourceCacheWithLimitedImages()); PDFRenderer actualPdfRenderer = new PDFRenderer(actualDocument); final int minPageCount = Math.min(expectedDocument.getNumberOfPages(), actualDocument.getNumberOfPages()); CountDownLatch latch = new CountDownLatch(minPageCount); for (int pageIndex = 0; pageIndex < minPageCount; pageIndex++) { drawImage(latch, pageIndex, expectedDocument, actualDocument, expectedPdfRenderer, actualPdfRenderer); }//w ww . jav a 2 s . c o m Utilities.await(latch, "FullCompare"); Utilities.shutdownAndAwaitTermination(drawExecutor, "Draw"); Utilities.shutdownAndAwaitTermination(parrallelDrawExecutor, "Parallel Draw"); Utilities.shutdownAndAwaitTermination(diffExecutor, "Diff"); if (expectedDocument.getNumberOfPages() > minPageCount) { addExtraPages(expectedDocument, expectedPdfRenderer, minPageCount, MISSING_RGB, true); } else if (actualDocument.getNumberOfPages() > minPageCount) { addExtraPages(actualDocument, actualPdfRenderer, minPageCount, EXTRA_RGB, false); } }
From source file:de.redsix.pdfcompare.PdfComparator.java
License:Apache License
private void addExtraPages(final PDDocument document, final PDFRenderer pdfRenderer, final int minPageCount, final int color, final boolean expected) throws IOException { for (int pageIndex = minPageCount; pageIndex < document.getNumberOfPages(); pageIndex++) { ImageWithDimension image = renderPageAsImage(document, pdfRenderer, pageIndex); final DataBuffer dataBuffer = image.bufferedImage.getRaster().getDataBuffer(); for (int i = 0; i < image.bufferedImage.getWidth() * MARKER_WIDTH; i++) { dataBuffer.setElem(i, color); }//from ww w . ja v a 2s . c om for (int i = 0; i < image.bufferedImage.getHeight(); i++) { for (int j = 0; j < MARKER_WIDTH; j++) { dataBuffer.setElem(i * image.bufferedImage.getWidth() + j, color); } } if (expected) { compareResult.addPage(new PageDiffCalculator(true, false), pageIndex, image, blank(image), image); } else { compareResult.addPage(new PageDiffCalculator(true, false), pageIndex, blank(image), image, image); } } }
From source file:diagramextractor.DiagramExtractor.java
/** * @param args the command line arguments *//*www. jav a 2s. c o m*/ public static void main(String[] args) throws IOException, COSVisitorException { if (args.length < 2) { showHelp(); System.exit(-1); } List<Integer> diagramOptionsList = new LinkedList<>(); diagramOptionsList = parseOptions(args); List<String> diagramNameList = new LinkedList<>(); diagramNameList = getDiagramNames(diagramOptionsList); File inputDir = new File(args[0]); File[] reports = inputDir.listFiles(); String diagramName = args[1]; PDDocument outputDocument = new PDDocument(); PDFMergerUtility merger = new PDFMergerUtility(); merger.setDestinationFileName("output.pdf"); for (File report : reports) { PDDocument doc = PDDocument.load(report); System.out.println("LOADED FILE: " + report.getName()); int pageNumber = 0; System.out.println("NUMBER OF PAGES: " + doc.getNumberOfPages()); for (int i = 0; i <= doc.getNumberOfPages(); i++) { PDFTextStripper stripper = new PDFTextStripper(); stripper.setStartPage(i); stripper.setEndPage(i); String contents = stripper.getText(doc); boolean containsDiagram = false; for (String diagram : diagramNameList) { if (contents.contains(diagram)) { containsDiagram = true; } } if (containsDiagram && !contents.contains("Table of Contents") && !contents.contains("Table of Figures") && !contents.contains("Obsah") && !contents.contains("Tabulka ?sel")) { pageNumber = i; System.out.println("Diagram found on page: " + pageNumber); PageExtractor extractor = new PageExtractor(doc, pageNumber, pageNumber); PDDocument extractedPage = extractor.extract(); PDPage page = (PDPage) extractedPage.getDocumentCatalog().getAllPages().get(0); PDPageContentStream contentStream = new PDPageContentStream(extractedPage, page, true, true, true); contentStream.beginText(); contentStream.setFont(PDType1Font.HELVETICA_BOLD, 12); contentStream.moveTextPositionByAmount(100, 50); contentStream.drawString(report.getName()); contentStream.endText(); contentStream.close(); merger.appendDocument(outputDocument, extractedPage); } } if (pageNumber == 0) { System.out.println("The diagram " + diagramName + " was not found in file " + report.getName()); } doc.close(); } merger.mergeDocuments(); System.out.println(); System.out.println("Diagrams have been merged."); String outputFileName = generateFilename(inputDir.getCanonicalPath(), "output.pdf"); outputDocument.save(outputFileName); outputDocument.close(); System.out.println("Output file saved as: " + outputFileName); }
From source file:dk.defxws.fedoragsearch.server.TransformerToText.java
License:Open Source License
public int getPdfPagesCount_(byte[] doc) throws Exception { String password = ""; PDDocument pdDoc = null; // extract PDF document's textual content try {/*w w w . jav a2 s . co m*/ pdDoc = PDDocument.load(new ByteArrayInputStream(doc), password); return pdDoc.getNumberOfPages(); } catch (Exception e) { throw new Exception("Cannot parse PDF document", e); } finally { closePDDocument(pdDoc); } }
From source file:edu.ist.psu.sagnik.research.pdfbox2playground.javatest.DrawPrintTextLocations.java
License:Apache License
/** * This will print the documents data.//from w w w . j a va 2 s. c o m * * @param args The command line arguments. * * @throws IOException If there is an error parsing the document. */ public static void main(String[] args) throws IOException { PDDocument document = null; try { document = PDDocument.load(new File(new DataLocation().pdLoc)); DrawPrintTextLocations stripper = new DrawPrintTextLocations(document, new DataLocation().pdLoc); stripper.setSortByPosition(true); for (int page = 0; page < document.getNumberOfPages(); ++page) { stripper.stripPage(page); } } finally { if (document != null) { document.close(); } } }
From source file:edu.ist.psu.sagnik.research.pdfbox2playground.javatest.ExtractImages.java
License:Apache License
private void extract(String pdfFile, String password) throws IOException { PDDocument document = null; try {/* w w w .ja va 2 s .c om*/ document = PDDocument.load(new File(pdfFile), password); AccessPermission ap = document.getCurrentAccessPermission(); if (!ap.canExtractContent()) { throw new IOException("You do not have permission to extract images"); } for (int i = 0; i < document.getNumberOfPages(); i++) // todo: ITERATOR would be much better { PDPage page = document.getPage(i); ImageGraphicsEngine extractor = new ImageGraphicsEngine(page); extractor.run(); } } finally { if (document != null) { document.close(); } } }
From source file:es.rickyepoderi.pdfimages.Converter.java
License:Open Source License
/** * Method that converts a PDF file in a series of images. * /*from w w w .j a v a2s . com*/ * @param pdfFile The PDF file to read * @param prefix The prefix of the images to write * @param imgFormat The image format ("jpg", "png",...) used by ImageIO * @param dpi The DPI of the images to render pages * @param type The type of the image (RGB, GREY,...) * @throws IOException Some error generating the images */ public void pdf2Images(File pdfFile, String prefix, String suffix, int dpi, ImageType type) throws IOException { PDDocument document = PDDocument.load(pdfFile); PDFRenderer pdfRenderer = new PDFRenderer(document); int pad = (int) Math.ceil(Math.log10(document.getNumberOfPages())); if (pad == 0) { pad = 1; } String format = String.format("%s.%%0%dd.%s", prefix, pad, suffix); for (int i = 0; i < document.getNumberOfPages(); i++) { BufferedImage image = pdfRenderer.renderImageWithDPI(i, dpi, type); ImageIO.write(image, suffix, new File(String.format(format, i))); } }