Example usage for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog

List of usage examples for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog.

Prototype

public PDDocumentCatalog getDocumentCatalog() 

Source Link

Document

This will get the document CATALOG.

Usage

From source file:Project.data.preparation.ImageExtraction.java

public void extractImages(String sourceDir, String destinationDir)
        throws IOException, CryptographyException, COSVisitorException {
    PDDocument document = null;
    double[] size;
    if (oldFile.exists()) {
        document = PDDocument.load(sourceDir);
        if (document.isEncrypted()) {
            document.decrypt("");
        }/*from w w  w  .  j a va  2  s. c  o  m*/
        PrintImageLocation printer; // Get image location
        List<PDPage> list = document.getDocumentCatalog().getAllPages();

        String fileName_img = oldFile.getName().replace(".pdf", "_cover");
        int pageNum = 0;
        int totalImages = 1;
        System.out.println("\n" + filename);

        for (PDPage page : list) {

            original_imgName = new ArrayList<String>();
            location_xy = new ArrayList<double[]>();
            size_xy_ordered = new ArrayList<double[]>();
            size_xy_tmp = new ArrayList<double[]>();
            PDResources pdResources = page.getResources();
            Map pageImages = pdResources.getXObjects();
            pageNum++;
            if (pageImages != null && pageImages.size() > 0) {

                Iterator imageIter = pageImages.keySet().iterator();
                while (imageIter.hasNext()) {

                    String key = (String) imageIter.next();
                    PDXObjectImage pdxObjectImage = (PDXObjectImage) pageImages.get(key);
                    String imgName = fileName_img + "_" + totalImages;
                    System.out.println("Page Number : " + pageNum + "\t" + imgName);
                    pdxObjectImage.write2file(destinationDir + imgName);

                    original_imgName.add(imgName + "." + pdxObjectImage.getSuffix());
                    size = new double[] { pdxObjectImage.getWidth(), pdxObjectImage.getHeight() };
                    size_xy_ordered.add(size);
                    totalImages++;
                }
                //Start for detect figure name for image renaming
                printer = new PrintImageLocation(page);
                location_xy = printer.getLocation_xy();
                size_xy_tmp = printer.getSize_xy();
                RearrangeImageOrder(location_xy, size_xy_tmp, size_xy_ordered);
                //PrinttoString();
                DetectFigureName detectFig = new DetectFigureName(original_imgName, filename, pageNum, page,
                        location_ordered, size_xy_ordered);
            }
        }
    } else {
        System.err.println("File not exists");
    }

    if (document != null) {
        document.close();
    }

}

From source file:richtercloud.document.scanner.gui.MainPanel.java

License:Open Source License

/**
 * Uses a modal dialog in order to display the progress of the retrieval and
 * make the operation cancelable./*from   w  ww.  ja  va 2  s . co  m*/
 * @param documentFile
 * @return the retrieved images or {@code null} if the retrieval has been
 * canceled (in dialog)
 * @throws DocumentAddException
 * @throws InterruptedException
 * @throws ExecutionException
 */
/*
internal implementation notes:
- can't use ProgressMonitor without blocking EVT instead of a model dialog
when using SwingWorker.get
*/
public List<BufferedImage> retrieveImages(final File documentFile)
        throws DocumentAddException, InterruptedException, ExecutionException {
    if (documentFile == null) {
        throw new IllegalArgumentException("documentFile mustn't be null");
    }
    final SwingWorkerGetWaitDialog dialog = new SwingWorkerGetWaitDialog(SwingUtilities.getWindowAncestor(this), //owner
            DocumentScanner.generateApplicationWindowTitle("Wait", APP_NAME, APP_VERSION), //dialogTitle
            "Retrieving image data", //labelText
            null //progressBarText
    );
    final SwingWorker<List<BufferedImage>, Void> worker = new SwingWorker<List<BufferedImage>, Void>() {
        @Override
        protected List<BufferedImage> doInBackground() throws Exception {
            List<BufferedImage> retValue = new LinkedList<>();
            try {
                InputStream pdfInputStream = new FileInputStream(documentFile);
                PDDocument document = PDDocument.load(pdfInputStream);
                @SuppressWarnings("unchecked")
                List<PDPage> pages = document.getDocumentCatalog().getAllPages();
                for (PDPage page : pages) {
                    if (dialog.isCanceled()) {
                        document.close();
                        MainPanel.LOGGER.debug("tab generation aborted");
                        return null;
                    }
                    BufferedImage image = page.convertToImage();
                    retValue.add(image);
                }
                document.close();
            } catch (IOException ex) {
                throw new DocumentAddException(ex);
            }
            return retValue;
        }

        @Override
        protected void done() {
        }
    };
    worker.addPropertyChangeListener(new SwingWorkerCompletionWaiter(dialog));
    worker.execute();
    //the dialog will be visible until the SwingWorker is done
    dialog.setVisible(true);
    List<BufferedImage> retValue = worker.get();
    return retValue;
}

From source file:se.streamsource.streamflow.web.application.pdf.Underlay.java

License:Apache License

private void overlayWithDarkenBlendMode(PDDocument document, PDDocument overlay) throws IOException {
    PDXObjectForm xobject = importAsXObject(document,
            (PDPage) overlay.getDocumentCatalog().getAllPages().get(0));
    PDExtendedGraphicsState darken = new PDExtendedGraphicsState();
    darken.getCOSDictionary().setName("BM", "Darken");

    List<PDPage> pages = document.getDocumentCatalog().getAllPages();

    for (PDPage page : pages) {
        Map<String, PDExtendedGraphicsState> states = page.getResources().getGraphicsStates();
        if (states == null)
            states = new HashMap<>();
        String darkenKey = MapUtil.getNextUniqueKey(states, "Dkn");
        states.put(darkenKey, darken);// w w  w . ja  va2s.co m
        page.getResources().setGraphicsStates(states);

        PDPageContentStream stream = new PDPageContentStream(document, page, true, false, true);
        stream.appendRawCommands(String.format("/%s gs ", darkenKey));
        stream.drawXObject(xobject, 0, 0, 1, 1);
        stream.close();
    }
}

From source file:tesreateimage.Tesreateimage.java

public static void convertPDFToImage(String pdfFilename) throws IOException {
    PDDocument document = PDDocument.loadNonSeq(new File(pdfFilename), null);
    List<PDPage> pdPages = document.getDocumentCatalog().getAllPages();
    int page = 0;
    for (PDPage pdPage : pdPages) {
        ++page;/*from w  w  w. j av a 2 s. c  o m*/
        BufferedImage bim = pdPage.convertToImage(BufferedImage.TYPE_INT_RGB, 300);
        ImageIOUtil.writeImage(bim, "png", pdfFilename + "-" + page, 300);
    }
    document.close();
}

From source file:Tools.PostProcessing.java

private String generateProcessedAndRejectPDFs(String preProcPdfFileName)
        throws IOException, COSVisitorException {
    PDDocument pdf = PDDocument.load(preProcPdfFileName);
    PDDocument rejectPdf = new PDDocument();
    PDDocument cedmsPdf = new PDDocument();
    //String rejectPdfFileName = preProcPdfFileName.replace(".pdf", "_forReject.pdf");  
    //String cedmsPdfFileName = preProcPdfFileName.replace(".pdf", "_forCEDMS.pdf");  
    int pageNum = pdf.getNumberOfPages(); //total number of pages in the pdf file
    // add reject page into rejectPdf
    PDFTextStripper pdfStripper = new PDFTextStripper();
    int seqNumber = 1;
    boolean isLastReject = true; // last page status  
    for (int i = 0; i < pageNum; i++) {
        PDPage page = (PDPage) pdf.getDocumentCatalog().getAllPages().get(i);
        int pageIndex = i + 1;
        pdfStripper.setStartPage(pageIndex);
        pdfStripper.setEndPage(pageIndex);
        String res = pdfStripper.getText(pdf);
        // System.out.println(res);

        if (res.contains(GlobalVar.PRE_PROC_KEY_SYMBOL)) {
            String[] data = GlobalVar.getCtrlNumAndfullSSN(res);
            String ctrlNum = data[0];
            String fullSSN = data[1];
            // System.out.println("full ssn:" + fullSSN + ". ctrl num:" + ctrlNum);
            //                if(LEGIT_LV_MAP_FOR_COLOR_LV_LOG.containsKey(fullSSN)){
            //                   // System.out.println("ctrl num: " + LEGIT_LV_MAP.get(fullSSN));
            //                }
            if (LEGIT_LV_MAP_FOR_COLOR_LV_LOG.containsKey(fullSSN)
                    && LEGIT_LV_MAP_FOR_COLOR_LV_LOG.get(fullSSN).containsKey(ctrlNum)) {
                // System.out.println("Good leave");
                int thisSeqNumber = Integer.parseInt(LEGIT_LV_MAP_FOR_COLOR_LV_LOG.get(fullSSN).get(ctrlNum));
                if (thisSeqNumber == seqNumber) {
                    cedmsPdf.addPage(page);
                    //LEGIT_LV_MAP_FOR_COLOR_LV_LOG.get(fullSSN).remove(ctrlNum);  // remove 
                    isLastReject = false;
                    seqNumber++;/*w w  w. j  a va2  s .c o m*/
                } else {
                    String msg = "Order might be incorrect or there is a duplicate! ssn: " + fullSSN
                            + " ctrl num: " + ctrlNum + ". Seq number is: " + seqNumber;
                    //JOptionPane.showMessageDialog(null, msg);
                    System.out.println(msg);
                }
            } else {
                rejectPdf.addPage(page);
                drawComments(rejectPdf, page, fullSSN, ctrlNum);
                isLastReject = true;
            }
        } else { // add the supporting documents to the last pdf file
            if (isLastReject) {
                rejectPdf.addPage(page);
            } else {
                cedmsPdf.addPage(page);
            }
        }
    }

    String cedmsPdfFileName = null;
    String rejectPdfFileName = null;
    if (preProcPdfFileName.contains(".pdf")) {
        cedmsPdfFileName = preProcPdfFileName.replace(".pdf", GlobalVar.PRE_CEDMS_PDF);
        rejectPdfFileName = preProcPdfFileName.replace(".pdf", "_forReject.pdf");
    } else if (preProcPdfFileName.contains(".PDF")) {
        cedmsPdfFileName = preProcPdfFileName.replace(".PDF", GlobalVar.PRE_CEDMS_PDF);
        rejectPdfFileName = preProcPdfFileName.replace(".PDF", "_forReject.pdf");
    } else {
        JOptionPane.showMessageDialog(null, "Invalid pre-processing file.");
    }

    //        preProcPdfFileName.replace(".pdf", "_forReject.pdf"); 
    //        if (preProcPdfFileName.contains(".pdf")){
    //           cedmsPdfFileName = preProcPdfFileName.replace(".pdf", GlobalVar.PRE_CEDMS_PDF); 
    //        } else if (preProcPdfFileName.contains(".PDF")) {
    //           cedmsPdfFileName = preProcPdfFileName.replace(".PDF", GlobalVar.PRE_CEDMS_PDF);
    //        } else {
    //           JOptionPane.showMessageDialog(null, "Invalid pre-processing file."); 
    //        }
    //        
    if (rejectPdf.getNumberOfPages() > 0 && cedmsPdf.getNumberOfPages() > 0) {
        cedmsPdf.save(cedmsPdfFileName);
        rejectPdf.save(rejectPdfFileName);
        //            JOptionPane.showMessageDialog(null, "The ready-for-upload and the rejected "
        //                    + "leave forms are saved in *_forCEDMS.pdf and *_forReject.pdf, respectively.");
    } else if (rejectPdf.getNumberOfPages() > 0) {
        rejectPdf.save(rejectPdfFileName);
        cedmsPdfFileName = null; // no cedms file is generated.
        //            JOptionPane.showMessageDialog(null, "The rejected leave forms are saved in *_forReject.pdf.");
    } else if (cedmsPdf.getNumberOfPages() > 0) {
        cedmsPdf.save(cedmsPdfFileName);
        //            JOptionPane.showMessageDialog(null, "The ready-for-upload leave forms are saved in *_forCEDMS.pdf."); 
    }
    rejectPdf.close();
    cedmsPdf.close();
    pdf.close();
    return cedmsPdfFileName;
}

From source file:Tools.PostProcessing.java

private String pdfNumbering(String processedPdfFileName) throws IOException, COSVisitorException {
    PDDocument cedmsPdf = PDDocument.load(processedPdfFileName);
    List pages = cedmsPdf.getDocumentCatalog().getAllPages();
    PDFTextStripper pdfStripper = new PDFTextStripper();
    String res = pdfStripper.getText(cedmsPdf);
    //System.out.println(res);
    Boolean isPreProcessed = res.contains(GlobalVar.PRE_PROC_KEY_SYMBOL); // check if the file is pre-processed.
    Boolean isNumbered = res.contains("/0");
    Iterator<PDPage> iter = pages.iterator();
    int sequenceNum = 1; // start from 0001
    if (isPreProcessed && isNumbered) {
        GlobalVar.updateSeqNum(cedmsPdf, CYCLE); // update the sequence number
    } else if (isPreProcessed) { // first time
        int pageNumber = 1;

        while (iter.hasNext()) {
            PDPage page = iter.next();/*from   ww w .  j a va2  s .  c  o m*/

            pdfStripper.setStartPage(pageNumber);
            pdfStripper.setEndPage(pageNumber);
            res = pdfStripper.getText(cedmsPdf);
            // == numbering
            if (res.contains(GlobalVar.PRE_PROC_KEY_SYMBOL)) {
                String[] data = res.split(GlobalVar.PRE_PROC_KEY_SYMBOL);

                if (VERIFY_WITH_LISTING) { //verify the sequence number with the number on 80/80 listing
                    String ssn = data[0].substring(data[0].length() - GlobalVar.SSN_LEN, data[0].length());
                    String ctrlNum = data[1].substring(0, -GlobalVar.MAX_CTRL_NUM_LEN);
                    String seqNum = LEGIT_LV_MAP_FOR_COLOR_LV_LOG.get(ssn).get(ctrlNum);
                    int seqNumberFrom8080Listing = Integer.parseInt(seqNum);
                    if (seqNumberFrom8080Listing != sequenceNum) {
                        JOptionPane.showMessageDialog(null,
                                ssn + " " + ctrlNum + " seq num: " + seqNum + " do not match 80/80 listing");
                    }
                }

                PDPageContentStream stream = new PDPageContentStream(cedmsPdf, page, true, false);
                stream.beginText();
                stream.setFont(PDType1Font.HELVETICA, GlobalVar.SEQ_NUM_FONT_SIZE);
                stream.moveTextPositionByAmount(GlobalVar.SEQ_NUM_TEXT_X_POSITION,
                        GlobalVar.SEQ_NUM_TEXT_Y_POSITION);
                stream.setTextRotation(3.14 / 2, GlobalVar.SEQ_NUM_TEXT_X_POSITION,
                        GlobalVar.SEQ_NUM_TEXT_Y_POSITION); // rotate text 90 degree at x = 600, y = 400
                //stream.drawString(CYCLE + "/" + seqNum);
                stream.drawString(CYCLE + "/" + GlobalVar.globalCountGenerator5Digit(sequenceNum));
                sequenceNum++;
                stream.endText();
                stream.close();
            }
            pageNumber++;
            // end of numbering
        }
    }
    //        String suffix = "_" + CYCLE +" Numbered.pdf";
    //        String fileName = DTL_PDF_FILE_NAME.replace(".pdf", suffix);
    String fileName = processedPdfFileName.replaceAll(".pdf", GlobalVar.NUMBERED_PDF);
    cedmsPdf.save(fileName);
    cedmsPdf.close();
    return fileName;
}

From source file:Tools.PreProcessing.java

private void generatePDFFile(String pdfFileName, String xlsxFileName, Boolean[][] statusArray)
        throws IOException, COSVisitorException {

    List<String> textList = readXlsxFile(xlsxFileName);
    // System.out.println("text list: " + textList);
    //Iterator<String> it = textList.iterator();
    PDDocument pdf = PDDocument.load(pdfFileName);
    List pages = pdf.getDocumentCatalog().getAllPages();
    Iterator<PDPage> iter = pages.iterator();

    int pageNum = 0; // 0 based
    int index = 0;

    while (iter.hasNext()) {
        PDPage page = iter.next();//w  w w.  ja v a2s.  com
        // PDPage pageBlank = new PDPage();            
        PDPageContentStream stream = new PDPageContentStream(pdf, page, true, false);
        if (statusArray[GlobalVar.SELECT_BUTTON_INDEX][pageNum]) {
            if (index < textList.size()) {
                String text = textList.get(index); // zero based
                //System.out.println(text);
                pageWrite(stream, text, index);
            } else {
                JOptionPane.showMessageDialog(null,
                        "Preprocessing is inaccurate. XLSX list is shorter than the pdf file.");
                break;
            }
            index++;
        }
        stream.close();
        pageNum++;
    }
    if (index > textList.size()) {
        JOptionPane.showMessageDialog(null,
                "Preproc might be inaccurate. XLSX list is longer than the pdf file.");
    }
    // out put two pdf files: one is template for printer print hardcopies, the other is digital copy
    String suffix = "_" + "_pre_processed.pdf";
    pdfFileName = pdfFileName.replace(".pdf", suffix);
    pdf.save(pdfFileName);
    pdf.close();

}

From source file:transeditor.TransEditorFXMLController.java

private void importPDF(File file, Integer page) {
    PDDocument pdf;
    try {/*from w w w.  j a  va  2s.c  om*/
        pdf = PDDocument.load(file);
        List<PDPage> list = pdf.getDocumentCatalog().getAllPages();
        //ToDO: Nicer selection window
        BufferedImage bfimage = list.get(page).convertToImage(1, 300);
        Image image = SwingFXUtils.toFXImage(bfimage, null);
        imageview.setImage(image);
        width = imageview.getBoundsInParent().getWidth();
        getZoom(width);
    } catch (Exception e) {
    }
}

From source file:uk.ac.ebi.biostudies.file.thumbnails.PDFThumbnail.java

License:Apache License

@Override
public void generateThumbnail(String sourceFilePath, File thumbnailFile) throws IOException {
    PDDocument pdf = null;
    try {/*  w  w  w  .  ja  v a 2 s  . c o m*/
        pdf = PDDocument.load(sourceFilePath);
        PDPage page = (PDPage) pdf.getDocumentCatalog().getAllPages().get(0);
        BufferedImage image = page.convertToImage(BufferedImage.TYPE_INT_RGB, 96);
        float inverseAspectRatio = ((float) image.getHeight()) / image.getWidth();
        BufferedImageOp resampler = new ResampleOp(THUMBNAIL_WIDTH,
                Math.round(inverseAspectRatio * THUMBNAIL_WIDTH), ResampleOp.FILTER_LANCZOS);
        BufferedImage output = resampler.filter(image, null);
        ImageIO.write(output, "png", thumbnailFile);
    } finally {
        if (pdf != null) {
            pdf.close();
        }
    }
}

From source file:uk.ac.ebi.tools.PDFExtractor.java

License:Open Source License

/**
 * Extracts images from a PDF file and returns them in a list.
 *
 * @param filePath//from   w  ww .java2s.c o m
 * @return
 * @throws java.io.IOException
 */
public HashMap<String, String> getImages(String filePath) throws IOException {

    HashMap<String, String> imagePaths = new HashMap<>();
    try {

        if (new File(filePath).exists()) {
            PDDocument document = PDDocument.load(filePath);
            List<PDPage> list = document.getDocumentCatalog().getAllPages();

            for (PDPage page : list) {
                PDResources pdResources = page.getResources();

                Map pageImages = pdResources.getImages();
                if (pageImages.size() > 0) {
                    Iterator imageIter = pageImages.keySet().iterator();
                    while (imageIter.hasNext()) {
                        String key = (String) imageIter.next();
                        PDXObjectImage pdxObjectImage = (PDXObjectImage) pageImages.get(key);
                        String uniqueName = PDFDoc.generateUniqueName();
                        StringBuilder builder = new StringBuilder();

                        // set the imageFormat
                        String imageFormat = pdxObjectImage.getSuffix();

                        builder = builder.append(System.getProperty("user.home")).append("/")
                                .append(uniqueName);
                        imagePaths.put(builder.toString(), imageFormat);
                        pdxObjectImage.write2file(builder.toString());
                        builder.delete(0, builder.length());
                    }
                }
            }

            document.close();
        } else {
            System.err.println("File not exists");
        }

    } catch (IOException ex) {
        PDFlogger.log(Level.SEVERE, "Error while extracting: Please check the input.", ex.getMessage());
    }

    return imagePaths;
}