Example usage for org.apache.pdfbox.pdmodel PDDocument close

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument close.

Prototype

@Override
public void close() throws IOException

Source Link

Document

This will close the underlying COSDocument object.

Usage

From source file:de.uni_koeln.ub.drc.reader.PdfContentExtractor.java

License:Open Source License

/**
 * @param pdfName//from w w  w . j  ava  2s . co  m
 *            The full path to the PDF file to extract content from
 * @return The PageInfo object for the PDF
 */
public static PageInfo extractContentFromPdf(String pdfName) {
    try {
        location = pdfName;
        PDDocument document = PDDocument.load(new File(pdfName));
        PdfContentExtractor x = initExtractor(document);
        PageInfo result = x.toPageInfo();
        document.close();
        return result;
    } catch (IOException e) {
        e.printStackTrace();
    }
    return null;
}

From source file:de.uni_siegen.wineme.come_in.thumbnailer.thumbnailers.PDFBoxThumbnailer.java

License:Open Source License

@Override
public void generateThumbnail(final File input, final File output) throws IOException, ThumbnailerException {

    FileUtils.deleteQuietly(output);/*from  w w  w  . ja  v a 2  s. c om*/

    PDDocument document = null;
    try {
        try {
            document = PDDocument.load(input);
        } catch (final IOException e) {
            throw new ThumbnailerException("Could not load PDF File", e);
        }

        final List<?> pages = document.getDocumentCatalog().getAllPages();
        final PDPage page = (PDPage) pages.get(0);
        final BufferedImage tmpImage = this.writeImageForPage(document, page, BufferedImage.TYPE_INT_RGB);

        if (tmpImage.getWidth() == this.thumbWidth) {
            ImageIO.write(tmpImage, PDFBoxThumbnailer.OUTPUT_FORMAT, output);
        } else {
            final ResizeImage resizer = new ResizeImage(this.thumbWidth, this.thumbHeight);
            resizer.resizeMethod = ResizeImage.NO_RESIZE_ONLY_CROP;
            resizer.setInputImage(tmpImage);
            resizer.writeOutput(output);
        }
    }

    finally {
        if (document != null) {
            try {
                document.close();
            } catch (final IOException e) {
            }
        }
    }
}

From source file:de.uni_siegen.wineme.come_in.thumbnailer.thumbnailers.PDFBoxThumbnailer.java

License:Open Source License

@SuppressWarnings("unchecked")
@Override//from  ww w . j  a v  a  2 s .  co m
public void generateThumbnails(final File input, final File outputFolder)
        throws IOException, ThumbnailerException {
    PDDocument document = null;
    try {
        try {
            document = PDDocument.load(input);
        } catch (final IOException e) {
            throw new ThumbnailerException("Could not load PDF File", e);
        }

        final List<PDPage> allPages = document.getDocumentCatalog().getAllPages();
        int pageNumber = 0;
        for (final PDPage page : allPages) {
            final BufferedImage image = this.writeImageForPage(document, page, BufferedImage.TYPE_INT_RGB);

            final File outputFile = ThumbnailNamer.getFile(outputFolder, pageNumber);

            if (image.getWidth() == this.thumbWidth) {
                ImageIO.write(image, PDFBoxThumbnailer.OUTPUT_FORMAT, outputFile);
            } else {
                final ResizeImage resizer = new ResizeImage(this.thumbWidth, this.thumbHeight);
                resizer.resizeMethod = ResizeImage.RESIZE_FIT_BOTH_DIMENSIONS;
                resizer.setInputImage(image);
                resizer.writeOutput(outputFile);
            }

            pageNumber++;
        }

    } finally {
        if (document != null) {
            try {
                document.close();
            } catch (final IOException e) {
                // swallow exception on closing.
            }
        }
    }

}

From source file:de.uzk.hki.da.convert.PdfService.java

License:Open Source License

/**
 * Generates a new PDF which only contains certainPages of the original document.
 * Users can choose if they wish to reduce to a number of pages (beginning from the first page)
 * or to a certain set of pages. Both options can be used at the same time. 
 * By setting one of the parameters to either "" or null any of the options the according 
 * option will not be used./*from   w  ww  . j av a2  s  .  c om*/
 *
 * @param numberOfPagesText null or empty if unused or "n" (e.g. "2" for two pages from the beginning of the document).
 * @param certainPagesText white space separated list of numbers that mark pages which should be part of the target document.
 * 
 * @throws IOException 
 * @author Jens Peters
 * @author Sebastian Cuy
 * @author Daniel M. de Oliveira
 */
public void reduceToCertainPages(String numberOfPagesText, String certainPagesText) throws IOException {

    PDDocument srcPdf = null;
    PDDocument targetPdf = null;
    if (srcPdfFile == null)
        throw new IllegalStateException("srcFile not set");

    srcPdf = PDDocument.load(srcPdfFile);
    targetPdf = new PDDocument();

    @SuppressWarnings("rawtypes")
    List srcPages = srcPdf.getDocumentCatalog().getAllPages();

    int numberOfPages = 0;

    if (numberOfPagesText != null && !numberOfPagesText.isEmpty()) {
        numberOfPages = Integer.parseInt(numberOfPagesText);
        for (int i = 0; i < Math.min(numberOfPages, srcPages.size()); i++)
            targetPdf.addPage((PDPage) srcPages.get(i));
    }

    if (StringUtilities.isNotSet(numberOfPagesText) && StringUtilities.isNotSet(certainPagesText)) {
        for (int i = 0; i < srcPages.size(); i++)
            targetPdf.addPage((PDPage) srcPages.get(i));
    }

    if (certainPagesText != null && !certainPagesText.isEmpty()) {
        String[] certainPagesTexts = certainPagesText.split(" ");
        int[] certainPages = new int[certainPagesTexts.length];
        for (int i = 0; i < certainPagesTexts.length; i++) {
            certainPages[i] = Integer.parseInt(certainPagesTexts[i]);
        }
        Arrays.sort(certainPages);
        for (int i = 0; i < certainPages.length; i++) {
            if (certainPages[i] > numberOfPages && srcPages.size() > certainPages[i] - 1)
                targetPdf.addPage((PDPage) srcPages.get(certainPages[i] - 1));
        }
    }

    try {
        targetPdf.save(targetPdfFile);
    } catch (Exception e) {
        throw new RuntimeException("Unable to create PDF!", e);
    } finally {
        targetPdf.close();
        srcPdf.close();
    }

}

From source file:de.uzk.hki.da.format.PdfService.java

License:Open Source License

/**
 * Generates a new PDF which only contains certainPages of the original document.
 * Users can choose if they wish to reduce to a number of pages (beginning from the first page)
 * or to a certain set of pages. Both options can be used at the same time. 
 * By setting one of the parameters to either "" or null any of the options the according 
 * option will not be used./*  www  . ja v a  2s .c om*/
 *
 * @param numberOfPagesText null or empty if unused or "n" (e.g. "2" for two pages from the beginning of the document).
 * @param certainPagesText white space separated list of numbers that mark pages which should be part of the target document.
 * 
 * @throws IOException 
 * @author Jens Peters
 * @author Sebastian Cuy
 * @author Daniel M. de Oliveira
 */
public void reduceToCertainPages(String numberOfPagesText, String certainPagesText) throws IOException {

    PDDocument srcPdf = null;
    PDDocument targetPdf = null;
    if (srcPdfFile == null)
        throw new IllegalStateException("srcFile not set");

    srcPdf = PDDocument.load(srcPdfFile);
    targetPdf = new PDDocument();

    @SuppressWarnings("rawtypes")
    List srcPages = srcPdf.getDocumentCatalog().getAllPages();

    int numberOfPages = 0;
    if (numberOfPagesText != null && !numberOfPagesText.isEmpty()) {
        numberOfPages = Integer.parseInt(numberOfPagesText);
        for (int i = 0; i < Math.min(numberOfPages, srcPages.size()); i++)
            targetPdf.addPage((PDPage) srcPages.get(i));
    }

    if (certainPagesText != null && !certainPagesText.isEmpty()) {
        String[] certainPagesTexts = certainPagesText.split(" ");
        int[] certainPages = new int[certainPagesTexts.length];
        for (int i = 0; i < certainPagesTexts.length; i++) {
            certainPages[i] = Integer.parseInt(certainPagesTexts[i]);
        }
        Arrays.sort(certainPages);
        for (int i = 0; i < certainPages.length; i++) {
            if (certainPages[i] > numberOfPages && srcPages.size() > certainPages[i] - 1)
                targetPdf.addPage((PDPage) srcPages.get(certainPages[i] - 1));
        }
    }

    try {
        targetPdf.save(targetPdfFile);
    } catch (Exception e) {
        throw new RuntimeException("Unable to create PDF!", e);
    } finally {
        targetPdf.close();
    }

}

From source file:dev.ztgnrw.ExtractEmbeddedFiles.java

License:Apache License

/**
 * This is the main method.//w  w w .j  a va 2 s  .  c o  m
 *
 * @param args The command line arguments.
 *
 * @throws IOException If there is an error parsing the document.
 */
public static void extractEmbeddedFiles(String file) throws IOException {

    PDDocument document = null;
    try {
        File pdfFile = new File(file);
        String filePath = pdfFile.getParent() + System.getProperty("file.separator");
        document = PDDocument.load(pdfFile);
        PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog());
        PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles();
        if (efTree != null) {
            Map<String, PDComplexFileSpecification> names = efTree.getNames();
            if (names != null) {
                extractFiles(names, filePath);
            } else {
                List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids();
                for (PDNameTreeNode<PDComplexFileSpecification> node : kids) {
                    names = node.getNames();
                    extractFiles(names, filePath);
                }
            }
        }

        // extract files from annotations
        for (PDPage page : document.getPages()) {
            for (PDAnnotation annotation : page.getAnnotations()) {
                if (annotation instanceof PDAnnotationFileAttachment) {
                    PDAnnotationFileAttachment annotationFileAttachment = (PDAnnotationFileAttachment) annotation;
                    PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) annotationFileAttachment
                            .getFile();
                    PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpec);
                    extractFile(filePath, fileSpec.getFilename(), embeddedFile);
                }
            }
        }

    } finally {
        if (document != null) {
            document.close();
        }
    }

}

From source file:diagramextractor.DiagramExtractor.java

/**
 * @param args the command line arguments
 *///  w w w .  ja v a2s  .  c o  m
public static void main(String[] args) throws IOException, COSVisitorException {

    if (args.length < 2) {
        showHelp();
        System.exit(-1);
    }

    List<Integer> diagramOptionsList = new LinkedList<>();
    diagramOptionsList = parseOptions(args);

    List<String> diagramNameList = new LinkedList<>();
    diagramNameList = getDiagramNames(diagramOptionsList);

    File inputDir = new File(args[0]);
    File[] reports = inputDir.listFiles();
    String diagramName = args[1];

    PDDocument outputDocument = new PDDocument();

    PDFMergerUtility merger = new PDFMergerUtility();
    merger.setDestinationFileName("output.pdf");

    for (File report : reports) {

        PDDocument doc = PDDocument.load(report);
        System.out.println("LOADED FILE: " + report.getName());

        int pageNumber = 0;

        System.out.println("NUMBER OF PAGES: " + doc.getNumberOfPages());

        for (int i = 0; i <= doc.getNumberOfPages(); i++) {
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setStartPage(i);
            stripper.setEndPage(i);

            String contents = stripper.getText(doc);

            boolean containsDiagram = false;

            for (String diagram : diagramNameList) {
                if (contents.contains(diagram)) {
                    containsDiagram = true;
                }
            }

            if (containsDiagram && !contents.contains("Table of Contents")
                    && !contents.contains("Table of Figures") && !contents.contains("Obsah")
                    && !contents.contains("Tabulka ?sel")) {
                pageNumber = i;
                System.out.println("Diagram found on page: " + pageNumber);

                PageExtractor extractor = new PageExtractor(doc, pageNumber, pageNumber);
                PDDocument extractedPage = extractor.extract();

                PDPage page = (PDPage) extractedPage.getDocumentCatalog().getAllPages().get(0);
                PDPageContentStream contentStream = new PDPageContentStream(extractedPage, page, true, true,
                        true);
                contentStream.beginText();
                contentStream.setFont(PDType1Font.HELVETICA_BOLD, 12);
                contentStream.moveTextPositionByAmount(100, 50);
                contentStream.drawString(report.getName());
                contentStream.endText();
                contentStream.close();

                merger.appendDocument(outputDocument, extractedPage);

            }

        }

        if (pageNumber == 0) {
            System.out.println("The diagram " + diagramName + " was not found in file " + report.getName());
        }
        doc.close();
    }

    merger.mergeDocuments();

    System.out.println();
    System.out.println("Diagrams have been merged.");

    String outputFileName = generateFilename(inputDir.getCanonicalPath(), "output.pdf");
    outputDocument.save(outputFileName);
    outputDocument.close();

    System.out.println("Output file saved as: " + outputFileName);
}

From source file:dk.defxws.fedoragsearch.server.TransformerToText.java

License:Open Source License

private void closePDDocument(PDDocument pdDoc) {
    if (pdDoc != null) {
        try {/*from   w w w .j av a 2 s.  c o m*/
            pdDoc.close();
        } catch (IOException e) {
        }
    }
}

From source file:dk.dma.msinm.legacy.nm.ActiveTempPrelimNmPdfExtractor.java

License:Open Source License

/**
 * Main method for extracting active list of NtM's
 * @param noticeIds the list of notices to update
 *///w  ww .  j ava 2s .  c  om
public void extractActiveNoticeIds(List<SeriesIdentifier> noticeIds) throws Exception {
    PDDocument document = null;
    try {
        PDFTextStripper stripper = new PDFTextStripper();
        document = PDDocument.load(inputStream);
        //stripper.setStartPage(1);
        String text = stripper.getText(document);

        // Read the text line by line
        Pattern p = Pattern.compile(ACTIVE_NM_LINE);
        BufferedReader br = new BufferedReader(new StringReader(text));
        String line;
        while ((line = br.readLine()) != null) {
            Matcher m = p.matcher(line.trim());
            if (m.matches()) {
                SeriesIdentifier id = new SeriesIdentifier();
                id.setMainType(SeriesIdType.NM);
                id.setYear(year);
                id.setNumber(Integer.valueOf(m.group(1)));
                id.setAuthority(organization);
                noticeIds.add(id);
            }
        }

    } catch (IOException e) {
        log.error("Error extracting notices from file " + fileName, e);
        throw e;
    } finally {
        if (document != null) {
            document.close();
        }
        try {
            inputStream.close();
        } catch (Exception ex) {
        }
    }
}

From source file:dk.dma.msinm.legacy.nm.NmPdfExtractor.java

License:Open Source License

/**
 * Main method for extracting the NtM's//from  ww w.  j  a  v a  2  s .  c o  m
 * @param notices the list of notices to update
 */
public void extractNotices(List<Message> notices) throws Exception {
    PDDocument document = null;
    try {
        PDFTextStripper stripper = new PDFTextStripper();
        document = PDDocument.load(inputStream);
        stripper.setStartPage(3);
        String text = stripper.getText(document);

        List<String> textBlocks = extractNoticeTextBlocks(text);

        extractNotices(notices, textBlocks);

    } catch (IOException e) {
        log.error("Error extracting notivces from file " + fileName, e);
        throw e;
    } finally {
        if (document != null) {
            document.close();
        }
        try {
            inputStream.close();
        } catch (Exception ex) {
        }
    }
}