Example usage for org.apache.pdfbox.pdmodel PDDocument close

List of usage examples for org.apache.pdfbox.pdmodel PDDocument close

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument close.

Prototype

@Override
public void close() throws IOException 

Source Link

Document

This will close the underlying COSDocument object.

Usage

From source file:com.qwazr.library.pdfbox.PdfBoxParser.java

License:Apache License

/**
 * Extract text content using PDFBox//from   w  w w  .j  a v a2s .com
 *
 * @param pdf
 * @param resultBuilder
 * @throws Exception
 */
private void parseContent(final PDDocument pdf, final ParserResultBuilder resultBuilder) throws Exception {
    try {
        extractMetaData(pdf, resultBuilder.metas());
        Stripper stripper = new Stripper(resultBuilder);
        stripper.getText(pdf);
    } finally {
        if (pdf != null)
            pdf.close();
    }
}

From source file:com.santaanna.friendlyreader.pdfstod.pdfstod3.ReplaceStringStreamEngine.java

License:Apache License

private void saveAndClose(String filnamn, PDDocument utfil) {
    try {//from  www.java 2s . c  o  m
        if (utfil != null) {
            SkrivUt(3, "saveAndClose");
            utfil.save(filnamn);
            utfil.close();
        }
    } catch (java.io.IOException jioio) {
        SkrivUt(7, "IO Fel i saveAndClose.");
    } catch (COSVisitorException cosv) {
        SkrivUt(7, "CosVis Fel i saveAndClose.");
    }
}

From source file:com.sinefine.util.pdf.Pdfs.java

License:Apache License

/**
 * Closes the instance of the PDDocument without throwing an exception.
 *
 * @param pdDocument an instance of the class {@linkplain PDDocument}.
 *///from   w ww  .j  a v a  2  s .  c om
private static void closeQuietly(final PDDocument pdDocument) {
    if (pdDocument != null) {
        try {
            pdDocument.close();
        } catch (IOException ioe) {
            //ignore exception
            if (LOGGER.isDebugEnabled()) {
                LOGGER.debug("An exception occured whilst attempting "
                        + "to close an instance of the PDDocument class.  "
                        + "Although this exception will not cause the "
                        + "application to fail, it should be investigated.", ioe);
            }
        }
    }
}

From source file:com.stimulus.archiva.extraction.PDFExtractor.java

License:Open Source License

public Reader getText(InputStream is, Charset charset, IndexInfo indexInfo) throws ExtractionException {
    logger.debug("extracting pdf file");
    File file = null;/* w  ww.j av  a2  s  .  c  om*/
    PDDocument document = null;
    Writer output = null;
    try {
        PDFParser parser = new PDFParser(is);
        parser.parse();
        document = parser.getPDDocument();
        if (document.isEncrypted()) {
            DocumentEncryption decryptor = new DocumentEncryption(document);
            if (logger.isDebugEnabled()) {
                logger.debug("pdf document appears to be encrypted (will attempt decryption)");

            }
            decryptor.decryptDocument("");
        }
        file = File.createTempFile("extract_pdf", ".tmp");
        indexInfo.addDeleteFile(file);
        output = new OutputStreamWriter(new FileOutputStream(file), "UTF-8");
        PDFTextStripper stripper = new PDFTextStripper();
        stripper.writeText(document, output);
        /*logger.debug("PDF extraction completed");
         BufferedReader reader;
         try {
            reader = new BufferedReader(new FileReader(file));
           String line = null;
           while( (line=reader.readLine()) != null) {
              logger.debug("PDF>"+line);
           }
           reader.close();
         } catch(Exception e) {
            logger.error("failed to open txt file",e);
         }*/
    } catch (Throwable e) {
        throw new ExtractionException("failed to extract pdf (probable password protected document)", e, logger,
                ChainedException.Level.DEBUG);
    } finally {
        try {
            if (document != null)
                document.close();
            if (output != null)
                output.close();
        } catch (IOException io) {
        }
    }
    try {
        logger.debug("returning extracted PDF data");
        Reader outReader = new FileReader(file);
        indexInfo.addReader(outReader);
        return outReader;
    } catch (Exception ex) {
        throw new ExtractionException("failed to extract text from powerpoint document", ex, logger,
                ChainedException.Level.DEBUG);
    }
}

From source file:com.sustainalytics.crawlerfilter.PDFTitleGeneration.java

License:Apache License

/**
 * This method extracts creation date/ custom date of a PDF file
 * @param file is a File object// w w  w  .j a v a 2  s . c om
 * @return String that contains the creation date/ custom date of the PDF
 */
public static String extractDate(File file) {
    PDDocument document = null;
    boolean isDamaged = false; //to deal with damaged pdf
    String creationDateMetaData = "";
    try {
        document = PDDocument.load(file.toString());
        /*If the PDF file is not damanged --->*/
        if (!isDamaged) {
            /*...but the file is encrypted --->*/
            if (document.isEncrypted()) {
                logger.info("File " + file.getName() + "is encrypted. Trying to decrypt...");
                try {
                    /*...then decryptt it --->*/
                    document.decrypt("");
                    document.setAllSecurityToBeRemoved(true);
                    logger.info("File " + file.getName() + "successfully decrypted!");
                } catch (CryptographyException e) {
                    logger.info("Error decrypting file " + file.getName());
                    isDamaged = true;
                }

            } /*<--work around to decrypt an encrypted pdf ends here*/

            /*Metadata extraction --->*/
            PDDocumentInformation info = document.getDocumentInformation();

            /*We are only interested in date data--->*/
            Calendar calendar = info.getCreationDate();
            int creationYear = 0, creationMonth = 0, creationDate = 0;
            if (calendar != null) {
                creationYear = calendar.get(Calendar.YEAR);
                creationMonth = calendar.get(Calendar.MONTH) + 1;
                creationDate = calendar.get(Calendar.DATE);

            } /*<---Date data extraction complete*/

            /*If creation date is not empty --->*/
            if (creationYear != 0) {
                creationDateMetaData = creationYear + "-" + creationMonth + "-" + creationDate;
            } //<--- creation date found and the date part of the title is generated
            /*No creation date is found --->*/
            else {
                SimpleDateFormat dateFormatter = new SimpleDateFormat("MM/dd/yyyy");
                Date customDate = null;
                /*But we have custom date some times --->*/
                try {
                    customDate = dateFormatter.parse(info.getCustomMetadataValue("customdate"));
                } catch (ParseException e) {
                    logger.info("Error parsing date from custom date");
                }
                calendar = Calendar.getInstance();
                calendar.setTime(customDate);
                if (calendar != null) {
                    creationYear = calendar.get(Calendar.YEAR);
                    creationMonth = calendar.get(Calendar.MONTH) + 1;
                    creationDate = calendar.get(Calendar.DATE);

                } /*<---Date data extraction complete from customdate*/
                if (creationYear != 0) {
                    creationDateMetaData = creationYear + "-" + creationMonth + "-" + creationDate;
                }
            } //<--- work around if no creation date is found

        } /*<--- Good to know that the PDF was not damaged*/
    } catch (IOException e) { /*If the PDF was not read by the system --->*/
        logger.info("Error processing file " + file.getName());
        /*... then maybe it is damaged*/
        isDamaged = true;
    } finally {
        try {
            /*If the file was good, not damaged, then please close it --->*/
            if (!isDamaged) {
                document.close();
                logger.info("File " + file.getName() + " is closed successfully!");
            }
        } catch (IOException e) {
            logger.info("Error closing file " + file.getName());
        }
    } /*<--- PDF closing done!*/
    return creationDateMetaData;
}

From source file:com.testautomationguru.utility.PDFUtil.java

License:Apache License

/**
* Get the page count of the document.//from   w  w w.j  a  va2s  . c  o  m
* 
* @param file Absolute file path
* @return int No of pages in the document.
* @throws java.io.IOException when file is not found.
*/
public int getPageCount(String file) throws IOException {
    logger.info("file :" + file);
    PDDocument doc = PDDocument.load(new File(file));
    int pageCount = doc.getNumberOfPages();
    logger.info("pageCount :" + pageCount);
    doc.close();
    return pageCount;
}

From source file:com.testautomationguru.utility.PDFUtil.java

License:Apache License

/**
* This method returns the content of the document
*//*from  w w  w  .  ja  v  a 2 s.  c  o  m*/
private String getPDFText(String file, int startPage, int endPage) throws IOException {

    logger.info("file : " + file);
    logger.info("startPage : " + startPage);
    logger.info("endPage : " + endPage);

    PDDocument doc = PDDocument.load(new File(file));

    PDFTextStripper localStripper = new PDFTextStripper();
    if (null != this.stripper) {
        localStripper = this.stripper;
    }

    this.updateStartAndEndPages(file, startPage, endPage);
    localStripper.setStartPage(this.startPage);
    localStripper.setEndPage(this.endPage);

    String txt = localStripper.getText(doc);
    logger.info("PDF Text before trimming : " + txt);
    if (this.bTrimWhiteSpace) {
        txt = txt.trim().replaceAll("\\s+", " ").trim();
        logger.info("PDF Text after  trimming : " + txt);
    }

    doc.close();
    return txt;
}

From source file:com.testautomationguru.utility.PDFUtil.java

License:Apache License

/**
* This method saves the each page of the pdf as image
*//*from  w  ww . j  a v a  2s.c  om*/
private List<String> saveAsImage(String file, int startPage, int endPage) throws IOException {

    logger.info("file : " + file);
    logger.info("startPage : " + startPage);
    logger.info("endPage : " + endPage);

    ArrayList<String> imgNames = new ArrayList<String>();

    try {
        File sourceFile = new File(file);
        this.createImageDestinationDirectory(file);
        this.updateStartAndEndPages(file, startPage, endPage);

        String fileName = sourceFile.getName().replace(".pdf", "");

        PDDocument document = PDDocument.load(sourceFile);
        PDFRenderer pdfRenderer = new PDFRenderer(document);
        for (int iPage = this.startPage - 1; iPage < this.endPage; iPage++) {
            logger.info("Page No : " + (iPage + 1));
            String fname = this.imageDestinationPath + fileName + "_" + (iPage + 1) + ".png";
            BufferedImage image = pdfRenderer.renderImageWithDPI(iPage, 300, ImageType.RGB);
            ImageIOUtil.writeImage(image, fname, 300);
            imgNames.add(fname);
            logger.info("PDf Page saved as image : " + fname);
        }
        document.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
    return imgNames;
}

From source file:com.testautomationguru.utility.PDFUtil.java

License:Apache License

private boolean convertToImageAndCompare(String file1, String file2, int startPage, int endPage)
        throws IOException {

    boolean result = true;

    PDDocument doc1 = null;
    PDDocument doc2 = null;/*from   w  ww.j a va 2s  .  c o m*/

    PDFRenderer pdfRenderer1 = null;
    PDFRenderer pdfRenderer2 = null;

    try {

        doc1 = PDDocument.load(new File(file1));
        doc2 = PDDocument.load(new File(file2));

        pdfRenderer1 = new PDFRenderer(doc1);
        pdfRenderer2 = new PDFRenderer(doc2);

        for (int iPage = startPage - 1; iPage < endPage; iPage++) {
            String fileName = new File(file1).getName().replace(".pdf", "_") + (iPage + 1);
            fileName = this.getImageDestinationPath() + "/" + fileName + "_diff.png";

            logger.info("Comparing Page No : " + (iPage + 1));
            BufferedImage image1 = pdfRenderer1.renderImageWithDPI(iPage, 300, ImageType.RGB);
            BufferedImage image2 = pdfRenderer2.renderImageWithDPI(iPage, 300, ImageType.RGB);
            result = ImageUtil.compareAndHighlight(image1, image2, fileName, this.bHighlightPdfDifference,
                    this.imgColor.getRGB()) && result;
            if (!this.bCompareAllPages && !result) {
                break;
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        doc1.close();
        doc2.close();
    }
    return result;
}

From source file:com.testautomationguru.utility.PDFUtil.java

License:Apache License

/**
* This method extracts all the embedded images of the pdf document
*///from w w  w  . j av  a  2 s  .  co m
private List<String> extractimages(String file, int startPage, int endPage) {

    logger.info("file : " + file);
    logger.info("startPage : " + startPage);
    logger.info("endPage : " + endPage);

    ArrayList<String> imgNames = new ArrayList<String>();
    boolean bImageFound = false;
    try {

        this.createImageDestinationDirectory(file);
        String fileName = this.getFileName(file).replace(".pdf", "_resource");

        PDDocument document = PDDocument.load(new File(file));
        PDPageTree list = document.getPages();

        this.updateStartAndEndPages(file, startPage, endPage);

        int totalImages = 1;
        for (int iPage = this.startPage - 1; iPage < this.endPage; iPage++) {
            logger.info("Page No : " + (iPage + 1));
            PDResources pdResources = list.get(iPage).getResources();
            for (COSName c : pdResources.getXObjectNames()) {
                PDXObject o = pdResources.getXObject(c);
                if (o instanceof org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject) {
                    bImageFound = true;
                    String fname = this.imageDestinationPath + "/" + fileName + "_" + totalImages + ".png";
                    ImageIO.write(((org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject) o).getImage(),
                            "png", new File(fname));
                    imgNames.add(fname);
                    totalImages++;
                }
            }
        }
        document.close();
        if (bImageFound)
            logger.info("Images are saved @ " + this.imageDestinationPath);
        else
            logger.info("No images were found in the PDF");
    } catch (Exception e) {
        e.printStackTrace();
    }
    return imgNames;
}