Example usage for org.apache.pdfbox.pdmodel PDDocument close

List of usage examples for org.apache.pdfbox.pdmodel PDDocument close

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument close.

Prototype

@Override
public void close() throws IOException 

Source Link

Document

This will close the underlying COSDocument object.

Usage

From source file:org.quelea.data.pdf.PDFPresentation.java

License:Open Source License

/**
 * Make the slides that go in this PDF, this is what takes time and should
 * only be done once.// w  w  w .j a  va  2s.c om
 *
 * @return all the slides.
 */
private PdfSlide[] makeSlides() throws IOException {
    File pdf = new File(file);
    PDDocument document = PDDocument.load(pdf.getAbsoluteFile());
    Path f = Files.createTempDirectory(null);
    f.toFile().deleteOnExit();
    ArrayList<PdfSlide> ret = new ArrayList<>();
    PDFRenderer pdfRenderer = new PDFRenderer(document);
    int totalPages = document.getNumberOfPages();
    for (int i = 0; i < totalPages; i++) {
        ret.add(new PdfSlide(i + 1, pdfRenderer));
    }
    document.close();
    return ret.toArray(new PdfSlide[ret.size()]);
}

From source file:org.quelea.services.importexport.SurvivorSongbookParser.java

License:Open Source License

/**
 * Get all the songs in the PDF document.
 * @return a list of all the songs.//ww  w .j a va  2s.com
 * @throws IOException if something went wrong.
 */
@Override
public List<SongDisplayable> getSongs(File location, StatusPanel statusPanel) throws IOException {
    PDDocument document = PDDocument.load(location);
    List<SongDisplayable> pdfSongs = new ArrayList<>();
    PDFTextStripper stripper = new PDFTextStripper();
    List<String> songParts = new ArrayList<>();
    for (int i = 0; i < document.getNumberOfPages(); i++) {
        String pageText = getPageText(document, stripper, i);
        if (pageText.trim().isEmpty()) {
            continue;
        }
        songParts.add(pageText);
        boolean twoPart = pageText.contains("(1 of");
        if (i < document.getNumberOfPages() - 1) { //This section in case the original (1 of x) is missed out
            String nextPageText = getPageText(document, stripper, i + 1);
            if (nextPageText.contains("(2 of")) {
                twoPart = true;
            }
        }
        if (!twoPart) {
            SongDisplayable song = processSong(songParts.toArray(new String[songParts.size()]));
            if (song != null) {
                pdfSongs.add(song);
            }
            songParts.clear();
        }
    }
    document.close();
    if (pdfSongs == null) {
        return new ArrayList<>();
    } else {
        return pdfSongs;
    }
}

From source file:org.sakaiproject.search.component.adapter.contenthosting.PDFContentDigester.java

License:Educational Community License

public String getContent(ContentResource contentResource) {
    if (contentResource == null) {
        throw new RuntimeException("Null contentResource passed to getContent");
    }//w  w w . j ava 2  s . c om

    InputStream contentStream = null;
    PDFParser parser = null;
    PDDocument pddoc = null;
    try {
        contentStream = contentResource.streamContent();
        parser = new PDFParser(new BufferedInputStream(contentStream));
        parser.parse();
        pddoc = parser.getPDDocument();
        if (pddoc != null) {
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setLineSeparator("\n");
            CharArrayWriter cw = new CharArrayWriter();
            stripper.writeText(pddoc, cw);
            return SearchUtils.appendCleanString(cw.toCharArray(), null).toString();
        }
    } catch (ServerOverloadException e) {
        String eMessage = e.getMessage();
        if (eMessage == null) {
            eMessage = e.toString();
        }
        throw new RuntimeException(
                "Failed to get content for indexing: cause: ServerOverloadException: " + eMessage, e);
    } catch (IOException e) {
        String eMessage = e.getMessage();
        if (eMessage == null) {
            eMessage = e.toString();
        }
        throw new RuntimeException("Failed to get content for indexing: cause: IOException:  " + eMessage, e);
    } finally {
        if (pddoc != null) {
            try {
                pddoc.close();
            } catch (IOException e) {
                log.debug(e);
            }
        }

        if (contentStream != null) {
            try {
                contentStream.close();
            } catch (IOException e) {
                log.debug(e);
            }
        }
    }
    return null;
}

From source file:org.seasar.robot.extractor.impl.PdfExtractor.java

License:Apache License

@Override
public ExtractData getText(final InputStream in, final Map<String, String> params) {
    if (in == null) {
        throw new RobotSystemException("The inputstream is null.");
    }/*from w  w  w .  j  a  v  a2  s  .c o  m*/
    synchronized (pdfBoxLockObj) {
        PDDocument document = null;
        try {
            document = PDDocument.load(in, null, force);
            if (document.isEncrypted() && params != null) {
                String password = params.get(ExtractData.PDF_PASSWORD);
                if (password == null) {
                    password = getPassword(params.get(ExtractData.URL),
                            params.get(TikaMetadataKeys.RESOURCE_NAME_KEY));
                }
                if (password != null) {
                    final StandardDecryptionMaterial sdm = new StandardDecryptionMaterial(password);
                    document.openProtection(sdm);
                    final AccessPermission ap = document.getCurrentAccessPermission();

                    if (!ap.canExtractContent()) {
                        throw new IOException("You do not have permission to extract text.");
                    }
                }
            }

            final ByteArrayOutputStream baos = new ByteArrayOutputStream();
            final Writer output = new OutputStreamWriter(baos, encoding);
            final PDFTextStripper stripper = new PDFTextStripper(encoding);
            stripper.setForceParsing(force);
            final AtomicBoolean done = new AtomicBoolean(false);
            final PDDocument doc = document;
            final Set<Exception> exceptionSet = new HashSet<>();
            Thread task = new Thread(new Runnable() {
                @Override
                public void run() {
                    try {
                        stripper.writeText(doc, output);
                    } catch (Exception e) {
                        exceptionSet.add(e);
                    } finally {
                        done.set(true);
                    }
                }
            });
            task.setDaemon(true);
            task.start();
            task.join(timeout);
            if (!done.get()) {
                for (int i = 0; i < 100 && !done.get(); i++) {
                    task.interrupt();
                    Thread.sleep(50);
                }
                throw new ExtractException("PDFBox process cannot finish in " + timeout + " sec.");
            } else if (!exceptionSet.isEmpty()) {
                throw exceptionSet.iterator().next();
            }
            output.flush();
            final ExtractData extractData = new ExtractData(baos.toString(encoding));
            extractMetadata(document, extractData);
            return extractData;
        } catch (final Exception e) {
            throw new ExtractException(e);
        } finally {
            if (document != null) {
                try {
                    document.close();
                } catch (final IOException e) {
                    // NOP
                }
            }
        }
    }
}

From source file:org.sencko.nalb.parser.Game.java

License:Open Source License

public Game(InputStream stream, OptimizedCharArrayWriter writer, PDFTextStripper stripper) throws Exception {
    PDDocument document = PDDocument.load(stream);

    writer.reset();//from  w  ww  . ja va2 s  .  c o m

    stripper.writeText(document, writer);

    reader = new BufferedReader(new CharArrayReader(writer.getBuffer(), 0, writer.size()));
    try {
        extractTeamNames();
        extractGameNumber();
        extractMatchTime();
        extract5minPeriodScore(scoring5minHome, homeTeam);
        extract5minPeriodScore(scoring5minAway, awayTeam);
        extractPlayerStats(homeTeam);
        extractPlayerStats(awayTeam);

        document.close();
        rowCache = null;
        currentMatcher = null;
    } catch (Exception ex) {
        System.out.println(rowCache);
        System.out.println(currentMatcher);
        throw ex;
    }
}

From source file:org.silverpeas.core.index.indexing.parser.pdfParser.PdfParser2.java

License:Open Source License

@Override
public Reader getReader(String path, String encoding) {
    Reader reader = null;/*  w w  w  .j a va 2s .com*/
    InputStream file = null;
    PDDocument document = null;
    try {
        file = new FileInputStream(path);
        document = PDDocument.load(file);
        PDFTextStripper extractor = new PDFTextStripper();
        String text = extractor.getText(document);
        reader = new StringReader(text);
    } catch (Exception e) {
        SilverTrace.error("indexing", "PdfParser2", "indexing.MSG_IO_ERROR_WHILE_READING", path, e);
    } finally {
        try {
            if (document != null) {
                document.close();
            }
            IOUtils.closeQuietly(file);
        } catch (IOException ioe) {
            SilverTrace.error("indexing", "PdfParser2.getReader()", "indexing.MSG_IO_ERROR_WHILE_CLOSING", path,
                    ioe);
        }
    }
    return reader;
}

From source file:org.silverpeas.search.indexEngine.parser.pdfParser.PdfParser2.java

License:Open Source License

@Override
public Reader getReader(String path, String encoding) {
    Reader reader = null;//from  www  . jav  a  2s.co m
    InputStream file = null;
    PDDocument document = null;
    try {
        file = new FileInputStream(path);
        document = PDDocument.load(file);
        PDFTextStripper extractor = new PDFTextStripper();
        String text = extractor.getText(document);
        reader = new StringReader(text);
    } catch (Exception e) {
        SilverTrace.error("indexEngine", "PdfParser2", "indexEngine.MSG_IO_ERROR_WHILE_READING", path, e);
    } finally {
        try {
            if (document != null) {
                document.close();
            }
            IOUtils.closeQuietly(file);
        } catch (IOException ioe) {
            SilverTrace.error("indexEngine", "PdfParser2.getReader()", "indexEngine.MSG_IO_ERROR_WHILE_CLOSING",
                    path, ioe);
        }
    }
    return reader;
}

From source file:org.socialbiz.cog.util.PDFUtil.java

License:Apache License

public static void main(String[] args) { //For test
    try {//from www  .j  a v a2 s  . c  o  m
        String path = args[0];
        PDDocument document = new PDDocument();
        PDPage page = new PDPage();
        page.setMediaBox(PDPage.PAGE_SIZE_A4);
        document.addPage(page);
        PDFont font = PDType1Font.HELVETICA;

        PDPageContentStream contentStream = new PDPageContentStream(document, page, false, false);
        contentStream.beginText();
        contentStream.setFont(font, 12);
        contentStream.moveTextPositionByAmount(100, 800);
        String x = "hello world";
        contentStream.drawString(x);
        contentStream.moveTextPositionByAmount(-90, -15);
        contentStream.setFont(font, 12);
        contentStream.drawString("Hello World3");
        contentStream.endText();
        contentStream.close();
        document.save(path);
        document.close();
        System.out.println("DONE..");

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:org.swiftexplorer.gui.preview.PdfPanel.java

License:Apache License

public synchronized void setPdf(PDDocument pdf) {
    listImagePages.clear();/* ww  w . ja  v a 2  s.c om*/
    if (pdf == null)
        return;
    try {
        if (pdf.isEncrypted()) {
            logger.info("Failed attempt at previewing an encrypted PDF");
            return;
        }
        PDDocumentCatalog cat = pdf.getDocumentCatalog();
        @SuppressWarnings("unchecked")
        List<PDPage> pages = cat.getAllPages();
        if (pages != null && !pages.isEmpty()) {
            for (PDPage page : pages) {
                listImagePages.add(page.convertToImage());
                if (listImagePages.size() >= maxPageToPreview)
                    break;
            }
        }
    } catch (IOException e) {
        logger.error("Error occurred while opening the pdf document", e);
    } finally {
        if (pdf != null) {
            try {
                pdf.close();
            } catch (IOException ex) {
                logger.error("Error occurred while closing the pdf document", ex);
            }
        }
    }
    repaint();
}

From source file:org.terrier.indexing.PDFDocument.java

License:Mozilla Public License

/** 
 * Returns the reader of text, which is suitable for parsing terms out of,
 * and which is created by converting the file represented by 
 * parameter docStream. This method involves running the stream 
 * through the PDFParser etc provided in the org.pdfbox library.
 * On error, it returns null, and sets EOD to true, so no terms 
 * can be read from this document.//from w w w .  j a va2 s.  c  o  m
 * @param is the input stream that represents the document's file.
 * @return Reader a reader that is fed to an indexer.
 */
protected Reader getReader(InputStream is) {

    if ((Files.length(filename) / 1048576) > 300) {
        logger.info("Skipping document " + filename + " because it's size exceeds 300Mb");
        return new StringReader("");
    }

    PDDocument pdfDocument = null;
    Reader rtr = null;
    try {
        pdfDocument = PDDocument.load(is);

        if (pdfDocument.isEncrypted()) {
            //Just try using the default password and move on
            pdfDocument.decrypt("");
        }

        //create a writer where to append the text content.
        StringWriter writer = new StringWriter();
        PDFTextStripper stripper = new PDFTextStripper();
        stripper.writeText(pdfDocument, writer);

        String contents = writer.getBuffer().toString();
        int spaceCount = StringUtils.countMatches(contents, " ");
        for (char badChar : new char[] { '\u00A0', '\u2029', '#' }) {
            final int count = StringUtils.countMatches(contents, "" + badChar);
            if (count > spaceCount / 2) {
                contents = contents.replace(badChar, ' ');
                spaceCount += count;
            }
        }
        rtr = new StringReader(contents);

        PDDocumentInformation info = pdfDocument.getDocumentInformation();
        if (info != null && USE_PDF_TITLE) {
            setProperty("title", info.getTitle());
        } else {
            setProperty("title", new java.io.File(super.filename).getName());
        }
    } catch (CryptographyException e) {
        throw new RuntimeException("Error decrypting PDF document: " + e);
    } catch (InvalidPasswordException e) {
        //they didn't suppply a password and the default of "" was wrong.
        throw new RuntimeException("Error: The PDF document is encrypted and will not be indexed.");
    } catch (Exception e) {
        throw new RuntimeException("Error extracting PDF document", e);
    } finally {
        if (pdfDocument != null) {
            try {
                pdfDocument.close();
            } catch (IOException ioe) {
            }
        }
    }
    return rtr;
}