List of usage examples for org.apache.pdfbox.pdmodel PDDocument close
@Override public void close() throws IOException
From source file:org.quelea.data.pdf.PDFPresentation.java
License:Open Source License
/** * Make the slides that go in this PDF, this is what takes time and should * only be done once.// w w w .j a va 2s.c om * * @return all the slides. */ private PdfSlide[] makeSlides() throws IOException { File pdf = new File(file); PDDocument document = PDDocument.load(pdf.getAbsoluteFile()); Path f = Files.createTempDirectory(null); f.toFile().deleteOnExit(); ArrayList<PdfSlide> ret = new ArrayList<>(); PDFRenderer pdfRenderer = new PDFRenderer(document); int totalPages = document.getNumberOfPages(); for (int i = 0; i < totalPages; i++) { ret.add(new PdfSlide(i + 1, pdfRenderer)); } document.close(); return ret.toArray(new PdfSlide[ret.size()]); }
From source file:org.quelea.services.importexport.SurvivorSongbookParser.java
License:Open Source License
/** * Get all the songs in the PDF document. * @return a list of all the songs.//ww w .j a va 2s.com * @throws IOException if something went wrong. */ @Override public List<SongDisplayable> getSongs(File location, StatusPanel statusPanel) throws IOException { PDDocument document = PDDocument.load(location); List<SongDisplayable> pdfSongs = new ArrayList<>(); PDFTextStripper stripper = new PDFTextStripper(); List<String> songParts = new ArrayList<>(); for (int i = 0; i < document.getNumberOfPages(); i++) { String pageText = getPageText(document, stripper, i); if (pageText.trim().isEmpty()) { continue; } songParts.add(pageText); boolean twoPart = pageText.contains("(1 of"); if (i < document.getNumberOfPages() - 1) { //This section in case the original (1 of x) is missed out String nextPageText = getPageText(document, stripper, i + 1); if (nextPageText.contains("(2 of")) { twoPart = true; } } if (!twoPart) { SongDisplayable song = processSong(songParts.toArray(new String[songParts.size()])); if (song != null) { pdfSongs.add(song); } songParts.clear(); } } document.close(); if (pdfSongs == null) { return new ArrayList<>(); } else { return pdfSongs; } }
From source file:org.sakaiproject.search.component.adapter.contenthosting.PDFContentDigester.java
License:Educational Community License
public String getContent(ContentResource contentResource) { if (contentResource == null) { throw new RuntimeException("Null contentResource passed to getContent"); }//w w w . j ava 2 s . c om InputStream contentStream = null; PDFParser parser = null; PDDocument pddoc = null; try { contentStream = contentResource.streamContent(); parser = new PDFParser(new BufferedInputStream(contentStream)); parser.parse(); pddoc = parser.getPDDocument(); if (pddoc != null) { PDFTextStripper stripper = new PDFTextStripper(); stripper.setLineSeparator("\n"); CharArrayWriter cw = new CharArrayWriter(); stripper.writeText(pddoc, cw); return SearchUtils.appendCleanString(cw.toCharArray(), null).toString(); } } catch (ServerOverloadException e) { String eMessage = e.getMessage(); if (eMessage == null) { eMessage = e.toString(); } throw new RuntimeException( "Failed to get content for indexing: cause: ServerOverloadException: " + eMessage, e); } catch (IOException e) { String eMessage = e.getMessage(); if (eMessage == null) { eMessage = e.toString(); } throw new RuntimeException("Failed to get content for indexing: cause: IOException: " + eMessage, e); } finally { if (pddoc != null) { try { pddoc.close(); } catch (IOException e) { log.debug(e); } } if (contentStream != null) { try { contentStream.close(); } catch (IOException e) { log.debug(e); } } } return null; }
From source file:org.seasar.robot.extractor.impl.PdfExtractor.java
License:Apache License
@Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (in == null) { throw new RobotSystemException("The inputstream is null."); }/*from w w w . j a v a2 s .c o m*/ synchronized (pdfBoxLockObj) { PDDocument document = null; try { document = PDDocument.load(in, null, force); if (document.isEncrypted() && params != null) { String password = params.get(ExtractData.PDF_PASSWORD); if (password == null) { password = getPassword(params.get(ExtractData.URL), params.get(TikaMetadataKeys.RESOURCE_NAME_KEY)); } if (password != null) { final StandardDecryptionMaterial sdm = new StandardDecryptionMaterial(password); document.openProtection(sdm); final AccessPermission ap = document.getCurrentAccessPermission(); if (!ap.canExtractContent()) { throw new IOException("You do not have permission to extract text."); } } } final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final Writer output = new OutputStreamWriter(baos, encoding); final PDFTextStripper stripper = new PDFTextStripper(encoding); stripper.setForceParsing(force); final AtomicBoolean done = new AtomicBoolean(false); final PDDocument doc = document; final Set<Exception> exceptionSet = new HashSet<>(); Thread task = new Thread(new Runnable() { @Override public void run() { try { stripper.writeText(doc, output); } catch (Exception e) { exceptionSet.add(e); } finally { done.set(true); } } }); task.setDaemon(true); task.start(); task.join(timeout); if (!done.get()) { for (int i = 0; i < 100 && !done.get(); i++) { task.interrupt(); Thread.sleep(50); } throw new ExtractException("PDFBox process cannot finish in " + timeout + " sec."); } else if (!exceptionSet.isEmpty()) { throw exceptionSet.iterator().next(); } output.flush(); final ExtractData extractData = new ExtractData(baos.toString(encoding)); extractMetadata(document, extractData); return extractData; } catch (final Exception e) { throw new ExtractException(e); } finally { if (document != null) { try { document.close(); } catch (final IOException e) { // NOP } } } } }
From source file:org.sencko.nalb.parser.Game.java
License:Open Source License
public Game(InputStream stream, OptimizedCharArrayWriter writer, PDFTextStripper stripper) throws Exception { PDDocument document = PDDocument.load(stream); writer.reset();//from w ww . ja va2 s . c o m stripper.writeText(document, writer); reader = new BufferedReader(new CharArrayReader(writer.getBuffer(), 0, writer.size())); try { extractTeamNames(); extractGameNumber(); extractMatchTime(); extract5minPeriodScore(scoring5minHome, homeTeam); extract5minPeriodScore(scoring5minAway, awayTeam); extractPlayerStats(homeTeam); extractPlayerStats(awayTeam); document.close(); rowCache = null; currentMatcher = null; } catch (Exception ex) { System.out.println(rowCache); System.out.println(currentMatcher); throw ex; } }
From source file:org.silverpeas.core.index.indexing.parser.pdfParser.PdfParser2.java
License:Open Source License
@Override public Reader getReader(String path, String encoding) { Reader reader = null;/* w w w .j a va 2s .com*/ InputStream file = null; PDDocument document = null; try { file = new FileInputStream(path); document = PDDocument.load(file); PDFTextStripper extractor = new PDFTextStripper(); String text = extractor.getText(document); reader = new StringReader(text); } catch (Exception e) { SilverTrace.error("indexing", "PdfParser2", "indexing.MSG_IO_ERROR_WHILE_READING", path, e); } finally { try { if (document != null) { document.close(); } IOUtils.closeQuietly(file); } catch (IOException ioe) { SilverTrace.error("indexing", "PdfParser2.getReader()", "indexing.MSG_IO_ERROR_WHILE_CLOSING", path, ioe); } } return reader; }
From source file:org.silverpeas.search.indexEngine.parser.pdfParser.PdfParser2.java
License:Open Source License
@Override public Reader getReader(String path, String encoding) { Reader reader = null;//from www . jav a 2s.co m InputStream file = null; PDDocument document = null; try { file = new FileInputStream(path); document = PDDocument.load(file); PDFTextStripper extractor = new PDFTextStripper(); String text = extractor.getText(document); reader = new StringReader(text); } catch (Exception e) { SilverTrace.error("indexEngine", "PdfParser2", "indexEngine.MSG_IO_ERROR_WHILE_READING", path, e); } finally { try { if (document != null) { document.close(); } IOUtils.closeQuietly(file); } catch (IOException ioe) { SilverTrace.error("indexEngine", "PdfParser2.getReader()", "indexEngine.MSG_IO_ERROR_WHILE_CLOSING", path, ioe); } } return reader; }
From source file:org.socialbiz.cog.util.PDFUtil.java
License:Apache License
public static void main(String[] args) { //For test try {//from www .j a v a2 s . c o m String path = args[0]; PDDocument document = new PDDocument(); PDPage page = new PDPage(); page.setMediaBox(PDPage.PAGE_SIZE_A4); document.addPage(page); PDFont font = PDType1Font.HELVETICA; PDPageContentStream contentStream = new PDPageContentStream(document, page, false, false); contentStream.beginText(); contentStream.setFont(font, 12); contentStream.moveTextPositionByAmount(100, 800); String x = "hello world"; contentStream.drawString(x); contentStream.moveTextPositionByAmount(-90, -15); contentStream.setFont(font, 12); contentStream.drawString("Hello World3"); contentStream.endText(); contentStream.close(); document.save(path); document.close(); System.out.println("DONE.."); } catch (Exception e) { e.printStackTrace(); } }
From source file:org.swiftexplorer.gui.preview.PdfPanel.java
License:Apache License
public synchronized void setPdf(PDDocument pdf) { listImagePages.clear();/* ww w . ja v a 2 s.c om*/ if (pdf == null) return; try { if (pdf.isEncrypted()) { logger.info("Failed attempt at previewing an encrypted PDF"); return; } PDDocumentCatalog cat = pdf.getDocumentCatalog(); @SuppressWarnings("unchecked") List<PDPage> pages = cat.getAllPages(); if (pages != null && !pages.isEmpty()) { for (PDPage page : pages) { listImagePages.add(page.convertToImage()); if (listImagePages.size() >= maxPageToPreview) break; } } } catch (IOException e) { logger.error("Error occurred while opening the pdf document", e); } finally { if (pdf != null) { try { pdf.close(); } catch (IOException ex) { logger.error("Error occurred while closing the pdf document", ex); } } } repaint(); }
From source file:org.terrier.indexing.PDFDocument.java
License:Mozilla Public License
/** * Returns the reader of text, which is suitable for parsing terms out of, * and which is created by converting the file represented by * parameter docStream. This method involves running the stream * through the PDFParser etc provided in the org.pdfbox library. * On error, it returns null, and sets EOD to true, so no terms * can be read from this document.//from w w w . j a va2 s. c o m * @param is the input stream that represents the document's file. * @return Reader a reader that is fed to an indexer. */ protected Reader getReader(InputStream is) { if ((Files.length(filename) / 1048576) > 300) { logger.info("Skipping document " + filename + " because it's size exceeds 300Mb"); return new StringReader(""); } PDDocument pdfDocument = null; Reader rtr = null; try { pdfDocument = PDDocument.load(is); if (pdfDocument.isEncrypted()) { //Just try using the default password and move on pdfDocument.decrypt(""); } //create a writer where to append the text content. StringWriter writer = new StringWriter(); PDFTextStripper stripper = new PDFTextStripper(); stripper.writeText(pdfDocument, writer); String contents = writer.getBuffer().toString(); int spaceCount = StringUtils.countMatches(contents, " "); for (char badChar : new char[] { '\u00A0', '\u2029', '#' }) { final int count = StringUtils.countMatches(contents, "" + badChar); if (count > spaceCount / 2) { contents = contents.replace(badChar, ' '); spaceCount += count; } } rtr = new StringReader(contents); PDDocumentInformation info = pdfDocument.getDocumentInformation(); if (info != null && USE_PDF_TITLE) { setProperty("title", info.getTitle()); } else { setProperty("title", new java.io.File(super.filename).getName()); } } catch (CryptographyException e) { throw new RuntimeException("Error decrypting PDF document: " + e); } catch (InvalidPasswordException e) { //they didn't suppply a password and the default of "" was wrong. throw new RuntimeException("Error: The PDF document is encrypted and will not be indexed."); } catch (Exception e) { throw new RuntimeException("Error extracting PDF document", e); } finally { if (pdfDocument != null) { try { pdfDocument.close(); } catch (IOException ioe) { } } } return rtr; }