List of usage examples for org.apache.pdfbox.pdmodel PDDocument load
public static PDDocument load(byte[] input) throws IOException
From source file:drakkar.mast.retrieval.parser.PdfParser.java
/** * Para extraer contenido del pdf//from w w w . j a v a2 s . c om * * @param f * @return */ public boolean analyzePdfDocument(File f) { try { pdoc = PDDocument.load(f); if (!pdoc.isEncrypted() && pdoc.getCurrentAccessPermission().canExtractContent() && pdoc.getNumberOfPages() != 0) { this.numberPages = pdoc.getNumberOfPages(); pdfText = new PDFTextStripper(); swriter = new StringWriter(); ////////////////////datos pinf = pdoc.getDocumentInformation(); if (pinf == null) { OutputMonitor.printLine("The document does not have available information.", OutputMonitor.INFORMATION_MESSAGE); } else { setTitle(pinf.getTitle()); setAuthor(pinf.getAuthor()); setNumberpages(pdoc.getNumberOfPages()); setCalCreation(pinf.getCreationDate()); setCalModification(pinf.getModificationDate()); pdfText.writeText(pdoc, swriter); allContent = swriter.getBuffer().toString(); } pdoc.close(); swriter.close(); return true; } else { OutputMonitor.printLine("Encrypted document.", OutputMonitor.INFORMATION_MESSAGE); } } catch (Exception ex) { OutputMonitor.printStream("", ex); } finally { if (pdoc != null) { try { pdoc.close(); } catch (IOException ex) { OutputMonitor.printStream("IO", ex); } } } return false; }
From source file:drakkar.mast.retrieval.parser.PdfParser.java
/** * Divide el contenido del pdf de 100 en 100 pginas de acuerdo al nmero * total para el motor de bsqueda Minion * * @param f/* w w w . j a v a2 s .co m*/ * @param indexer indexador de Minion * @throws IOException */ public void divideTextforMinion(File f, SimpleIndexer indexer) throws IOException { pdoc = PDDocument.load(f); this.numberPages = pdoc.getNumberOfPages(); if (!pdoc.isEncrypted() && pdoc.getCurrentAccessPermission().canExtractContent() && pdoc.getNumberOfPages() != 0) { String fragment = null; int start = 0, end = 0; int count = 0; if (this.numberPages > 100) { for (int i = 0; i < numberPages; i = i + 100) { count++; //para el key del document swriter = new StringWriter(); pdfText = new PDFTextStripper(); start = i; end = 99 + i; if (end > numberPages) { end = numberPages; } pdfText.setStartPage(start); pdfText.setEndPage(end); pdfText.writeText(pdoc, swriter); fragment = swriter.getBuffer().toString(); DocumentMinion docm = new DocumentMinion(indexer, f.getPath() + count); docm.addField("filepath", f.getAbsolutePath()); docm.addField("name", f.getName()); docm.addField("book", fragment); docm.closeDocument(); swriter.close(); } pdoc.close(); } else { //si tiene menos de 100 pginas toma todo el texto como est swriter = new StringWriter(); pdfText = new PDFTextStripper(); pdfText.writeText(pdoc, swriter); fragment = swriter.getBuffer().toString(); DocumentMinion docm = new DocumentMinion(indexer, f.getPath()); docm.addField("filepath", f.getAbsolutePath()); docm.addField("name", f.getName()); docm.addField("book", fragment); docm.closeDocument(); swriter.close(); pdoc.close(); } } else { OutputMonitor.printLine("Encrypted book.", OutputMonitor.INFORMATION_MESSAGE); } if (pdoc != null) { try { pdoc.close(); } catch (IOException ex) { OutputMonitor.printStream("", ex); } } }
From source file:drakkar.mast.retrieval.parser.PdfParser.java
/** * Divide el contenido del pdf de 100 en 100 pginas de acuerdo al nmero * total para el motor de bsqueda Lucene * * @param f//from w ww . j a va 2 s.c o m * @param doccs * @param doc * @param doclsi * @throws IOException */ public void divideTextforLucene(File f, DocumentLucene doccs, DocumentLucene doc, DocumentLucene doclsi) throws IOException { pdoc = PDDocument.load(f); this.numberPages = pdoc.getNumberOfPages(); if (!pdoc.isEncrypted() && pdoc.getCurrentAccessPermission().canExtractContent() && pdoc.getNumberOfPages() != 0) { String fragment = null; int start = 0, end = 0; int count = 0; if (this.numberPages > 100) { for (int i = 0; i < numberPages; i = i + 100) { count++; //para el key del document swriter = new StringWriter(); pdfText = new PDFTextStripper(); start = i; end = 99 + i; if (end > numberPages) { end = numberPages; } pdfText.setStartPage(start); pdfText.setEndPage(end); pdfText.writeText(pdoc, swriter); fragment = swriter.getBuffer().toString(); doc.addField("filepath", f.getCanonicalPath()); doccs.addField("filepathcs", f.getCanonicalPath()); doc.addField("name", f.getName()); doccs.addField("namecs", f.getName()); doc.addField("book", fragment); doccs.addField("bookcs", fragment); /////// if (doclsi != null) { doclsi.addField("book", fragment); } swriter.close(); } pdoc.close(); } else { //si tiene menos de 100 pginas toma todo el texto como est swriter = new StringWriter(); pdfText = new PDFTextStripper(); //index pdfText.writeText(pdoc, swriter); fragment = swriter.getBuffer().toString(); doc.addField("filepath", f.getCanonicalPath()); doccs.addField("filepathcs", f.getCanonicalPath()); doc.addField("name", f.getName()); doccs.addField("namecs", f.getName()); doc.addField("book", fragment); doccs.addField("bookcs", fragment); if (doclsi != null) { doclsi.addField("book", fragment); } swriter.close(); pdoc.close(); } } else { OutputMonitor.printLine("Encrypted book.", OutputMonitor.INFORMATION_MESSAGE); } if (pdoc != null) { try { pdoc.close(); } catch (IOException ex) { OutputMonitor.printStream("", ex); } } }
From source file:editorframework.PDDocumentAdapter.java
public PDDocumentAdapter(String fileName) { try {//from w w w . j ava 2s. c om this.pdPanel = new PDFPagePanel(); pdDocument = PDDocument.load(fileName); allPages = pdDocument.getDocumentCatalog().getAllPages(); } catch (IOException ex) { Logger.getLogger(PDDocumentAdapter.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:editorframework.PDDocumentAdapter.java
public boolean open(String fileName) { try {//from w w w .j a va2 s. co m pdDocument = PDDocument.load(new FileInputStream(fileName)); allPages = pdDocument.getDocumentCatalog().getAllPages(); return true; } catch (IOException ex) { Logger.getLogger(PDDocumentAdapter.class.getName()).log(Level.SEVERE, null, ex); } return false; }
From source file:editorframework.pdfbox.OpenTextPDF.java
public PDDocument openPDF(String pdfFilename) { //openPDF(fileName); PDDocument document = null;/*from w ww.jav a 2 s .c o m*/ try { document = PDDocument.load(new File(pdfFilename)); return document; } catch (IOException ex) { Logger.getLogger(OpenTextPDFAdapter.class.getName()).log(Level.SEVERE, null, ex); } return null; }
From source file:editorframework.pdfbox.PDFBoxDocumentAdaptee.java
private static PDDocument parseDocument(String filename) throws IOException { PDDocument document = PDDocument.load(filename); if (document.isEncrypted()) { try {// www . ja va 2s . com document.decrypt(""); } catch (org.apache.pdfbox.exceptions.CryptographyException e) { e.printStackTrace(); } } return document; }
From source file:editorframework.pdfbox.testes.MyPDFBox.java
private void init() { JFrame jFrame = new JFrame(); jFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); try {/*from w w w. j av a 2s . com*/ final PDDocument doc = PDDocument.load(new File("./simple.pdf")); List<PDPage> allPages = doc.getDocumentCatalog().getAllPages(); PDPage page = (PDPage) allPages.get(1); setPage(page); jFrame.setBackground(Color.DARK_GRAY); setLayout(new FlowLayout()); jFrame.add(this); jFrame.setBounds(40, 40, getWidth() + 100, getHeight() + 50); jFrame.setVisible(true); jFrame.addWindowListener(new WindowAdapter() { @Override public void windowClosing(WindowEvent e) { try { doc.close(); } catch (IOException e1) { e1.printStackTrace(); } } }); } catch (IOException e) { System.out.println(e.toString()); } //doc.close(); }
From source file:editorframework.pdfbox.testes.PDFReaderAdaptor.java
License:Apache License
private static PDDocument parseDocument(InputStream input) throws IOException { PDDocument document = PDDocument.load(input); if (document.isEncrypted()) { try {//from www .ja v a2s . c o m document.decrypt(""); } catch (org.apache.pdfbox.exceptions.CryptographyException e) { e.printStackTrace(); } } return document; }
From source file:editorframework.pdfbox.testes.TestesComPDFBox.java
public static void transformarPDFemImagem() throws IOException { //OPCAO 1 transformar o pdfpage em bufered image, e exibir no JFrame normal PDDocument document = PDDocument.load(new File("./simple.pdf")); List<PDPage> allPages = document.getDocumentCatalog().getAllPages(); PDPage firstPage = allPages.get(0);/* w ww. j ava2 s . c o m*/ BufferedImage bi = firstPage.convertToImage(); File outputfile = new File("image.jpg"); ImageIO.write(bi, "jpg", outputfile); }