List of usage examples for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog
public PDDocumentCatalog getDocumentCatalog()
From source file:net.sf.jabref.plugins.pdftasks.PDFTaskSidePane.java
License:Open Source License
private void doTasks() { // PDF file type representation final ExternalFileType pdf_type = Globals.prefs.getExternalFileTypeByExt("pdf"); // get Bibtex database associated with the current tab final BasePanel db_panel = frame.basePanel(); final BibtexDatabase db = db_panel.database(); final MetaData db_meta = db_panel.metaData(); // get selected Bibtex entries from the current tab final BibtexEntry[] db_entries = db_panel.getSelectedEntries(); // get Bibtex database file for current tab final File db_file = db_panel.getFile(); if (db_file == null || db_file.getParentFile() == null) { JOptionPane.showMessageDialog(frame, "Bibtex database must be saved before performing PDF tasks.", title, JOptionPane.INFORMATION_MESSAGE); return;/* w w w. ja v a2 s .c o m*/ } // get array of directories that PDF files could possibly be in final List<File> db_dirs = new LinkedList<File>(); for (String dir : db_meta.getFileDirectory(GUIGlobals.FILE_FIELD)) { db_dirs.add(new File(dir)); } if (db_dirs.size() == 0 || !db_dirs.contains(db_file.getParentFile())) { db_dirs.add(db_file.getParentFile()); } // return if no entries are selected if (db_entries.length == 0) { JOptionPane.showMessageDialog(frame, "No entries selected for PDF tasks.", title, JOptionPane.INFORMATION_MESSAGE); return; } // get PDF file directory final File pdf_dir = absoluteFile(pdf_dir_txt.getText(), db_file.getParentFile()); // do tasks encapsulated in a worker-thread class AbstractWorker tasks = new AbstractWorker() { boolean cancelled = false; boolean confirmed = false; boolean erred = false; // get user confirmation for PDF file modifications private boolean getUserConfirmation() { if (!confirmed) { confirmed = JOptionPane.showConfirmDialog(frame, "Are you sure you want to rename, move, and/or modify PDF files?\n" + "This operations cannot be undone.", title, JOptionPane.YES_NO_OPTION) == JOptionPane.YES_OPTION; } return confirmed; } public void init() { // block main window frame.block(); } public void run() { // for debugging purposes final boolean modifyDatabase = true; // iterate over selected Bibtex entries int entry_count = 0; for (BibtexEntry entry : db_entries) { ++entry_count; // get Bibtex key, check is not null String key = entry.getCiteKey(); if (key == null || key.length() == 0) { JOptionPane.showMessageDialog(frame, "BibTeX entry '" + entry.getId() + "' does not have a key!", title, JOptionPane.ERROR_MESSAGE); erred = true; return; } // update status bar frame.output(String.format("Processing BibTeX entry: %s (%d of %d)...", key, entry_count, db_entries.length)); // get table of file links for this Bibtex entry FileListTableModel files = new FileListTableModel(); files.setContent(entry.getField(GUIGlobals.FILE_FIELD)); for (int fileindex = 0; fileindex < files.getRowCount(); ++fileindex) { FileListEntry file_entry = files.getEntry(fileindex); // skip if this is not a PDF file link if (!file_entry.getType().equals(pdf_type)) continue; // get PDF file File pdf_file = null; for (File db_dir : db_dirs) { pdf_file = absoluteFile(file_entry.getLink(), db_dir); if (pdf_file.isFile()) { break; } pdf_file = null; } if (pdf_file == null) { String errmsg = "Could not find PDF file '" + file_entry.getLink() + "' in '" + db_dirs.get(0); for (int i = 1; i < db_dirs.size(); ++i) { errmsg += "', '" + db_dirs.get(i); } errmsg += "'!"; JOptionPane.showMessageDialog(frame, errmsg, title, JOptionPane.ERROR_MESSAGE); erred = true; return; } // get PDF file description String pdf_desc = file_entry.getDescription(); // new PDF file File new_pdf_file = pdf_file; // rename PDF file if (rename_pdfs_chk.isSelected()) { // build new PDF name String new_name = key; if (!pdf_desc.isEmpty()) { new_name += "_" + pdf_desc.replace(" ", "_"); } new_name += "." + pdf_type.getExtension(); // set new PDF file new_pdf_file = absoluteFile(new_name, new_pdf_file.getParentFile()); } // move PDF file if (move_to_pdf_dir_chk.isSelected()) { new_pdf_file = absoluteFile(new_pdf_file.getName(), pdf_dir); } // if PDF file needs to be moved if (!new_pdf_file.equals(pdf_file)) { // get user confirmation if (!getUserConfirmation()) { cancelled = true; return; } // perform move/rename operations if (modifyDatabase) { String errmsg = ""; try { // create parent directories File new_pdf_dir = new_pdf_file.getParentFile(); if (new_pdf_dir != null && !new_pdf_dir.isDirectory()) { errmsg = "Could not create directory '" + new_pdf_file.getParentFile().getPath() + "'"; erred = !new_pdf_file.getParentFile().mkdirs(); } if (!erred) { // check if PDF file already exists, and ask for confirmation to replace it if (new_pdf_file.isFile()) { switch (JOptionPane.showConfirmDialog(frame, "PDF file '" + new_pdf_file.getPath() + "' already exists.\n" + "Are you sure you want to replace it with " + "PDF file '" + pdf_file.getPath() + "'?\n" + "This operation cannot be undone.", title, JOptionPane.YES_NO_CANCEL_OPTION)) { case JOptionPane.NO_OPTION: continue; case JOptionPane.CANCEL_OPTION: cancelled = true; return; case JOptionPane.YES_OPTION: errmsg = "Could not delete PDF file '" + new_pdf_file.getPath() + "'"; erred = !new_pdf_file.delete(); } } // otherwise test that we can create the new PDF file else { errmsg = "Could not access PDF file '" + new_pdf_file.getPath() + "'"; erred = !new_pdf_file.createNewFile() || !new_pdf_file.delete(); } if (!erred) { // try to move/rename PDF file errmsg = "Could not rename PDF file '" + pdf_file.getPath() + "' to '" + new_pdf_file.getPath() + "'"; erred = !pdf_file.renameTo(new_pdf_file); } } } // possible exceptions catch (SecurityException e) { erred = true; errmsg += ": insufficient permissions"; } catch (IOException e) { e.printStackTrace(); erred = true; errmsg += ": an I/O exception occurred"; } if (erred) { JOptionPane.showMessageDialog(frame, errmsg + ".", title, JOptionPane.ERROR_MESSAGE); return; } // everything was successful pdf_file = new_pdf_file; } } // update file entry table and Bibtex entry file_entry.setLink(relativePath(pdf_file, db_dirs.get(0))); if (modifyDatabase) { String new_files = files.getStringRepresentation(); if (!new_files.equals(entry.getField(GUIGlobals.FILE_FIELD))) { entry.setField(GUIGlobals.FILE_FIELD, new_files); db_panel.markNonUndoableBaseChanged(); } } // perform operations on PDF file contents if (write_pdf_docinfo_chk.isSelected()) { if (erase_pdf_docinfo_chk.isSelected()) { // get user confirmation if (!getUserConfirmation()) { cancelled = true; return; } // open PDF file PDDocument document = null; try { document = PDDocument.load(pdf_file); } catch (IOException e) { e.printStackTrace(); erred = true; JOptionPane.showMessageDialog(frame, "Could not open PDF file '" + pdf_file.getPath() + "': an I/O exception occurred.", title, JOptionPane.ERROR_MESSAGE); return; } // erase document information document.setDocumentInformation(new PDDocumentInformation()); // erase XML metadata document.getDocumentCatalog().setMetadata(null); // save and close PDF file try { document.save(pdf_file.getPath()); document.close(); } catch (COSVisitorException e) { e.printStackTrace(); erred = true; JOptionPane.showMessageDialog(frame, "Could not save PDF file '" + pdf_file.getPath() + "': an exception occurred.", title, JOptionPane.ERROR_MESSAGE); return; } catch (IOException e) { e.printStackTrace(); erred = true; JOptionPane.showMessageDialog(frame, "Could not save/close PDF file '" + pdf_file.getPath() + "': an I/O exception occurred.", title, JOptionPane.ERROR_MESSAGE); return; } } // write XMP / PDF document catalog metadata try { XMPUtil.writeXMP(pdf_file, entry, db); } catch (IOException e) { e.printStackTrace(); erred = true; JOptionPane.showMessageDialog(frame, "Could not write XMP to PDF file '" + pdf_file.getPath() + "': an I/O exception occurred.", title, JOptionPane.ERROR_MESSAGE); return; } catch (TransformerException e) { e.printStackTrace(); erred = true; JOptionPane .showMessageDialog(frame, "Could not write XMP to PDF file '" + pdf_file.getPath() + "': an exception occurred.", title, JOptionPane.ERROR_MESSAGE); return; } } } } } public void update() { // unblock main window frame.unblock(); // print to status bar if (erred) { frame.output("An error occurred during PDF Tasks"); } else if (cancelled) { frame.output("Cancelled PDF Tasks"); } else { frame.output("Completed PDF Tasks"); } } }; // run task thread (based on code in BasePanel.runCommand()) try { tasks.init(); tasks.getWorker().run(); tasks.getCallBack().update(); } catch (Throwable e) { frame.unblock(); e.printStackTrace(); } }
From source file:net.sf.jabref.util.XMPUtil.java
License:Open Source License
private static XMPMetadata getXMPMetadata(PDDocument document) throws IOException { PDDocumentCatalog catalog = document.getDocumentCatalog(); PDMetadata metaRaw = catalog.getMetadata(); if (metaRaw == null) { return null; }/* ww w . ja v a2 s . c om*/ XMPMetadata meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream())); meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class); return meta; }
From source file:net.sf.jabref.util.XMPUtil.java
License:Open Source License
/** * Try to write the given BibTexEntries as DublinCore XMP Schemas * /* w ww. j a v a 2s . c om*/ * Existing DublinCore schemas in the document are removed * * @param document * The pdf document to write to. * @param entries * The Bibtex entries that are written as schemas * @param database * maybenull An optional database which the given bibtex entries * belong to, which will be used to resolve strings. If the * database is null the strings will not be resolved. * @throws IOException * @throws TransformerException */ @SuppressWarnings("unchecked") private static void writeDublinCore(PDDocument document, Collection<BibtexEntry> entries, BibtexDatabase database) throws IOException, TransformerException { if (database != null) { entries = database.resolveForStrings(entries, false); } PDDocumentCatalog catalog = document.getDocumentCatalog(); PDMetadata metaRaw = catalog.getMetadata(); XMPMetadata meta; if (metaRaw != null) { meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream())); } else { meta = new XMPMetadata(); } // Remove all current Dublin-Core schemas List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE); for (XMPSchema schema : schemas) { schema.getElement().getParentNode().removeChild(schema.getElement()); } for (BibtexEntry entry : entries) { XMPSchemaDublinCore dcSchema = new XMPSchemaDublinCore(meta); XMPUtil.writeToDCSchema(dcSchema, entry, null); meta.addSchema(dcSchema); } // Save to stream and then input that stream to the PDF ByteArrayOutputStream os = new ByteArrayOutputStream(); meta.save(os); ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray()); PDMetadata metadataStream = new PDMetadata(document, is, false); catalog.setMetadata(metadataStream); }
From source file:net.sf.jabref.util.XMPUtil.java
License:Open Source License
/** * Try to write the given BibTexEntry in the XMP-stream of the given * PDF-file.//ww w . j av a 2 s. c o m * * Throws an IOException if the file cannot be read or written, so the user * can remove a lock or cancel the operation. * * The method will overwrite existing BibTeX-XMP-data, but keep other * existing metadata. * * @param file * The file to write the entries to. * @param bibtexEntries * The entries to write to the file. * * @param database * maybenull An optional database which the given bibtex entries * belong to, which will be used to resolve strings. If the * database is null the strings will not be resolved. * @param writePDFInfo * Write information also in PDF document properties * @throws TransformerException * If the entry was malformed or unsupported. * @throws IOException * If the file could not be written to or could not be found. */ @SuppressWarnings("unchecked") public static void writeXMP(File file, Collection<BibtexEntry> bibtexEntries, BibtexDatabase database, boolean writePDFInfo) throws IOException, TransformerException { if (database != null) { bibtexEntries = database.resolveForStrings(bibtexEntries, false); } PDDocument document = null; try { document = PDDocument.load(file.getAbsoluteFile()); if (document.isEncrypted()) { throw new EncryptionNotSupportedException("Error: Cannot add metadata to encrypted document."); } if (writePDFInfo && (bibtexEntries.size() == 1)) { XMPUtil.writeDocumentInformation(document, bibtexEntries.iterator().next(), null); XMPUtil.writeDublinCore(document, bibtexEntries, null); } PDDocumentCatalog catalog = document.getDocumentCatalog(); PDMetadata metaRaw = catalog.getMetadata(); XMPMetadata meta; if (metaRaw != null) { meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream())); } else { meta = new XMPMetadata(); } meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class); // Remove all current Bibtex-schemas List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE); for (XMPSchema schema : schemas) { XMPSchemaBibtex bib = (XMPSchemaBibtex) schema; bib.getElement().getParentNode().removeChild(bib.getElement()); } for (BibtexEntry e : bibtexEntries) { XMPSchemaBibtex bibtex = new XMPSchemaBibtex(meta); meta.addSchema(bibtex); bibtex.setBibtexEntry(e, null); } // Save to stream and then input that stream to the PDF ByteArrayOutputStream os = new ByteArrayOutputStream(); meta.save(os); ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray()); PDMetadata metadataStream = new PDMetadata(document, is, false); catalog.setMetadata(metadataStream); // Save try { document.save(file.getAbsolutePath()); } catch (COSVisitorException e) { throw new TransformerException("Could not write XMP-metadata: " + e.getLocalizedMessage()); } } finally { if (document != null) { document.close(); } } }
From source file:net.sf.jsignpdf.preview.Pdf2Image.java
License:Mozilla Public License
/** * Returns image (or null if failed) generated from given page in PDF using * PDFBox tool.//from ww w . ja va2s . c o m * * @param aPage * page in PDF (1 based) * @return image or null */ public BufferedImage getImageUsingPdfBox(final int aPage) { BufferedImage tmpResult = null; PDDocument tmpDoc = null; try { tmpDoc = PDDocument.load(options.getInFile()); if (tmpDoc.isEncrypted()) { tmpDoc.decrypt(options.getPdfOwnerPwdStrX()); } int resolution; try { resolution = Toolkit.getDefaultToolkit().getScreenResolution(); } catch (HeadlessException e) { resolution = 96; } final PDPage page = (PDPage) tmpDoc.getDocumentCatalog().getAllPages().get(aPage - 1); tmpResult = page.convertToImage(BufferedImage.TYPE_INT_RGB, resolution); } catch (Exception e) { e.printStackTrace(); } finally { if (tmpDoc != null) { try { tmpDoc.close(); } catch (Exception e) { e.printStackTrace(); } } } return tmpResult; }
From source file:net.timendum.pdf.Images2HTML.java
License:Open Source License
public void processDocument(PDDocument document) throws IOException { List allPages = document.getDocumentCatalog().getAllPages(); for (int i = 0; i < allPages.size(); i++) { PDPage page = (PDPage) allPages.get(i); processStream(page, page.findResources(), page.getContents().getStream()); }/*w w w . ja va 2 s. co m*/ }
From source file:net.yacy.document.parser.pdfParser.java
License:Open Source License
/** * extract clickable links from pdf// www . ja v a 2 s.co m * @param pdf the document to parse * @return all detected links */ private Collection<AnchorURL>[] extractPdfLinks(final PDDocument pdf) { @SuppressWarnings("unchecked") List<PDPage> allPages = pdf.getDocumentCatalog().getAllPages(); @SuppressWarnings("unchecked") Collection<AnchorURL>[] linkCollections = (Collection<AnchorURL>[]) new Collection<?>[allPages.size()]; int pagecount = 0; for (PDPage page : allPages) { final Collection<AnchorURL> pdflinks = new ArrayList<AnchorURL>(); try { List<PDAnnotation> annotations = page.getAnnotations(); if (annotations != null) { for (PDAnnotation pdfannotation : annotations) { if (pdfannotation instanceof PDAnnotationLink) { PDAction link = ((PDAnnotationLink) pdfannotation).getAction(); if (link != null && link instanceof PDActionURI) { PDActionURI pdflinkuri = (PDActionURI) link; String uristr = pdflinkuri.getURI(); AnchorURL url = new AnchorURL(uristr); pdflinks.add(url); } } } } } catch (IOException ex) { } linkCollections[pagecount++] = pdflinks; } return linkCollections; }
From source file:no.digipost.print.validate.PdfValidator.java
License:Apache License
@SuppressWarnings("unchecked") private List<PDPage> getAllPagesFrom(final PDDocument pdDoc) { return pdDoc.getDocumentCatalog().getAllPages(); }
From source file:nominas.sei.form.Principal.java
private void ordenaNominas(String rutaEntrada, String rutaSalida) { ArrayList<PaginaNomina> paginasNomina = new ArrayList<PaginaNomina>(); for (int x = 0; x < 1; x++) {//RECORREMOS EL ARREGLO CON LOS NOMBRES DE ARCHIVO try {/*from ww w.j a va 2s . com*/ PDDocument pd = PDDocument.load(rutaEntrada); //CARGAR EL PDF List l = pd.getDocumentCatalog().getAllPages();//NUMERO LAS PAGINAS DEL ARCHIVO Object[] obj = l.toArray();//METO EN UN OBJETO LA LISTA DE PAGINAS PARA MANIPULARLA for (int i = 0; i < l.size(); i++) { PDPage page = (PDPage) obj[i];//PAGE ES LA PAGINA 1 DE LA QUE CONSTA EL ARCHIVO PageFormat pageFormat = pd.getPageFormat(0);//PROPIEDADES DE LA PAGINA (FORMATO) Double d1 = new Double(pageFormat.getHeight());//ALTO Double d2 = new Double(pageFormat.getWidth());//ANCHO int width = d1.intValue();//ANCHO int eigth = 1024;//ALTO PDFTextStripperByArea stripper = new PDFTextStripperByArea();//COMPONENTE PARA ACCESO AL TEXTO Rectangle rect = new Rectangle(0, 0, width, eigth);//DEFNIR AREA DONDE SE BUSCARA EL TEXTO stripper.addRegion("area1", rect);//REGISTRAMOS LA REGION CON UN NOMBRE stripper.extractRegions(page);//EXTRAE TEXTO DEL AREA String contenido = new String();//CONTENIDO = A LO QUE CONTENGA EL AREA O REGION contenido = (stripper.getTextForRegion("area1")); String[] lines = contenido.split("[\\r\\n]+"); String nombre = lines[1].substring(28, lines[1].length() - 10);//Separamos el nombre PaginaNomina nomina = new PaginaNomina(page, nombre); paginasNomina.add(nomina); } Collections.sort(paginasNomina); // Create a new empty document PDDocument document = new PDDocument(); for (int i = 0; i < paginasNomina.size(); i++) { System.out.println(paginasNomina.get(i).getNombre()); document.addPage(paginasNomina.get(i).getPagina()); } // Save the newly created document document.save(rutaSalida); // finally make sure that the document is properly // closed. document.close(); pd.close();//CERRAMOS OBJETO ACROBAT } catch (Exception e) { System.out.println(e.getMessage()); } //CATCH } //FOR }
From source file:nominas.sei.NominasSEI.java
/** * @param args the command line arguments *//* w w w . j av a 2 s . c o m*/ public static void main(String[] args) { ArrayList<PaginaNomina> paginasNomina = new ArrayList<PaginaNomina>(); for (int x = 0; x < 1; x++) {//RECORREMOS EL ARREGLO CON LOS NOMBRES DE ARCHIVO String ruta = new String();//VARIABLE QUE DETERMINARA LA RUTA DEL ARCHIVO A LEER. ruta = (".\\NOMINAS.pdf"); //SE ALMACENA LA RUTA DEL ARCHIVO A LEER. try { PDDocument pd = PDDocument.load(ruta); //CARGAR EL PDF List l = pd.getDocumentCatalog().getAllPages();//NUMERO LAS PAGINAS DEL ARCHIVO Object[] obj = l.toArray();//METO EN UN OBJETO LA LISTA DE PAGINAS PARA MANIPULARLA for (int i = 0; i < l.size(); i++) { PDPage page = (PDPage) obj[i];//PAGE ES LA PAGINA 1 DE LA QUE CONSTA EL ARCHIVO PageFormat pageFormat = pd.getPageFormat(0);//PROPIEDADES DE LA PAGINA (FORMATO) Double d1 = new Double(pageFormat.getHeight());//ALTO Double d2 = new Double(pageFormat.getWidth());//ANCHO int width = d1.intValue();//ANCHO int eigth = 1024;//ALTO PDFTextStripperByArea stripper = new PDFTextStripperByArea();//COMPONENTE PARA ACCESO AL TEXTO Rectangle rect = new Rectangle(0, 0, width, eigth);//DEFNIR AREA DONDE SE BUSCARA EL TEXTO stripper.addRegion("area1", rect);//REGISTRAMOS LA REGION CON UN NOMBRE stripper.extractRegions(page);//EXTRAE TEXTO DEL AREA String contenido = new String();//CONTENIDO = A LO QUE CONTENGA EL AREA O REGION contenido = (stripper.getTextForRegion("area1")); String[] lines = contenido.split("[\\r\\n]+"); String nombre = lines[1].substring(28, lines[1].length() - 10); PaginaNomina nomina = new PaginaNomina(page, nombre); paginasNomina.add(nomina); } Collections.sort(paginasNomina); // Create a new empty document PDDocument document = new PDDocument(); for (int i = 0; i < paginasNomina.size(); i++) { System.out.println(paginasNomina.get(i).getNombre()); document.addPage(paginasNomina.get(i).getPagina()); } // Save the newly created document document.save("NominasOrdenadas.pdf"); // finally make sure that the document is properly // closed. document.close(); pd.close();//CERRAMOS OBJETO ACROBAT } catch (Exception e) { System.out.println(e.getMessage()); } //CATCH } //FOR }