List of usage examples for org.apache.pdfbox.pdmodel PDDocument load
public static PDDocument load(byte[] input) throws IOException
From source file:net.sf.jabref.imports.PdfContentImporter.java
License:Open Source License
@Override public List<BibtexEntry> importEntries(InputStream in, OutputPrinter status) throws IOException { final ArrayList<BibtexEntry> res = new ArrayList<BibtexEntry>(1); PDDocument document;//from ww w . ja v a2 s. c om try { document = PDDocument.load(in); } catch (IOException e) { PdfContentImporter.logger.log(Level.SEVERE, "Could not load document", e); return res; } try { if (document.isEncrypted()) { PdfContentImporter.logger.log(Level.INFO, Globals.lang("Encrypted documents are not supported")); //return res; } PDFTextStripper stripper = new PDFTextStripper(); stripper.setStartPage(1); stripper.setEndPage(1); stripper.setSortByPosition(true); stripper.setParagraphEnd(System.getProperty("line.separator")); StringWriter writer = new StringWriter(); stripper.writeText(document, writer); String textResult = writer.toString(); String doi = DOIUtil.getDOI(textResult); if (doi.length() < textResult.length()) { // A DOI was found in the text // We do NO parsing of the text, but use the DOI fetcher ImportInspector i = new ImportInspector() { @Override public void toFront() { } @Override public void setProgress(int current, int max) { } @Override public void addEntry(BibtexEntry entry) { // add the entry to the result object res.add(entry); } }; PdfContentImporter.doiToBibTeXFetcher.processQuery(doi, i, status); if (res.size() != 0) { // if something has been found, return the result return res; } else { // otherwise, we just parse the PDF } } String author; String editor = null; String institution = null; String abstractT = null; String keywords = null; String title; String conference = null; String DOI = null; String series = null; String volume = null; String number = null; String pages = null; // year is a class variable as the method extractYear() uses it; String publisher = null; BibtexEntryType type = BibtexEntryType.INPROCEEDINGS; final String lineBreak = System.getProperty("line.separator"); split = textResult.split(lineBreak); // idea: split[] contains the different lines // blocks are separated by empty lines // treat each block // or do special treatment at authors (which are not broken) // therefore, we do a line-based and not a block-based splitting // i points to the current line // curString (mostly) contains the current block // the different lines are joined into one and thereby separated by " " proceedToNextNonEmptyLine(); if (i >= split.length) { // PDF could not be parsed or is empty // return empty list return res; } curString = split[i]; i = i + 1; if (curString.length() > 4) { // special case: possibly conference as first line on the page extractYear(); if (curString.contains("Conference")) { fillCurStringWithNonEmptyLines(); conference = curString; curString = ""; } else { // e.g. Copyright (c) 1998 by the Genetics Society of America // future work: get year using RegEx String lower = curString.toLowerCase(); if (lower.contains("copyright")) { fillCurStringWithNonEmptyLines(); publisher = curString; curString = ""; } } } // start: title fillCurStringWithNonEmptyLines(); title = streamlineTitle(curString); curString = ""; //i points to the next non-empty line // after title: authors author = null; while ((i < split.length) && (!split[i].equals(""))) { // author names are unlikely to be split among different lines // treat them line by line curString = streamlineNames(split[i]); if (author == null) { author = curString; } else { if (curString.equals("")) { // if split[i] is "and" then "" is returned by streamlineNames -> do nothing } else { author = author.concat(" and ").concat(curString); } } i++; } curString = ""; i++; // then, abstract and keywords follow while (i < split.length) { curString = split[i]; if ((curString.length() >= "Abstract".length()) && (curString.substring(0, "Abstract".length()).equalsIgnoreCase("Abstract"))) { if (curString.length() == "Abstract".length()) { // only word "abstract" found -- skip line curString = ""; } else { curString = curString.substring("Abstract".length() + 1).trim().concat(lineBreak); } i++; // fillCurStringWithNonEmptyLines() cannot be used as that uses " " as line separator // whereas we need linebreak as separator while ((i < split.length) && (!split[i].equals(""))) { curString = curString.concat(split[i]).concat(lineBreak); i++; } abstractT = curString; i++; } else if ((curString.length() >= "Keywords".length()) && (curString.substring(0, "Keywords".length()).equalsIgnoreCase("Keywords"))) { if (curString.length() == "Keywords".length()) { // only word "Keywords" found -- skip line curString = ""; } else { curString = curString.substring("Keywords".length() + 1).trim(); } i++; fillCurStringWithNonEmptyLines(); keywords = removeNonLettersAtEnd(curString); } else { String lower = curString.toLowerCase(); int pos = lower.indexOf("technical"); if (pos >= 0) { type = BibtexEntryType.TECHREPORT; pos = curString.trim().lastIndexOf(' '); if (pos >= 0) { // assumption: last character of curString is NOT ' ' // otherwise pos+1 leads to an out-of-bounds exception number = curString.substring(pos + 1); } } i++; proceedToNextNonEmptyLine(); } } i = split.length - 1; // last block: DOI, detailed information // sometimes, this information is in the third last block etc... // therefore, read until the beginning of the file while (i >= 0) { readLastBlock(); // i now points to the block before or is -1 // curString contains the last block, separated by " " extractYear(); int pos = curString.indexOf("(Eds.)"); if ((pos >= 0) && (publisher == null)) { // looks like a Springer last line // e.g: A. Persson and J. Stirna (Eds.): PoEM 2009, LNBIP 39, pp. 161-175, 2009. publisher = "Springer"; editor = streamlineNames(curString.substring(0, pos - 1)); curString = curString.substring(pos + "(Eds.)".length() + 2); //+2 because of ":" after (Eds.) and the subsequent space String[] springerSplit = curString.split(", "); if (springerSplit.length >= 4) { conference = springerSplit[0]; String seriesData = springerSplit[1]; int lastSpace = seriesData.lastIndexOf(' '); series = seriesData.substring(0, lastSpace); volume = seriesData.substring(lastSpace + 1); pages = springerSplit[2].substring(4); if (springerSplit[3].length() >= 4) { year = springerSplit[3].substring(0, 4); } } } else { if (DOI == null) { pos = curString.indexOf("DOI"); if (pos < 0) { pos = curString.indexOf("doi"); } if (pos >= 0) { pos += 3; char delimiter = curString.charAt(pos); if ((delimiter == ':') || (delimiter == ' ')) { pos++; } int nextSpace = curString.indexOf(' ', pos); if (nextSpace > 0) { DOI = curString.substring(pos, nextSpace); } else { DOI = curString.substring(pos); } } } if ((publisher == null) && (curString.contains("IEEE"))) { // IEEE has the conference things at the end publisher = "IEEE"; // year is extracted by extractYear // otherwise, we could it determine as follows: // String yearStr = curString.substring(curString.length()-4); // if (isYear(yearStr)) { // year = yearStr; // } if (conference == null) { pos = curString.indexOf('$'); if (pos > 0) { // we found the price // before the price, the ISSN is stated // skip that pos -= 2; while ((pos >= 0) && (curString.charAt(pos) != ' ')) { pos--; } if (pos > 0) { conference = curString.substring(0, pos); } } } } // String lower = curString.toLowerCase(); // if (institution == null) { // // } } } BibtexEntry entry = new BibtexEntry(); entry.setType(type); if (author != null) { entry.setField("author", author); } if (editor != null) { entry.setField("editor", editor); } if (institution != null) { entry.setField("institution", institution); } if (abstractT != null) { entry.setField("abstract", abstractT); } if (keywords != null) { entry.setField("keywords", keywords); } if (title != null) { entry.setField("title", title); } if (conference != null) { entry.setField("booktitle", conference); } if (DOI != null) { entry.setField("doi", DOI); } if (series != null) { entry.setField("series", series); } if (volume != null) { entry.setField("volume", volume); } if (number != null) { entry.setField("number", number); } if (pages != null) { entry.setField("pages", pages); } if (year != null) { entry.setField("year", year); } if (publisher != null) { entry.setField("publisher", publisher); } entry.setField("review", textResult); res.add(entry); } catch (NoClassDefFoundError e) { if (e.getMessage().equals("org/bouncycastle/jce/provider/BouncyCastleProvider")) { status.showMessage(Globals.lang( "Java Bouncy Castle library not found. Please download and install it. For more information see http://www.bouncycastle.org/.")); } else { PdfContentImporter.logger.log(Level.SEVERE, e.getLocalizedMessage(), e); } } finally { document.close(); } return res; }
From source file:net.sf.jabref.logic.xmp.XMPUtil.java
License:Open Source License
/** * Try to read the given BibTexEntry from the XMP-stream of the given * inputstream containing a PDF-file.// w w w . j av a 2 s . com * * @param inputStream * The inputstream to read from. * * @throws IOException * Throws an IOException if the file cannot be read, so the user * than remove a lock or cancel the operation. * * @return list of BibEntries retrieved from the stream. May be empty, but never null */ public static List<BibEntry> readXMP(InputStream inputStream) throws IOException { List<BibEntry> result = new LinkedList<>(); try (PDDocument document = PDDocument.load(inputStream)) { if (document.isEncrypted()) { throw new EncryptionNotSupportedException("Error: Cannot read metadata from encrypted document."); } Optional<XMPMetadata> meta = XMPUtil.getXMPMetadata(document); if (meta.isPresent()) { List<XMPSchema> schemas = meta.get().getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE); for (XMPSchema schema : schemas) { XMPSchemaBibtex bib = (XMPSchemaBibtex) schema; BibEntry entry = bib.getBibtexEntry(); if (entry.getType() == null) { entry.setType("misc"); } result.add(entry); } // If we did not find anything have a look if a Dublin Core exists if (result.isEmpty()) { schemas = meta.get().getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE); for (XMPSchema schema : schemas) { XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schema; Optional<BibEntry> entry = XMPUtil.getBibtexEntryFromDublinCore(dc); if (entry.isPresent()) { if (entry.get().getType() == null) { entry.get().setType("misc"); } result.add(entry.get()); } } } } if (result.isEmpty()) { // If we did not find any XMP metadata, search for non XMP metadata PDDocumentInformation documentInformation = document.getDocumentInformation(); Optional<BibEntry> entry = XMPUtil.getBibtexEntryFromDocumentInformation(documentInformation); if (entry.isPresent()) { result.add(entry.get()); } } } // return empty list, if no metadata was found if (result.isEmpty()) { return Collections.emptyList(); } return result; }
From source file:net.sf.jabref.logic.xmp.XMPUtil.java
License:Open Source License
/** * Will read the XMPMetadata from the given pdf file, closing the file * afterwards./*from w w w.ja v a 2s . c om*/ * * @param inputStream * The inputStream representing a PDF-file to read the * XMPMetadata from. * @return The XMPMetadata object found in the file */ private static Optional<XMPMetadata> readRawXMP(InputStream inputStream) throws IOException { try (PDDocument document = PDDocument.load(inputStream)) { if (document.isEncrypted()) { throw new EncryptionNotSupportedException("Error: Cannot read metadata from encrypted document."); } return XMPUtil.getXMPMetadata(document); } }
From source file:net.sf.jabref.logic.xmp.XMPUtil.java
License:Open Source License
/** * Try to write the given BibTexEntry in the XMP-stream of the given * PDF-file./*w w w . j a v a 2 s. com*/ * * Throws an IOException if the file cannot be read or written, so the user * can remove a lock or cancel the operation. * * The method will overwrite existing BibTeX-XMP-data, but keep other * existing metadata. * * @param file * The file to write the entries to. * @param bibtexEntries * The entries to write to the file. * * @param database * maybenull An optional database which the given bibtex entries * belong to, which will be used to resolve strings. If the * database is null the strings will not be resolved. * @param writePDFInfo * Write information also in PDF document properties * @throws TransformerException * If the entry was malformed or unsupported. * @throws IOException * If the file could not be written to or could not be found. */ public static void writeXMP(File file, Collection<BibEntry> bibtexEntries, BibDatabase database, boolean writePDFInfo) throws IOException, TransformerException { Collection<BibEntry> resolvedEntries; if (database == null) { resolvedEntries = bibtexEntries; } else { resolvedEntries = database.resolveForStrings(bibtexEntries, false); } try (PDDocument document = PDDocument.load(file.getAbsoluteFile())) { if (document.isEncrypted()) { throw new EncryptionNotSupportedException("Error: Cannot add metadata to encrypted document."); } if (writePDFInfo && (resolvedEntries.size() == 1)) { XMPUtil.writeDocumentInformation(document, resolvedEntries.iterator().next(), null); XMPUtil.writeDublinCore(document, resolvedEntries, null); } PDDocumentCatalog catalog = document.getDocumentCatalog(); PDMetadata metaRaw = catalog.getMetadata(); XMPMetadata meta; if (metaRaw == null) { meta = new XMPMetadata(); } else { meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream())); } meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class); // Remove all current Bibtex-schemas List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE); for (XMPSchema schema : schemas) { XMPSchemaBibtex bib = (XMPSchemaBibtex) schema; bib.getElement().getParentNode().removeChild(bib.getElement()); } for (BibEntry e : resolvedEntries) { XMPSchemaBibtex bibtex = new XMPSchemaBibtex(meta); meta.addSchema(bibtex); bibtex.setBibtexEntry(e, null); } // Save to stream and then input that stream to the PDF ByteArrayOutputStream os = new ByteArrayOutputStream(); meta.save(os); ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray()); PDMetadata metadataStream = new PDMetadata(document, is, false); catalog.setMetadata(metadataStream); // Save try { document.save(file.getAbsolutePath()); } catch (COSVisitorException e) { throw new TransformerException("Could not write XMP-metadata: " + e.getLocalizedMessage()); } } }
From source file:net.sf.jabref.PdfPreviewPanel.java
License:Open Source License
private void renderPDFFile(File file) { InputStream input;/*from ww w . java2 s . c o m*/ try { input = new FileInputStream(file); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); return; } PDDocument document; try { document = PDDocument.load(input); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); return; } @SuppressWarnings("unchecked") List<PDPage> pages = document.getDocumentCatalog().getAllPages(); PDPage page = pages.get(0); BufferedImage image; try { image = page.convertToImage(); } catch (Exception e1) { // silently ignores all rendering exceptions image = null; } if (image != null) { int width = this.getParent().getWidth(); int height = this.getParent().getHeight(); BufferedImage resImage = resizeImage(image, width, height, BufferedImage.TYPE_INT_RGB); ImageIcon icon = new ImageIcon(resImage); picLabel.setText(null); picLabel.setIcon(icon); } else { clearPreview(); } try { document.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:net.sf.jabref.plugins.pdftasks.PDFTaskSidePane.java
License:Open Source License
private void doTasks() { // PDF file type representation final ExternalFileType pdf_type = Globals.prefs.getExternalFileTypeByExt("pdf"); // get Bibtex database associated with the current tab final BasePanel db_panel = frame.basePanel(); final BibtexDatabase db = db_panel.database(); final MetaData db_meta = db_panel.metaData(); // get selected Bibtex entries from the current tab final BibtexEntry[] db_entries = db_panel.getSelectedEntries(); // get Bibtex database file for current tab final File db_file = db_panel.getFile(); if (db_file == null || db_file.getParentFile() == null) { JOptionPane.showMessageDialog(frame, "Bibtex database must be saved before performing PDF tasks.", title, JOptionPane.INFORMATION_MESSAGE); return;/*from w w w . j av a2s . c o m*/ } // get array of directories that PDF files could possibly be in final List<File> db_dirs = new LinkedList<File>(); for (String dir : db_meta.getFileDirectory(GUIGlobals.FILE_FIELD)) { db_dirs.add(new File(dir)); } if (db_dirs.size() == 0 || !db_dirs.contains(db_file.getParentFile())) { db_dirs.add(db_file.getParentFile()); } // return if no entries are selected if (db_entries.length == 0) { JOptionPane.showMessageDialog(frame, "No entries selected for PDF tasks.", title, JOptionPane.INFORMATION_MESSAGE); return; } // get PDF file directory final File pdf_dir = absoluteFile(pdf_dir_txt.getText(), db_file.getParentFile()); // do tasks encapsulated in a worker-thread class AbstractWorker tasks = new AbstractWorker() { boolean cancelled = false; boolean confirmed = false; boolean erred = false; // get user confirmation for PDF file modifications private boolean getUserConfirmation() { if (!confirmed) { confirmed = JOptionPane.showConfirmDialog(frame, "Are you sure you want to rename, move, and/or modify PDF files?\n" + "This operations cannot be undone.", title, JOptionPane.YES_NO_OPTION) == JOptionPane.YES_OPTION; } return confirmed; } public void init() { // block main window frame.block(); } public void run() { // for debugging purposes final boolean modifyDatabase = true; // iterate over selected Bibtex entries int entry_count = 0; for (BibtexEntry entry : db_entries) { ++entry_count; // get Bibtex key, check is not null String key = entry.getCiteKey(); if (key == null || key.length() == 0) { JOptionPane.showMessageDialog(frame, "BibTeX entry '" + entry.getId() + "' does not have a key!", title, JOptionPane.ERROR_MESSAGE); erred = true; return; } // update status bar frame.output(String.format("Processing BibTeX entry: %s (%d of %d)...", key, entry_count, db_entries.length)); // get table of file links for this Bibtex entry FileListTableModel files = new FileListTableModel(); files.setContent(entry.getField(GUIGlobals.FILE_FIELD)); for (int fileindex = 0; fileindex < files.getRowCount(); ++fileindex) { FileListEntry file_entry = files.getEntry(fileindex); // skip if this is not a PDF file link if (!file_entry.getType().equals(pdf_type)) continue; // get PDF file File pdf_file = null; for (File db_dir : db_dirs) { pdf_file = absoluteFile(file_entry.getLink(), db_dir); if (pdf_file.isFile()) { break; } pdf_file = null; } if (pdf_file == null) { String errmsg = "Could not find PDF file '" + file_entry.getLink() + "' in '" + db_dirs.get(0); for (int i = 1; i < db_dirs.size(); ++i) { errmsg += "', '" + db_dirs.get(i); } errmsg += "'!"; JOptionPane.showMessageDialog(frame, errmsg, title, JOptionPane.ERROR_MESSAGE); erred = true; return; } // get PDF file description String pdf_desc = file_entry.getDescription(); // new PDF file File new_pdf_file = pdf_file; // rename PDF file if (rename_pdfs_chk.isSelected()) { // build new PDF name String new_name = key; if (!pdf_desc.isEmpty()) { new_name += "_" + pdf_desc.replace(" ", "_"); } new_name += "." + pdf_type.getExtension(); // set new PDF file new_pdf_file = absoluteFile(new_name, new_pdf_file.getParentFile()); } // move PDF file if (move_to_pdf_dir_chk.isSelected()) { new_pdf_file = absoluteFile(new_pdf_file.getName(), pdf_dir); } // if PDF file needs to be moved if (!new_pdf_file.equals(pdf_file)) { // get user confirmation if (!getUserConfirmation()) { cancelled = true; return; } // perform move/rename operations if (modifyDatabase) { String errmsg = ""; try { // create parent directories File new_pdf_dir = new_pdf_file.getParentFile(); if (new_pdf_dir != null && !new_pdf_dir.isDirectory()) { errmsg = "Could not create directory '" + new_pdf_file.getParentFile().getPath() + "'"; erred = !new_pdf_file.getParentFile().mkdirs(); } if (!erred) { // check if PDF file already exists, and ask for confirmation to replace it if (new_pdf_file.isFile()) { switch (JOptionPane.showConfirmDialog(frame, "PDF file '" + new_pdf_file.getPath() + "' already exists.\n" + "Are you sure you want to replace it with " + "PDF file '" + pdf_file.getPath() + "'?\n" + "This operation cannot be undone.", title, JOptionPane.YES_NO_CANCEL_OPTION)) { case JOptionPane.NO_OPTION: continue; case JOptionPane.CANCEL_OPTION: cancelled = true; return; case JOptionPane.YES_OPTION: errmsg = "Could not delete PDF file '" + new_pdf_file.getPath() + "'"; erred = !new_pdf_file.delete(); } } // otherwise test that we can create the new PDF file else { errmsg = "Could not access PDF file '" + new_pdf_file.getPath() + "'"; erred = !new_pdf_file.createNewFile() || !new_pdf_file.delete(); } if (!erred) { // try to move/rename PDF file errmsg = "Could not rename PDF file '" + pdf_file.getPath() + "' to '" + new_pdf_file.getPath() + "'"; erred = !pdf_file.renameTo(new_pdf_file); } } } // possible exceptions catch (SecurityException e) { erred = true; errmsg += ": insufficient permissions"; } catch (IOException e) { e.printStackTrace(); erred = true; errmsg += ": an I/O exception occurred"; } if (erred) { JOptionPane.showMessageDialog(frame, errmsg + ".", title, JOptionPane.ERROR_MESSAGE); return; } // everything was successful pdf_file = new_pdf_file; } } // update file entry table and Bibtex entry file_entry.setLink(relativePath(pdf_file, db_dirs.get(0))); if (modifyDatabase) { String new_files = files.getStringRepresentation(); if (!new_files.equals(entry.getField(GUIGlobals.FILE_FIELD))) { entry.setField(GUIGlobals.FILE_FIELD, new_files); db_panel.markNonUndoableBaseChanged(); } } // perform operations on PDF file contents if (write_pdf_docinfo_chk.isSelected()) { if (erase_pdf_docinfo_chk.isSelected()) { // get user confirmation if (!getUserConfirmation()) { cancelled = true; return; } // open PDF file PDDocument document = null; try { document = PDDocument.load(pdf_file); } catch (IOException e) { e.printStackTrace(); erred = true; JOptionPane.showMessageDialog(frame, "Could not open PDF file '" + pdf_file.getPath() + "': an I/O exception occurred.", title, JOptionPane.ERROR_MESSAGE); return; } // erase document information document.setDocumentInformation(new PDDocumentInformation()); // erase XML metadata document.getDocumentCatalog().setMetadata(null); // save and close PDF file try { document.save(pdf_file.getPath()); document.close(); } catch (COSVisitorException e) { e.printStackTrace(); erred = true; JOptionPane.showMessageDialog(frame, "Could not save PDF file '" + pdf_file.getPath() + "': an exception occurred.", title, JOptionPane.ERROR_MESSAGE); return; } catch (IOException e) { e.printStackTrace(); erred = true; JOptionPane.showMessageDialog(frame, "Could not save/close PDF file '" + pdf_file.getPath() + "': an I/O exception occurred.", title, JOptionPane.ERROR_MESSAGE); return; } } // write XMP / PDF document catalog metadata try { XMPUtil.writeXMP(pdf_file, entry, db); } catch (IOException e) { e.printStackTrace(); erred = true; JOptionPane.showMessageDialog(frame, "Could not write XMP to PDF file '" + pdf_file.getPath() + "': an I/O exception occurred.", title, JOptionPane.ERROR_MESSAGE); return; } catch (TransformerException e) { e.printStackTrace(); erred = true; JOptionPane .showMessageDialog(frame, "Could not write XMP to PDF file '" + pdf_file.getPath() + "': an exception occurred.", title, JOptionPane.ERROR_MESSAGE); return; } } } } } public void update() { // unblock main window frame.unblock(); // print to status bar if (erred) { frame.output("An error occurred during PDF Tasks"); } else if (cancelled) { frame.output("Cancelled PDF Tasks"); } else { frame.output("Completed PDF Tasks"); } } }; // run task thread (based on code in BasePanel.runCommand()) try { tasks.init(); tasks.getWorker().run(); tasks.getCallBack().update(); } catch (Throwable e) { frame.unblock(); e.printStackTrace(); } }
From source file:net.sf.jabref.util.XMPUtil.java
License:Open Source License
/** * Try to read the given BibTexEntry from the XMP-stream of the given * inputstream containing a PDF-file.//w ww.j av a 2 s .c om * * @param inputStream * The inputstream to read from. * * @throws IOException * Throws an IOException if the file cannot be read, so the user * than remove a lock or cancel the operation. */ @SuppressWarnings("unchecked") public static List<BibtexEntry> readXMP(InputStream inputStream) throws IOException { List<BibtexEntry> result = new LinkedList<BibtexEntry>(); PDDocument document = null; try { document = PDDocument.load(inputStream); if (document.isEncrypted()) { throw new EncryptionNotSupportedException("Error: Cannot read metadata from encrypted document."); } XMPMetadata meta = XMPUtil.getXMPMetadata(document); // If we did not find any XMP metadata, search for non XMP metadata if (meta != null) { List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE); for (XMPSchema schema : schemas) { XMPSchemaBibtex bib = (XMPSchemaBibtex) schema; result.add(bib.getBibtexEntry()); } // If we did not find anything have a look if a Dublin Core exists if (result.isEmpty()) { schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE); for (XMPSchema schema : schemas) { XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schema; BibtexEntry entry = XMPUtil.getBibtexEntryFromDublinCore(dc); if (entry != null) { result.add(entry); } } } } if (result.isEmpty()) { BibtexEntry entry = XMPUtil .getBibtexEntryFromDocumentInformation(document.getDocumentInformation()); if (entry != null) { result.add(entry); } } } finally { if (document != null) { document.close(); } } // return null, if no metadata was found if (result.isEmpty()) { return null; } return result; }
From source file:net.sf.jabref.util.XMPUtil.java
License:Open Source License
/** * Will read the XMPMetadata from the given pdf file, closing the file * afterwards./*from ww w. j a va 2s. c om*/ * * @param inputStream * The inputStream representing a PDF-file to read the * XMPMetadata from. * @return The XMPMetadata object found in the file or null if none is * found. * @throws IOException */ private static XMPMetadata readRawXMP(InputStream inputStream) throws IOException { PDDocument document = null; try { document = PDDocument.load(inputStream); if (document.isEncrypted()) { throw new EncryptionNotSupportedException("Error: Cannot read metadata from encrypted document."); } return XMPUtil.getXMPMetadata(document); } finally { if (document != null) { document.close(); } } }
From source file:net.sf.jabref.util.XMPUtil.java
License:Open Source License
/** * Try to write the given BibTexEntry in the XMP-stream of the given * PDF-file./*w w w.j a va 2 s.co m*/ * * Throws an IOException if the file cannot be read or written, so the user * can remove a lock or cancel the operation. * * The method will overwrite existing BibTeX-XMP-data, but keep other * existing metadata. * * @param file * The file to write the entries to. * @param bibtexEntries * The entries to write to the file. * * @param database * maybenull An optional database which the given bibtex entries * belong to, which will be used to resolve strings. If the * database is null the strings will not be resolved. * @param writePDFInfo * Write information also in PDF document properties * @throws TransformerException * If the entry was malformed or unsupported. * @throws IOException * If the file could not be written to or could not be found. */ @SuppressWarnings("unchecked") public static void writeXMP(File file, Collection<BibtexEntry> bibtexEntries, BibtexDatabase database, boolean writePDFInfo) throws IOException, TransformerException { if (database != null) { bibtexEntries = database.resolveForStrings(bibtexEntries, false); } PDDocument document = null; try { document = PDDocument.load(file.getAbsoluteFile()); if (document.isEncrypted()) { throw new EncryptionNotSupportedException("Error: Cannot add metadata to encrypted document."); } if (writePDFInfo && (bibtexEntries.size() == 1)) { XMPUtil.writeDocumentInformation(document, bibtexEntries.iterator().next(), null); XMPUtil.writeDublinCore(document, bibtexEntries, null); } PDDocumentCatalog catalog = document.getDocumentCatalog(); PDMetadata metaRaw = catalog.getMetadata(); XMPMetadata meta; if (metaRaw != null) { meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream())); } else { meta = new XMPMetadata(); } meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class); // Remove all current Bibtex-schemas List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE); for (XMPSchema schema : schemas) { XMPSchemaBibtex bib = (XMPSchemaBibtex) schema; bib.getElement().getParentNode().removeChild(bib.getElement()); } for (BibtexEntry e : bibtexEntries) { XMPSchemaBibtex bibtex = new XMPSchemaBibtex(meta); meta.addSchema(bibtex); bibtex.setBibtexEntry(e, null); } // Save to stream and then input that stream to the PDF ByteArrayOutputStream os = new ByteArrayOutputStream(); meta.save(os); ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray()); PDMetadata metadataStream = new PDMetadata(document, is, false); catalog.setMetadata(metadataStream); // Save try { document.save(file.getAbsolutePath()); } catch (COSVisitorException e) { throw new TransformerException("Could not write XMP-metadata: " + e.getLocalizedMessage()); } } finally { if (document != null) { document.close(); } } }
From source file:net.sf.jsignpdf.preview.Pdf2Image.java
License:Mozilla Public License
/** * Returns image (or null if failed) generated from given page in PDF using * PDFBox tool./* w w w . ja va2 s .co m*/ * * @param aPage * page in PDF (1 based) * @return image or null */ public BufferedImage getImageUsingPdfBox(final int aPage) { BufferedImage tmpResult = null; PDDocument tmpDoc = null; try { tmpDoc = PDDocument.load(options.getInFile()); if (tmpDoc.isEncrypted()) { tmpDoc.decrypt(options.getPdfOwnerPwdStrX()); } int resolution; try { resolution = Toolkit.getDefaultToolkit().getScreenResolution(); } catch (HeadlessException e) { resolution = 96; } final PDPage page = (PDPage) tmpDoc.getDocumentCatalog().getAllPages().get(aPage - 1); tmpResult = page.convertToImage(BufferedImage.TYPE_INT_RGB, resolution); } catch (Exception e) { e.printStackTrace(); } finally { if (tmpDoc != null) { try { tmpDoc.close(); } catch (Exception e) { e.printStackTrace(); } } } return tmpResult; }