List of usage examples for com.itextpdf.text.pdf PdfReader getMetadata
public byte[] getMetadata() throws IOException
From source file:br.unifor.mia.xmpsemantico.xmp.MetadataXmp.java
License:GNU General Public License
/** * Reads the XML stream inside a PDF file into an XML file. * @param src A PDF file containing XMP data * @param dest XML file containing the XMP data extracted from the PDF * @throws IOException/*from ww w . j a v a 2 s .c om*/ */ public void readXmpMetadata() throws IOException { PdfReader reader = new PdfReader(pathPdf); FileOutputStream fos = new FileOutputStream(pathXMP); byte[] b = reader.getMetadata(); fos.write(b, 0, b.length); fos.flush(); fos.close(); reader.close(); }
From source file:org.crossref.pdfmark.Main.java
License:Open Source License
public Main(String[] args) { if (args.length == 0) { printUsage();//from w w w.ja v a 2s . c o m System.exit(2); } CmdLineParser parser = new CmdLineParser(); Option provideXmpOp = parser.addStringOption('p', "xmp-file"); Option overwriteOp = parser.addBooleanOption('f', "force"); Option outputOp = parser.addStringOption('o', "output-dir"); Option doiOp = parser.addStringOption('d', "doi"); Option searchOp = parser.addBooleanOption('s', "search-for-doi"); Option copyrightOp = parser.addBooleanOption("no-copyright"); Option rightsOp = parser.addStringOption("rights-agent"); Option apiKeyOp = parser.addStringOption("api-key"); try { parser.parse(args); } catch (CmdLineParser.OptionException e) { printUsage(); System.exit(2); } String optionalXmpPath = (String) parser.getOptionValue(provideXmpOp, ""); String outputDir = (String) parser.getOptionValue(outputOp, ""); String explicitDoi = (String) parser.getOptionValue(doiOp, ""); boolean useTheForce = (Boolean) parser.getOptionValue(overwriteOp, Boolean.FALSE); boolean searchForDoi = (Boolean) parser.getOptionValue(searchOp, Boolean.FALSE); boolean noCopyright = (Boolean) parser.getOptionValue(copyrightOp, Boolean.FALSE); String rightsAgent = (String) parser.getOptionValue(rightsOp, ""); String apiKey = (String) parser.getOptionValue(apiKeyOp, ApiKey.DEFAULT); if (!explicitDoi.equals("") && searchForDoi) { exitWithError(2, "-d and -s are mutually exclusive options."); } if (!outputDir.isEmpty() && !new File(outputDir).exists()) { exitWithError(2, "The output directory, '" + outputDir + "' does not exist."); } byte[] optionalXmpData = null; if (!optionalXmpPath.equals("")) { /* We will take XMP data from a file. */ FileInfo xmpFile = FileInfo.readFileFully(optionalXmpPath); if (xmpFile.missing) { exitWithError(2, "Error: File '" + xmpFile.path + "' does not exist."); } else if (xmpFile.error != null) { exitWithError(2, "Error: Could not read '" + xmpFile.path + "' because of:\n" + xmpFile.error); } optionalXmpData = xmpFile.data; } grabber = new MetadataGrabber(apiKey); /* Now we're ready to merge our imported or generated XMP data with what * is already in each PDF. */ for (String pdfFilePath : parser.getRemainingArgs()) { String outputPath = getOutFileName(pdfFilePath); /* Grab the leaf. */ if (outputPath.contains(File.separator)) { String[] split = outputPath.split(File.separator); outputPath = split[split.length - 1]; } if (!outputDir.isEmpty()) { outputPath = outputDir + File.separator + outputPath; } else { /* Output to the working directory. */ } File pdfFile = new File(pdfFilePath); File outputFile = new File(outputPath); byte[] resolvedXmpData = null; if (!pdfFile.exists()) { exitWithError(2, "Error: File '" + pdfFilePath + "' does not exist."); } if (outputFile.exists() && !useTheForce) { exitWithError(2, "Error: File '" + outputPath + "' already exists.\nTry using -f (force)."); } try { if (!useTheForce && isLinearizedPdf(new FileInputStream(pdfFile))) { exitWithError(2, "Error: '" + pdfFilePath + "' is a" + " linearized PDF and force is not specified." + " This tool will output non-linearized PDF." + "\nIf you don't mind that, use -f (force)."); } } catch (IOException e) { exitWithError(2, "Error: Could not determine linearization" + " because of:\n" + e); } if (!explicitDoi.equals("")) { resolvedXmpData = getXmpForDoi(explicitDoi, !noCopyright, rightsAgent); } try { new File(outputFile.getPath() + ".tmp").deleteOnExit(); FileInputStream fileIn = new FileInputStream(pdfFile); FileOutputStream fileOut = new FileOutputStream(outputFile.getPath() + ".tmp"); PdfReader reader = new PdfReader(fileIn); PdfStamper stamper = new PdfStamper(reader, fileOut); byte[] merged = reader.getMetadata(); if (optionalXmpData != null) { merged = XmpUtils.mergeXmp(merged, optionalXmpData); } if (resolvedXmpData != null) { merged = XmpUtils.mergeXmp(merged, resolvedXmpData); } stamper.setXmpMetadata(merged); stamper.close(); reader.close(); fileIn = new FileInputStream(outputFile.getPath() + ".tmp"); writeInfoDictionary(fileIn, outputFile.getPath(), merged); } catch (IOException e) { exitWithError(2, "Error: Couldn't handle '" + pdfFilePath + "' because of:\n" + e); } catch (DocumentException e) { exitWithError(2, "Error: Couldn't handle '" + pdfFilePath + "' because of:\n" + e); } catch (XmpException e) { exitWithError(2, "Error: Couldn't handle '" + pdfFilePath + "' because of:\n" + e); } catch (COSVisitorException e) { exitWithError(2, "Error: Couldn't write document info dictionary" + " because of:\n" + e); } } shutDown(); }
From source file:org.sinekartapdfa.alfresco.utils.PDFTools.java
License:Open Source License
/** * metodo di utilita' che verifica se il pdf in input e' un PDF/A * /*w w w . j a va 2 s .co m*/ * @param reader * @return */ public static boolean isPdfa(InputStream is) { if (tracer.isDebugEnabled()) tracer.debug("checking if PDF is PDF/A"); PdfReader reader = null; ByteArrayInputStream bais = null; XMLStreamReader sr = null; try { reader = new PdfReader(is); byte[] metadata = reader.getMetadata(); if (metadata == null || metadata.length == 0) return false; bais = new ByteArrayInputStream(metadata); sr = XMLInputFactory.newInstance().createXMLStreamReader(bais); boolean isConformanceTag = false; int eventCode; while (sr.hasNext()) { eventCode = sr.next(); String val = null; switch (eventCode) { case 1: val = sr.getLocalName(); if (val.equals("conformance") && sr.getNamespaceURI().equals("http://www.aiim.org/pdfa/ns/id/")) isConformanceTag = true; break; case 4: val = sr.getText(); if (isConformanceTag) { if (val.equals("A") || val.equals("B")) { if (tracer.isDebugEnabled()) tracer.debug("yes, it is"); return true; } else { if (tracer.isDebugEnabled()) tracer.debug("no, it isn't"); return false; } } break; } } } catch (Exception e) { tracer.error("Unable to read PDF. Unable to check if the pdf is a pdf/a.", e); throw new PDFException("Unable to read PDF. Unable to check if the pdf is a pdf/a.", e); } finally { try { if (reader != null) reader.close(); } catch (Exception e) { tracer.error("error on pdf reader", e); } try { if (sr != null) sr.close(); } catch (Exception e) { tracer.error("error on stax reader", e); } try { if (bais != null) bais.close(); } catch (Exception e) { tracer.error("error on input stream", e); } try { if (is != null) is.close(); } catch (Exception e) { tracer.error("error on input stream", e); } } if (tracer.isDebugEnabled()) tracer.debug("no, it isn't"); return false; }
From source file:uk.bl.wa.tika.parser.pdf.itext.PDFParser.java
License:Apache License
private static void extractMetadata(PdfReader reader, Metadata metadata) { try {//from ww w . j a v a2 s . c o m HashMap<String, String> map = reader.getInfo(); // Clone the PDF info: for (String key : map.keySet()) { metadata.set(key.toLowerCase(), map.get(key)); } // Add other data of interest: metadata.set("pdf:version", "1." + reader.getPdfVersion()); metadata.set("pdf:numPages", "" + reader.getNumberOfPages()); metadata.set("pdf:cryptoMode", "" + getCryptoModeAsString(reader)); metadata.set("pdf:openedWithFullPermissions", "" + reader.isOpenedWithFullPermissions()); metadata.set("pdf:encrypted", "" + reader.isEncrypted()); metadata.set("pdf:metadataEncrypted", "" + reader.isMetadataEncrypted()); metadata.set("pdf:128key", "" + reader.is128Key()); metadata.set("pdf:tampered", "" + reader.isTampered()); // Also grap XMP metadata, if present: byte[] xmpmd = reader.getMetadata(); if (xmpmd != null) { // This is standard Tika code for parsing standard stuff from the XMP: JempboxExtractor extractor = new JempboxExtractor(metadata); extractor.parse(new ByteArrayInputStream(xmpmd)); // This is custom XMP-handling code: XMPMetadata xmp = XMPMetadata.load(new ByteArrayInputStream(xmpmd)); // There is a special class for grabbing data in the PDF schema - not sure it will add much here: // Could parse xmp:CreatorTool and pdf:Producer etc. etc. out of here. //XMPSchemaPDF pdfxmp = xmp.getPDFSchema(); // Added a PDF/A schema class: xmp.addXMLNSMapping(XMPSchemaPDFA.NAMESPACE, XMPSchemaPDFA.class); XMPSchemaPDFA pdfaxmp = (XMPSchemaPDFA) xmp.getSchemaByClass(XMPSchemaPDFA.class); if (pdfaxmp != null) { metadata.set("pdfaid:part", pdfaxmp.getPart()); metadata.set("pdfaid:conformance", pdfaxmp.getConformance()); String version = "A-" + pdfaxmp.getPart() + pdfaxmp.getConformance().toLowerCase(); //metadata.set("pdfa:version", version ); metadata.set("pdf:version", version); } } // Attempt to determine Adobe extension level: PdfDictionary extensions = reader.getCatalog().getAsDict(PdfName.EXTENSIONS); if (extensions != null) { PdfDictionary adobeExt = extensions.getAsDict(PdfName.ADBE); if (adobeExt != null) { PdfName baseVersion = adobeExt.getAsName(PdfName.BASEVERSION); int el = adobeExt.getAsNumber(PdfName.EXTENSIONLEVEL).intValue(); metadata.set("pdf:version", baseVersion.toString().substring(1) + " Adobe Extension Level " + el); } } // Ensure the normalised metadata are mapped in: if (map.get("Title") != null) metadata.set(Metadata.TITLE, map.get("Title")); if (map.get("Author") != null) metadata.set(Metadata.AUTHOR, map.get("Author")); } catch (Exception e) { System.err.println("PDFParser.extractMetadata() caught Exception: " + e.getMessage()); e.printStackTrace(); } }