Example usage for com.itextpdf.text.pdf PdfReader getMetadata

Introduction

In this page you can find the example usage for com.itextpdf.text.pdf PdfReader getMetadata.

Prototype

public byte[] getMetadata() throws IOException

Source Link

Document

Gets the XML metadata.

Usage

From source file:br.unifor.mia.xmpsemantico.xmp.MetadataXmp.java

License:GNU General Public License

/**
 * Reads the XML stream inside a PDF file into an XML file.
 * @param src  A PDF file containing XMP data
 * @param dest XML file containing the XMP data extracted from the PDF
 * @throws IOException/*from ww w .  j  a v  a  2 s  .c om*/
 */
public void readXmpMetadata() throws IOException {
    PdfReader reader = new PdfReader(pathPdf);
    FileOutputStream fos = new FileOutputStream(pathXMP);
    byte[] b = reader.getMetadata();
    fos.write(b, 0, b.length);
    fos.flush();
    fos.close();
    reader.close();
}

From source file:org.crossref.pdfmark.Main.java

License:Open Source License

public Main(String[] args) {
    if (args.length == 0) {
        printUsage();//from   w w  w.ja  v a  2s . c  o m
        System.exit(2);
    }

    CmdLineParser parser = new CmdLineParser();
    Option provideXmpOp = parser.addStringOption('p', "xmp-file");
    Option overwriteOp = parser.addBooleanOption('f', "force");
    Option outputOp = parser.addStringOption('o', "output-dir");
    Option doiOp = parser.addStringOption('d', "doi");
    Option searchOp = parser.addBooleanOption('s', "search-for-doi");
    Option copyrightOp = parser.addBooleanOption("no-copyright");
    Option rightsOp = parser.addStringOption("rights-agent");
    Option apiKeyOp = parser.addStringOption("api-key");

    try {
        parser.parse(args);
    } catch (CmdLineParser.OptionException e) {
        printUsage();
        System.exit(2);
    }

    String optionalXmpPath = (String) parser.getOptionValue(provideXmpOp, "");
    String outputDir = (String) parser.getOptionValue(outputOp, "");
    String explicitDoi = (String) parser.getOptionValue(doiOp, "");
    boolean useTheForce = (Boolean) parser.getOptionValue(overwriteOp, Boolean.FALSE);
    boolean searchForDoi = (Boolean) parser.getOptionValue(searchOp, Boolean.FALSE);
    boolean noCopyright = (Boolean) parser.getOptionValue(copyrightOp, Boolean.FALSE);
    String rightsAgent = (String) parser.getOptionValue(rightsOp, "");
    String apiKey = (String) parser.getOptionValue(apiKeyOp, ApiKey.DEFAULT);

    if (!explicitDoi.equals("") && searchForDoi) {
        exitWithError(2, "-d and -s are mutually exclusive options.");
    }

    if (!outputDir.isEmpty() && !new File(outputDir).exists()) {
        exitWithError(2, "The output directory, '" + outputDir + "' does not exist.");
    }

    byte[] optionalXmpData = null;

    if (!optionalXmpPath.equals("")) {
        /* We will take XMP data from a file. */
        FileInfo xmpFile = FileInfo.readFileFully(optionalXmpPath);
        if (xmpFile.missing) {
            exitWithError(2, "Error: File '" + xmpFile.path + "' does not exist.");
        } else if (xmpFile.error != null) {
            exitWithError(2, "Error: Could not read '" + xmpFile.path + "' because of:\n" + xmpFile.error);
        }

        optionalXmpData = xmpFile.data;
    }

    grabber = new MetadataGrabber(apiKey);

    /* Now we're ready to merge our imported or generated XMP data with what
     * is already in each PDF. */

    for (String pdfFilePath : parser.getRemainingArgs()) {
        String outputPath = getOutFileName(pdfFilePath);

        /* Grab the leaf. */
        if (outputPath.contains(File.separator)) {
            String[] split = outputPath.split(File.separator);
            outputPath = split[split.length - 1];
        }

        if (!outputDir.isEmpty()) {
            outputPath = outputDir + File.separator + outputPath;
        } else {
            /* Output to the working directory. */
        }

        File pdfFile = new File(pdfFilePath);
        File outputFile = new File(outputPath);

        byte[] resolvedXmpData = null;

        if (!pdfFile.exists()) {
            exitWithError(2, "Error: File '" + pdfFilePath + "' does not exist.");
        }

        if (outputFile.exists() && !useTheForce) {
            exitWithError(2, "Error: File '" + outputPath + "' already exists.\nTry using -f (force).");
        }

        try {
            if (!useTheForce && isLinearizedPdf(new FileInputStream(pdfFile))) {
                exitWithError(2,
                        "Error: '" + pdfFilePath + "' is a" + " linearized PDF and force is not specified."
                                + " This tool will output non-linearized PDF."
                                + "\nIf you don't mind that, use -f (force).");
            }
        } catch (IOException e) {
            exitWithError(2, "Error: Could not determine linearization" + " because of:\n" + e);
        }

        if (!explicitDoi.equals("")) {
            resolvedXmpData = getXmpForDoi(explicitDoi, !noCopyright, rightsAgent);
        }

        try {
            new File(outputFile.getPath() + ".tmp").deleteOnExit();

            FileInputStream fileIn = new FileInputStream(pdfFile);
            FileOutputStream fileOut = new FileOutputStream(outputFile.getPath() + ".tmp");
            PdfReader reader = new PdfReader(fileIn);
            PdfStamper stamper = new PdfStamper(reader, fileOut);

            byte[] merged = reader.getMetadata();

            if (optionalXmpData != null) {
                merged = XmpUtils.mergeXmp(merged, optionalXmpData);
            }

            if (resolvedXmpData != null) {
                merged = XmpUtils.mergeXmp(merged, resolvedXmpData);
            }

            stamper.setXmpMetadata(merged);

            stamper.close();
            reader.close();

            fileIn = new FileInputStream(outputFile.getPath() + ".tmp");
            writeInfoDictionary(fileIn, outputFile.getPath(), merged);
        } catch (IOException e) {
            exitWithError(2, "Error: Couldn't handle '" + pdfFilePath + "' because of:\n" + e);
        } catch (DocumentException e) {
            exitWithError(2, "Error: Couldn't handle '" + pdfFilePath + "' because of:\n" + e);
        } catch (XmpException e) {
            exitWithError(2, "Error: Couldn't handle '" + pdfFilePath + "' because of:\n" + e);
        } catch (COSVisitorException e) {
            exitWithError(2, "Error: Couldn't write document info dictionary" + " because of:\n" + e);
        }
    }

    shutDown();
}

From source file:org.sinekartapdfa.alfresco.utils.PDFTools.java

License:Open Source License

/**
 * metodo di utilita' che verifica se il pdf in input e' un PDF/A
 * /*w  w w  .  j a  va  2  s  .co  m*/
 * @param reader
 * @return
 */
public static boolean isPdfa(InputStream is) {
    if (tracer.isDebugEnabled())
        tracer.debug("checking if PDF is PDF/A");
    PdfReader reader = null;
    ByteArrayInputStream bais = null;
    XMLStreamReader sr = null;
    try {
        reader = new PdfReader(is);
        byte[] metadata = reader.getMetadata();
        if (metadata == null || metadata.length == 0)
            return false;
        bais = new ByteArrayInputStream(metadata);
        sr = XMLInputFactory.newInstance().createXMLStreamReader(bais);
        boolean isConformanceTag = false;
        int eventCode;
        while (sr.hasNext()) {
            eventCode = sr.next();
            String val = null;
            switch (eventCode) {
            case 1:
                val = sr.getLocalName();
                if (val.equals("conformance") && sr.getNamespaceURI().equals("http://www.aiim.org/pdfa/ns/id/"))
                    isConformanceTag = true;
                break;
            case 4:
                val = sr.getText();
                if (isConformanceTag) {
                    if (val.equals("A") || val.equals("B")) {
                        if (tracer.isDebugEnabled())
                            tracer.debug("yes, it is");
                        return true;
                    } else {
                        if (tracer.isDebugEnabled())
                            tracer.debug("no, it isn't");
                        return false;
                    }
                }
                break;
            }
        }
    } catch (Exception e) {
        tracer.error("Unable to read PDF. Unable to check if the pdf is a pdf/a.", e);
        throw new PDFException("Unable to read PDF. Unable to check if the pdf is a pdf/a.", e);
    } finally {
        try {
            if (reader != null)
                reader.close();
        } catch (Exception e) {
            tracer.error("error on pdf reader", e);
        }
        try {
            if (sr != null)
                sr.close();
        } catch (Exception e) {
            tracer.error("error on stax reader", e);
        }
        try {
            if (bais != null)
                bais.close();
        } catch (Exception e) {
            tracer.error("error on input stream", e);
        }
        try {
            if (is != null)
                is.close();
        } catch (Exception e) {
            tracer.error("error on input stream", e);
        }
    }
    if (tracer.isDebugEnabled())
        tracer.debug("no, it isn't");
    return false;
}

From source file:uk.bl.wa.tika.parser.pdf.itext.PDFParser.java

License:Apache License

private static void extractMetadata(PdfReader reader, Metadata metadata) {
    try {//from  ww  w  .  j  a  v  a2 s .  c  o m
        HashMap<String, String> map = reader.getInfo();
        // Clone the PDF info:
        for (String key : map.keySet()) {
            metadata.set(key.toLowerCase(), map.get(key));
        }
        // Add other data of interest:
        metadata.set("pdf:version", "1." + reader.getPdfVersion());
        metadata.set("pdf:numPages", "" + reader.getNumberOfPages());
        metadata.set("pdf:cryptoMode", "" + getCryptoModeAsString(reader));
        metadata.set("pdf:openedWithFullPermissions", "" + reader.isOpenedWithFullPermissions());
        metadata.set("pdf:encrypted", "" + reader.isEncrypted());
        metadata.set("pdf:metadataEncrypted", "" + reader.isMetadataEncrypted());
        metadata.set("pdf:128key", "" + reader.is128Key());
        metadata.set("pdf:tampered", "" + reader.isTampered());
        // Also grap XMP metadata, if present:
        byte[] xmpmd = reader.getMetadata();
        if (xmpmd != null) {
            // This is standard Tika code for parsing standard stuff from the XMP:
            JempboxExtractor extractor = new JempboxExtractor(metadata);
            extractor.parse(new ByteArrayInputStream(xmpmd));
            // This is custom XMP-handling code:
            XMPMetadata xmp = XMPMetadata.load(new ByteArrayInputStream(xmpmd));
            // There is a special class for grabbing data in the PDF schema - not sure it will add much here:
            // Could parse xmp:CreatorTool and pdf:Producer etc. etc. out of here.
            //XMPSchemaPDF pdfxmp = xmp.getPDFSchema();
            // Added a PDF/A schema class:
            xmp.addXMLNSMapping(XMPSchemaPDFA.NAMESPACE, XMPSchemaPDFA.class);
            XMPSchemaPDFA pdfaxmp = (XMPSchemaPDFA) xmp.getSchemaByClass(XMPSchemaPDFA.class);
            if (pdfaxmp != null) {
                metadata.set("pdfaid:part", pdfaxmp.getPart());
                metadata.set("pdfaid:conformance", pdfaxmp.getConformance());
                String version = "A-" + pdfaxmp.getPart() + pdfaxmp.getConformance().toLowerCase();
                //metadata.set("pdfa:version", version );                    
                metadata.set("pdf:version", version);
            }
        }
        // Attempt to determine Adobe extension level:
        PdfDictionary extensions = reader.getCatalog().getAsDict(PdfName.EXTENSIONS);
        if (extensions != null) {
            PdfDictionary adobeExt = extensions.getAsDict(PdfName.ADBE);
            if (adobeExt != null) {
                PdfName baseVersion = adobeExt.getAsName(PdfName.BASEVERSION);
                int el = adobeExt.getAsNumber(PdfName.EXTENSIONLEVEL).intValue();
                metadata.set("pdf:version",
                        baseVersion.toString().substring(1) + " Adobe Extension Level " + el);
            }
        }
        // Ensure the normalised metadata are mapped in:
        if (map.get("Title") != null)
            metadata.set(Metadata.TITLE, map.get("Title"));
        if (map.get("Author") != null)
            metadata.set(Metadata.AUTHOR, map.get("Author"));
    } catch (Exception e) {
        System.err.println("PDFParser.extractMetadata() caught Exception: " + e.getMessage());
        e.printStackTrace();
    }
}