Example usage for com.itextpdf.text.pdf PdfReader getMetadata

List of usage examples for com.itextpdf.text.pdf PdfReader getMetadata

Introduction

In this page you can find the example usage for com.itextpdf.text.pdf PdfReader getMetadata.

Prototype

public byte[] getMetadata() throws IOException 

Source Link

Document

Gets the XML metadata.

Usage

From source file:br.unifor.mia.xmpsemantico.xmp.MetadataXmp.java

License:GNU General Public License

/**
 * Reads the XML stream inside a PDF file into an XML file.
 * @param src  A PDF file containing XMP data
 * @param dest XML file containing the XMP data extracted from the PDF
 * @throws IOException/*from ww w .  j  a v  a  2 s  .c om*/
 */
public void readXmpMetadata() throws IOException {
    PdfReader reader = new PdfReader(pathPdf);
    FileOutputStream fos = new FileOutputStream(pathXMP);
    byte[] b = reader.getMetadata();
    fos.write(b, 0, b.length);
    fos.flush();
    fos.close();
    reader.close();
}

From source file:org.crossref.pdfmark.Main.java

License:Open Source License

public Main(String[] args) {
    if (args.length == 0) {
        printUsage();//from   w w  w.ja  v a  2s . c  o m
        System.exit(2);
    }

    CmdLineParser parser = new CmdLineParser();
    Option provideXmpOp = parser.addStringOption('p', "xmp-file");
    Option overwriteOp = parser.addBooleanOption('f', "force");
    Option outputOp = parser.addStringOption('o', "output-dir");
    Option doiOp = parser.addStringOption('d', "doi");
    Option searchOp = parser.addBooleanOption('s', "search-for-doi");
    Option copyrightOp = parser.addBooleanOption("no-copyright");
    Option rightsOp = parser.addStringOption("rights-agent");
    Option apiKeyOp = parser.addStringOption("api-key");

    try {
        parser.parse(args);
    } catch (CmdLineParser.OptionException e) {
        printUsage();
        System.exit(2);
    }

    String optionalXmpPath = (String) parser.getOptionValue(provideXmpOp, "");
    String outputDir = (String) parser.getOptionValue(outputOp, "");
    String explicitDoi = (String) parser.getOptionValue(doiOp, "");
    boolean useTheForce = (Boolean) parser.getOptionValue(overwriteOp, Boolean.FALSE);
    boolean searchForDoi = (Boolean) parser.getOptionValue(searchOp, Boolean.FALSE);
    boolean noCopyright = (Boolean) parser.getOptionValue(copyrightOp, Boolean.FALSE);
    String rightsAgent = (String) parser.getOptionValue(rightsOp, "");
    String apiKey = (String) parser.getOptionValue(apiKeyOp, ApiKey.DEFAULT);

    if (!explicitDoi.equals("") && searchForDoi) {
        exitWithError(2, "-d and -s are mutually exclusive options.");
    }

    if (!outputDir.isEmpty() && !new File(outputDir).exists()) {
        exitWithError(2, "The output directory, '" + outputDir + "' does not exist.");
    }

    byte[] optionalXmpData = null;

    if (!optionalXmpPath.equals("")) {
        /* We will take XMP data from a file. */
        FileInfo xmpFile = FileInfo.readFileFully(optionalXmpPath);
        if (xmpFile.missing) {
            exitWithError(2, "Error: File '" + xmpFile.path + "' does not exist.");
        } else if (xmpFile.error != null) {
            exitWithError(2, "Error: Could not read '" + xmpFile.path + "' because of:\n" + xmpFile.error);
        }

        optionalXmpData = xmpFile.data;
    }

    grabber = new MetadataGrabber(apiKey);

    /* Now we're ready to merge our imported or generated XMP data with what
     * is already in each PDF. */

    for (String pdfFilePath : parser.getRemainingArgs()) {
        String outputPath = getOutFileName(pdfFilePath);

        /* Grab the leaf. */
        if (outputPath.contains(File.separator)) {
            String[] split = outputPath.split(File.separator);
            outputPath = split[split.length - 1];
        }

        if (!outputDir.isEmpty()) {
            outputPath = outputDir + File.separator + outputPath;
        } else {
            /* Output to the working directory. */
        }

        File pdfFile = new File(pdfFilePath);
        File outputFile = new File(outputPath);

        byte[] resolvedXmpData = null;

        if (!pdfFile.exists()) {
            exitWithError(2, "Error: File '" + pdfFilePath + "' does not exist.");
        }

        if (outputFile.exists() && !useTheForce) {
            exitWithError(2, "Error: File '" + outputPath + "' already exists.\nTry using -f (force).");
        }

        try {
            if (!useTheForce && isLinearizedPdf(new FileInputStream(pdfFile))) {
                exitWithError(2,
                        "Error: '" + pdfFilePath + "' is a" + " linearized PDF and force is not specified."
                                + " This tool will output non-linearized PDF."
                                + "\nIf you don't mind that, use -f (force).");
            }
        } catch (IOException e) {
            exitWithError(2, "Error: Could not determine linearization" + " because of:\n" + e);
        }

        if (!explicitDoi.equals("")) {
            resolvedXmpData = getXmpForDoi(explicitDoi, !noCopyright, rightsAgent);
        }

        try {
            new File(outputFile.getPath() + ".tmp").deleteOnExit();

            FileInputStream fileIn = new FileInputStream(pdfFile);
            FileOutputStream fileOut = new FileOutputStream(outputFile.getPath() + ".tmp");
            PdfReader reader = new PdfReader(fileIn);
            PdfStamper stamper = new PdfStamper(reader, fileOut);

            byte[] merged = reader.getMetadata();

            if (optionalXmpData != null) {
                merged = XmpUtils.mergeXmp(merged, optionalXmpData);
            }

            if (resolvedXmpData != null) {
                merged = XmpUtils.mergeXmp(merged, resolvedXmpData);
            }

            stamper.setXmpMetadata(merged);

            stamper.close();
            reader.close();

            fileIn = new FileInputStream(outputFile.getPath() + ".tmp");
            writeInfoDictionary(fileIn, outputFile.getPath(), merged);
        } catch (IOException e) {
            exitWithError(2, "Error: Couldn't handle '" + pdfFilePath + "' because of:\n" + e);
        } catch (DocumentException e) {
            exitWithError(2, "Error: Couldn't handle '" + pdfFilePath + "' because of:\n" + e);
        } catch (XmpException e) {
            exitWithError(2, "Error: Couldn't handle '" + pdfFilePath + "' because of:\n" + e);
        } catch (COSVisitorException e) {
            exitWithError(2, "Error: Couldn't write document info dictionary" + " because of:\n" + e);
        }
    }

    shutDown();
}

From source file:org.sinekartapdfa.alfresco.utils.PDFTools.java

License:Open Source License

/**
 * metodo di utilita' che verifica se il pdf in input e' un PDF/A
 * /*w  w w  .  j a  va  2  s  .co  m*/
 * @param reader
 * @return
 */
public static boolean isPdfa(InputStream is) {
    if (tracer.isDebugEnabled())
        tracer.debug("checking if PDF is PDF/A");
    PdfReader reader = null;
    ByteArrayInputStream bais = null;
    XMLStreamReader sr = null;
    try {
        reader = new PdfReader(is);
        byte[] metadata = reader.getMetadata();
        if (metadata == null || metadata.length == 0)
            return false;
        bais = new ByteArrayInputStream(metadata);
        sr = XMLInputFactory.newInstance().createXMLStreamReader(bais);
        boolean isConformanceTag = false;
        int eventCode;
        while (sr.hasNext()) {
            eventCode = sr.next();
            String val = null;
            switch (eventCode) {
            case 1:
                val = sr.getLocalName();
                if (val.equals("conformance") && sr.getNamespaceURI().equals("http://www.aiim.org/pdfa/ns/id/"))
                    isConformanceTag = true;
                break;
            case 4:
                val = sr.getText();
                if (isConformanceTag) {
                    if (val.equals("A") || val.equals("B")) {
                        if (tracer.isDebugEnabled())
                            tracer.debug("yes, it is");
                        return true;
                    } else {
                        if (tracer.isDebugEnabled())
                            tracer.debug("no, it isn't");
                        return false;
                    }
                }
                break;
            }
        }
    } catch (Exception e) {
        tracer.error("Unable to read PDF. Unable to check if the pdf is a pdf/a.", e);
        throw new PDFException("Unable to read PDF. Unable to check if the pdf is a pdf/a.", e);
    } finally {
        try {
            if (reader != null)
                reader.close();
        } catch (Exception e) {
            tracer.error("error on pdf reader", e);
        }
        try {
            if (sr != null)
                sr.close();
        } catch (Exception e) {
            tracer.error("error on stax reader", e);
        }
        try {
            if (bais != null)
                bais.close();
        } catch (Exception e) {
            tracer.error("error on input stream", e);
        }
        try {
            if (is != null)
                is.close();
        } catch (Exception e) {
            tracer.error("error on input stream", e);
        }
    }
    if (tracer.isDebugEnabled())
        tracer.debug("no, it isn't");
    return false;
}

From source file:uk.bl.wa.tika.parser.pdf.itext.PDFParser.java

License:Apache License

private static void extractMetadata(PdfReader reader, Metadata metadata) {
    try {//from  ww  w  .  j  a  v  a2 s .  c  o m
        HashMap<String, String> map = reader.getInfo();
        // Clone the PDF info:
        for (String key : map.keySet()) {
            metadata.set(key.toLowerCase(), map.get(key));
        }
        // Add other data of interest:
        metadata.set("pdf:version", "1." + reader.getPdfVersion());
        metadata.set("pdf:numPages", "" + reader.getNumberOfPages());
        metadata.set("pdf:cryptoMode", "" + getCryptoModeAsString(reader));
        metadata.set("pdf:openedWithFullPermissions", "" + reader.isOpenedWithFullPermissions());
        metadata.set("pdf:encrypted", "" + reader.isEncrypted());
        metadata.set("pdf:metadataEncrypted", "" + reader.isMetadataEncrypted());
        metadata.set("pdf:128key", "" + reader.is128Key());
        metadata.set("pdf:tampered", "" + reader.isTampered());
        // Also grap XMP metadata, if present:
        byte[] xmpmd = reader.getMetadata();
        if (xmpmd != null) {
            // This is standard Tika code for parsing standard stuff from the XMP:
            JempboxExtractor extractor = new JempboxExtractor(metadata);
            extractor.parse(new ByteArrayInputStream(xmpmd));
            // This is custom XMP-handling code:
            XMPMetadata xmp = XMPMetadata.load(new ByteArrayInputStream(xmpmd));
            // There is a special class for grabbing data in the PDF schema - not sure it will add much here:
            // Could parse xmp:CreatorTool and pdf:Producer etc. etc. out of here.
            //XMPSchemaPDF pdfxmp = xmp.getPDFSchema();
            // Added a PDF/A schema class:
            xmp.addXMLNSMapping(XMPSchemaPDFA.NAMESPACE, XMPSchemaPDFA.class);
            XMPSchemaPDFA pdfaxmp = (XMPSchemaPDFA) xmp.getSchemaByClass(XMPSchemaPDFA.class);
            if (pdfaxmp != null) {
                metadata.set("pdfaid:part", pdfaxmp.getPart());
                metadata.set("pdfaid:conformance", pdfaxmp.getConformance());
                String version = "A-" + pdfaxmp.getPart() + pdfaxmp.getConformance().toLowerCase();
                //metadata.set("pdfa:version", version );                    
                metadata.set("pdf:version", version);
            }
        }
        // Attempt to determine Adobe extension level:
        PdfDictionary extensions = reader.getCatalog().getAsDict(PdfName.EXTENSIONS);
        if (extensions != null) {
            PdfDictionary adobeExt = extensions.getAsDict(PdfName.ADBE);
            if (adobeExt != null) {
                PdfName baseVersion = adobeExt.getAsName(PdfName.BASEVERSION);
                int el = adobeExt.getAsNumber(PdfName.EXTENSIONLEVEL).intValue();
                metadata.set("pdf:version",
                        baseVersion.toString().substring(1) + " Adobe Extension Level " + el);
            }
        }
        // Ensure the normalised metadata are mapped in:
        if (map.get("Title") != null)
            metadata.set(Metadata.TITLE, map.get("Title"));
        if (map.get("Author") != null)
            metadata.set(Metadata.AUTHOR, map.get("Author"));
    } catch (Exception e) {
        System.err.println("PDFParser.extractMetadata() caught Exception: " + e.getMessage());
        e.printStackTrace();
    }
}