Example usage for org.apache.pdfbox.pdmodel PDDocumentInformation getCustomMetadataValue

List of usage examples for org.apache.pdfbox.pdmodel PDDocumentInformation getCustomMetadataValue

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocumentInformation getCustomMetadataValue.

Prototype

public String getCustomMetadataValue(String fieldName) 

Source Link

Document

This will get the value of a custom metadata information field for the document.

Usage

From source file:adams.flow.transformer.PDFMetaData.java

License:Open Source License

/**
 * Executes the flow item./*from ww w  .j a v a 2 s  .  c om*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    File file;
    SpreadSheet sheet;
    PDDocument document;
    PDDocumentInformation info;
    Row row;
    Set<String> keys;

    result = null;

    // get file
    if (m_InputToken.getPayload() instanceof File)
        file = (File) m_InputToken.getPayload();
    else
        file = new PlaceholderFile((String) m_InputToken.getPayload());

    sheet = new DefaultSpreadSheet();
    sheet.setDataRowClass(SparseDataRow.class);
    sheet.setName("Meta-Data: " + file.getAbsolutePath());

    try {
        row = sheet.addRow();
        document = PDDocument.load(file.getAbsoluteFile());
        info = document.getDocumentInformation();

        addCell(row, "Title", info.getTitle());
        addCell(row, "Subject", info.getSubject());
        addCell(row, "Author", info.getAuthor());
        addCell(row, "Keywords", info.getKeywords());
        addCell(row, "Producer", info.getProducer());
        addCell(row, "Creation Date", info.getCreationDate());
        addCell(row, "Modification Date", info.getModificationDate());
        addCell(row, "Creator", info.getCreator());
        addCell(row, "Trapped", info.getTrapped());
        keys = info.getMetadataKeys();
        for (String key : keys)
            addCell(row, "Meta-" + key, info.getCustomMetadataValue(key));
    } catch (Exception e) {
        result = handleException("Failed to extract meta-data: ", e);
    }

    if (result == null)
        m_OutputToken = new Token(sheet);

    return result;
}

From source file:com.sustainalytics.crawlerfilter.PDFTitleGeneration.java

License:Apache License

/**
 * This method extracts creation date/ custom date of a PDF file
 * @param file is a File object//  ww w.  j av  a 2s .co m
 * @return String that contains the creation date/ custom date of the PDF
 */
public static String extractDate(File file) {
    PDDocument document = null;
    boolean isDamaged = false; //to deal with damaged pdf
    String creationDateMetaData = "";
    try {
        document = PDDocument.load(file.toString());
        /*If the PDF file is not damanged --->*/
        if (!isDamaged) {
            /*...but the file is encrypted --->*/
            if (document.isEncrypted()) {
                logger.info("File " + file.getName() + "is encrypted. Trying to decrypt...");
                try {
                    /*...then decryptt it --->*/
                    document.decrypt("");
                    document.setAllSecurityToBeRemoved(true);
                    logger.info("File " + file.getName() + "successfully decrypted!");
                } catch (CryptographyException e) {
                    logger.info("Error decrypting file " + file.getName());
                    isDamaged = true;
                }

            } /*<--work around to decrypt an encrypted pdf ends here*/

            /*Metadata extraction --->*/
            PDDocumentInformation info = document.getDocumentInformation();

            /*We are only interested in date data--->*/
            Calendar calendar = info.getCreationDate();
            int creationYear = 0, creationMonth = 0, creationDate = 0;
            if (calendar != null) {
                creationYear = calendar.get(Calendar.YEAR);
                creationMonth = calendar.get(Calendar.MONTH) + 1;
                creationDate = calendar.get(Calendar.DATE);

            } /*<---Date data extraction complete*/

            /*If creation date is not empty --->*/
            if (creationYear != 0) {
                creationDateMetaData = creationYear + "-" + creationMonth + "-" + creationDate;
            } //<--- creation date found and the date part of the title is generated
            /*No creation date is found --->*/
            else {
                SimpleDateFormat dateFormatter = new SimpleDateFormat("MM/dd/yyyy");
                Date customDate = null;
                /*But we have custom date some times --->*/
                try {
                    customDate = dateFormatter.parse(info.getCustomMetadataValue("customdate"));
                } catch (ParseException e) {
                    logger.info("Error parsing date from custom date");
                }
                calendar = Calendar.getInstance();
                calendar.setTime(customDate);
                if (calendar != null) {
                    creationYear = calendar.get(Calendar.YEAR);
                    creationMonth = calendar.get(Calendar.MONTH) + 1;
                    creationDate = calendar.get(Calendar.DATE);

                } /*<---Date data extraction complete from customdate*/
                if (creationYear != 0) {
                    creationDateMetaData = creationYear + "-" + creationMonth + "-" + creationDate;
                }
            } //<--- work around if no creation date is found

        } /*<--- Good to know that the PDF was not damaged*/
    } catch (IOException e) { /*If the PDF was not read by the system --->*/
        logger.info("Error processing file " + file.getName());
        /*... then maybe it is damaged*/
        isDamaged = true;
    } finally {
        try {
            /*If the file was good, not damaged, then please close it --->*/
            if (!isDamaged) {
                document.close();
                logger.info("File " + file.getName() + " is closed successfully!");
            }
        } catch (IOException e) {
            logger.info("Error closing file " + file.getName());
        }
    } /*<--- PDF closing done!*/
    return creationDateMetaData;
}

From source file:org.codelibs.fess.crawler.extractor.impl.PdfExtractor.java

License:Apache License

private void extractMetadata(final PDDocument document, final ExtractData extractData) {
    final PDDocumentInformation info = document.getDocumentInformation();
    if (info == null) {
        return;/*w  ww.  j a v a2s  .  c om*/
    }

    for (final String key : info.getMetadataKeys()) {
        final String value = info.getCustomMetadataValue(key);
        addMetadata(extractData, key, value);
    }
}

From source file:org.lockss.plugin.georgthiemeverlag.GeorgThiemeVerlagPdfFilterFactory.java

License:Open Source License

@Override
public void transform(ArchivalUnit au, PdfDocument pdfDocument) throws PdfException {
    pdfDocument.unsetModificationDate();
    PdfUtil.normalizeTrailerId(pdfDocument);
    pdfDocument.unsetMetadata();/*www .  ja  v  a 2s  .c o m*/
    PDDocumentInformation pdDocInfo = ((GtvPdfBoxDocument) pdfDocument).getPdDocumentInformation();
    if (pdDocInfo.getCustomMetadataValue(GtvPdfBoxDocument.PDFDATE) != null) {
        pdDocInfo.setCustomMetadataValue(GtvPdfBoxDocument.PDFDATE, null);
    }
    if (pdDocInfo.getCustomMetadataValue(GtvPdfBoxDocument.PDFUSER) != null) {
        pdDocInfo.setCustomMetadataValue(GtvPdfBoxDocument.PDFUSER, null);
    }

    PdfStateMachineWorker worker = new PdfStateMachineWorker();
    boolean anyXform = false;
    for (PdfPage pdfPage : pdfDocument.getPages()) {
        PdfTokenStream pdfTokenStream = pdfPage.getPageTokenStream();
        worker.process(pdfTokenStream);
        if (worker.getResult()) {
            anyXform = true;
            List<PdfToken> tokens = pdfTokenStream.getTokens();
            // clear tokens including text markers
            tokens.subList(worker.getBegin(), worker.getEnd() + 1).clear();
            pdfTokenStream.setTokens(tokens);
        }
    }
    if (log.isDebug2()) {
        log.debug2("Transform: " + anyXform);
    }
}

From source file:org.pdfmetamodifier.MetadataHelper.java

License:Apache License

/**
 * Convert Metadata object to list of lines.
 * /* w  ww  .  ja v a2s .  c  o  m*/
 * @param metadata
 *            Source Metadata object.
 * @return list of lines with Metadata representation.
 */
public static List<String> metadataToLineList(final PDDocumentInformation documentInformation) {
    final List<String> lineList = new ArrayList<>();

    if (documentInformation != null) {
        final List<String> matadataKeys = new ArrayList<>(documentInformation.getMetadataKeys());
        Collections.sort(matadataKeys);

        for (String key : matadataKeys) {
            final String value = documentInformation.getCustomMetadataValue(key);
            if (value != null) {
                lineList.add(String.format(METADATA_LINE_TEMPLATE, key, value));
            }
        }
    }

    return lineList;
}