Example usage for org.apache.pdfbox.pdmodel PDDocumentNameDictionary getEmbeddedFiles

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocumentNameDictionary getEmbeddedFiles.

Prototype

public PDEmbeddedFilesNameTreeNode getEmbeddedFiles()

Source Link

Document

Get the embedded files named tree node.

Usage

From source file:algorithm.PDFFileAttacher.java

License:Apache License

private void attachAll(File outputFile, List<File> payloadList) throws IOException {
    PDDocument document = PDDocument.load(outputFile);
    List<PDComplexFileSpecification> fileSpecifications = getFileSpecifications(document, payloadList);
    PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog());
    PDEmbeddedFilesNameTreeNode filesTree = namesDictionary.getEmbeddedFiles();
    filesTree = new PDEmbeddedFilesNameTreeNode();
    Map<String, COSObjectable> fileMap = new HashMap<String, COSObjectable>();
    for (int i = 0; i < fileSpecifications.size(); i++) {
        fileMap.put("PericlesMetadata-" + i, fileSpecifications.get(i));
    }//from   w ww  .j  a  v  a2  s. c o  m
    filesTree.setNames(fileMap);
    namesDictionary.setEmbeddedFiles(filesTree);
    document.getDocumentCatalog().setNames(namesDictionary);
    try {
        document.save(outputFile);
    } catch (COSVisitorException e) {
    }
    document.close();
}

From source file:algorithm.PDFFileAttacher.java

License:Apache License

@Override
public List<RestoredFile> restore(File originalPdf) throws IOException {
    RestoredFile copiedPdf = getRestoredCarrier(originalPdf);
    List<RestoredFile> restoredFiles = new ArrayList<RestoredFile>();
    PDDocument document = PDDocument.load(copiedPdf);
    PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog());
    PDEmbeddedFilesNameTreeNode filesTree = namesDictionary.getEmbeddedFiles();
    if (filesTree != null) {
        int i = 0;
        while (true) {
            PDComplexFileSpecification fileSpecification = (PDComplexFileSpecification) filesTree
                    .getValue("PericlesMetadata-" + i);
            if (fileSpecification == null) {
                break;
            }/*from   ww  w. ja v a 2s  .  co  m*/
            File oldAttachedFile = new File(fileSpecification.getFile());
            RestoredFile restoredPayload = new RestoredFile(RESTORED_DIRECTORY + oldAttachedFile.getName());
            PDEmbeddedFile embeddedFile = fileSpecification.getEmbeddedFile();
            InputStream inputStream = embeddedFile.createInputStream();
            FileOutputStream outputStream = new FileOutputStream(restoredPayload);
            IOUtils.copy(inputStream, outputStream);
            removeBuggyLineEnding(restoredPayload);
            restoredPayload.wasPayload = true;
            restoredPayload.checksumValid = true;
            restoredPayload.restorationNote = "Checksum wasn't calculated, because this algorithm isn't using restoration metadata. The original payload file survives the encapsulation with this algorithm.";
            restoredFiles.add(restoredPayload);
            i++;
        }
    }
    document.close();
    copiedPdf.wasCarrier = true;
    copiedPdf.checksumValid = false;
    copiedPdf.restorationNote = "Checksum can't be valid, because attached payload files can't be removed from carrier.";
    restoredFiles.add(copiedPdf);
    for (RestoredFile file : restoredFiles) {
        file.algorithm = this;
        for (RestoredFile relatedFile : restoredFiles) {
            if (file != relatedFile) {
                file.relatedFiles.add(relatedFile);
            }
        }
    }
    return restoredFiles;
}

From source file:dev.ztgnrw.ExtractEmbeddedFiles.java

License:Apache License

/**
 * This is the main method./* ww w .ja  v  a2s  .co m*/
 *
 * @param args The command line arguments.
 *
 * @throws IOException If there is an error parsing the document.
 */
public static void extractEmbeddedFiles(String file) throws IOException {

    PDDocument document = null;
    try {
        File pdfFile = new File(file);
        String filePath = pdfFile.getParent() + System.getProperty("file.separator");
        document = PDDocument.load(pdfFile);
        PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog());
        PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles();
        if (efTree != null) {
            Map<String, PDComplexFileSpecification> names = efTree.getNames();
            if (names != null) {
                extractFiles(names, filePath);
            } else {
                List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids();
                for (PDNameTreeNode<PDComplexFileSpecification> node : kids) {
                    names = node.getNames();
                    extractFiles(names, filePath);
                }
            }
        }

        // extract files from annotations
        for (PDPage page : document.getPages()) {
            for (PDAnnotation annotation : page.getAnnotations()) {
                if (annotation instanceof PDAnnotationFileAttachment) {
                    PDAnnotationFileAttachment annotationFileAttachment = (PDAnnotationFileAttachment) annotation;
                    PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) annotationFileAttachment
                            .getFile();
                    PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpec);
                    extractFile(filePath, fileSpec.getFilename(), embeddedFile);
                }
            }
        }

    } finally {
        if (document != null) {
            document.close();
        }
    }

}

From source file:eu.europa.ejusticeportal.dss.controller.signature.PdfUtils.java

License:EUPL

/**
 * Extracts an attachment from a PDF//from w  w  w .  ja  v a  2 s  .  c  o m
 * 
 * @param pdf the PDF; this is unaffected by the method
 * @param name the name of the attachment
 * @return the attachment
 */
public static byte[] extractAttachment(final byte[] pdf, final String name) {
    PDDocument doc = null;
    try {
        InputStream is = new ByteArrayInputStream(pdf);
        doc = PDDocument.load(is);
        PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(doc.getDocumentCatalog());
        PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles();
        Map<String, COSObjectable> names = efTree.getNames();
        byte[] data = null;
        if (names != null) {
            PDComplexFileSpecification attach = (PDComplexFileSpecification) names.get(name);
            data = attach.getEmbeddedFile().getByteArray();
        }

        // Remove the \r\n added by PdfBox (PDDocument.saveIncremental).
        if (data != null) {
            String newString = new String(data, "UTF-8");
            while (newString.endsWith("\r\n")) {
                newString = newString.replaceAll("\\r\\n$", "");
                data = newString.getBytes(Charset.forName("UTF-8"));
            }
        }

        return data;

    } catch (IOException e) {
        LOGGER.error("Error detaching.", e);
        throw new SigningException(e);
    } finally {
        closeQuietly(doc);
    }
}

From source file:eu.europa.ejusticeportal.dss.controller.signature.PdfUtils.java

License:EUPL

/**
 * Get a map of file name/byte for all embedded files in the pdf
 * /*from   w ww.  j  ava  2s. c  o m*/
 * @param doc the pdf
 * @return the map of attachments
 * @throws IOException
 */
private static Map<String, byte[]> getAttachments(PDDocument doc) throws IOException {
    PDDocumentNameDictionary names = new PDDocumentNameDictionary(doc.getDocumentCatalog());
    Map<String, byte[]> attachments = new LinkedHashMap<String, byte[]>();
    if (names.getEmbeddedFiles() != null && names.getEmbeddedFiles().getNames() != null) {
        for (String key : names.getEmbeddedFiles().getNames().keySet()) {
            attachments.put(key, ((PDComplexFileSpecification) names.getEmbeddedFiles().getNames().get(key))
                    .getEmbeddedFile().getByteArray());
        }
    }
    return attachments;
}

From source file:eu.europa.ejusticeportal.dss.controller.signature.PdfUtils.java

License:EUPL

/**
 * Get a map of file name/byte for all embedded files in the pdf
 * //  ww  w . ja  v  a2 s .  co m
 * @param doc the pdf
 * @return the map of attachments
 * @throws IOException
 */
private static Set<String> getAttachmentsNames(PDDocument doc) throws IOException {
    PDDocumentNameDictionary names = new PDDocumentNameDictionary(doc.getDocumentCatalog());
    Set<String> attachments = new HashSet<String>();
    if (names.getEmbeddedFiles() != null && names.getEmbeddedFiles().getNames() != null) {
        for (String key : names.getEmbeddedFiles().getNames().keySet()) {
            attachments.add(key);
        }
    }
    return attachments;
}

From source file:io.konik.carriage.pdfbox.PDFBoxInvoiceExtractor.java

License:Open Source License

private static final PDEmbeddedFilesNameTreeNode listEmbeddedFiles(PDDocumentNameDictionary names) {
    PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles();
    if (embeddedFiles == null) {
        throw new InvoiceExtractionError(NO_FILE);
    }/*from   w  w w .  jav a2 s . c  o  m*/
    return embeddedFiles;
}

From source file:mj.ocraptor.extraction.tika.parser.pdf.PDF2XHTML.java

License:Apache License

private void extractEmbeddedDocuments(PDDocument document, ContentHandler handler)
        throws IOException, SAXException, TikaException {
    PDDocumentCatalog catalog = document.getDocumentCatalog();
    PDDocumentNameDictionary names = catalog.getNames();
    if (names == null) {
        return;/*from w w  w. ja  v  a  2  s.co  m*/
    }
    PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles();

    if (embeddedFiles == null) {
        return;
    }

    EmbeddedDocumentExtractor embeddedExtractor = context.get(EmbeddedDocumentExtractor.class);
    if (embeddedExtractor == null) {
        embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context);
    }

    Map<String, COSObjectable> embeddedFileNames = embeddedFiles.getNames();
    // For now, try to get the embeddedFileNames out of embeddedFiles or its
    // kids.
    // This code follows: pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java
    // If there is a need we could add a fully recursive search to find a
    // non-null
    // Map<String, COSObjectable> that contains the doc info.
    if (embeddedFileNames != null) {
        processEmbeddedDocNames(embeddedFileNames, embeddedExtractor);
    } else {
        List<PDNameTreeNode> kids = embeddedFiles.getKids();
        if (kids == null) {
            return;
        }
        for (PDNameTreeNode n : kids) {
            Map<String, COSObjectable> childNames = n.getNames();
            if (childNames != null) {
                processEmbeddedDocNames(childNames, embeddedExtractor);
            }
        }
    }
}

From source file:net.padaf.preflight.helpers.CatalogValidationHelper.java

License:Apache License

/**
 * A Catalog shall not contain the EmbeddedFiles entry.
 * /*from   w w w  .ja  v a  2  s . co  m*/
 * @param handler
 * @param catalog
 * @param result
 * @throws ValidationException
 */
protected void validateNames(DocumentHandler handler, PDDocumentCatalog catalog, List<ValidationError> result)
        throws ValidationException {
    PDDocumentNameDictionary names = catalog.getNames();
    if (names != null) {
        PDEmbeddedFilesNameTreeNode efs = names.getEmbeddedFiles();
        if (efs != null) {
            result.add(new ValidationError(ERROR_SYNTAX_TRAILER_CATALOG_EMBEDDEDFILES,
                    "EmbeddedFile entry is present in the Names dictionary"));
        }
    }
}

From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTML.java

License:Apache License

private void extractEmbeddedDocuments(PDDocument document) throws IOException, SAXException, TikaException {
    PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog());
    PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles();
    if (efTree == null) {
        return;//from  ww w . j a v a  2  s  .  c  om
    }

    Map<String, PDComplexFileSpecification> embeddedFileNames = efTree.getNames();
    //For now, try to get the embeddedFileNames out of embeddedFiles or its kids.
    //This code follows: pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java
    //If there is a need we could add a fully recursive search to find a non-null
    //Map<String, COSObjectable> that contains the doc info.
    if (embeddedFileNames != null) {
        processEmbeddedDocNames(embeddedFileNames);
    } else {
        List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids();
        if (kids == null) {
            return;
        }
        for (PDNameTreeNode<PDComplexFileSpecification> node : kids) {
            embeddedFileNames = node.getNames();
            if (embeddedFileNames != null) {
                processEmbeddedDocNames(embeddedFileNames);
            }
        }
    }
}