List of usage examples for org.apache.pdfbox.pdmodel PDDocumentNameDictionary getEmbeddedFiles
public PDEmbeddedFilesNameTreeNode getEmbeddedFiles()
From source file:algorithm.PDFFileAttacher.java
License:Apache License
private void attachAll(File outputFile, List<File> payloadList) throws IOException { PDDocument document = PDDocument.load(outputFile); List<PDComplexFileSpecification> fileSpecifications = getFileSpecifications(document, payloadList); PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog()); PDEmbeddedFilesNameTreeNode filesTree = namesDictionary.getEmbeddedFiles(); filesTree = new PDEmbeddedFilesNameTreeNode(); Map<String, COSObjectable> fileMap = new HashMap<String, COSObjectable>(); for (int i = 0; i < fileSpecifications.size(); i++) { fileMap.put("PericlesMetadata-" + i, fileSpecifications.get(i)); }//from w ww .j a v a2 s. c o m filesTree.setNames(fileMap); namesDictionary.setEmbeddedFiles(filesTree); document.getDocumentCatalog().setNames(namesDictionary); try { document.save(outputFile); } catch (COSVisitorException e) { } document.close(); }
From source file:algorithm.PDFFileAttacher.java
License:Apache License
@Override public List<RestoredFile> restore(File originalPdf) throws IOException { RestoredFile copiedPdf = getRestoredCarrier(originalPdf); List<RestoredFile> restoredFiles = new ArrayList<RestoredFile>(); PDDocument document = PDDocument.load(copiedPdf); PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog()); PDEmbeddedFilesNameTreeNode filesTree = namesDictionary.getEmbeddedFiles(); if (filesTree != null) { int i = 0; while (true) { PDComplexFileSpecification fileSpecification = (PDComplexFileSpecification) filesTree .getValue("PericlesMetadata-" + i); if (fileSpecification == null) { break; }/*from ww w. ja v a 2s . co m*/ File oldAttachedFile = new File(fileSpecification.getFile()); RestoredFile restoredPayload = new RestoredFile(RESTORED_DIRECTORY + oldAttachedFile.getName()); PDEmbeddedFile embeddedFile = fileSpecification.getEmbeddedFile(); InputStream inputStream = embeddedFile.createInputStream(); FileOutputStream outputStream = new FileOutputStream(restoredPayload); IOUtils.copy(inputStream, outputStream); removeBuggyLineEnding(restoredPayload); restoredPayload.wasPayload = true; restoredPayload.checksumValid = true; restoredPayload.restorationNote = "Checksum wasn't calculated, because this algorithm isn't using restoration metadata. The original payload file survives the encapsulation with this algorithm."; restoredFiles.add(restoredPayload); i++; } } document.close(); copiedPdf.wasCarrier = true; copiedPdf.checksumValid = false; copiedPdf.restorationNote = "Checksum can't be valid, because attached payload files can't be removed from carrier."; restoredFiles.add(copiedPdf); for (RestoredFile file : restoredFiles) { file.algorithm = this; for (RestoredFile relatedFile : restoredFiles) { if (file != relatedFile) { file.relatedFiles.add(relatedFile); } } } return restoredFiles; }
From source file:dev.ztgnrw.ExtractEmbeddedFiles.java
License:Apache License
/** * This is the main method./* ww w .ja v a2s .co m*/ * * @param args The command line arguments. * * @throws IOException If there is an error parsing the document. */ public static void extractEmbeddedFiles(String file) throws IOException { PDDocument document = null; try { File pdfFile = new File(file); String filePath = pdfFile.getParent() + System.getProperty("file.separator"); document = PDDocument.load(pdfFile); PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog()); PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles(); if (efTree != null) { Map<String, PDComplexFileSpecification> names = efTree.getNames(); if (names != null) { extractFiles(names, filePath); } else { List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids(); for (PDNameTreeNode<PDComplexFileSpecification> node : kids) { names = node.getNames(); extractFiles(names, filePath); } } } // extract files from annotations for (PDPage page : document.getPages()) { for (PDAnnotation annotation : page.getAnnotations()) { if (annotation instanceof PDAnnotationFileAttachment) { PDAnnotationFileAttachment annotationFileAttachment = (PDAnnotationFileAttachment) annotation; PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) annotationFileAttachment .getFile(); PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpec); extractFile(filePath, fileSpec.getFilename(), embeddedFile); } } } } finally { if (document != null) { document.close(); } } }
From source file:eu.europa.ejusticeportal.dss.controller.signature.PdfUtils.java
License:EUPL
/** * Extracts an attachment from a PDF//from w w w . ja v a 2 s . c o m * * @param pdf the PDF; this is unaffected by the method * @param name the name of the attachment * @return the attachment */ public static byte[] extractAttachment(final byte[] pdf, final String name) { PDDocument doc = null; try { InputStream is = new ByteArrayInputStream(pdf); doc = PDDocument.load(is); PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(doc.getDocumentCatalog()); PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles(); Map<String, COSObjectable> names = efTree.getNames(); byte[] data = null; if (names != null) { PDComplexFileSpecification attach = (PDComplexFileSpecification) names.get(name); data = attach.getEmbeddedFile().getByteArray(); } // Remove the \r\n added by PdfBox (PDDocument.saveIncremental). if (data != null) { String newString = new String(data, "UTF-8"); while (newString.endsWith("\r\n")) { newString = newString.replaceAll("\\r\\n$", ""); data = newString.getBytes(Charset.forName("UTF-8")); } } return data; } catch (IOException e) { LOGGER.error("Error detaching.", e); throw new SigningException(e); } finally { closeQuietly(doc); } }
From source file:eu.europa.ejusticeportal.dss.controller.signature.PdfUtils.java
License:EUPL
/** * Get a map of file name/byte for all embedded files in the pdf * /*from w ww. j ava 2s. c o m*/ * @param doc the pdf * @return the map of attachments * @throws IOException */ private static Map<String, byte[]> getAttachments(PDDocument doc) throws IOException { PDDocumentNameDictionary names = new PDDocumentNameDictionary(doc.getDocumentCatalog()); Map<String, byte[]> attachments = new LinkedHashMap<String, byte[]>(); if (names.getEmbeddedFiles() != null && names.getEmbeddedFiles().getNames() != null) { for (String key : names.getEmbeddedFiles().getNames().keySet()) { attachments.put(key, ((PDComplexFileSpecification) names.getEmbeddedFiles().getNames().get(key)) .getEmbeddedFile().getByteArray()); } } return attachments; }
From source file:eu.europa.ejusticeportal.dss.controller.signature.PdfUtils.java
License:EUPL
/** * Get a map of file name/byte for all embedded files in the pdf * // ww w . ja v a2 s . co m * @param doc the pdf * @return the map of attachments * @throws IOException */ private static Set<String> getAttachmentsNames(PDDocument doc) throws IOException { PDDocumentNameDictionary names = new PDDocumentNameDictionary(doc.getDocumentCatalog()); Set<String> attachments = new HashSet<String>(); if (names.getEmbeddedFiles() != null && names.getEmbeddedFiles().getNames() != null) { for (String key : names.getEmbeddedFiles().getNames().keySet()) { attachments.add(key); } } return attachments; }
From source file:io.konik.carriage.pdfbox.PDFBoxInvoiceExtractor.java
License:Open Source License
private static final PDEmbeddedFilesNameTreeNode listEmbeddedFiles(PDDocumentNameDictionary names) { PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles(); if (embeddedFiles == null) { throw new InvoiceExtractionError(NO_FILE); }/*from w w w . jav a2 s . c o m*/ return embeddedFiles; }
From source file:mj.ocraptor.extraction.tika.parser.pdf.PDF2XHTML.java
License:Apache License
private void extractEmbeddedDocuments(PDDocument document, ContentHandler handler) throws IOException, SAXException, TikaException { PDDocumentCatalog catalog = document.getDocumentCatalog(); PDDocumentNameDictionary names = catalog.getNames(); if (names == null) { return;/*from w w w. ja v a 2 s.co m*/ } PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles(); if (embeddedFiles == null) { return; } EmbeddedDocumentExtractor embeddedExtractor = context.get(EmbeddedDocumentExtractor.class); if (embeddedExtractor == null) { embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context); } Map<String, COSObjectable> embeddedFileNames = embeddedFiles.getNames(); // For now, try to get the embeddedFileNames out of embeddedFiles or its // kids. // This code follows: pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java // If there is a need we could add a fully recursive search to find a // non-null // Map<String, COSObjectable> that contains the doc info. if (embeddedFileNames != null) { processEmbeddedDocNames(embeddedFileNames, embeddedExtractor); } else { List<PDNameTreeNode> kids = embeddedFiles.getKids(); if (kids == null) { return; } for (PDNameTreeNode n : kids) { Map<String, COSObjectable> childNames = n.getNames(); if (childNames != null) { processEmbeddedDocNames(childNames, embeddedExtractor); } } } }
From source file:net.padaf.preflight.helpers.CatalogValidationHelper.java
License:Apache License
/** * A Catalog shall not contain the EmbeddedFiles entry. * /*from w w w .ja v a 2 s . co m*/ * @param handler * @param catalog * @param result * @throws ValidationException */ protected void validateNames(DocumentHandler handler, PDDocumentCatalog catalog, List<ValidationError> result) throws ValidationException { PDDocumentNameDictionary names = catalog.getNames(); if (names != null) { PDEmbeddedFilesNameTreeNode efs = names.getEmbeddedFiles(); if (efs != null) { result.add(new ValidationError(ERROR_SYNTAX_TRAILER_CATALOG_EMBEDDEDFILES, "EmbeddedFile entry is present in the Names dictionary")); } } }
From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTML.java
License:Apache License
private void extractEmbeddedDocuments(PDDocument document) throws IOException, SAXException, TikaException { PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog()); PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles(); if (efTree == null) { return;//from ww w . j a v a 2 s . c om } Map<String, PDComplexFileSpecification> embeddedFileNames = efTree.getNames(); //For now, try to get the embeddedFileNames out of embeddedFiles or its kids. //This code follows: pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java //If there is a need we could add a fully recursive search to find a non-null //Map<String, COSObjectable> that contains the doc info. if (embeddedFileNames != null) { processEmbeddedDocNames(embeddedFileNames); } else { List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids(); if (kids == null) { return; } for (PDNameTreeNode<PDComplexFileSpecification> node : kids) { embeddedFileNames = node.getNames(); if (embeddedFileNames != null) { processEmbeddedDocNames(embeddedFileNames); } } } }