Example usage for org.apache.pdfbox.pdmodel.common.filespecification PDComplexFileSpecification getFile

List of usage examples for org.apache.pdfbox.pdmodel.common.filespecification PDComplexFileSpecification getFile

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel.common.filespecification PDComplexFileSpecification getFile.

Prototype

@Override
public String getFile() 

Source Link

Document

This will get the file name.

Usage

From source file:algorithm.PDFFileAttacher.java

License:Apache License

@Override
public List<RestoredFile> restore(File originalPdf) throws IOException {
    RestoredFile copiedPdf = getRestoredCarrier(originalPdf);
    List<RestoredFile> restoredFiles = new ArrayList<RestoredFile>();
    PDDocument document = PDDocument.load(copiedPdf);
    PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog());
    PDEmbeddedFilesNameTreeNode filesTree = namesDictionary.getEmbeddedFiles();
    if (filesTree != null) {
        int i = 0;
        while (true) {
            PDComplexFileSpecification fileSpecification = (PDComplexFileSpecification) filesTree
                    .getValue("PericlesMetadata-" + i);
            if (fileSpecification == null) {
                break;
            }//from   ww  w  .  jav  a  2  s . co  m
            File oldAttachedFile = new File(fileSpecification.getFile());
            RestoredFile restoredPayload = new RestoredFile(RESTORED_DIRECTORY + oldAttachedFile.getName());
            PDEmbeddedFile embeddedFile = fileSpecification.getEmbeddedFile();
            InputStream inputStream = embeddedFile.createInputStream();
            FileOutputStream outputStream = new FileOutputStream(restoredPayload);
            IOUtils.copy(inputStream, outputStream);
            removeBuggyLineEnding(restoredPayload);
            restoredPayload.wasPayload = true;
            restoredPayload.checksumValid = true;
            restoredPayload.restorationNote = "Checksum wasn't calculated, because this algorithm isn't using restoration metadata. The original payload file survives the encapsulation with this algorithm.";
            restoredFiles.add(restoredPayload);
            i++;
        }
    }
    document.close();
    copiedPdf.wasCarrier = true;
    copiedPdf.checksumValid = false;
    copiedPdf.restorationNote = "Checksum can't be valid, because attached payload files can't be removed from carrier.";
    restoredFiles.add(copiedPdf);
    for (RestoredFile file : restoredFiles) {
        file.algorithm = this;
        for (RestoredFile relatedFile : restoredFiles) {
            if (file != relatedFile) {
                file.relatedFiles.add(relatedFile);
            }
        }
    }
    return restoredFiles;
}

From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTML.java

License:Apache License

private void extractMultiOSPDEmbeddedFiles(String displayName, PDComplexFileSpecification spec,
        AttributesImpl attributes) throws IOException, SAXException, TikaException {

    if (spec == null) {
        return;/*from   w  w  w. j  av a2  s.c o m*/
    }
    //current strategy is to pull all, not just first non-null
    extractPDEmbeddedFile(displayName, spec.getFileUnicode(), spec.getFile(), spec.getEmbeddedFile(),
            attributes);
    extractPDEmbeddedFile(displayName, spec.getFileUnicode(), spec.getFileMac(), spec.getEmbeddedFileMac(),
            attributes);
    extractPDEmbeddedFile(displayName, spec.getFileUnicode(), spec.getFileDos(), spec.getEmbeddedFileDos(),
            attributes);
    extractPDEmbeddedFile(displayName, spec.getFileUnicode(), spec.getFileUnix(), spec.getEmbeddedFileUnix(),
            attributes);
}

From source file:org.apache.tika.parser.pdf.EnhancedPDF2XHTML.java

License:Apache License

private void extractMultiOSPDEmbeddedFiles(String defaultName, PDComplexFileSpecification spec,
        EmbeddedDocumentExtractor extractor) throws IOException, SAXException, TikaException {

    if (spec == null) {
        return;//from  w w  w .  ja  va 2s.c  o m
    }
    //current strategy is to pull all, not just first non-null
    extractPDEmbeddedFile(defaultName, spec.getFile(), spec.getEmbeddedFile(), extractor);
    extractPDEmbeddedFile(defaultName, spec.getFileMac(), spec.getEmbeddedFileMac(), extractor);
    extractPDEmbeddedFile(defaultName, spec.getFileDos(), spec.getEmbeddedFileDos(), extractor);
    extractPDEmbeddedFile(defaultName, spec.getFileUnix(), spec.getEmbeddedFileUnix(), extractor);
}

From source file:org.paxle.parser.pdf.impl.PdfParser.java

License:Open Source License

/**
 * A function to extract the content of embedded files from a PDF document.
 *//*from www  .j ava  2s  .  c  o m*/
protected void extractEmbeddedFiles(URI location, IParserDocument parserDoc, PDDocument pddDoc)
        throws IOException {
    final PDDocumentCatalog pddDocCatalog = pddDoc.getDocumentCatalog();
    if (pddDocCatalog == null)
        return;

    final PDDocumentNameDictionary nameDic = pddDocCatalog.getNames();
    if (nameDic == null)
        return;

    final PDEmbeddedFilesNameTreeNode embeddedFiles = nameDic.getEmbeddedFiles();
    if (embeddedFiles == null)
        return;

    @SuppressWarnings("unchecked")
    final Map<String, Object> names = embeddedFiles.getNames();
    if (names == null || names.isEmpty())
        return;

    final IParserContext context = this.contextLocal.getCurrentContext();

    for (Entry<String, Object> name : names.entrySet()) {
        // final String fileDesc = name.getKey();
        final Object fileObj = name.getValue();
        if (fileObj == null)
            continue;

        if (fileObj instanceof PDComplexFileSpecification) {
            final PDComplexFileSpecification embeddedFileSpec = (PDComplexFileSpecification) fileObj;
            final PDEmbeddedFile embeddedFile = embeddedFileSpec.getEmbeddedFile();

            // getting the embedded file name and mime-type
            final String fileName = embeddedFileSpec.getFile();
            final String fileMimeType = embeddedFile.getSubtype();
            if (fileMimeType == null) {
                this.logger.warn(String.format("No mime-type specified form embedded file '%s#%s'.", location,
                        fileName));
                continue;
            }

            // getting a parser to parse the content
            final ISubParser sp = context.getParser(fileMimeType);
            if (sp == null) {
                this.logger.warn(String.format("No parser found to parse embedded file '%s#%s' with type '%s'.",
                        location, fileName, fileMimeType));
                continue;
            }

            // parsing content
            InputStream embeddedFileStream = null;
            try {
                embeddedFileStream = embeddedFile.createInputStream();
                final IParserDocument subParserDoc = sp.parse(location, "UTF-8", embeddedFileStream);
                if (subParserDoc.getMimeType() == null) {
                    subParserDoc.setMimeType(fileMimeType);
                }

                parserDoc.addSubDocument(fileName, subParserDoc);
            } catch (ParserException e) {
                this.logger.error(String.format(
                        "Unexpected error while parsing parse embedded file '%s#%s' with type '%s': %s",
                        location, fileName, fileMimeType, e.getMessage()));
            } finally {
                if (embeddedFileStream != null)
                    try {
                        embeddedFileStream.close();
                    } catch (Exception e) {
                        this.logger.error(e);
                    }
            }
        }
    }
}