Example usage for org.apache.pdfbox.pdmodel PDDocumentCatalog getNames

List of usage examples for org.apache.pdfbox.pdmodel PDDocumentCatalog getNames

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocumentCatalog getNames.

Prototype

public PDDocumentNameDictionary getNames() 

Source Link

Usage

From source file:com.fangxin365.core.utils.PDFMerger.java

License:Apache License

/**
 * append all pages from source to destination.
 * /*from   w w  w  .j ava 2 s  .  c o m*/
 * @param destination
 *            the document to receive the pages
 * @param source
 *            the document originating the new pages
 * 
 * @throws IOException
 *             If there is an error accessing data from either document.
 */
public void appendDocument(PDDocument destination, PDDocument source) throws IOException {
    if (destination.isEncrypted()) {
        System.out.println("Error: destination PDF is encrypted, can't append encrypted PDF documents.");
    }
    if (source.isEncrypted()) {
        System.out.println("Error: source PDF is encrypted, can't append encrypted PDF documents.");
    }
    PDDocumentInformation destInfo = destination.getDocumentInformation();
    PDDocumentInformation srcInfo = source.getDocumentInformation();
    destInfo.getDictionary().mergeInto(srcInfo.getDictionary());

    PDDocumentCatalog destCatalog = destination.getDocumentCatalog();
    PDDocumentCatalog srcCatalog = source.getDocumentCatalog();

    // use the highest version number for the resulting pdf
    float destVersion = destination.getDocument().getVersion();
    float srcVersion = source.getDocument().getVersion();

    if (destVersion < srcVersion) {
        destination.getDocument().setVersion(srcVersion);
    }

    if (destCatalog.getOpenAction() == null) {
        destCatalog.setOpenAction(srcCatalog.getOpenAction());
    }

    // maybe there are some shared resources for all pages
    COSDictionary srcPages = (COSDictionary) srcCatalog.getCOSDictionary().getDictionaryObject(COSName.PAGES);
    COSDictionary srcResources = (COSDictionary) srcPages.getDictionaryObject(COSName.RESOURCES);
    COSDictionary destPages = (COSDictionary) destCatalog.getCOSDictionary().getDictionaryObject(COSName.PAGES);
    COSDictionary destResources = (COSDictionary) destPages.getDictionaryObject(COSName.RESOURCES);
    if (srcResources != null) {
        if (destResources != null) {
            destResources.mergeInto(srcResources);
        } else {
            destPages.setItem(COSName.RESOURCES, srcResources);
        }
    }

    PDFCloneUtility cloner = new PDFCloneUtility(destination);

    try {
        PDAcroForm destAcroForm = destCatalog.getAcroForm();
        PDAcroForm srcAcroForm = srcCatalog.getAcroForm();
        if (destAcroForm == null) {
            cloner.cloneForNewDocument(srcAcroForm);
            destCatalog.setAcroForm(srcAcroForm);
        } else {
            if (srcAcroForm != null) {
                mergeAcroForm(cloner, destAcroForm, srcAcroForm);
            }
        }
    } catch (Exception e) {
        // if we are not ignoring exceptions, we'll re-throw this
        if (!ignoreAcroFormErrors) {
            throw (IOException) e;
        }
    }

    COSArray destThreads = (COSArray) destCatalog.getCOSDictionary().getDictionaryObject(COSName.THREADS);
    COSArray srcThreads = (COSArray) cloner
            .cloneForNewDocument(destCatalog.getCOSDictionary().getDictionaryObject(COSName.THREADS));
    if (destThreads == null) {
        destCatalog.getCOSDictionary().setItem(COSName.THREADS, srcThreads);
    } else {
        destThreads.addAll(srcThreads);
    }

    PDDocumentNameDictionary destNames = destCatalog.getNames();
    PDDocumentNameDictionary srcNames = srcCatalog.getNames();
    if (srcNames != null) {
        if (destNames == null) {
            destCatalog.getCOSDictionary().setItem(COSName.NAMES, cloner.cloneForNewDocument(srcNames));
        } else {
            cloner.cloneMerge(srcNames, destNames);
        }

    }

    PDDocumentOutline destOutline = destCatalog.getDocumentOutline();
    PDDocumentOutline srcOutline = srcCatalog.getDocumentOutline();
    if (srcOutline != null) {
        if (destOutline == null) {
            PDDocumentOutline cloned = new PDDocumentOutline(
                    (COSDictionary) cloner.cloneForNewDocument(srcOutline));
            destCatalog.setDocumentOutline(cloned);
        } else {
            PDOutlineItem first = srcOutline.getFirstChild();
            if (first != null) {
                PDOutlineItem clonedFirst = new PDOutlineItem(
                        (COSDictionary) cloner.cloneForNewDocument(first));
                destOutline.appendChild(clonedFirst);
            }
        }
    }

    String destPageMode = destCatalog.getPageMode();
    String srcPageMode = srcCatalog.getPageMode();
    if (destPageMode == null) {
        destCatalog.setPageMode(srcPageMode);
    }

    COSDictionary destLabels = (COSDictionary) destCatalog.getCOSDictionary()
            .getDictionaryObject(COSName.PAGE_LABELS);
    COSDictionary srcLabels = (COSDictionary) srcCatalog.getCOSDictionary()
            .getDictionaryObject(COSName.PAGE_LABELS);
    if (srcLabels != null) {
        int destPageCount = destination.getNumberOfPages();
        COSArray destNums = null;
        if (destLabels == null) {
            destLabels = new COSDictionary();
            destNums = new COSArray();
            destLabels.setItem(COSName.NUMS, destNums);
            destCatalog.getCOSDictionary().setItem(COSName.PAGE_LABELS, destLabels);
        } else {
            destNums = (COSArray) destLabels.getDictionaryObject(COSName.NUMS);
        }
        COSArray srcNums = (COSArray) srcLabels.getDictionaryObject(COSName.NUMS);
        if (srcNums != null) {
            for (int i = 0; i < srcNums.size(); i += 2) {
                COSNumber labelIndex = (COSNumber) srcNums.getObject(i);
                long labelIndexValue = labelIndex.intValue();
                destNums.add(COSInteger.get(labelIndexValue + destPageCount));
                destNums.add(cloner.cloneForNewDocument(srcNums.getObject(i + 1)));
            }
        }
    }

    COSStream destMetadata = (COSStream) destCatalog.getCOSDictionary().getDictionaryObject(COSName.METADATA);
    COSStream srcMetadata = (COSStream) srcCatalog.getCOSDictionary().getDictionaryObject(COSName.METADATA);
    if (destMetadata == null && srcMetadata != null) {
        PDStream newStream = new PDStream(destination, srcMetadata.getUnfilteredStream(), false);
        newStream.getStream().mergeInto(srcMetadata);
        newStream.addCompression();
        destCatalog.getCOSDictionary().setItem(COSName.METADATA, newStream);
    }

    // finally append the pages
    @SuppressWarnings("unchecked")
    List<PDPage> pages = srcCatalog.getAllPages();
    Iterator<PDPage> pageIter = pages.iterator();
    while (pageIter.hasNext()) {
        PDPage page = pageIter.next();
        PDPage newPage = new PDPage((COSDictionary) cloner.cloneForNewDocument(page.getCOSDictionary()));
        newPage.setCropBox(page.findCropBox());
        newPage.setMediaBox(page.findMediaBox());
        newPage.setRotation(page.findRotation());
        destination.addPage(newPage);
    }
}

From source file:mj.ocraptor.extraction.tika.parser.pdf.PDF2XHTML.java

License:Apache License

private void extractEmbeddedDocuments(PDDocument document, ContentHandler handler)
        throws IOException, SAXException, TikaException {
    PDDocumentCatalog catalog = document.getDocumentCatalog();
    PDDocumentNameDictionary names = catalog.getNames();
    if (names == null) {
        return;/*  www.  j  a va 2s  .  c  om*/
    }
    PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles();

    if (embeddedFiles == null) {
        return;
    }

    EmbeddedDocumentExtractor embeddedExtractor = context.get(EmbeddedDocumentExtractor.class);
    if (embeddedExtractor == null) {
        embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context);
    }

    Map<String, COSObjectable> embeddedFileNames = embeddedFiles.getNames();
    // For now, try to get the embeddedFileNames out of embeddedFiles or its
    // kids.
    // This code follows: pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java
    // If there is a need we could add a fully recursive search to find a
    // non-null
    // Map<String, COSObjectable> that contains the doc info.
    if (embeddedFileNames != null) {
        processEmbeddedDocNames(embeddedFileNames, embeddedExtractor);
    } else {
        List<PDNameTreeNode> kids = embeddedFiles.getKids();
        if (kids == null) {
            return;
        }
        for (PDNameTreeNode n : kids) {
            Map<String, COSObjectable> childNames = n.getNames();
            if (childNames != null) {
                processEmbeddedDocNames(childNames, embeddedExtractor);
            }
        }
    }
}

From source file:net.padaf.preflight.helpers.CatalogValidationHelper.java

License:Apache License

/**
 * A Catalog shall not contain the EmbeddedFiles entry.
 * //from  w w  w .  j a va  2 s . c o m
 * @param handler
 * @param catalog
 * @param result
 * @throws ValidationException
 */
protected void validateNames(DocumentHandler handler, PDDocumentCatalog catalog, List<ValidationError> result)
        throws ValidationException {
    PDDocumentNameDictionary names = catalog.getNames();
    if (names != null) {
        PDEmbeddedFilesNameTreeNode efs = names.getEmbeddedFiles();
        if (efs != null) {
            result.add(new ValidationError(ERROR_SYNTAX_TRAILER_CATALOG_EMBEDDEDFILES,
                    "EmbeddedFile entry is present in the Names dictionary"));
        }
    }
}

From source file:org.apache.tika.parser.pdf.EnhancedPDF2XHTML.java

License:Apache License

private void extractEmbeddedDocuments(PDDocument document, ContentHandler handler)
        throws IOException, SAXException, TikaException {
    PDDocumentCatalog catalog = document.getDocumentCatalog();
    PDDocumentNameDictionary names = catalog.getNames();
    if (names == null) {
        return;/*from w  ww .j ava2  s  .c om*/
    }
    PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles();

    if (embeddedFiles == null) {
        return;
    }

    Map<String, COSObjectable> embeddedFileNames = embeddedFiles.getNames();
    //For now, try to get the embeddedFileNames out of embeddedFiles or its kids.
    //This code follows: pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java
    //If there is a need we could add a fully recursive search to find a non-null
    //Map<String, COSObjectable> that contains the doc info.
    if (embeddedFileNames != null) {
        processEmbeddedDocNames(embeddedFileNames);
    } else {
        List<PDNameTreeNode> kids = embeddedFiles.getKids();
        if (kids == null) {
            return;
        }
        for (PDNameTreeNode n : kids) {
            Map<String, COSObjectable> childNames = n.getNames();
            if (childNames != null) {
                processEmbeddedDocNames(childNames);
            }
        }
    }
}

From source file:org.paxle.parser.pdf.impl.PdfParser.java

License:Open Source License

/**
 * A function to extract the content of embedded files from a PDF document.
 *///from  w ww .ja v  a2 s .  c  o  m
protected void extractEmbeddedFiles(URI location, IParserDocument parserDoc, PDDocument pddDoc)
        throws IOException {
    final PDDocumentCatalog pddDocCatalog = pddDoc.getDocumentCatalog();
    if (pddDocCatalog == null)
        return;

    final PDDocumentNameDictionary nameDic = pddDocCatalog.getNames();
    if (nameDic == null)
        return;

    final PDEmbeddedFilesNameTreeNode embeddedFiles = nameDic.getEmbeddedFiles();
    if (embeddedFiles == null)
        return;

    @SuppressWarnings("unchecked")
    final Map<String, Object> names = embeddedFiles.getNames();
    if (names == null || names.isEmpty())
        return;

    final IParserContext context = this.contextLocal.getCurrentContext();

    for (Entry<String, Object> name : names.entrySet()) {
        // final String fileDesc = name.getKey();
        final Object fileObj = name.getValue();
        if (fileObj == null)
            continue;

        if (fileObj instanceof PDComplexFileSpecification) {
            final PDComplexFileSpecification embeddedFileSpec = (PDComplexFileSpecification) fileObj;
            final PDEmbeddedFile embeddedFile = embeddedFileSpec.getEmbeddedFile();

            // getting the embedded file name and mime-type
            final String fileName = embeddedFileSpec.getFile();
            final String fileMimeType = embeddedFile.getSubtype();
            if (fileMimeType == null) {
                this.logger.warn(String.format("No mime-type specified form embedded file '%s#%s'.", location,
                        fileName));
                continue;
            }

            // getting a parser to parse the content
            final ISubParser sp = context.getParser(fileMimeType);
            if (sp == null) {
                this.logger.warn(String.format("No parser found to parse embedded file '%s#%s' with type '%s'.",
                        location, fileName, fileMimeType));
                continue;
            }

            // parsing content
            InputStream embeddedFileStream = null;
            try {
                embeddedFileStream = embeddedFile.createInputStream();
                final IParserDocument subParserDoc = sp.parse(location, "UTF-8", embeddedFileStream);
                if (subParserDoc.getMimeType() == null) {
                    subParserDoc.setMimeType(fileMimeType);
                }

                parserDoc.addSubDocument(fileName, subParserDoc);
            } catch (ParserException e) {
                this.logger.error(String.format(
                        "Unexpected error while parsing parse embedded file '%s#%s' with type '%s': %s",
                        location, fileName, fileMimeType, e.getMessage()));
            } finally {
                if (embeddedFileStream != null)
                    try {
                        embeddedFileStream.close();
                    } catch (Exception e) {
                        this.logger.error(e);
                    }
            }
        }
    }
}