Example usage for org.apache.pdfbox.pdmodel PDDocumentCatalog getNames

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocumentCatalog getNames.

Prototype

public PDDocumentNameDictionary getNames()

Source Link

Usage

From source file:com.fangxin365.core.utils.PDFMerger.java

License:Apache License

/**
 * append all pages from source to destination.
 * /*from   w w  w  .j ava 2 s  .  c o m*/
 * @param destination
 *            the document to receive the pages
 * @param source
 *            the document originating the new pages
 * 
 * @throws IOException
 *             If there is an error accessing data from either document.
 */
public void appendDocument(PDDocument destination, PDDocument source) throws IOException {
    if (destination.isEncrypted()) {
        System.out.println("Error: destination PDF is encrypted, can't append encrypted PDF documents.");
    }
    if (source.isEncrypted()) {
        System.out.println("Error: source PDF is encrypted, can't append encrypted PDF documents.");
    }
    PDDocumentInformation destInfo = destination.getDocumentInformation();
    PDDocumentInformation srcInfo = source.getDocumentInformation();
    destInfo.getDictionary().mergeInto(srcInfo.getDictionary());

    PDDocumentCatalog destCatalog = destination.getDocumentCatalog();
    PDDocumentCatalog srcCatalog = source.getDocumentCatalog();

    // use the highest version number for the resulting pdf
    float destVersion = destination.getDocument().getVersion();
    float srcVersion = source.getDocument().getVersion();

    if (destVersion < srcVersion) {
        destination.getDocument().setVersion(srcVersion);
    }

    if (destCatalog.getOpenAction() == null) {
        destCatalog.setOpenAction(srcCatalog.getOpenAction());
    }

    // maybe there are some shared resources for all pages
    COSDictionary srcPages = (COSDictionary) srcCatalog.getCOSDictionary().getDictionaryObject(COSName.PAGES);
    COSDictionary srcResources = (COSDictionary) srcPages.getDictionaryObject(COSName.RESOURCES);
    COSDictionary destPages = (COSDictionary) destCatalog.getCOSDictionary().getDictionaryObject(COSName.PAGES);
    COSDictionary destResources = (COSDictionary) destPages.getDictionaryObject(COSName.RESOURCES);
    if (srcResources != null) {
        if (destResources != null) {
            destResources.mergeInto(srcResources);
        } else {
            destPages.setItem(COSName.RESOURCES, srcResources);
        }
    }

    PDFCloneUtility cloner = new PDFCloneUtility(destination);

    try {
        PDAcroForm destAcroForm = destCatalog.getAcroForm();
        PDAcroForm srcAcroForm = srcCatalog.getAcroForm();
        if (destAcroForm == null) {
            cloner.cloneForNewDocument(srcAcroForm);
            destCatalog.setAcroForm(srcAcroForm);
        } else {
            if (srcAcroForm != null) {
                mergeAcroForm(cloner, destAcroForm, srcAcroForm);
            }
        }
    } catch (Exception e) {
        // if we are not ignoring exceptions, we'll re-throw this
        if (!ignoreAcroFormErrors) {
            throw (IOException) e;
        }
    }

    COSArray destThreads = (COSArray) destCatalog.getCOSDictionary().getDictionaryObject(COSName.THREADS);
    COSArray srcThreads = (COSArray) cloner
            .cloneForNewDocument(destCatalog.getCOSDictionary().getDictionaryObject(COSName.THREADS));
    if (destThreads == null) {
        destCatalog.getCOSDictionary().setItem(COSName.THREADS, srcThreads);
    } else {
        destThreads.addAll(srcThreads);
    }

    PDDocumentNameDictionary destNames = destCatalog.getNames();
    PDDocumentNameDictionary srcNames = srcCatalog.getNames();
    if (srcNames != null) {
        if (destNames == null) {
            destCatalog.getCOSDictionary().setItem(COSName.NAMES, cloner.cloneForNewDocument(srcNames));
        } else {
            cloner.cloneMerge(srcNames, destNames);
        }

    }

    PDDocumentOutline destOutline = destCatalog.getDocumentOutline();
    PDDocumentOutline srcOutline = srcCatalog.getDocumentOutline();
    if (srcOutline != null) {
        if (destOutline == null) {
            PDDocumentOutline cloned = new PDDocumentOutline(
                    (COSDictionary) cloner.cloneForNewDocument(srcOutline));
            destCatalog.setDocumentOutline(cloned);
        } else {
            PDOutlineItem first = srcOutline.getFirstChild();
            if (first != null) {
                PDOutlineItem clonedFirst = new PDOutlineItem(
                        (COSDictionary) cloner.cloneForNewDocument(first));
                destOutline.appendChild(clonedFirst);
            }
        }
    }

    String destPageMode = destCatalog.getPageMode();
    String srcPageMode = srcCatalog.getPageMode();
    if (destPageMode == null) {
        destCatalog.setPageMode(srcPageMode);
    }

    COSDictionary destLabels = (COSDictionary) destCatalog.getCOSDictionary()
            .getDictionaryObject(COSName.PAGE_LABELS);
    COSDictionary srcLabels = (COSDictionary) srcCatalog.getCOSDictionary()
            .getDictionaryObject(COSName.PAGE_LABELS);
    if (srcLabels != null) {
        int destPageCount = destination.getNumberOfPages();
        COSArray destNums = null;
        if (destLabels == null) {
            destLabels = new COSDictionary();
            destNums = new COSArray();
            destLabels.setItem(COSName.NUMS, destNums);
            destCatalog.getCOSDictionary().setItem(COSName.PAGE_LABELS, destLabels);
        } else {
            destNums = (COSArray) destLabels.getDictionaryObject(COSName.NUMS);
        }
        COSArray srcNums = (COSArray) srcLabels.getDictionaryObject(COSName.NUMS);
        if (srcNums != null) {
            for (int i = 0; i < srcNums.size(); i += 2) {
                COSNumber labelIndex = (COSNumber) srcNums.getObject(i);
                long labelIndexValue = labelIndex.intValue();
                destNums.add(COSInteger.get(labelIndexValue + destPageCount));
                destNums.add(cloner.cloneForNewDocument(srcNums.getObject(i + 1)));
            }
        }
    }

    COSStream destMetadata = (COSStream) destCatalog.getCOSDictionary().getDictionaryObject(COSName.METADATA);
    COSStream srcMetadata = (COSStream) srcCatalog.getCOSDictionary().getDictionaryObject(COSName.METADATA);
    if (destMetadata == null && srcMetadata != null) {
        PDStream newStream = new PDStream(destination, srcMetadata.getUnfilteredStream(), false);
        newStream.getStream().mergeInto(srcMetadata);
        newStream.addCompression();
        destCatalog.getCOSDictionary().setItem(COSName.METADATA, newStream);
    }

    // finally append the pages
    @SuppressWarnings("unchecked")
    List<PDPage> pages = srcCatalog.getAllPages();
    Iterator<PDPage> pageIter = pages.iterator();
    while (pageIter.hasNext()) {
        PDPage page = pageIter.next();
        PDPage newPage = new PDPage((COSDictionary) cloner.cloneForNewDocument(page.getCOSDictionary()));
        newPage.setCropBox(page.findCropBox());
        newPage.setMediaBox(page.findMediaBox());
        newPage.setRotation(page.findRotation());
        destination.addPage(newPage);
    }
}

From source file:mj.ocraptor.extraction.tika.parser.pdf.PDF2XHTML.java

License:Apache License

private void extractEmbeddedDocuments(PDDocument document, ContentHandler handler)
        throws IOException, SAXException, TikaException {
    PDDocumentCatalog catalog = document.getDocumentCatalog();
    PDDocumentNameDictionary names = catalog.getNames();
    if (names == null) {
        return;/*  www.  j  a va 2s  .  c  om*/
    }
    PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles();

    if (embeddedFiles == null) {
        return;
    }

    EmbeddedDocumentExtractor embeddedExtractor = context.get(EmbeddedDocumentExtractor.class);
    if (embeddedExtractor == null) {
        embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context);
    }

    Map<String, COSObjectable> embeddedFileNames = embeddedFiles.getNames();
    // For now, try to get the embeddedFileNames out of embeddedFiles or its
    // kids.
    // This code follows: pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java
    // If there is a need we could add a fully recursive search to find a
    // non-null
    // Map<String, COSObjectable> that contains the doc info.
    if (embeddedFileNames != null) {
        processEmbeddedDocNames(embeddedFileNames, embeddedExtractor);
    } else {
        List<PDNameTreeNode> kids = embeddedFiles.getKids();
        if (kids == null) {
            return;
        }
        for (PDNameTreeNode n : kids) {
            Map<String, COSObjectable> childNames = n.getNames();
            if (childNames != null) {
                processEmbeddedDocNames(childNames, embeddedExtractor);
            }
        }
    }
}

From source file:net.padaf.preflight.helpers.CatalogValidationHelper.java

License:Apache License

/**
 * A Catalog shall not contain the EmbeddedFiles entry.
 * //from  w w  w .  j a va  2 s . c o m
 * @param handler
 * @param catalog
 * @param result
 * @throws ValidationException
 */
protected void validateNames(DocumentHandler handler, PDDocumentCatalog catalog, List<ValidationError> result)
        throws ValidationException {
    PDDocumentNameDictionary names = catalog.getNames();
    if (names != null) {
        PDEmbeddedFilesNameTreeNode efs = names.getEmbeddedFiles();
        if (efs != null) {
            result.add(new ValidationError(ERROR_SYNTAX_TRAILER_CATALOG_EMBEDDEDFILES,
                    "EmbeddedFile entry is present in the Names dictionary"));
        }
    }
}

From source file:org.apache.tika.parser.pdf.EnhancedPDF2XHTML.java

License:Apache License

private void extractEmbeddedDocuments(PDDocument document, ContentHandler handler)
        throws IOException, SAXException, TikaException {
    PDDocumentCatalog catalog = document.getDocumentCatalog();
    PDDocumentNameDictionary names = catalog.getNames();
    if (names == null) {
        return;/*from w  ww .j ava2  s  .c om*/
    }
    PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles();

    if (embeddedFiles == null) {
        return;
    }

    Map<String, COSObjectable> embeddedFileNames = embeddedFiles.getNames();
    //For now, try to get the embeddedFileNames out of embeddedFiles or its kids.
    //This code follows: pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java
    //If there is a need we could add a fully recursive search to find a non-null
    //Map<String, COSObjectable> that contains the doc info.
    if (embeddedFileNames != null) {
        processEmbeddedDocNames(embeddedFileNames);
    } else {
        List<PDNameTreeNode> kids = embeddedFiles.getKids();
        if (kids == null) {
            return;
        }
        for (PDNameTreeNode n : kids) {
            Map<String, COSObjectable> childNames = n.getNames();
            if (childNames != null) {
                processEmbeddedDocNames(childNames);
            }
        }
    }
}

From source file:org.paxle.parser.pdf.impl.PdfParser.java

License:Open Source License

/**
 * A function to extract the content of embedded files from a PDF document.
 *///from  w ww .ja v  a2 s .  c  o  m
protected void extractEmbeddedFiles(URI location, IParserDocument parserDoc, PDDocument pddDoc)
        throws IOException {
    final PDDocumentCatalog pddDocCatalog = pddDoc.getDocumentCatalog();
    if (pddDocCatalog == null)
        return;

    final PDDocumentNameDictionary nameDic = pddDocCatalog.getNames();
    if (nameDic == null)
        return;

    final PDEmbeddedFilesNameTreeNode embeddedFiles = nameDic.getEmbeddedFiles();
    if (embeddedFiles == null)
        return;

    @SuppressWarnings("unchecked")
    final Map<String, Object> names = embeddedFiles.getNames();
    if (names == null || names.isEmpty())
        return;

    final IParserContext context = this.contextLocal.getCurrentContext();

    for (Entry<String, Object> name : names.entrySet()) {
        // final String fileDesc = name.getKey();
        final Object fileObj = name.getValue();
        if (fileObj == null)
            continue;

        if (fileObj instanceof PDComplexFileSpecification) {
            final PDComplexFileSpecification embeddedFileSpec = (PDComplexFileSpecification) fileObj;
            final PDEmbeddedFile embeddedFile = embeddedFileSpec.getEmbeddedFile();

            // getting the embedded file name and mime-type
            final String fileName = embeddedFileSpec.getFile();
            final String fileMimeType = embeddedFile.getSubtype();
            if (fileMimeType == null) {
                this.logger.warn(String.format("No mime-type specified form embedded file '%s#%s'.", location,
                        fileName));
                continue;
            }

            // getting a parser to parse the content
            final ISubParser sp = context.getParser(fileMimeType);
            if (sp == null) {
                this.logger.warn(String.format("No parser found to parse embedded file '%s#%s' with type '%s'.",
                        location, fileName, fileMimeType));
                continue;
            }

            // parsing content
            InputStream embeddedFileStream = null;
            try {
                embeddedFileStream = embeddedFile.createInputStream();
                final IParserDocument subParserDoc = sp.parse(location, "UTF-8", embeddedFileStream);
                if (subParserDoc.getMimeType() == null) {
                    subParserDoc.setMimeType(fileMimeType);
                }

                parserDoc.addSubDocument(fileName, subParserDoc);
            } catch (ParserException e) {
                this.logger.error(String.format(
                        "Unexpected error while parsing parse embedded file '%s#%s' with type '%s': %s",
                        location, fileName, fileMimeType, e.getMessage()));
            } finally {
                if (embeddedFileStream != null)
                    try {
                        embeddedFileStream.close();
                    } catch (Exception e) {
                        this.logger.error(e);
                    }
            }
        }
    }
}