Example usage for com.lowagie.text.pdf PdfArray getArrayList

List of usage examples for com.lowagie.text.pdf PdfArray getArrayList

Introduction

In this page you can find the example usage for com.lowagie.text.pdf PdfArray getArrayList.

Prototype

public ArrayList getArrayList() 

Source Link

Document

Get the internal arrayList for this PdfArray.

Usage

From source file:com.cyberway.issue.crawler.extractor.PDFParser.java

License:Open Source License

/**
 * Parse a PdfDictionary, looking for URIs recursively and adding
 * them to foundURIs//  w  w  w  .j  a  v a  2 s .  c o  m
 * @param entity
 */
protected void extractURIs(PdfObject entity) {

    // deal with dictionaries
    if (entity.isDictionary()) {

        PdfDictionary dictionary = (PdfDictionary) entity;

        @SuppressWarnings("unchecked")
        Set<PdfName> allkeys = dictionary.getKeys();
        for (PdfName key : allkeys) {
            PdfObject value = dictionary.get(key);

            // see if it's the key is a UR[I,L]
            if (key.toString().equals("/URI") || key.toString().equals("/URL")) {
                foundURIs.add(value.toString());

            } else {
                this.extractURIs(value);
            }

        }

        // deal with arrays
    } else if (entity.isArray()) {

        PdfArray array = (PdfArray) entity;
        ArrayList arrayObjects = array.getArrayList();
        Iterator objectList = arrayObjects.iterator();

        while (objectList.hasNext()) {
            this.extractURIs((PdfObject) objectList.next());
        }

        // deal with indirect references
    } else if (entity.getClass() == PRIndirectReference.class) {

        PRIndirectReference indirect = (PRIndirectReference) entity;

        // if we've already seen a reference to this object
        if (haveSeen(indirect.getGeneration(), indirect.getNumber())) {
            return;

            // note that we've seen it if it's new
        } else {
            markAsSeen(indirect.getGeneration(), indirect.getNumber());
        }

        // dereference the "pointer" and process the object
        indirect.getReader(); // FIXME: examine side-effects
        PdfObject direct = PdfReader.getPdfObject(indirect);

        this.extractURIs(direct);
    }
}

From source file:crawler.PDFParser.java

License:Open Source License

/**
 * Parse a PdfDictionary, looking for URIs recursively and adding
 * them to foundURIs/*from www  . ja va 2 s  . co m*/
 * @param entity
 */
protected void extractURIs(PdfObject entity) {
    // deal with dictionaries
    if (entity.isDictionary()) {
        PdfDictionary dictionary = (PdfDictionary) entity;

        @SuppressWarnings("unchecked")
        Set<PdfName> allkeys = dictionary.getKeys();
        for (PdfName key : allkeys) {
            PdfObject value = dictionary.get(key);

            // see if it's the key is a UR[I,L]
            if (key.toString().equals("/URI") || key.toString().equals("/URL"))
                foundURIs.add(value.toString());
            else
                this.extractURIs(value);
        }

        // deal with arrays
    } else if (entity.isArray()) {
        PdfArray array = (PdfArray) entity;
        ArrayList arrayObjects = array.getArrayList();
        Iterator objectList = arrayObjects.iterator();

        while (objectList.hasNext())
            this.extractURIs((PdfObject) objectList.next());

        // deal with indirect references
    } else if (entity.getClass() == PRIndirectReference.class) {
        PRIndirectReference indirect = (PRIndirectReference) entity;

        // if we've already seen a reference to this object
        if (haveSeen(indirect.getGeneration(), indirect.getNumber()))
            return;

        // note that we've seen it if it's new
        else
            markAsSeen(indirect.getGeneration(), indirect.getNumber());

        // dereference the "pointer" and process the object
        indirect.getReader();
        PdfObject direct = PdfReader.getPdfObject(indirect);

        this.extractURIs(direct);
    }
}

From source file:org.archive.modules.extractor.PDFParser.java

License:Apache License

/**
 * Parse a PdfDictionary, looking for URIs recursively and adding
 * them to foundURIs/*from   w w  w.jav  a2  s.c o m*/
 * @param entity
 */
@SuppressWarnings("unchecked")
protected void extractURIs(PdfObject entity) {

    // deal with dictionaries
    if (entity.isDictionary()) {

        PdfDictionary dictionary = (PdfDictionary) entity;

        Set<PdfName> allkeys = dictionary.getKeys();
        for (PdfName key : allkeys) {
            PdfObject value = dictionary.get(key);

            // see if it's the key is a UR[I,L]
            if (key.toString().equals("/URI") || key.toString().equals("/URL")) {
                foundURIs.add(value.toString());

            } else {
                this.extractURIs(value);
            }

        }

        // deal with arrays
    } else if (entity.isArray()) {

        PdfArray array = (PdfArray) entity;
        for (PdfObject pdfObject : (Iterable<PdfObject>) array.getArrayList()) {
            this.extractURIs(pdfObject);
        }

        // deal with indirect references
    } else if (entity.getClass() == PRIndirectReference.class) {

        PRIndirectReference indirect = (PRIndirectReference) entity;

        // if we've already seen a reference to this object
        if (haveSeen(indirect.getGeneration(), indirect.getNumber())) {
            return;

            // note that we've seen it if it's new
        } else {
            markAsSeen(indirect.getGeneration(), indirect.getNumber());
        }

        // dereference the "pointer" and process the object
        indirect.getReader(); // FIXME: examine side-effects
        PdfObject direct = PdfReader.getPdfObject(indirect);

        this.extractURIs(direct);
    }
}

From source file:org.kuali.coeus.propdev.impl.budget.subaward.PropDevPropDevBudgetSubAwardServiceImpl.java

License:Open Source License

/**
 * extracts attachments from PDF File/* w  w  w  . ja va 2  s. c  o m*/
 */

@SuppressWarnings("unchecked")
protected Map extractAttachments(PdfReader reader) throws IOException {
    Map fileMap = new HashMap();
    PdfDictionary catalog = reader.getCatalog();
    PdfDictionary names = (PdfDictionary) PdfReader.getPdfObject(catalog.get(PdfName.NAMES));
    if (names != null) {
        PdfDictionary embFiles = (PdfDictionary) PdfReader
                .getPdfObject(names.get(new PdfName("EmbeddedFiles")));
        if (embFiles != null) {
            HashMap embMap = PdfNameTree.readTree(embFiles);
            for (Iterator i = embMap.values().iterator(); i.hasNext();) {
                PdfDictionary filespec = (PdfDictionary) PdfReader.getPdfObject((PdfObject) i.next());
                Object fileInfo[] = unpackFile(reader, filespec);
                if (fileMap.containsKey(fileInfo[0])) {
                    throw new RuntimeException(DUPLICATE_FILE_NAMES);
                }
                fileMap.put(fileInfo[0], fileInfo[1]);
            }
        }
    }
    for (int k = 1; k <= reader.getNumberOfPages(); ++k) {
        PdfArray annots = (PdfArray) PdfReader.getPdfObject(reader.getPageN(k).get(PdfName.ANNOTS));
        if (annots == null) {
            continue;
        }
        for (Iterator i = annots.getArrayList().listIterator(); i.hasNext();) {
            PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject((PdfObject) i.next());
            PdfName subType = (PdfName) PdfReader.getPdfObject(annot.get(PdfName.SUBTYPE));
            if (!PdfName.FILEATTACHMENT.equals(subType)) {
                continue;
            }
            PdfDictionary filespec = (PdfDictionary) PdfReader.getPdfObject(annot.get(PdfName.FS));
            Object fileInfo[] = unpackFile(reader, filespec);
            if (fileMap.containsKey(fileInfo[0])) {
                throw new RuntimeException(DUPLICATE_FILE_NAMES);
            }

            fileMap.put(fileInfo[0], fileInfo[1]);
        }
    }

    return fileMap;
}