Example usage for com.itextpdf.text.pdf PdfArray size

List of usage examples for com.itextpdf.text.pdf PdfArray size

Introduction

In this page you can find the example usage for com.itextpdf.text.pdf PdfArray size.

Prototype

public int size() 

Source Link

Document

Returns the number of entries in the array.

Usage

From source file:com.poet.ar.remover.AnnotationRemover.java

/**
 * remove content that matches keywords//from www  .j av a  2 s.  co m
 *
 * @param page
 * @return count of removed content
 */
private static int doRemoveContent(PdfDictionary page) {

    // all contents in page i
    PdfArray contentArray = page.getAsArray(PdfName.CONTENTS);
    PdfDictionary resources = page.getAsDict(PdfName.RESOURCES);
    List<Integer> willRemovedIx = new ArrayList<Integer>();

    if (contentArray != null) {

        PdfStream stream = null;
        for (int i = 0; i < contentArray.size(); i++) {

            stream = contentArray.getAsStream(i);

            PRStream pr = (PRStream) stream;

            // TODO // FIXME: 2016/1/27 0027 java.lang.ClassCastException: com.itextpdf.text.pdf.PdfArray cannot be cast to com.itextpdf.text.pdf.PdfLiteral
            // get display text
            //                String text = StreamContentExtractor.extractFromPdfStream(stream, resources);
            //
            //                if (keywords.contains(text)) {
            //                    willRemovedIx.add(i);
            //                }

            try {
                String text = StreamContentExtractor.extractFromPdfStream(stream, resources);

                if (keywords.contains(text)) {
                    willRemovedIx.add(i);
                }
            } catch (Exception ex) {
            }

        }

        int i = 0;
        for (Integer ix : willRemovedIx) {
            contentArray.remove(ix - i++);
        }
    }

    return willRemovedIx.size();
}

From source file:com.poet.ar.remover.AnnotationRemover.java

/**
 * remove annotation that matches keywords
 *
 * @param page/* w  w w.j a v a 2 s .c om*/
 * @return count of removed annotations
 */
private static int doRemoveAnnotation(PdfDictionary page) {

    // all annotations in page i
    PdfArray annoArray = page.getAsArray(PdfName.ANNOTS);
    List<Integer> willRemovedIx = new ArrayList<Integer>();

    if (annoArray != null) {

        PdfDictionary annotation = null;
        PdfDictionary a = null;
        PdfString uri = null;
        for (int i = 0; i < annoArray.size(); i++) {

            annotation = annoArray.getAsDict(i);

            if (annotation == null) {
                continue;
            }

            a = annotation.getAsDict(PdfName.A);

            if (a == null) {
                continue;
            }

            uri = a.getAsString(PdfName.URI);

            if (uri == null) {
                continue;
            }

            String uriStr = uri.toString().trim();

            if (keywords.contains(uriStr)) {
                willRemovedIx.add(i);
            }

        }

        int i = 0;
        for (Integer ix : willRemovedIx) {
            annoArray.remove(ix - i++);
        }

    }

    return willRemovedIx.size();
}

From source file:de.gbv.marginalia.Annotation.java

License:Open Source License

/**
 * Serialize the annotation in XML format.
 * The annotation is emitted as stream of SAX events to a ContentHandler.
 * The XML is XFDF with additional Marginalia elements in its own namespace.
 */// ww  w  .  ja v a 2 s . c om
public void serializeXML(ContentHandler handler) throws SAXException {
    SimpleXMLCreator xml = new SimpleXMLCreator(handler, namespaces);

    Set<PdfName> allkeys = this.dict.getKeys();
    allkeys.remove(PdfName.TYPE);
    allkeys.remove(PdfName.SUBTYPE);
    allkeys.remove(PdfName.PARENT);
    allkeys.remove(PdfName.CONTENTS);
    allkeys.remove(PdfName.POPUP);

    Map<String, String> attrs = new HashMap<String, String>();
    for (String aName : this.FIELDS.keySet()) {
        Field f = this.FIELDS.get(aName);
        String value = f.getFrom(this.dict);
        if (value != null) { // TODO: encoding & exception
            attrs.put(aName, value);
            //                allkeys.remove( f.name );
        }
    }

    PdfDictionary pg = getAsDictionary(this.dict, PdfName.P);
    allkeys.remove(PdfName.P);
    //CropBox=[0, 0, 595, 842]
    //Rotate
    //MediaBox=[0, 0, 595, 842]
    // TODO: find out where page number is stored
    if (attrs.get("page") == null)
        attrs.put("page", "" + this.pageNum);

    String element = subtypes.get(this.subtype);
    if (element == null) { // TODO
        element = this.subtype.toString();
    }

    xml.startElement(element, attrs);

    if (element.equals("ink")) {
        PdfArray inklist = this.dict.getAsArray(new PdfName("InkList"));
        if (inklist != null) {
            xml.startElement("inklist");
            for (int i = 0; i < inklist.size(); i++) {
                PdfArray pathArray = inklist.getAsArray(i);
                String s = "";
                for (int j = 0; j < pathArray.size(); j += 2) {
                    if (j > 0)
                        s += ";";
                    s += "" + pathArray.getAsNumber(j).floatValue() + ",";
                    s += "" + pathArray.getAsNumber(j + 1).floatValue();
                }
                xml.contentElement("gesture", s);
            }
            xml.endElement();
        }
    }

    if (attrs.get("rect") != null) {
        Map<String, String> a = new HashMap<String, String>();
        RectField rf = (RectField) this.FIELDS.get("rect");
        PdfRectangle r = null;
        if (rf != null)
            r = (PdfRectangle) rf.getObjectFrom(this.dict);
        if (r != null) {
            a.put("left", "" + r.left());
            a.put("bottom", "" + r.bottom());
            a.put("right", "" + r.right());
            a.put("top", "" + r.top());
            xml.emptyElement("m", "rect", a);
        }
    }

    if (this.content != null && !this.content.equals("")) {
        // TODO: encode content if not UTF-8 ?
        xml.contentElement("content", content.toString());
    }
    // TODO: contents-richtext
    // TODO: popup
    /*
          if ( this.popup != null ) {
            out.println("<!--popup>");
            for ( PdfName n : this.popup.getKeys() ) {
               out.println( n + "=" + this.popup.getDirectObject(n) );
            }
            out.println("</popup-->");
          }
    */
    // remaining dictionary elements
    /*
            for ( PdfName name : allkeys ) {
    Map<String,String> a = new HashMap<String,String>();
    a.put("name",name.toString());
    a.put("value",this.dict.getDirectObject(name).toString());
    xml.emptyElement( "m","unknown", a );
            }
    */
    xml.endElement();
}

From source file:de.gbv.marginalia.Marginalia.java

License:Open Source License

/**
 * Inspect a PDF file and write the info to a writer
 * @param writer Writer to a text file/*from ww  w. j  a va  2 s.  com*/
 * @param filename Path to the PDF file
 * @throws IOException
 */
public static void inspect(PrintWriter writer, String filename) throws IOException, SAXException {
    //        writer.println(filename);
    writer.flush();

    PdfReader reader = new PdfReader(filename);

    ContentHandler xmlhandler = new SimpleXMLWriter(writer);
    xmlhandler.startDocument();

    SimpleXMLCreator xml = new SimpleXMLCreator(xmlhandler, Annotation.namespaces, true);

    /*
            writer.println("Number of pages: "+reader.getNumberOfPages());
            Rectangle mediabox = reader.getPageSize(1);
            writer.print("Size of page 1: [");
            writer.print(mediabox.getLeft());
            writer.print(',');
            writer.print(mediabox.getBottom());
            writer.print(',');
            writer.print(mediabox.getRight());
            writer.print(',');
            writer.print(mediabox.getTop());
            writer.println("]");
            writer.print("Rotation of page 1: ");
            writer.println(reader.getPageRotation(1));
            writer.print("Page size with rotation of page 1: ");
            writer.println(reader.getPageSizeWithRotation(1));
            writer.println();
            writer.flush();
    */
    List<Annotation> annots = new LinkedList<Annotation>();
    xml.startElement("annots");

    // TODO: The following elements may be added:
    // - optionally write <f href="Document.pdf"/>
    // - optionally write <ids original="ID" modified="ID" />

    xml.startElement("m", "pages");
    for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); pageNum++) {
        PdfDictionary pageDic = reader.getPageN(pageNum);

        Map<String, String> attr = new HashMap<String, String>();
        attr.put("number", "" + pageNum);
        attr.put("rotate", "" + reader.getPageRotation(pageNum));

        Rectangle mediabox = reader.getPageSize(pageNum);
        attr.put("left", "" + mediabox.getLeft());
        attr.put("bottom", "" + mediabox.getBottom());
        attr.put("right", "" + mediabox.getRight());
        attr.put("top", "" + mediabox.getTop());

        xml.contentElement("m", "page", "", attr);

        PdfArray rawannots = pageDic.getAsArray(PdfName.ANNOTS);
        if (rawannots == null || rawannots.isEmpty()) {
            // writer.println("page "+pageNum+" contains no annotations");
            continue;
        }

        // writer.println("page "+pageNum+" has "+rawannots.size()+" annotations");

        for (int i = 0; i < rawannots.size(); i++) {
            PdfObject obj = rawannots.getDirectObject(i);
            if (!obj.isDictionary())
                continue;
            Annotation a = new Annotation((PdfDictionary) obj, pageNum);
            annots.add(a);
        }

        /**
        // Now we have all highlight and similar annotations, we need
        // to find out what words are actually highlighted! PDF in fact
        // is a dump format to express documents.
        // For some hints see
        // http://stackoverflow.com/questions/4028240/extract-each-column-of-a-pdf-file
                
        // We could reuse code from LocationTextExtractionStrategy (TODO)
        // LocationTextExtractionStrategy extr = new LocationTextExtractionStrategy();
        String fulltext = PdfTextExtractor.getTextFromPage(reader,pageNum);//,extr
        writer.println(fulltext);
        */
    }
    xml.endElement();

    for (Annotation a : annots) {
        a.serializeXML(xmlhandler);
    }
    // TODO: add page information (page size and orientation)

    xml.endAll();
}

From source file:de.gbv.marginalia.Marginalia.java

License:Open Source License

public static void dumpArray(PdfArray a) {
    if (a == null)
        return;/*w ww .  j a va  2s  . com*/
    for (int i = 0; i < a.size(); i++) {
        System.out.println(i + a.getPdfObject(i).toString());
    }
}

From source file:de.rub.dez6a3.jpdfsigner.control.JPodPDFViewer.java

License:Open Source License

@Override
public ArrayList getAttachments() throws IOException {
    ArrayList files = new ArrayList();
    PdfReader reader = new PdfReader(conf.getPDFFile());
    PdfDictionary root = reader.getCatalog();
    PdfDictionary documentnames = root.getAsDict(PdfName.NAMES);
    PdfDictionary embeddedfiles = documentnames.getAsDict(PdfName.EMBEDDEDFILES);
    PdfArray filespecs = embeddedfiles.getAsArray(PdfName.NAMES);
    PdfDictionary filespec;//from   ww w .j  a v  a 2  s .  c o  m
    PdfDictionary refs;
    for (int i = 0; i < filespecs.size();) {
        filespecs.getAsName(i++);
        filespec = filespecs.getAsDict(i++);
        refs = filespec.getAsDict(PdfName.EF);
        Iterator it = refs.getKeys().iterator();
        while (it.hasNext()) {
            PdfName key = (PdfName) it.next();
            if (key.toString().equals("/F")) {

                String filename = "-";
                String desc = "-";
                int size = -1;
                String moddate = "-";
                String compsize = "-";
                PdfObject pdfobj = null;

                try {
                    filename = filespec.getAsString(key).toString();
                } catch (Exception e) {
                    log.warn("Cannot load attachment-name - " + e.getMessage());
                }
                try {
                    desc = filespec.getAsString(PdfName.DESC).toString();
                } catch (Exception e) {
                    log.warn("Cannot load attachment-description - " + e.getMessage());
                }
                byte[] attBytes = null;
                try {
                    PRStream stream = (PRStream) PdfReader.getPdfObject(refs.getAsIndirectObject(key));
                    attBytes = PdfReader.getStreamBytes(stream);
                    size = attBytes.length;
                } catch (Exception e) {
                    log.warn("Cannot load attachment-size - " + e.getMessage());
                }
                try {
                    pdfobj = PdfReader.getPdfObject(refs.getAsIndirectObject(key));
                } catch (Exception e) {
                    log.warn("Cannot load attachment-pdfobject - " + e.getMessage());
                }

                Hashtable fileData = new Hashtable();
                fileData.put(ATTACHMENT_FILENAME_STRING, filename); //filename
                fileData.put(ATTACHMENT_DESCRIPTION_STRING, desc); //Description
                fileData.put(ATTACHMENT_SIZE_INT, size); //size
                fileData.put(ATTACHMENT_BYTES_ARR, attBytes); //bytes
                files.add(fileData);
            }
        }
    }
    return files;
}

From source file:mkl.testarea.itext5.pdfcleanup.PdfCleanUpContentOperator.java

License:Open Source License

/**
 * Example./*from   w w w  .  j a va2 s .c o  m*/
 *      TJ = [(h) 3 4 (q) 7 (w) (e)]
 *      Result = {0:0, 1:7, 2:7, 3:0, 4:0}
 *
 * @return Map whose key is an ordinal number of the string in the TJ array and value
 *         is the position adjustment.
 */
private Map<Integer, Float> structureTJarray(PdfArray array) {
    Map<Integer, Float> structuredTJoperands = new HashMap<Integer, Float>();

    if (array.size() == 0) {
        return structuredTJoperands;
    }

    Integer previousStrNum = 0;
    structuredTJoperands.put(previousStrNum, 0f);

    for (int i = 0; i < array.size(); ++i) {
        PdfObject currentObj = array.getPdfObject(i);

        if (currentObj instanceof PdfString && ((PdfString) currentObj).toUnicodeString().length() > 0) {
            ++previousStrNum;
            structuredTJoperands.put(previousStrNum, 0f);
        } else {
            Float oldOffset = structuredTJoperands.get(previousStrNum);
            structuredTJoperands.put(previousStrNum, oldOffset + ((PdfNumber) currentObj).floatValue());
        }
    }

    return structuredTJoperands;
}

From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java

License:Open Source License

/**
 * Extracts locations from the redact annotations contained in the document and applied to the given page.
 *///from   w  ww  .  j a  v a  2s  .  c o m
private List<PdfCleanUpLocation> extractLocationsFromRedactAnnots(int page, PdfDictionary pageDict) {
    List<PdfCleanUpLocation> locations = new ArrayList<PdfCleanUpLocation>();

    if (pageDict.contains(PdfName.ANNOTS)) {
        PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS);

        for (int i = 0; i < annotsArray.size(); ++i) {
            PdfIndirectReference annotIndirRef = annotsArray.getAsIndirectObject(i);
            PdfDictionary annotDict = annotsArray.getAsDict(i);
            PdfName annotSubtype = annotDict.getAsName(PdfName.SUBTYPE);

            if (annotSubtype.equals(PdfName.REDACT)) {
                saveRedactAnnotIndirRef(page, annotIndirRef.toString());
                locations.addAll(extractLocationsFromRedactAnnot(page, i, annotDict));
            }
        }
    }

    return locations;
}

From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java

License:Open Source License

/**
 * Extracts locations from the concrete annotation.
 * Note: annotation can consist not only of one area specified by the RECT entry, but also of multiple areas specified
 * by the QuadPoints entry in the annotation dictionary.
 *//*w  ww . j a  va2s  .  c  om*/
private List<PdfCleanUpLocation> extractLocationsFromRedactAnnot(int page, int annotIndex,
        PdfDictionary annotDict) {
    List<PdfCleanUpLocation> locations = new ArrayList<PdfCleanUpLocation>();
    List<Rectangle> markedRectangles = new ArrayList<Rectangle>();
    PdfArray quadPoints = annotDict.getAsArray(PdfName.QUADPOINTS);

    if (quadPoints.size() != 0) {
        markedRectangles.addAll(translateQuadPointsToRectangles(quadPoints));
    } else {
        PdfArray annotRect = annotDict.getAsArray(PdfName.RECT);
        markedRectangles
                .add(new Rectangle(annotRect.getAsNumber(0).floatValue(), annotRect.getAsNumber(1).floatValue(),
                        annotRect.getAsNumber(2).floatValue(), annotRect.getAsNumber(3).floatValue()));
    }

    clippingRects.put(annotIndex, markedRectangles);

    BaseColor cleanUpColor = null;
    PdfArray ic = annotDict.getAsArray(PdfName.IC);

    if (ic != null) {
        cleanUpColor = new BaseColor(ic.getAsNumber(0).floatValue(), ic.getAsNumber(1).floatValue(),
                ic.getAsNumber(2).floatValue());
    }

    PdfStream ro = annotDict.getAsStream(PdfName.RO);

    if (ro != null) {
        cleanUpColor = null;
    }

    for (Rectangle rect : markedRectangles) {
        locations.add(new PdfCleanUpLocation(page, rect, cleanUpColor));
    }

    return locations;
}

From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java

License:Open Source License

private List<Rectangle> translateQuadPointsToRectangles(PdfArray quadPoints) {
    List<Rectangle> rectangles = new ArrayList<Rectangle>();

    for (int i = 0; i < quadPoints.size(); i += 8) {
        rectangles.add(new Rectangle(quadPoints.getAsNumber(i + 4).floatValue(), // QuadPoints have "Z" order
                quadPoints.getAsNumber(i + 5).floatValue(), quadPoints.getAsNumber(i + 2).floatValue(),
                quadPoints.getAsNumber(i + 3).floatValue()));
    }//from  www  .  j av  a2  s .c  o m

    return rectangles;
}