Example usage for com.itextpdf.text.pdf PdfName ANNOTS

List of usage examples for com.itextpdf.text.pdf PdfName ANNOTS

Introduction

In this page you can find the example usage for com.itextpdf.text.pdf PdfName ANNOTS.

Prototype

PdfName ANNOTS

To view the source code for com.itextpdf.text.pdf PdfName ANNOTS.

Click Source Link

Document

A name

Usage

From source file:com.poet.ar.remover.AnnotationRemover.java

/**
 * remove annotation that matches keywords
 *
 * @param page/*from ww  w  . j a  va  2  s  .  c o m*/
 * @return count of removed annotations
 */
private static int doRemoveAnnotation(PdfDictionary page) {

    // all annotations in page i
    PdfArray annoArray = page.getAsArray(PdfName.ANNOTS);
    List<Integer> willRemovedIx = new ArrayList<Integer>();

    if (annoArray != null) {

        PdfDictionary annotation = null;
        PdfDictionary a = null;
        PdfString uri = null;
        for (int i = 0; i < annoArray.size(); i++) {

            annotation = annoArray.getAsDict(i);

            if (annotation == null) {
                continue;
            }

            a = annotation.getAsDict(PdfName.A);

            if (a == null) {
                continue;
            }

            uri = a.getAsString(PdfName.URI);

            if (uri == null) {
                continue;
            }

            String uriStr = uri.toString().trim();

            if (keywords.contains(uriStr)) {
                willRemovedIx.add(i);
            }

        }

        int i = 0;
        for (Integer ix : willRemovedIx) {
            annoArray.remove(ix - i++);
        }

    }

    return willRemovedIx.size();
}

From source file:commentextractor.CommentExtractorApp.java

License:GNU General Public License

static String extractComments(String filename, int first, int last) {
    StringBuffer output = null;//w  ww . j  a  v  a  2  s . c  om
    try {
        PdfReader reader = new PdfReader(filename);

        if (last >= reader.getNumberOfPages() || (last == -1)) {
            last = reader.getNumberOfPages();
        }

        output = new StringBuffer(1024);

        for (int i = first; i <= last; i++) {

            PdfDictionary page = reader.getPageN(i);
            PdfArray annotsArray = null;

            if (page.getAsArray(PdfName.ANNOTS) == null) {
                continue;
            }

            annotsArray = page.getAsArray(PdfName.ANNOTS);
            for (ListIterator<PdfObject> iter = annotsArray.listIterator(); iter.hasNext();) {
                PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject(iter.next());
                PdfString content = (PdfString) PdfReader.getPdfObject(annot.get(PdfName.CONTENTS));
                if (content != null) {
                    output.append("----------\n");
                    output.append("Page " + i);
                    output.append("\n");
                    output.append(content.toUnicodeString().replaceAll("\r", "\r\n"));
                    output.append("\n");
                }
            }
        }
    } catch (Exception e) {
        Logger.getLogger(CommentExtractorApp.class.getName()).log(Level.SEVERE, null, e);
    }
    return new String(output);
}

From source file:de.gbv.marginalia.Marginalia.java

License:Open Source License

/**
 * Inspect a PDF file and write the info to a writer
 * @param writer Writer to a text file//from  w  ww  .  ja  v a2  s  . c om
 * @param filename Path to the PDF file
 * @throws IOException
 */
public static void inspect(PrintWriter writer, String filename) throws IOException, SAXException {
    //        writer.println(filename);
    writer.flush();

    PdfReader reader = new PdfReader(filename);

    ContentHandler xmlhandler = new SimpleXMLWriter(writer);
    xmlhandler.startDocument();

    SimpleXMLCreator xml = new SimpleXMLCreator(xmlhandler, Annotation.namespaces, true);

    /*
            writer.println("Number of pages: "+reader.getNumberOfPages());
            Rectangle mediabox = reader.getPageSize(1);
            writer.print("Size of page 1: [");
            writer.print(mediabox.getLeft());
            writer.print(',');
            writer.print(mediabox.getBottom());
            writer.print(',');
            writer.print(mediabox.getRight());
            writer.print(',');
            writer.print(mediabox.getTop());
            writer.println("]");
            writer.print("Rotation of page 1: ");
            writer.println(reader.getPageRotation(1));
            writer.print("Page size with rotation of page 1: ");
            writer.println(reader.getPageSizeWithRotation(1));
            writer.println();
            writer.flush();
    */
    List<Annotation> annots = new LinkedList<Annotation>();
    xml.startElement("annots");

    // TODO: The following elements may be added:
    // - optionally write <f href="Document.pdf"/>
    // - optionally write <ids original="ID" modified="ID" />

    xml.startElement("m", "pages");
    for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); pageNum++) {
        PdfDictionary pageDic = reader.getPageN(pageNum);

        Map<String, String> attr = new HashMap<String, String>();
        attr.put("number", "" + pageNum);
        attr.put("rotate", "" + reader.getPageRotation(pageNum));

        Rectangle mediabox = reader.getPageSize(pageNum);
        attr.put("left", "" + mediabox.getLeft());
        attr.put("bottom", "" + mediabox.getBottom());
        attr.put("right", "" + mediabox.getRight());
        attr.put("top", "" + mediabox.getTop());

        xml.contentElement("m", "page", "", attr);

        PdfArray rawannots = pageDic.getAsArray(PdfName.ANNOTS);
        if (rawannots == null || rawannots.isEmpty()) {
            // writer.println("page "+pageNum+" contains no annotations");
            continue;
        }

        // writer.println("page "+pageNum+" has "+rawannots.size()+" annotations");

        for (int i = 0; i < rawannots.size(); i++) {
            PdfObject obj = rawannots.getDirectObject(i);
            if (!obj.isDictionary())
                continue;
            Annotation a = new Annotation((PdfDictionary) obj, pageNum);
            annots.add(a);
        }

        /**
        // Now we have all highlight and similar annotations, we need
        // to find out what words are actually highlighted! PDF in fact
        // is a dump format to express documents.
        // For some hints see
        // http://stackoverflow.com/questions/4028240/extract-each-column-of-a-pdf-file
                
        // We could reuse code from LocationTextExtractionStrategy (TODO)
        // LocationTextExtractionStrategy extr = new LocationTextExtractionStrategy();
        String fulltext = PdfTextExtractor.getTextFromPage(reader,pageNum);//,extr
        writer.println(fulltext);
        */
    }
    xml.endElement();

    for (Annotation a : annots) {
        a.serializeXML(xmlhandler);
    }
    // TODO: add page information (page size and orientation)

    xml.endAll();
}

From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java

License:Open Source License

/**
 * Extracts locations from the redact annotations contained in the document and applied to the given page.
 *//*from   w  w  w.j  ava 2s .  c o m*/
private List<PdfCleanUpLocation> extractLocationsFromRedactAnnots(int page, PdfDictionary pageDict) {
    List<PdfCleanUpLocation> locations = new ArrayList<PdfCleanUpLocation>();

    if (pageDict.contains(PdfName.ANNOTS)) {
        PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS);

        for (int i = 0; i < annotsArray.size(); ++i) {
            PdfIndirectReference annotIndirRef = annotsArray.getAsIndirectObject(i);
            PdfDictionary annotDict = annotsArray.getAsDict(i);
            PdfName annotSubtype = annotDict.getAsName(PdfName.SUBTYPE);

            if (annotSubtype.equals(PdfName.REDACT)) {
                saveRedactAnnotIndirRef(page, annotIndirRef.toString());
                locations.addAll(extractLocationsFromRedactAnnot(page, i, annotDict));
            }
        }
    }

    return locations;
}

From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java

License:Open Source License

/**
 * Deletes redact annotations from the page and substitutes them with either OverlayText or RO object if it's needed.
 *///from   w w w  .  ja v  a  2s.  c om
private void deleteRedactAnnots(int pageNum) throws IOException, DocumentException {
    Set<String> indirRefs = redactAnnotIndirRefs.get(pageNum);

    if (indirRefs == null || indirRefs.isEmpty()) {
        return;
    }

    PdfReader reader = pdfStamper.getReader();
    PdfContentByte canvas = pdfStamper.getOverContent(pageNum);
    PdfDictionary pageDict = reader.getPageN(pageNum);
    PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS);

    // j is for access annotRect (i can be decreased, so we need to store additional index,
    // indicating current position in ANNOTS array in case if we don't remove anything
    for (int i = 0, j = 0; i < annotsArray.size(); ++i, ++j) {
        PdfIndirectReference annotIndRef = annotsArray.getAsIndirectObject(i);
        PdfDictionary annotDict = annotsArray.getAsDict(i);

        if (indirRefs.contains(annotIndRef.toString()) || indirRefs.contains(getParentIndRefStr(annotDict))) {
            PdfStream formXObj = annotDict.getAsStream(PdfName.RO);
            PdfString overlayText = annotDict.getAsString(PdfName.OVERLAYTEXT);

            if (fillCleanedArea && formXObj != null) {
                PdfArray rectArray = annotDict.getAsArray(PdfName.RECT);
                Rectangle annotRect = new Rectangle(rectArray.getAsNumber(0).floatValue(),
                        rectArray.getAsNumber(1).floatValue(), rectArray.getAsNumber(2).floatValue(),
                        rectArray.getAsNumber(3).floatValue());

                insertFormXObj(canvas, pageDict, formXObj, clippingRects.get(j), annotRect);
            } else if (fillCleanedArea && overlayText != null && overlayText.toUnicodeString().length() > 0) {
                drawOverlayText(canvas, clippingRects.get(j), overlayText, annotDict.getAsString(PdfName.DA),
                        annotDict.getAsNumber(PdfName.Q), annotDict.getAsBoolean(PdfName.REPEAT));
            }

            annotsArray.remove(i--); // array size is changed, so we need to decrease i
        }
    }

    if (annotsArray.size() == 0) {
        pageDict.remove(PdfName.ANNOTS);
    }
}

From source file:org.sejda.impl.itext5.component.PdfUnpacker.java

License:Open Source License

private Set<PdfDictionary> getFileAttachmentsDictionaries(PdfReader reader) {
    Set<PdfDictionary> retSet = new NullSafeSet<PdfDictionary>();
    for (int k = 1; k <= reader.getNumberOfPages(); ++k) {
        PdfArray annots = reader.getPageN(k).getAsArray(PdfName.ANNOTS);
        if (annots != null) {
            for (PdfObject current : annots) {
                PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject(current);
                if (PdfName.FILEATTACHMENT.equals(annot.getAsName(PdfName.SUBTYPE))) {
                    retSet.add(annot.getAsDict(PdfName.FS));
                }//from   w w  w  . j a va  2  s  . c  om
            }
        }
    }
    return retSet;
}