Example usage for com.itextpdf.text.pdf PdfDictionary getAsArray

Introduction

In this page you can find the example usage for com.itextpdf.text.pdf PdfDictionary getAsArray.

Prototype

public PdfArray getAsArray(final PdfName key)

Source Link

Document

Returns a PdfObject as a PdfArray, resolving indirect references.

Usage

From source file:com.poet.ar.remover.AnnotationRemover.java

/**
 * remove content that matches keywords//from  w  w w  .  j a  v a2 s.  co  m
 *
 * @param page
 * @return count of removed content
 */
private static int doRemoveContent(PdfDictionary page) {

    // all contents in page i
    PdfArray contentArray = page.getAsArray(PdfName.CONTENTS);
    PdfDictionary resources = page.getAsDict(PdfName.RESOURCES);
    List<Integer> willRemovedIx = new ArrayList<Integer>();

    if (contentArray != null) {

        PdfStream stream = null;
        for (int i = 0; i < contentArray.size(); i++) {

            stream = contentArray.getAsStream(i);

            PRStream pr = (PRStream) stream;

            // TODO // FIXME: 2016/1/27 0027 java.lang.ClassCastException: com.itextpdf.text.pdf.PdfArray cannot be cast to com.itextpdf.text.pdf.PdfLiteral
            // get display text
            //                String text = StreamContentExtractor.extractFromPdfStream(stream, resources);
            //
            //                if (keywords.contains(text)) {
            //                    willRemovedIx.add(i);
            //                }

            try {
                String text = StreamContentExtractor.extractFromPdfStream(stream, resources);

                if (keywords.contains(text)) {
                    willRemovedIx.add(i);
                }
            } catch (Exception ex) {
            }

        }

        int i = 0;
        for (Integer ix : willRemovedIx) {
            contentArray.remove(ix - i++);
        }
    }

    return willRemovedIx.size();
}

From source file:com.poet.ar.remover.AnnotationRemover.java

/**
 * remove annotation that matches keywords
 *
 * @param page//from  w w  w  . j  av  a 2  s  . com
 * @return count of removed annotations
 */
private static int doRemoveAnnotation(PdfDictionary page) {

    // all annotations in page i
    PdfArray annoArray = page.getAsArray(PdfName.ANNOTS);
    List<Integer> willRemovedIx = new ArrayList<Integer>();

    if (annoArray != null) {

        PdfDictionary annotation = null;
        PdfDictionary a = null;
        PdfString uri = null;
        for (int i = 0; i < annoArray.size(); i++) {

            annotation = annoArray.getAsDict(i);

            if (annotation == null) {
                continue;
            }

            a = annotation.getAsDict(PdfName.A);

            if (a == null) {
                continue;
            }

            uri = a.getAsString(PdfName.URI);

            if (uri == null) {
                continue;
            }

            String uriStr = uri.toString().trim();

            if (keywords.contains(uriStr)) {
                willRemovedIx.add(i);
            }

        }

        int i = 0;
        for (Integer ix : willRemovedIx) {
            annoArray.remove(ix - i++);
        }

    }

    return willRemovedIx.size();
}

From source file:commentextractor.CommentExtractorApp.java

License:GNU General Public License

static String extractComments(String filename, int first, int last) {
    StringBuffer output = null;/*from w  ww .  j a v a 2  s.  co  m*/
    try {
        PdfReader reader = new PdfReader(filename);

        if (last >= reader.getNumberOfPages() || (last == -1)) {
            last = reader.getNumberOfPages();
        }

        output = new StringBuffer(1024);

        for (int i = first; i <= last; i++) {

            PdfDictionary page = reader.getPageN(i);
            PdfArray annotsArray = null;

            if (page.getAsArray(PdfName.ANNOTS) == null) {
                continue;
            }

            annotsArray = page.getAsArray(PdfName.ANNOTS);
            for (ListIterator<PdfObject> iter = annotsArray.listIterator(); iter.hasNext();) {
                PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject(iter.next());
                PdfString content = (PdfString) PdfReader.getPdfObject(annot.get(PdfName.CONTENTS));
                if (content != null) {
                    output.append("----------\n");
                    output.append("Page " + i);
                    output.append("\n");
                    output.append(content.toUnicodeString().replaceAll("\r", "\r\n"));
                    output.append("\n");
                }
            }
        }
    } catch (Exception e) {
        Logger.getLogger(CommentExtractorApp.class.getName()).log(Level.SEVERE, null, e);
    }
    return new String(output);
}

From source file:de.gbv.marginalia.Marginalia.java

License:Open Source License

/**
 * Inspect a PDF file and write the info to a writer
 * @param writer Writer to a text file/*from  w  ww. j a  v  a 2 s  . co  m*/
 * @param filename Path to the PDF file
 * @throws IOException
 */
public static void inspect(PrintWriter writer, String filename) throws IOException, SAXException {
    //        writer.println(filename);
    writer.flush();

    PdfReader reader = new PdfReader(filename);

    ContentHandler xmlhandler = new SimpleXMLWriter(writer);
    xmlhandler.startDocument();

    SimpleXMLCreator xml = new SimpleXMLCreator(xmlhandler, Annotation.namespaces, true);

    /*
            writer.println("Number of pages: "+reader.getNumberOfPages());
            Rectangle mediabox = reader.getPageSize(1);
            writer.print("Size of page 1: [");
            writer.print(mediabox.getLeft());
            writer.print(',');
            writer.print(mediabox.getBottom());
            writer.print(',');
            writer.print(mediabox.getRight());
            writer.print(',');
            writer.print(mediabox.getTop());
            writer.println("]");
            writer.print("Rotation of page 1: ");
            writer.println(reader.getPageRotation(1));
            writer.print("Page size with rotation of page 1: ");
            writer.println(reader.getPageSizeWithRotation(1));
            writer.println();
            writer.flush();
    */
    List<Annotation> annots = new LinkedList<Annotation>();
    xml.startElement("annots");

    // TODO: The following elements may be added:
    // - optionally write <f href="Document.pdf"/>
    // - optionally write <ids original="ID" modified="ID" />

    xml.startElement("m", "pages");
    for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); pageNum++) {
        PdfDictionary pageDic = reader.getPageN(pageNum);

        Map<String, String> attr = new HashMap<String, String>();
        attr.put("number", "" + pageNum);
        attr.put("rotate", "" + reader.getPageRotation(pageNum));

        Rectangle mediabox = reader.getPageSize(pageNum);
        attr.put("left", "" + mediabox.getLeft());
        attr.put("bottom", "" + mediabox.getBottom());
        attr.put("right", "" + mediabox.getRight());
        attr.put("top", "" + mediabox.getTop());

        xml.contentElement("m", "page", "", attr);

        PdfArray rawannots = pageDic.getAsArray(PdfName.ANNOTS);
        if (rawannots == null || rawannots.isEmpty()) {
            // writer.println("page "+pageNum+" contains no annotations");
            continue;
        }

        // writer.println("page "+pageNum+" has "+rawannots.size()+" annotations");

        for (int i = 0; i < rawannots.size(); i++) {
            PdfObject obj = rawannots.getDirectObject(i);
            if (!obj.isDictionary())
                continue;
            Annotation a = new Annotation((PdfDictionary) obj, pageNum);
            annots.add(a);
        }

        /**
        // Now we have all highlight and similar annotations, we need
        // to find out what words are actually highlighted! PDF in fact
        // is a dump format to express documents.
        // For some hints see
        // http://stackoverflow.com/questions/4028240/extract-each-column-of-a-pdf-file
                
        // We could reuse code from LocationTextExtractionStrategy (TODO)
        // LocationTextExtractionStrategy extr = new LocationTextExtractionStrategy();
        String fulltext = PdfTextExtractor.getTextFromPage(reader,pageNum);//,extr
        writer.println(fulltext);
        */
    }
    xml.endElement();

    for (Annotation a : annots) {
        a.serializeXML(xmlhandler);
    }
    // TODO: add page information (page size and orientation)

    xml.endAll();
}

From source file:de.rub.dez6a3.jpdfsigner.control.JPodPDFViewer.java

License:Open Source License

@Override
public ArrayList getAttachments() throws IOException {
    ArrayList files = new ArrayList();
    PdfReader reader = new PdfReader(conf.getPDFFile());
    PdfDictionary root = reader.getCatalog();
    PdfDictionary documentnames = root.getAsDict(PdfName.NAMES);
    PdfDictionary embeddedfiles = documentnames.getAsDict(PdfName.EMBEDDEDFILES);
    PdfArray filespecs = embeddedfiles.getAsArray(PdfName.NAMES);
    PdfDictionary filespec;//from  w  w  w  .  j  a  v  a  2s  .c o m
    PdfDictionary refs;
    for (int i = 0; i < filespecs.size();) {
        filespecs.getAsName(i++);
        filespec = filespecs.getAsDict(i++);
        refs = filespec.getAsDict(PdfName.EF);
        Iterator it = refs.getKeys().iterator();
        while (it.hasNext()) {
            PdfName key = (PdfName) it.next();
            if (key.toString().equals("/F")) {

                String filename = "-";
                String desc = "-";
                int size = -1;
                String moddate = "-";
                String compsize = "-";
                PdfObject pdfobj = null;

                try {
                    filename = filespec.getAsString(key).toString();
                } catch (Exception e) {
                    log.warn("Cannot load attachment-name - " + e.getMessage());
                }
                try {
                    desc = filespec.getAsString(PdfName.DESC).toString();
                } catch (Exception e) {
                    log.warn("Cannot load attachment-description - " + e.getMessage());
                }
                byte[] attBytes = null;
                try {
                    PRStream stream = (PRStream) PdfReader.getPdfObject(refs.getAsIndirectObject(key));
                    attBytes = PdfReader.getStreamBytes(stream);
                    size = attBytes.length;
                } catch (Exception e) {
                    log.warn("Cannot load attachment-size - " + e.getMessage());
                }
                try {
                    pdfobj = PdfReader.getPdfObject(refs.getAsIndirectObject(key));
                } catch (Exception e) {
                    log.warn("Cannot load attachment-pdfobject - " + e.getMessage());
                }

                Hashtable fileData = new Hashtable();
                fileData.put(ATTACHMENT_FILENAME_STRING, filename); //filename
                fileData.put(ATTACHMENT_DESCRIPTION_STRING, desc); //Description
                fileData.put(ATTACHMENT_SIZE_INT, size); //size
                fileData.put(ATTACHMENT_BYTES_ARR, attBytes); //bytes
                files.add(fileData);
            }
        }
    }
    return files;
}

From source file:io.konik.carriage.itext.ITextInvoiceExtractor.java

License:Open Source License

private static PdfArray getValidAf(PdfDictionary catalog) {
    if (catalog.contains(AF)) {
        PdfArray af = catalog.getAsArray(AF);
        if (!af.isEmpty() && af.getDirectObject(0).isDictionary()) {
            return af;
        }//from  w  ww .  ja  v  a  2  s.c om
    }
    throw new InvoiceExtractionError("Pdf catalog does not contain Valid AF Entry");
}

From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java

License:Open Source License

/**
 * Extracts locations from the redact annotations contained in the document and applied to the given page.
 *//*from   w ww.  j  a v  a 2  s . c o m*/
private List<PdfCleanUpLocation> extractLocationsFromRedactAnnots(int page, PdfDictionary pageDict) {
    List<PdfCleanUpLocation> locations = new ArrayList<PdfCleanUpLocation>();

    if (pageDict.contains(PdfName.ANNOTS)) {
        PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS);

        for (int i = 0; i < annotsArray.size(); ++i) {
            PdfIndirectReference annotIndirRef = annotsArray.getAsIndirectObject(i);
            PdfDictionary annotDict = annotsArray.getAsDict(i);
            PdfName annotSubtype = annotDict.getAsName(PdfName.SUBTYPE);

            if (annotSubtype.equals(PdfName.REDACT)) {
                saveRedactAnnotIndirRef(page, annotIndirRef.toString());
                locations.addAll(extractLocationsFromRedactAnnot(page, i, annotDict));
            }
        }
    }

    return locations;
}

From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java

License:Open Source License

/**
 * Extracts locations from the concrete annotation.
 * Note: annotation can consist not only of one area specified by the RECT entry, but also of multiple areas specified
 * by the QuadPoints entry in the annotation dictionary.
 *///  w ww  . j a v  a 2s .c om
private List<PdfCleanUpLocation> extractLocationsFromRedactAnnot(int page, int annotIndex,
        PdfDictionary annotDict) {
    List<PdfCleanUpLocation> locations = new ArrayList<PdfCleanUpLocation>();
    List<Rectangle> markedRectangles = new ArrayList<Rectangle>();
    PdfArray quadPoints = annotDict.getAsArray(PdfName.QUADPOINTS);

    if (quadPoints.size() != 0) {
        markedRectangles.addAll(translateQuadPointsToRectangles(quadPoints));
    } else {
        PdfArray annotRect = annotDict.getAsArray(PdfName.RECT);
        markedRectangles
                .add(new Rectangle(annotRect.getAsNumber(0).floatValue(), annotRect.getAsNumber(1).floatValue(),
                        annotRect.getAsNumber(2).floatValue(), annotRect.getAsNumber(3).floatValue()));
    }

    clippingRects.put(annotIndex, markedRectangles);

    BaseColor cleanUpColor = null;
    PdfArray ic = annotDict.getAsArray(PdfName.IC);

    if (ic != null) {
        cleanUpColor = new BaseColor(ic.getAsNumber(0).floatValue(), ic.getAsNumber(1).floatValue(),
                ic.getAsNumber(2).floatValue());
    }

    PdfStream ro = annotDict.getAsStream(PdfName.RO);

    if (ro != null) {
        cleanUpColor = null;
    }

    for (Rectangle rect : markedRectangles) {
        locations.add(new PdfCleanUpLocation(page, rect, cleanUpColor));
    }

    return locations;
}

From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java

License:Open Source License

/**
 * Deletes redact annotations from the page and substitutes them with either OverlayText or RO object if it's needed.
 *///from  www .  ja va  2 s.com
private void deleteRedactAnnots(int pageNum) throws IOException, DocumentException {
    Set<String> indirRefs = redactAnnotIndirRefs.get(pageNum);

    if (indirRefs == null || indirRefs.isEmpty()) {
        return;
    }

    PdfReader reader = pdfStamper.getReader();
    PdfContentByte canvas = pdfStamper.getOverContent(pageNum);
    PdfDictionary pageDict = reader.getPageN(pageNum);
    PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS);

    // j is for access annotRect (i can be decreased, so we need to store additional index,
    // indicating current position in ANNOTS array in case if we don't remove anything
    for (int i = 0, j = 0; i < annotsArray.size(); ++i, ++j) {
        PdfIndirectReference annotIndRef = annotsArray.getAsIndirectObject(i);
        PdfDictionary annotDict = annotsArray.getAsDict(i);

        if (indirRefs.contains(annotIndRef.toString()) || indirRefs.contains(getParentIndRefStr(annotDict))) {
            PdfStream formXObj = annotDict.getAsStream(PdfName.RO);
            PdfString overlayText = annotDict.getAsString(PdfName.OVERLAYTEXT);

            if (fillCleanedArea && formXObj != null) {
                PdfArray rectArray = annotDict.getAsArray(PdfName.RECT);
                Rectangle annotRect = new Rectangle(rectArray.getAsNumber(0).floatValue(),
                        rectArray.getAsNumber(1).floatValue(), rectArray.getAsNumber(2).floatValue(),
                        rectArray.getAsNumber(3).floatValue());

                insertFormXObj(canvas, pageDict, formXObj, clippingRects.get(j), annotRect);
            } else if (fillCleanedArea && overlayText != null && overlayText.toUnicodeString().length() > 0) {
                drawOverlayText(canvas, clippingRects.get(j), overlayText, annotDict.getAsString(PdfName.DA),
                        annotDict.getAsNumber(PdfName.Q), annotDict.getAsBoolean(PdfName.REPEAT));
            }

            annotsArray.remove(i--); // array size is changed, so we need to decrease i
        }
    }

    if (annotsArray.size() == 0) {
        pageDict.remove(PdfName.ANNOTS);
    }
}

From source file:org.gmdev.pdftrick.utils.CustomExtraImgReader.java

License:Open Source License

/**
 * Read a png image with if all other method fails
 * @param ref//from  w ww. j a  v  a  2 s .  co  m
 * @param resultFile
 * @return The BufferedImage obj
 * @throws IOException
 * @throws ImageReadException
 */
public static BufferedImage readIndexedPNG(int ref, String resultFile) throws IOException, ImageReadException {

    PdfReader reader = new PdfReader(resultFile);
    PRStream stream = (PRStream) reader.getPdfObject(ref);
    PdfDictionary dic = stream;
    byte[] content = PdfReader.getStreamBytesRaw(stream);

    int width = dic.getAsNumber(PdfName.WIDTH).intValue();
    int height = dic.getAsNumber(PdfName.HEIGHT).intValue();
    int pngBitDepth = dic.getAsNumber(PdfName.BITSPERCOMPONENT).intValue();

    PdfObject colorspace = dic.getDirectObject(PdfName.COLORSPACE);
    PdfArray decode = dic.getAsArray(PdfName.DECODE);
    PdfArray carray = (PdfArray) colorspace;
    PdfObject id2 = carray.getDirectObject(3);

    byte[] palette = null;
    if (id2 instanceof PdfString) {
        palette = ((PdfString) id2).getBytes();
    } else if (id2 instanceof PRStream) {
        palette = PdfReader.getStreamBytes(((PRStream) id2));
    }

    Map<PdfName, FilterHandlers.FilterHandler> handlers = new HashMap<PdfName, FilterHandlers.FilterHandler>(
            FilterHandlers.getDefaultFilterHandlers());
    byte[] imageBytes = PdfReader.decodeBytes(content, dic, handlers);

    int stride = (width * pngBitDepth + 7) / 8;
    ByteArrayOutputStream ms = new ByteArrayOutputStream();
    PngWriter png = new PngWriter(ms);

    if (decode != null) {
        if (pngBitDepth == 1) {
            // if the decode array is 1,0, then we need to invert the image
            if (decode.getAsNumber(0).intValue() == 1 && decode.getAsNumber(1).intValue() == 0) {
                int len = imageBytes.length;
                for (int t = 0; t < len; ++t) {
                    imageBytes[t] ^= 0xff;
                }
            } else {
                // if the decode array is 0,1, do nothing.  It's possible that the array could be 0,0 or 1,1 - but that would be silly, so we'll just ignore that case
            }
        } else {
            // todo: add decode transformation for other depths
        }
    }
    int pngColorType = 0;
    png.writeHeader(width, height, pngBitDepth, pngColorType);

    if (palette != null) {
        png.writePalette(palette);
    }
    png.writeData(imageBytes, stride);
    png.writeEnd();

    imageBytes = ms.toByteArray();

    InputStream in = new ByteArrayInputStream(imageBytes);
    ImageInputStream ima_stream = ImageIO.createImageInputStream(in);

    BufferedImage buffImg = null;
    BufferedImage buffPic = ImageIO.read(ima_stream);

    // check if image contains a mask image ... experimental for this type of image
    BufferedImage buffMask = null;
    PRStream maskStream = (PRStream) dic.getAsStream(PdfName.SMASK);
    if (maskStream != null) {
        PdfImageObject maskImage = new PdfImageObject(maskStream);
        buffMask = maskImage.getBufferedImage();

        Image img = PdfTrickUtils.TransformGrayToTransparency(buffMask);
        buffImg = PdfTrickUtils.ApplyTransparency(buffPic, img);
    } else {
        buffImg = buffPic;
    }

    reader.close();
    ms.close();
    in.close();
    return buffImg;
}