Example usage for com.itextpdf.text.pdf PdfDictionary getAsArray

List of usage examples for com.itextpdf.text.pdf PdfDictionary getAsArray

Introduction

In this page you can find the example usage for com.itextpdf.text.pdf PdfDictionary getAsArray.

Prototype

public PdfArray getAsArray(final PdfName key) 

Source Link

Document

Returns a PdfObject as a PdfArray, resolving indirect references.

Usage

From source file:com.poet.ar.remover.AnnotationRemover.java

/**
 * remove content that matches keywords//from  w  w w  .  j a  v a2 s.  co  m
 *
 * @param page
 * @return count of removed content
 */
private static int doRemoveContent(PdfDictionary page) {

    // all contents in page i
    PdfArray contentArray = page.getAsArray(PdfName.CONTENTS);
    PdfDictionary resources = page.getAsDict(PdfName.RESOURCES);
    List<Integer> willRemovedIx = new ArrayList<Integer>();

    if (contentArray != null) {

        PdfStream stream = null;
        for (int i = 0; i < contentArray.size(); i++) {

            stream = contentArray.getAsStream(i);

            PRStream pr = (PRStream) stream;

            // TODO // FIXME: 2016/1/27 0027 java.lang.ClassCastException: com.itextpdf.text.pdf.PdfArray cannot be cast to com.itextpdf.text.pdf.PdfLiteral
            // get display text
            //                String text = StreamContentExtractor.extractFromPdfStream(stream, resources);
            //
            //                if (keywords.contains(text)) {
            //                    willRemovedIx.add(i);
            //                }

            try {
                String text = StreamContentExtractor.extractFromPdfStream(stream, resources);

                if (keywords.contains(text)) {
                    willRemovedIx.add(i);
                }
            } catch (Exception ex) {
            }

        }

        int i = 0;
        for (Integer ix : willRemovedIx) {
            contentArray.remove(ix - i++);
        }
    }

    return willRemovedIx.size();
}

From source file:com.poet.ar.remover.AnnotationRemover.java

/**
 * remove annotation that matches keywords
 *
 * @param page//from  w w  w  . j  av  a 2  s  . com
 * @return count of removed annotations
 */
private static int doRemoveAnnotation(PdfDictionary page) {

    // all annotations in page i
    PdfArray annoArray = page.getAsArray(PdfName.ANNOTS);
    List<Integer> willRemovedIx = new ArrayList<Integer>();

    if (annoArray != null) {

        PdfDictionary annotation = null;
        PdfDictionary a = null;
        PdfString uri = null;
        for (int i = 0; i < annoArray.size(); i++) {

            annotation = annoArray.getAsDict(i);

            if (annotation == null) {
                continue;
            }

            a = annotation.getAsDict(PdfName.A);

            if (a == null) {
                continue;
            }

            uri = a.getAsString(PdfName.URI);

            if (uri == null) {
                continue;
            }

            String uriStr = uri.toString().trim();

            if (keywords.contains(uriStr)) {
                willRemovedIx.add(i);
            }

        }

        int i = 0;
        for (Integer ix : willRemovedIx) {
            annoArray.remove(ix - i++);
        }

    }

    return willRemovedIx.size();
}

From source file:commentextractor.CommentExtractorApp.java

License:GNU General Public License

static String extractComments(String filename, int first, int last) {
    StringBuffer output = null;/*from w  ww .  j a v a 2  s.  co  m*/
    try {
        PdfReader reader = new PdfReader(filename);

        if (last >= reader.getNumberOfPages() || (last == -1)) {
            last = reader.getNumberOfPages();
        }

        output = new StringBuffer(1024);

        for (int i = first; i <= last; i++) {

            PdfDictionary page = reader.getPageN(i);
            PdfArray annotsArray = null;

            if (page.getAsArray(PdfName.ANNOTS) == null) {
                continue;
            }

            annotsArray = page.getAsArray(PdfName.ANNOTS);
            for (ListIterator<PdfObject> iter = annotsArray.listIterator(); iter.hasNext();) {
                PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject(iter.next());
                PdfString content = (PdfString) PdfReader.getPdfObject(annot.get(PdfName.CONTENTS));
                if (content != null) {
                    output.append("----------\n");
                    output.append("Page " + i);
                    output.append("\n");
                    output.append(content.toUnicodeString().replaceAll("\r", "\r\n"));
                    output.append("\n");
                }
            }
        }
    } catch (Exception e) {
        Logger.getLogger(CommentExtractorApp.class.getName()).log(Level.SEVERE, null, e);
    }
    return new String(output);
}

From source file:de.gbv.marginalia.Marginalia.java

License:Open Source License

/**
 * Inspect a PDF file and write the info to a writer
 * @param writer Writer to a text file/*from  w  ww. j a  v  a 2 s  . co  m*/
 * @param filename Path to the PDF file
 * @throws IOException
 */
public static void inspect(PrintWriter writer, String filename) throws IOException, SAXException {
    //        writer.println(filename);
    writer.flush();

    PdfReader reader = new PdfReader(filename);

    ContentHandler xmlhandler = new SimpleXMLWriter(writer);
    xmlhandler.startDocument();

    SimpleXMLCreator xml = new SimpleXMLCreator(xmlhandler, Annotation.namespaces, true);

    /*
            writer.println("Number of pages: "+reader.getNumberOfPages());
            Rectangle mediabox = reader.getPageSize(1);
            writer.print("Size of page 1: [");
            writer.print(mediabox.getLeft());
            writer.print(',');
            writer.print(mediabox.getBottom());
            writer.print(',');
            writer.print(mediabox.getRight());
            writer.print(',');
            writer.print(mediabox.getTop());
            writer.println("]");
            writer.print("Rotation of page 1: ");
            writer.println(reader.getPageRotation(1));
            writer.print("Page size with rotation of page 1: ");
            writer.println(reader.getPageSizeWithRotation(1));
            writer.println();
            writer.flush();
    */
    List<Annotation> annots = new LinkedList<Annotation>();
    xml.startElement("annots");

    // TODO: The following elements may be added:
    // - optionally write <f href="Document.pdf"/>
    // - optionally write <ids original="ID" modified="ID" />

    xml.startElement("m", "pages");
    for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); pageNum++) {
        PdfDictionary pageDic = reader.getPageN(pageNum);

        Map<String, String> attr = new HashMap<String, String>();
        attr.put("number", "" + pageNum);
        attr.put("rotate", "" + reader.getPageRotation(pageNum));

        Rectangle mediabox = reader.getPageSize(pageNum);
        attr.put("left", "" + mediabox.getLeft());
        attr.put("bottom", "" + mediabox.getBottom());
        attr.put("right", "" + mediabox.getRight());
        attr.put("top", "" + mediabox.getTop());

        xml.contentElement("m", "page", "", attr);

        PdfArray rawannots = pageDic.getAsArray(PdfName.ANNOTS);
        if (rawannots == null || rawannots.isEmpty()) {
            // writer.println("page "+pageNum+" contains no annotations");
            continue;
        }

        // writer.println("page "+pageNum+" has "+rawannots.size()+" annotations");

        for (int i = 0; i < rawannots.size(); i++) {
            PdfObject obj = rawannots.getDirectObject(i);
            if (!obj.isDictionary())
                continue;
            Annotation a = new Annotation((PdfDictionary) obj, pageNum);
            annots.add(a);
        }

        /**
        // Now we have all highlight and similar annotations, we need
        // to find out what words are actually highlighted! PDF in fact
        // is a dump format to express documents.
        // For some hints see
        // http://stackoverflow.com/questions/4028240/extract-each-column-of-a-pdf-file
                
        // We could reuse code from LocationTextExtractionStrategy (TODO)
        // LocationTextExtractionStrategy extr = new LocationTextExtractionStrategy();
        String fulltext = PdfTextExtractor.getTextFromPage(reader,pageNum);//,extr
        writer.println(fulltext);
        */
    }
    xml.endElement();

    for (Annotation a : annots) {
        a.serializeXML(xmlhandler);
    }
    // TODO: add page information (page size and orientation)

    xml.endAll();
}

From source file:de.rub.dez6a3.jpdfsigner.control.JPodPDFViewer.java

License:Open Source License

@Override
public ArrayList getAttachments() throws IOException {
    ArrayList files = new ArrayList();
    PdfReader reader = new PdfReader(conf.getPDFFile());
    PdfDictionary root = reader.getCatalog();
    PdfDictionary documentnames = root.getAsDict(PdfName.NAMES);
    PdfDictionary embeddedfiles = documentnames.getAsDict(PdfName.EMBEDDEDFILES);
    PdfArray filespecs = embeddedfiles.getAsArray(PdfName.NAMES);
    PdfDictionary filespec;//from  w  w  w  .  j  a  v  a  2s  .c o m
    PdfDictionary refs;
    for (int i = 0; i < filespecs.size();) {
        filespecs.getAsName(i++);
        filespec = filespecs.getAsDict(i++);
        refs = filespec.getAsDict(PdfName.EF);
        Iterator it = refs.getKeys().iterator();
        while (it.hasNext()) {
            PdfName key = (PdfName) it.next();
            if (key.toString().equals("/F")) {

                String filename = "-";
                String desc = "-";
                int size = -1;
                String moddate = "-";
                String compsize = "-";
                PdfObject pdfobj = null;

                try {
                    filename = filespec.getAsString(key).toString();
                } catch (Exception e) {
                    log.warn("Cannot load attachment-name - " + e.getMessage());
                }
                try {
                    desc = filespec.getAsString(PdfName.DESC).toString();
                } catch (Exception e) {
                    log.warn("Cannot load attachment-description - " + e.getMessage());
                }
                byte[] attBytes = null;
                try {
                    PRStream stream = (PRStream) PdfReader.getPdfObject(refs.getAsIndirectObject(key));
                    attBytes = PdfReader.getStreamBytes(stream);
                    size = attBytes.length;
                } catch (Exception e) {
                    log.warn("Cannot load attachment-size - " + e.getMessage());
                }
                try {
                    pdfobj = PdfReader.getPdfObject(refs.getAsIndirectObject(key));
                } catch (Exception e) {
                    log.warn("Cannot load attachment-pdfobject - " + e.getMessage());
                }

                Hashtable fileData = new Hashtable();
                fileData.put(ATTACHMENT_FILENAME_STRING, filename); //filename
                fileData.put(ATTACHMENT_DESCRIPTION_STRING, desc); //Description
                fileData.put(ATTACHMENT_SIZE_INT, size); //size
                fileData.put(ATTACHMENT_BYTES_ARR, attBytes); //bytes
                files.add(fileData);
            }
        }
    }
    return files;
}

From source file:io.konik.carriage.itext.ITextInvoiceExtractor.java

License:Open Source License

private static PdfArray getValidAf(PdfDictionary catalog) {
    if (catalog.contains(AF)) {
        PdfArray af = catalog.getAsArray(AF);
        if (!af.isEmpty() && af.getDirectObject(0).isDictionary()) {
            return af;
        }//from  w  ww .  ja  v  a  2  s.c om
    }
    throw new InvoiceExtractionError("Pdf catalog does not contain Valid AF Entry");
}

From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java

License:Open Source License

/**
 * Extracts locations from the redact annotations contained in the document and applied to the given page.
 *//*from   w ww.  j  a v  a 2  s . c o m*/
private List<PdfCleanUpLocation> extractLocationsFromRedactAnnots(int page, PdfDictionary pageDict) {
    List<PdfCleanUpLocation> locations = new ArrayList<PdfCleanUpLocation>();

    if (pageDict.contains(PdfName.ANNOTS)) {
        PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS);

        for (int i = 0; i < annotsArray.size(); ++i) {
            PdfIndirectReference annotIndirRef = annotsArray.getAsIndirectObject(i);
            PdfDictionary annotDict = annotsArray.getAsDict(i);
            PdfName annotSubtype = annotDict.getAsName(PdfName.SUBTYPE);

            if (annotSubtype.equals(PdfName.REDACT)) {
                saveRedactAnnotIndirRef(page, annotIndirRef.toString());
                locations.addAll(extractLocationsFromRedactAnnot(page, i, annotDict));
            }
        }
    }

    return locations;
}

From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java

License:Open Source License

/**
 * Extracts locations from the concrete annotation.
 * Note: annotation can consist not only of one area specified by the RECT entry, but also of multiple areas specified
 * by the QuadPoints entry in the annotation dictionary.
 *///  w ww  . j a v  a 2s .c om
private List<PdfCleanUpLocation> extractLocationsFromRedactAnnot(int page, int annotIndex,
        PdfDictionary annotDict) {
    List<PdfCleanUpLocation> locations = new ArrayList<PdfCleanUpLocation>();
    List<Rectangle> markedRectangles = new ArrayList<Rectangle>();
    PdfArray quadPoints = annotDict.getAsArray(PdfName.QUADPOINTS);

    if (quadPoints.size() != 0) {
        markedRectangles.addAll(translateQuadPointsToRectangles(quadPoints));
    } else {
        PdfArray annotRect = annotDict.getAsArray(PdfName.RECT);
        markedRectangles
                .add(new Rectangle(annotRect.getAsNumber(0).floatValue(), annotRect.getAsNumber(1).floatValue(),
                        annotRect.getAsNumber(2).floatValue(), annotRect.getAsNumber(3).floatValue()));
    }

    clippingRects.put(annotIndex, markedRectangles);

    BaseColor cleanUpColor = null;
    PdfArray ic = annotDict.getAsArray(PdfName.IC);

    if (ic != null) {
        cleanUpColor = new BaseColor(ic.getAsNumber(0).floatValue(), ic.getAsNumber(1).floatValue(),
                ic.getAsNumber(2).floatValue());
    }

    PdfStream ro = annotDict.getAsStream(PdfName.RO);

    if (ro != null) {
        cleanUpColor = null;
    }

    for (Rectangle rect : markedRectangles) {
        locations.add(new PdfCleanUpLocation(page, rect, cleanUpColor));
    }

    return locations;
}

From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java

License:Open Source License

/**
 * Deletes redact annotations from the page and substitutes them with either OverlayText or RO object if it's needed.
 *///from  www .  ja va  2 s.com
private void deleteRedactAnnots(int pageNum) throws IOException, DocumentException {
    Set<String> indirRefs = redactAnnotIndirRefs.get(pageNum);

    if (indirRefs == null || indirRefs.isEmpty()) {
        return;
    }

    PdfReader reader = pdfStamper.getReader();
    PdfContentByte canvas = pdfStamper.getOverContent(pageNum);
    PdfDictionary pageDict = reader.getPageN(pageNum);
    PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS);

    // j is for access annotRect (i can be decreased, so we need to store additional index,
    // indicating current position in ANNOTS array in case if we don't remove anything
    for (int i = 0, j = 0; i < annotsArray.size(); ++i, ++j) {
        PdfIndirectReference annotIndRef = annotsArray.getAsIndirectObject(i);
        PdfDictionary annotDict = annotsArray.getAsDict(i);

        if (indirRefs.contains(annotIndRef.toString()) || indirRefs.contains(getParentIndRefStr(annotDict))) {
            PdfStream formXObj = annotDict.getAsStream(PdfName.RO);
            PdfString overlayText = annotDict.getAsString(PdfName.OVERLAYTEXT);

            if (fillCleanedArea && formXObj != null) {
                PdfArray rectArray = annotDict.getAsArray(PdfName.RECT);
                Rectangle annotRect = new Rectangle(rectArray.getAsNumber(0).floatValue(),
                        rectArray.getAsNumber(1).floatValue(), rectArray.getAsNumber(2).floatValue(),
                        rectArray.getAsNumber(3).floatValue());

                insertFormXObj(canvas, pageDict, formXObj, clippingRects.get(j), annotRect);
            } else if (fillCleanedArea && overlayText != null && overlayText.toUnicodeString().length() > 0) {
                drawOverlayText(canvas, clippingRects.get(j), overlayText, annotDict.getAsString(PdfName.DA),
                        annotDict.getAsNumber(PdfName.Q), annotDict.getAsBoolean(PdfName.REPEAT));
            }

            annotsArray.remove(i--); // array size is changed, so we need to decrease i
        }
    }

    if (annotsArray.size() == 0) {
        pageDict.remove(PdfName.ANNOTS);
    }
}

From source file:org.gmdev.pdftrick.utils.CustomExtraImgReader.java

License:Open Source License

/**
 * Read a png image with if all other method fails
 * @param ref//from  w ww. j a  v  a  2 s .  co  m
 * @param resultFile
 * @return The BufferedImage obj
 * @throws IOException
 * @throws ImageReadException
 */
public static BufferedImage readIndexedPNG(int ref, String resultFile) throws IOException, ImageReadException {

    PdfReader reader = new PdfReader(resultFile);
    PRStream stream = (PRStream) reader.getPdfObject(ref);
    PdfDictionary dic = stream;
    byte[] content = PdfReader.getStreamBytesRaw(stream);

    int width = dic.getAsNumber(PdfName.WIDTH).intValue();
    int height = dic.getAsNumber(PdfName.HEIGHT).intValue();
    int pngBitDepth = dic.getAsNumber(PdfName.BITSPERCOMPONENT).intValue();

    PdfObject colorspace = dic.getDirectObject(PdfName.COLORSPACE);
    PdfArray decode = dic.getAsArray(PdfName.DECODE);
    PdfArray carray = (PdfArray) colorspace;
    PdfObject id2 = carray.getDirectObject(3);

    byte[] palette = null;
    if (id2 instanceof PdfString) {
        palette = ((PdfString) id2).getBytes();
    } else if (id2 instanceof PRStream) {
        palette = PdfReader.getStreamBytes(((PRStream) id2));
    }

    Map<PdfName, FilterHandlers.FilterHandler> handlers = new HashMap<PdfName, FilterHandlers.FilterHandler>(
            FilterHandlers.getDefaultFilterHandlers());
    byte[] imageBytes = PdfReader.decodeBytes(content, dic, handlers);

    int stride = (width * pngBitDepth + 7) / 8;
    ByteArrayOutputStream ms = new ByteArrayOutputStream();
    PngWriter png = new PngWriter(ms);

    if (decode != null) {
        if (pngBitDepth == 1) {
            // if the decode array is 1,0, then we need to invert the image
            if (decode.getAsNumber(0).intValue() == 1 && decode.getAsNumber(1).intValue() == 0) {
                int len = imageBytes.length;
                for (int t = 0; t < len; ++t) {
                    imageBytes[t] ^= 0xff;
                }
            } else {
                // if the decode array is 0,1, do nothing.  It's possible that the array could be 0,0 or 1,1 - but that would be silly, so we'll just ignore that case
            }
        } else {
            // todo: add decode transformation for other depths
        }
    }
    int pngColorType = 0;
    png.writeHeader(width, height, pngBitDepth, pngColorType);

    if (palette != null) {
        png.writePalette(palette);
    }
    png.writeData(imageBytes, stride);
    png.writeEnd();

    imageBytes = ms.toByteArray();

    InputStream in = new ByteArrayInputStream(imageBytes);
    ImageInputStream ima_stream = ImageIO.createImageInputStream(in);

    BufferedImage buffImg = null;
    BufferedImage buffPic = ImageIO.read(ima_stream);

    // check if image contains a mask image ... experimental for this type of image
    BufferedImage buffMask = null;
    PRStream maskStream = (PRStream) dic.getAsStream(PdfName.SMASK);
    if (maskStream != null) {
        PdfImageObject maskImage = new PdfImageObject(maskStream);
        buffMask = maskImage.getBufferedImage();

        Image img = PdfTrickUtils.TransformGrayToTransparency(buffMask);
        buffImg = PdfTrickUtils.ApplyTransparency(buffPic, img);
    } else {
        buffImg = buffPic;
    }

    reader.close();
    ms.close();
    in.close();
    return buffImg;
}