Example usage for com.itextpdf.text.pdf PdfArray getDirectObject

List of usage examples for com.itextpdf.text.pdf PdfArray getDirectObject

Introduction

In this page you can find the example usage for com.itextpdf.text.pdf PdfArray getDirectObject.

Prototype

public PdfObject getDirectObject(final int idx) 

Source Link

Document

Returns the PdfObject with the specified index, resolving a possible indirect reference to a direct object.

Usage

From source file:de.gbv.marginalia.Marginalia.java

License:Open Source License

/**
 * Inspect a PDF file and write the info to a writer
 * @param writer Writer to a text file//  ww w . j av a2s . c  o m
 * @param filename Path to the PDF file
 * @throws IOException
 */
public static void inspect(PrintWriter writer, String filename) throws IOException, SAXException {
    //        writer.println(filename);
    writer.flush();

    PdfReader reader = new PdfReader(filename);

    ContentHandler xmlhandler = new SimpleXMLWriter(writer);
    xmlhandler.startDocument();

    SimpleXMLCreator xml = new SimpleXMLCreator(xmlhandler, Annotation.namespaces, true);

    /*
            writer.println("Number of pages: "+reader.getNumberOfPages());
            Rectangle mediabox = reader.getPageSize(1);
            writer.print("Size of page 1: [");
            writer.print(mediabox.getLeft());
            writer.print(',');
            writer.print(mediabox.getBottom());
            writer.print(',');
            writer.print(mediabox.getRight());
            writer.print(',');
            writer.print(mediabox.getTop());
            writer.println("]");
            writer.print("Rotation of page 1: ");
            writer.println(reader.getPageRotation(1));
            writer.print("Page size with rotation of page 1: ");
            writer.println(reader.getPageSizeWithRotation(1));
            writer.println();
            writer.flush();
    */
    List<Annotation> annots = new LinkedList<Annotation>();
    xml.startElement("annots");

    // TODO: The following elements may be added:
    // - optionally write <f href="Document.pdf"/>
    // - optionally write <ids original="ID" modified="ID" />

    xml.startElement("m", "pages");
    for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); pageNum++) {
        PdfDictionary pageDic = reader.getPageN(pageNum);

        Map<String, String> attr = new HashMap<String, String>();
        attr.put("number", "" + pageNum);
        attr.put("rotate", "" + reader.getPageRotation(pageNum));

        Rectangle mediabox = reader.getPageSize(pageNum);
        attr.put("left", "" + mediabox.getLeft());
        attr.put("bottom", "" + mediabox.getBottom());
        attr.put("right", "" + mediabox.getRight());
        attr.put("top", "" + mediabox.getTop());

        xml.contentElement("m", "page", "", attr);

        PdfArray rawannots = pageDic.getAsArray(PdfName.ANNOTS);
        if (rawannots == null || rawannots.isEmpty()) {
            // writer.println("page "+pageNum+" contains no annotations");
            continue;
        }

        // writer.println("page "+pageNum+" has "+rawannots.size()+" annotations");

        for (int i = 0; i < rawannots.size(); i++) {
            PdfObject obj = rawannots.getDirectObject(i);
            if (!obj.isDictionary())
                continue;
            Annotation a = new Annotation((PdfDictionary) obj, pageNum);
            annots.add(a);
        }

        /**
        // Now we have all highlight and similar annotations, we need
        // to find out what words are actually highlighted! PDF in fact
        // is a dump format to express documents.
        // For some hints see
        // http://stackoverflow.com/questions/4028240/extract-each-column-of-a-pdf-file
                
        // We could reuse code from LocationTextExtractionStrategy (TODO)
        // LocationTextExtractionStrategy extr = new LocationTextExtractionStrategy();
        String fulltext = PdfTextExtractor.getTextFromPage(reader,pageNum);//,extr
        writer.println(fulltext);
        */
    }
    xml.endElement();

    for (Annotation a : annots) {
        a.serializeXML(xmlhandler);
    }
    // TODO: add page information (page size and orientation)

    xml.endAll();
}

From source file:io.konik.carriage.itext.ITextInvoiceExtractor.java

License:Open Source License

private static PdfArray getValidAf(PdfDictionary catalog) {
    if (catalog.contains(AF)) {
        PdfArray af = catalog.getAsArray(AF);
        if (!af.isEmpty() && af.getDirectObject(0).isDictionary()) {
            return af;
        }/*from   w w  w .j  a v a  2s  .c o  m*/
    }
    throw new InvoiceExtractionError("Pdf catalog does not contain Valid AF Entry");
}

From source file:org.gmdev.pdftrick.utils.CustomExtraImgReader.java

License:Open Source License

/**
 * Read a png image with if all other method fails
 * @param ref/*from ww w .j av a  2  s  . c  o  m*/
 * @param resultFile
 * @return The BufferedImage obj
 * @throws IOException
 * @throws ImageReadException
 */
public static BufferedImage readIndexedPNG(int ref, String resultFile) throws IOException, ImageReadException {

    PdfReader reader = new PdfReader(resultFile);
    PRStream stream = (PRStream) reader.getPdfObject(ref);
    PdfDictionary dic = stream;
    byte[] content = PdfReader.getStreamBytesRaw(stream);

    int width = dic.getAsNumber(PdfName.WIDTH).intValue();
    int height = dic.getAsNumber(PdfName.HEIGHT).intValue();
    int pngBitDepth = dic.getAsNumber(PdfName.BITSPERCOMPONENT).intValue();

    PdfObject colorspace = dic.getDirectObject(PdfName.COLORSPACE);
    PdfArray decode = dic.getAsArray(PdfName.DECODE);
    PdfArray carray = (PdfArray) colorspace;
    PdfObject id2 = carray.getDirectObject(3);

    byte[] palette = null;
    if (id2 instanceof PdfString) {
        palette = ((PdfString) id2).getBytes();
    } else if (id2 instanceof PRStream) {
        palette = PdfReader.getStreamBytes(((PRStream) id2));
    }

    Map<PdfName, FilterHandlers.FilterHandler> handlers = new HashMap<PdfName, FilterHandlers.FilterHandler>(
            FilterHandlers.getDefaultFilterHandlers());
    byte[] imageBytes = PdfReader.decodeBytes(content, dic, handlers);

    int stride = (width * pngBitDepth + 7) / 8;
    ByteArrayOutputStream ms = new ByteArrayOutputStream();
    PngWriter png = new PngWriter(ms);

    if (decode != null) {
        if (pngBitDepth == 1) {
            // if the decode array is 1,0, then we need to invert the image
            if (decode.getAsNumber(0).intValue() == 1 && decode.getAsNumber(1).intValue() == 0) {
                int len = imageBytes.length;
                for (int t = 0; t < len; ++t) {
                    imageBytes[t] ^= 0xff;
                }
            } else {
                // if the decode array is 0,1, do nothing.  It's possible that the array could be 0,0 or 1,1 - but that would be silly, so we'll just ignore that case
            }
        } else {
            // todo: add decode transformation for other depths
        }
    }
    int pngColorType = 0;
    png.writeHeader(width, height, pngBitDepth, pngColorType);

    if (palette != null) {
        png.writePalette(palette);
    }
    png.writeData(imageBytes, stride);
    png.writeEnd();

    imageBytes = ms.toByteArray();

    InputStream in = new ByteArrayInputStream(imageBytes);
    ImageInputStream ima_stream = ImageIO.createImageInputStream(in);

    BufferedImage buffImg = null;
    BufferedImage buffPic = ImageIO.read(ima_stream);

    // check if image contains a mask image ... experimental for this type of image
    BufferedImage buffMask = null;
    PRStream maskStream = (PRStream) dic.getAsStream(PdfName.SMASK);
    if (maskStream != null) {
        PdfImageObject maskImage = new PdfImageObject(maskStream);
        buffMask = maskImage.getBufferedImage();

        Image img = PdfTrickUtils.TransformGrayToTransparency(buffMask);
        buffImg = PdfTrickUtils.ApplyTransparency(buffPic, img);
    } else {
        buffImg = buffPic;
    }

    reader.close();
    ms.close();
    in.close();
    return buffImg;
}