List of usage examples for com.itextpdf.text.pdf PdfArray getDirectObject
public PdfObject getDirectObject(final int idx)
PdfObject
with the specified index, resolving a possible indirect reference to a direct object. From source file:de.gbv.marginalia.Marginalia.java
License:Open Source License
/** * Inspect a PDF file and write the info to a writer * @param writer Writer to a text file// ww w . j av a2s . c o m * @param filename Path to the PDF file * @throws IOException */ public static void inspect(PrintWriter writer, String filename) throws IOException, SAXException { // writer.println(filename); writer.flush(); PdfReader reader = new PdfReader(filename); ContentHandler xmlhandler = new SimpleXMLWriter(writer); xmlhandler.startDocument(); SimpleXMLCreator xml = new SimpleXMLCreator(xmlhandler, Annotation.namespaces, true); /* writer.println("Number of pages: "+reader.getNumberOfPages()); Rectangle mediabox = reader.getPageSize(1); writer.print("Size of page 1: ["); writer.print(mediabox.getLeft()); writer.print(','); writer.print(mediabox.getBottom()); writer.print(','); writer.print(mediabox.getRight()); writer.print(','); writer.print(mediabox.getTop()); writer.println("]"); writer.print("Rotation of page 1: "); writer.println(reader.getPageRotation(1)); writer.print("Page size with rotation of page 1: "); writer.println(reader.getPageSizeWithRotation(1)); writer.println(); writer.flush(); */ List<Annotation> annots = new LinkedList<Annotation>(); xml.startElement("annots"); // TODO: The following elements may be added: // - optionally write <f href="Document.pdf"/> // - optionally write <ids original="ID" modified="ID" /> xml.startElement("m", "pages"); for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); pageNum++) { PdfDictionary pageDic = reader.getPageN(pageNum); Map<String, String> attr = new HashMap<String, String>(); attr.put("number", "" + pageNum); attr.put("rotate", "" + reader.getPageRotation(pageNum)); Rectangle mediabox = reader.getPageSize(pageNum); attr.put("left", "" + mediabox.getLeft()); attr.put("bottom", "" + mediabox.getBottom()); attr.put("right", "" + mediabox.getRight()); attr.put("top", "" + mediabox.getTop()); xml.contentElement("m", "page", "", attr); PdfArray rawannots = pageDic.getAsArray(PdfName.ANNOTS); if (rawannots == null || rawannots.isEmpty()) { // writer.println("page "+pageNum+" contains no annotations"); continue; } // writer.println("page "+pageNum+" has "+rawannots.size()+" annotations"); for (int i = 0; i < rawannots.size(); i++) { PdfObject obj = rawannots.getDirectObject(i); if (!obj.isDictionary()) continue; Annotation a = new Annotation((PdfDictionary) obj, pageNum); annots.add(a); } /** // Now we have all highlight and similar annotations, we need // to find out what words are actually highlighted! PDF in fact // is a dump format to express documents. // For some hints see // http://stackoverflow.com/questions/4028240/extract-each-column-of-a-pdf-file // We could reuse code from LocationTextExtractionStrategy (TODO) // LocationTextExtractionStrategy extr = new LocationTextExtractionStrategy(); String fulltext = PdfTextExtractor.getTextFromPage(reader,pageNum);//,extr writer.println(fulltext); */ } xml.endElement(); for (Annotation a : annots) { a.serializeXML(xmlhandler); } // TODO: add page information (page size and orientation) xml.endAll(); }
From source file:io.konik.carriage.itext.ITextInvoiceExtractor.java
License:Open Source License
private static PdfArray getValidAf(PdfDictionary catalog) { if (catalog.contains(AF)) { PdfArray af = catalog.getAsArray(AF); if (!af.isEmpty() && af.getDirectObject(0).isDictionary()) { return af; }/*from w w w .j a v a 2s .c o m*/ } throw new InvoiceExtractionError("Pdf catalog does not contain Valid AF Entry"); }
From source file:org.gmdev.pdftrick.utils.CustomExtraImgReader.java
License:Open Source License
/** * Read a png image with if all other method fails * @param ref/*from ww w .j av a 2 s . c o m*/ * @param resultFile * @return The BufferedImage obj * @throws IOException * @throws ImageReadException */ public static BufferedImage readIndexedPNG(int ref, String resultFile) throws IOException, ImageReadException { PdfReader reader = new PdfReader(resultFile); PRStream stream = (PRStream) reader.getPdfObject(ref); PdfDictionary dic = stream; byte[] content = PdfReader.getStreamBytesRaw(stream); int width = dic.getAsNumber(PdfName.WIDTH).intValue(); int height = dic.getAsNumber(PdfName.HEIGHT).intValue(); int pngBitDepth = dic.getAsNumber(PdfName.BITSPERCOMPONENT).intValue(); PdfObject colorspace = dic.getDirectObject(PdfName.COLORSPACE); PdfArray decode = dic.getAsArray(PdfName.DECODE); PdfArray carray = (PdfArray) colorspace; PdfObject id2 = carray.getDirectObject(3); byte[] palette = null; if (id2 instanceof PdfString) { palette = ((PdfString) id2).getBytes(); } else if (id2 instanceof PRStream) { palette = PdfReader.getStreamBytes(((PRStream) id2)); } Map<PdfName, FilterHandlers.FilterHandler> handlers = new HashMap<PdfName, FilterHandlers.FilterHandler>( FilterHandlers.getDefaultFilterHandlers()); byte[] imageBytes = PdfReader.decodeBytes(content, dic, handlers); int stride = (width * pngBitDepth + 7) / 8; ByteArrayOutputStream ms = new ByteArrayOutputStream(); PngWriter png = new PngWriter(ms); if (decode != null) { if (pngBitDepth == 1) { // if the decode array is 1,0, then we need to invert the image if (decode.getAsNumber(0).intValue() == 1 && decode.getAsNumber(1).intValue() == 0) { int len = imageBytes.length; for (int t = 0; t < len; ++t) { imageBytes[t] ^= 0xff; } } else { // if the decode array is 0,1, do nothing. It's possible that the array could be 0,0 or 1,1 - but that would be silly, so we'll just ignore that case } } else { // todo: add decode transformation for other depths } } int pngColorType = 0; png.writeHeader(width, height, pngBitDepth, pngColorType); if (palette != null) { png.writePalette(palette); } png.writeData(imageBytes, stride); png.writeEnd(); imageBytes = ms.toByteArray(); InputStream in = new ByteArrayInputStream(imageBytes); ImageInputStream ima_stream = ImageIO.createImageInputStream(in); BufferedImage buffImg = null; BufferedImage buffPic = ImageIO.read(ima_stream); // check if image contains a mask image ... experimental for this type of image BufferedImage buffMask = null; PRStream maskStream = (PRStream) dic.getAsStream(PdfName.SMASK); if (maskStream != null) { PdfImageObject maskImage = new PdfImageObject(maskStream); buffMask = maskImage.getBufferedImage(); Image img = PdfTrickUtils.TransformGrayToTransparency(buffMask); buffImg = PdfTrickUtils.ApplyTransparency(buffPic, img); } else { buffImg = buffPic; } reader.close(); ms.close(); in.close(); return buffImg; }