List of usage examples for com.itextpdf.text.pdf PdfArray isEmpty
public boolean isEmpty()
true
if the array is empty. From source file:de.gbv.marginalia.Marginalia.java
License:Open Source License
/** * Inspect a PDF file and write the info to a writer * @param writer Writer to a text file/* www .j a va 2s . c o m*/ * @param filename Path to the PDF file * @throws IOException */ public static void inspect(PrintWriter writer, String filename) throws IOException, SAXException { // writer.println(filename); writer.flush(); PdfReader reader = new PdfReader(filename); ContentHandler xmlhandler = new SimpleXMLWriter(writer); xmlhandler.startDocument(); SimpleXMLCreator xml = new SimpleXMLCreator(xmlhandler, Annotation.namespaces, true); /* writer.println("Number of pages: "+reader.getNumberOfPages()); Rectangle mediabox = reader.getPageSize(1); writer.print("Size of page 1: ["); writer.print(mediabox.getLeft()); writer.print(','); writer.print(mediabox.getBottom()); writer.print(','); writer.print(mediabox.getRight()); writer.print(','); writer.print(mediabox.getTop()); writer.println("]"); writer.print("Rotation of page 1: "); writer.println(reader.getPageRotation(1)); writer.print("Page size with rotation of page 1: "); writer.println(reader.getPageSizeWithRotation(1)); writer.println(); writer.flush(); */ List<Annotation> annots = new LinkedList<Annotation>(); xml.startElement("annots"); // TODO: The following elements may be added: // - optionally write <f href="Document.pdf"/> // - optionally write <ids original="ID" modified="ID" /> xml.startElement("m", "pages"); for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); pageNum++) { PdfDictionary pageDic = reader.getPageN(pageNum); Map<String, String> attr = new HashMap<String, String>(); attr.put("number", "" + pageNum); attr.put("rotate", "" + reader.getPageRotation(pageNum)); Rectangle mediabox = reader.getPageSize(pageNum); attr.put("left", "" + mediabox.getLeft()); attr.put("bottom", "" + mediabox.getBottom()); attr.put("right", "" + mediabox.getRight()); attr.put("top", "" + mediabox.getTop()); xml.contentElement("m", "page", "", attr); PdfArray rawannots = pageDic.getAsArray(PdfName.ANNOTS); if (rawannots == null || rawannots.isEmpty()) { // writer.println("page "+pageNum+" contains no annotations"); continue; } // writer.println("page "+pageNum+" has "+rawannots.size()+" annotations"); for (int i = 0; i < rawannots.size(); i++) { PdfObject obj = rawannots.getDirectObject(i); if (!obj.isDictionary()) continue; Annotation a = new Annotation((PdfDictionary) obj, pageNum); annots.add(a); } /** // Now we have all highlight and similar annotations, we need // to find out what words are actually highlighted! PDF in fact // is a dump format to express documents. // For some hints see // http://stackoverflow.com/questions/4028240/extract-each-column-of-a-pdf-file // We could reuse code from LocationTextExtractionStrategy (TODO) // LocationTextExtractionStrategy extr = new LocationTextExtractionStrategy(); String fulltext = PdfTextExtractor.getTextFromPage(reader,pageNum);//,extr writer.println(fulltext); */ } xml.endElement(); for (Annotation a : annots) { a.serializeXML(xmlhandler); } // TODO: add page information (page size and orientation) xml.endAll(); }
From source file:io.konik.carriage.itext.ITextInvoiceExtractor.java
License:Open Source License
private static PdfArray getValidAf(PdfDictionary catalog) { if (catalog.contains(AF)) { PdfArray af = catalog.getAsArray(AF); if (!af.isEmpty() && af.getDirectObject(0).isDictionary()) { return af; }// w w w . j av a 2 s .c o m } throw new InvoiceExtractionError("Pdf catalog does not contain Valid AF Entry"); }
From source file:io.konik.carriage.itext.ITextInvoiceExtractor.java
License:Open Source License
private static PdfDictionary getValidFileSpec(PdfArray af) { if (af.isEmpty() || af.getAsDict(0) == null) { throw new InvoiceExtractionError("Pdf does not contain a FileSpec Entry"); }//from ww w. ja va 2s . c o m return af.getAsDict(0); }