List of usage examples for com.itextpdf.text.pdf PdfArray size
public int size()
From source file:com.poet.ar.remover.AnnotationRemover.java
/** * remove content that matches keywords//from www .j av a 2 s. co m * * @param page * @return count of removed content */ private static int doRemoveContent(PdfDictionary page) { // all contents in page i PdfArray contentArray = page.getAsArray(PdfName.CONTENTS); PdfDictionary resources = page.getAsDict(PdfName.RESOURCES); List<Integer> willRemovedIx = new ArrayList<Integer>(); if (contentArray != null) { PdfStream stream = null; for (int i = 0; i < contentArray.size(); i++) { stream = contentArray.getAsStream(i); PRStream pr = (PRStream) stream; // TODO // FIXME: 2016/1/27 0027 java.lang.ClassCastException: com.itextpdf.text.pdf.PdfArray cannot be cast to com.itextpdf.text.pdf.PdfLiteral // get display text // String text = StreamContentExtractor.extractFromPdfStream(stream, resources); // // if (keywords.contains(text)) { // willRemovedIx.add(i); // } try { String text = StreamContentExtractor.extractFromPdfStream(stream, resources); if (keywords.contains(text)) { willRemovedIx.add(i); } } catch (Exception ex) { } } int i = 0; for (Integer ix : willRemovedIx) { contentArray.remove(ix - i++); } } return willRemovedIx.size(); }
From source file:com.poet.ar.remover.AnnotationRemover.java
/** * remove annotation that matches keywords * * @param page/* w w w.j a v a 2 s .c om*/ * @return count of removed annotations */ private static int doRemoveAnnotation(PdfDictionary page) { // all annotations in page i PdfArray annoArray = page.getAsArray(PdfName.ANNOTS); List<Integer> willRemovedIx = new ArrayList<Integer>(); if (annoArray != null) { PdfDictionary annotation = null; PdfDictionary a = null; PdfString uri = null; for (int i = 0; i < annoArray.size(); i++) { annotation = annoArray.getAsDict(i); if (annotation == null) { continue; } a = annotation.getAsDict(PdfName.A); if (a == null) { continue; } uri = a.getAsString(PdfName.URI); if (uri == null) { continue; } String uriStr = uri.toString().trim(); if (keywords.contains(uriStr)) { willRemovedIx.add(i); } } int i = 0; for (Integer ix : willRemovedIx) { annoArray.remove(ix - i++); } } return willRemovedIx.size(); }
From source file:de.gbv.marginalia.Annotation.java
License:Open Source License
/** * Serialize the annotation in XML format. * The annotation is emitted as stream of SAX events to a ContentHandler. * The XML is XFDF with additional Marginalia elements in its own namespace. */// ww w . ja v a 2 s . c om public void serializeXML(ContentHandler handler) throws SAXException { SimpleXMLCreator xml = new SimpleXMLCreator(handler, namespaces); Set<PdfName> allkeys = this.dict.getKeys(); allkeys.remove(PdfName.TYPE); allkeys.remove(PdfName.SUBTYPE); allkeys.remove(PdfName.PARENT); allkeys.remove(PdfName.CONTENTS); allkeys.remove(PdfName.POPUP); Map<String, String> attrs = new HashMap<String, String>(); for (String aName : this.FIELDS.keySet()) { Field f = this.FIELDS.get(aName); String value = f.getFrom(this.dict); if (value != null) { // TODO: encoding & exception attrs.put(aName, value); // allkeys.remove( f.name ); } } PdfDictionary pg = getAsDictionary(this.dict, PdfName.P); allkeys.remove(PdfName.P); //CropBox=[0, 0, 595, 842] //Rotate //MediaBox=[0, 0, 595, 842] // TODO: find out where page number is stored if (attrs.get("page") == null) attrs.put("page", "" + this.pageNum); String element = subtypes.get(this.subtype); if (element == null) { // TODO element = this.subtype.toString(); } xml.startElement(element, attrs); if (element.equals("ink")) { PdfArray inklist = this.dict.getAsArray(new PdfName("InkList")); if (inklist != null) { xml.startElement("inklist"); for (int i = 0; i < inklist.size(); i++) { PdfArray pathArray = inklist.getAsArray(i); String s = ""; for (int j = 0; j < pathArray.size(); j += 2) { if (j > 0) s += ";"; s += "" + pathArray.getAsNumber(j).floatValue() + ","; s += "" + pathArray.getAsNumber(j + 1).floatValue(); } xml.contentElement("gesture", s); } xml.endElement(); } } if (attrs.get("rect") != null) { Map<String, String> a = new HashMap<String, String>(); RectField rf = (RectField) this.FIELDS.get("rect"); PdfRectangle r = null; if (rf != null) r = (PdfRectangle) rf.getObjectFrom(this.dict); if (r != null) { a.put("left", "" + r.left()); a.put("bottom", "" + r.bottom()); a.put("right", "" + r.right()); a.put("top", "" + r.top()); xml.emptyElement("m", "rect", a); } } if (this.content != null && !this.content.equals("")) { // TODO: encode content if not UTF-8 ? xml.contentElement("content", content.toString()); } // TODO: contents-richtext // TODO: popup /* if ( this.popup != null ) { out.println("<!--popup>"); for ( PdfName n : this.popup.getKeys() ) { out.println( n + "=" + this.popup.getDirectObject(n) ); } out.println("</popup-->"); } */ // remaining dictionary elements /* for ( PdfName name : allkeys ) { Map<String,String> a = new HashMap<String,String>(); a.put("name",name.toString()); a.put("value",this.dict.getDirectObject(name).toString()); xml.emptyElement( "m","unknown", a ); } */ xml.endElement(); }
From source file:de.gbv.marginalia.Marginalia.java
License:Open Source License
/** * Inspect a PDF file and write the info to a writer * @param writer Writer to a text file/*from ww w. j a va 2 s. com*/ * @param filename Path to the PDF file * @throws IOException */ public static void inspect(PrintWriter writer, String filename) throws IOException, SAXException { // writer.println(filename); writer.flush(); PdfReader reader = new PdfReader(filename); ContentHandler xmlhandler = new SimpleXMLWriter(writer); xmlhandler.startDocument(); SimpleXMLCreator xml = new SimpleXMLCreator(xmlhandler, Annotation.namespaces, true); /* writer.println("Number of pages: "+reader.getNumberOfPages()); Rectangle mediabox = reader.getPageSize(1); writer.print("Size of page 1: ["); writer.print(mediabox.getLeft()); writer.print(','); writer.print(mediabox.getBottom()); writer.print(','); writer.print(mediabox.getRight()); writer.print(','); writer.print(mediabox.getTop()); writer.println("]"); writer.print("Rotation of page 1: "); writer.println(reader.getPageRotation(1)); writer.print("Page size with rotation of page 1: "); writer.println(reader.getPageSizeWithRotation(1)); writer.println(); writer.flush(); */ List<Annotation> annots = new LinkedList<Annotation>(); xml.startElement("annots"); // TODO: The following elements may be added: // - optionally write <f href="Document.pdf"/> // - optionally write <ids original="ID" modified="ID" /> xml.startElement("m", "pages"); for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); pageNum++) { PdfDictionary pageDic = reader.getPageN(pageNum); Map<String, String> attr = new HashMap<String, String>(); attr.put("number", "" + pageNum); attr.put("rotate", "" + reader.getPageRotation(pageNum)); Rectangle mediabox = reader.getPageSize(pageNum); attr.put("left", "" + mediabox.getLeft()); attr.put("bottom", "" + mediabox.getBottom()); attr.put("right", "" + mediabox.getRight()); attr.put("top", "" + mediabox.getTop()); xml.contentElement("m", "page", "", attr); PdfArray rawannots = pageDic.getAsArray(PdfName.ANNOTS); if (rawannots == null || rawannots.isEmpty()) { // writer.println("page "+pageNum+" contains no annotations"); continue; } // writer.println("page "+pageNum+" has "+rawannots.size()+" annotations"); for (int i = 0; i < rawannots.size(); i++) { PdfObject obj = rawannots.getDirectObject(i); if (!obj.isDictionary()) continue; Annotation a = new Annotation((PdfDictionary) obj, pageNum); annots.add(a); } /** // Now we have all highlight and similar annotations, we need // to find out what words are actually highlighted! PDF in fact // is a dump format to express documents. // For some hints see // http://stackoverflow.com/questions/4028240/extract-each-column-of-a-pdf-file // We could reuse code from LocationTextExtractionStrategy (TODO) // LocationTextExtractionStrategy extr = new LocationTextExtractionStrategy(); String fulltext = PdfTextExtractor.getTextFromPage(reader,pageNum);//,extr writer.println(fulltext); */ } xml.endElement(); for (Annotation a : annots) { a.serializeXML(xmlhandler); } // TODO: add page information (page size and orientation) xml.endAll(); }
From source file:de.gbv.marginalia.Marginalia.java
License:Open Source License
public static void dumpArray(PdfArray a) { if (a == null) return;/*w ww . j a va 2s . com*/ for (int i = 0; i < a.size(); i++) { System.out.println(i + a.getPdfObject(i).toString()); } }
From source file:de.rub.dez6a3.jpdfsigner.control.JPodPDFViewer.java
License:Open Source License
@Override public ArrayList getAttachments() throws IOException { ArrayList files = new ArrayList(); PdfReader reader = new PdfReader(conf.getPDFFile()); PdfDictionary root = reader.getCatalog(); PdfDictionary documentnames = root.getAsDict(PdfName.NAMES); PdfDictionary embeddedfiles = documentnames.getAsDict(PdfName.EMBEDDEDFILES); PdfArray filespecs = embeddedfiles.getAsArray(PdfName.NAMES); PdfDictionary filespec;//from ww w .j a v a 2 s . c o m PdfDictionary refs; for (int i = 0; i < filespecs.size();) { filespecs.getAsName(i++); filespec = filespecs.getAsDict(i++); refs = filespec.getAsDict(PdfName.EF); Iterator it = refs.getKeys().iterator(); while (it.hasNext()) { PdfName key = (PdfName) it.next(); if (key.toString().equals("/F")) { String filename = "-"; String desc = "-"; int size = -1; String moddate = "-"; String compsize = "-"; PdfObject pdfobj = null; try { filename = filespec.getAsString(key).toString(); } catch (Exception e) { log.warn("Cannot load attachment-name - " + e.getMessage()); } try { desc = filespec.getAsString(PdfName.DESC).toString(); } catch (Exception e) { log.warn("Cannot load attachment-description - " + e.getMessage()); } byte[] attBytes = null; try { PRStream stream = (PRStream) PdfReader.getPdfObject(refs.getAsIndirectObject(key)); attBytes = PdfReader.getStreamBytes(stream); size = attBytes.length; } catch (Exception e) { log.warn("Cannot load attachment-size - " + e.getMessage()); } try { pdfobj = PdfReader.getPdfObject(refs.getAsIndirectObject(key)); } catch (Exception e) { log.warn("Cannot load attachment-pdfobject - " + e.getMessage()); } Hashtable fileData = new Hashtable(); fileData.put(ATTACHMENT_FILENAME_STRING, filename); //filename fileData.put(ATTACHMENT_DESCRIPTION_STRING, desc); //Description fileData.put(ATTACHMENT_SIZE_INT, size); //size fileData.put(ATTACHMENT_BYTES_ARR, attBytes); //bytes files.add(fileData); } } } return files; }
From source file:mkl.testarea.itext5.pdfcleanup.PdfCleanUpContentOperator.java
License:Open Source License
/** * Example./*from w w w . j a va2 s .c o m*/ * TJ = [(h) 3 4 (q) 7 (w) (e)] * Result = {0:0, 1:7, 2:7, 3:0, 4:0} * * @return Map whose key is an ordinal number of the string in the TJ array and value * is the position adjustment. */ private Map<Integer, Float> structureTJarray(PdfArray array) { Map<Integer, Float> structuredTJoperands = new HashMap<Integer, Float>(); if (array.size() == 0) { return structuredTJoperands; } Integer previousStrNum = 0; structuredTJoperands.put(previousStrNum, 0f); for (int i = 0; i < array.size(); ++i) { PdfObject currentObj = array.getPdfObject(i); if (currentObj instanceof PdfString && ((PdfString) currentObj).toUnicodeString().length() > 0) { ++previousStrNum; structuredTJoperands.put(previousStrNum, 0f); } else { Float oldOffset = structuredTJoperands.get(previousStrNum); structuredTJoperands.put(previousStrNum, oldOffset + ((PdfNumber) currentObj).floatValue()); } } return structuredTJoperands; }
From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java
License:Open Source License
/** * Extracts locations from the redact annotations contained in the document and applied to the given page. *///from w ww . j a v a 2s . c o m private List<PdfCleanUpLocation> extractLocationsFromRedactAnnots(int page, PdfDictionary pageDict) { List<PdfCleanUpLocation> locations = new ArrayList<PdfCleanUpLocation>(); if (pageDict.contains(PdfName.ANNOTS)) { PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS); for (int i = 0; i < annotsArray.size(); ++i) { PdfIndirectReference annotIndirRef = annotsArray.getAsIndirectObject(i); PdfDictionary annotDict = annotsArray.getAsDict(i); PdfName annotSubtype = annotDict.getAsName(PdfName.SUBTYPE); if (annotSubtype.equals(PdfName.REDACT)) { saveRedactAnnotIndirRef(page, annotIndirRef.toString()); locations.addAll(extractLocationsFromRedactAnnot(page, i, annotDict)); } } } return locations; }
From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java
License:Open Source License
/** * Extracts locations from the concrete annotation. * Note: annotation can consist not only of one area specified by the RECT entry, but also of multiple areas specified * by the QuadPoints entry in the annotation dictionary. *//*w ww . j a va2s . c om*/ private List<PdfCleanUpLocation> extractLocationsFromRedactAnnot(int page, int annotIndex, PdfDictionary annotDict) { List<PdfCleanUpLocation> locations = new ArrayList<PdfCleanUpLocation>(); List<Rectangle> markedRectangles = new ArrayList<Rectangle>(); PdfArray quadPoints = annotDict.getAsArray(PdfName.QUADPOINTS); if (quadPoints.size() != 0) { markedRectangles.addAll(translateQuadPointsToRectangles(quadPoints)); } else { PdfArray annotRect = annotDict.getAsArray(PdfName.RECT); markedRectangles .add(new Rectangle(annotRect.getAsNumber(0).floatValue(), annotRect.getAsNumber(1).floatValue(), annotRect.getAsNumber(2).floatValue(), annotRect.getAsNumber(3).floatValue())); } clippingRects.put(annotIndex, markedRectangles); BaseColor cleanUpColor = null; PdfArray ic = annotDict.getAsArray(PdfName.IC); if (ic != null) { cleanUpColor = new BaseColor(ic.getAsNumber(0).floatValue(), ic.getAsNumber(1).floatValue(), ic.getAsNumber(2).floatValue()); } PdfStream ro = annotDict.getAsStream(PdfName.RO); if (ro != null) { cleanUpColor = null; } for (Rectangle rect : markedRectangles) { locations.add(new PdfCleanUpLocation(page, rect, cleanUpColor)); } return locations; }
From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java
License:Open Source License
private List<Rectangle> translateQuadPointsToRectangles(PdfArray quadPoints) { List<Rectangle> rectangles = new ArrayList<Rectangle>(); for (int i = 0; i < quadPoints.size(); i += 8) { rectangles.add(new Rectangle(quadPoints.getAsNumber(i + 4).floatValue(), // QuadPoints have "Z" order quadPoints.getAsNumber(i + 5).floatValue(), quadPoints.getAsNumber(i + 2).floatValue(), quadPoints.getAsNumber(i + 3).floatValue())); }//from www . j av a2 s .c o m return rectangles; }