List of usage examples for com.itextpdf.text.pdf PdfName ANNOTS
PdfName ANNOTS
To view the source code for com.itextpdf.text.pdf PdfName ANNOTS.
Click Source Link
From source file:com.poet.ar.remover.AnnotationRemover.java
/** * remove annotation that matches keywords * * @param page/*from ww w . j a va 2 s . c o m*/ * @return count of removed annotations */ private static int doRemoveAnnotation(PdfDictionary page) { // all annotations in page i PdfArray annoArray = page.getAsArray(PdfName.ANNOTS); List<Integer> willRemovedIx = new ArrayList<Integer>(); if (annoArray != null) { PdfDictionary annotation = null; PdfDictionary a = null; PdfString uri = null; for (int i = 0; i < annoArray.size(); i++) { annotation = annoArray.getAsDict(i); if (annotation == null) { continue; } a = annotation.getAsDict(PdfName.A); if (a == null) { continue; } uri = a.getAsString(PdfName.URI); if (uri == null) { continue; } String uriStr = uri.toString().trim(); if (keywords.contains(uriStr)) { willRemovedIx.add(i); } } int i = 0; for (Integer ix : willRemovedIx) { annoArray.remove(ix - i++); } } return willRemovedIx.size(); }
From source file:commentextractor.CommentExtractorApp.java
License:GNU General Public License
static String extractComments(String filename, int first, int last) { StringBuffer output = null;//w ww . j a v a 2 s . c om try { PdfReader reader = new PdfReader(filename); if (last >= reader.getNumberOfPages() || (last == -1)) { last = reader.getNumberOfPages(); } output = new StringBuffer(1024); for (int i = first; i <= last; i++) { PdfDictionary page = reader.getPageN(i); PdfArray annotsArray = null; if (page.getAsArray(PdfName.ANNOTS) == null) { continue; } annotsArray = page.getAsArray(PdfName.ANNOTS); for (ListIterator<PdfObject> iter = annotsArray.listIterator(); iter.hasNext();) { PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject(iter.next()); PdfString content = (PdfString) PdfReader.getPdfObject(annot.get(PdfName.CONTENTS)); if (content != null) { output.append("----------\n"); output.append("Page " + i); output.append("\n"); output.append(content.toUnicodeString().replaceAll("\r", "\r\n")); output.append("\n"); } } } } catch (Exception e) { Logger.getLogger(CommentExtractorApp.class.getName()).log(Level.SEVERE, null, e); } return new String(output); }
From source file:de.gbv.marginalia.Marginalia.java
License:Open Source License
/** * Inspect a PDF file and write the info to a writer * @param writer Writer to a text file//from w ww . ja v a2 s . c om * @param filename Path to the PDF file * @throws IOException */ public static void inspect(PrintWriter writer, String filename) throws IOException, SAXException { // writer.println(filename); writer.flush(); PdfReader reader = new PdfReader(filename); ContentHandler xmlhandler = new SimpleXMLWriter(writer); xmlhandler.startDocument(); SimpleXMLCreator xml = new SimpleXMLCreator(xmlhandler, Annotation.namespaces, true); /* writer.println("Number of pages: "+reader.getNumberOfPages()); Rectangle mediabox = reader.getPageSize(1); writer.print("Size of page 1: ["); writer.print(mediabox.getLeft()); writer.print(','); writer.print(mediabox.getBottom()); writer.print(','); writer.print(mediabox.getRight()); writer.print(','); writer.print(mediabox.getTop()); writer.println("]"); writer.print("Rotation of page 1: "); writer.println(reader.getPageRotation(1)); writer.print("Page size with rotation of page 1: "); writer.println(reader.getPageSizeWithRotation(1)); writer.println(); writer.flush(); */ List<Annotation> annots = new LinkedList<Annotation>(); xml.startElement("annots"); // TODO: The following elements may be added: // - optionally write <f href="Document.pdf"/> // - optionally write <ids original="ID" modified="ID" /> xml.startElement("m", "pages"); for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); pageNum++) { PdfDictionary pageDic = reader.getPageN(pageNum); Map<String, String> attr = new HashMap<String, String>(); attr.put("number", "" + pageNum); attr.put("rotate", "" + reader.getPageRotation(pageNum)); Rectangle mediabox = reader.getPageSize(pageNum); attr.put("left", "" + mediabox.getLeft()); attr.put("bottom", "" + mediabox.getBottom()); attr.put("right", "" + mediabox.getRight()); attr.put("top", "" + mediabox.getTop()); xml.contentElement("m", "page", "", attr); PdfArray rawannots = pageDic.getAsArray(PdfName.ANNOTS); if (rawannots == null || rawannots.isEmpty()) { // writer.println("page "+pageNum+" contains no annotations"); continue; } // writer.println("page "+pageNum+" has "+rawannots.size()+" annotations"); for (int i = 0; i < rawannots.size(); i++) { PdfObject obj = rawannots.getDirectObject(i); if (!obj.isDictionary()) continue; Annotation a = new Annotation((PdfDictionary) obj, pageNum); annots.add(a); } /** // Now we have all highlight and similar annotations, we need // to find out what words are actually highlighted! PDF in fact // is a dump format to express documents. // For some hints see // http://stackoverflow.com/questions/4028240/extract-each-column-of-a-pdf-file // We could reuse code from LocationTextExtractionStrategy (TODO) // LocationTextExtractionStrategy extr = new LocationTextExtractionStrategy(); String fulltext = PdfTextExtractor.getTextFromPage(reader,pageNum);//,extr writer.println(fulltext); */ } xml.endElement(); for (Annotation a : annots) { a.serializeXML(xmlhandler); } // TODO: add page information (page size and orientation) xml.endAll(); }
From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java
License:Open Source License
/** * Extracts locations from the redact annotations contained in the document and applied to the given page. *//*from w w w.j ava 2s . c o m*/ private List<PdfCleanUpLocation> extractLocationsFromRedactAnnots(int page, PdfDictionary pageDict) { List<PdfCleanUpLocation> locations = new ArrayList<PdfCleanUpLocation>(); if (pageDict.contains(PdfName.ANNOTS)) { PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS); for (int i = 0; i < annotsArray.size(); ++i) { PdfIndirectReference annotIndirRef = annotsArray.getAsIndirectObject(i); PdfDictionary annotDict = annotsArray.getAsDict(i); PdfName annotSubtype = annotDict.getAsName(PdfName.SUBTYPE); if (annotSubtype.equals(PdfName.REDACT)) { saveRedactAnnotIndirRef(page, annotIndirRef.toString()); locations.addAll(extractLocationsFromRedactAnnot(page, i, annotDict)); } } } return locations; }
From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java
License:Open Source License
/** * Deletes redact annotations from the page and substitutes them with either OverlayText or RO object if it's needed. *///from w w w . ja v a 2s. c om private void deleteRedactAnnots(int pageNum) throws IOException, DocumentException { Set<String> indirRefs = redactAnnotIndirRefs.get(pageNum); if (indirRefs == null || indirRefs.isEmpty()) { return; } PdfReader reader = pdfStamper.getReader(); PdfContentByte canvas = pdfStamper.getOverContent(pageNum); PdfDictionary pageDict = reader.getPageN(pageNum); PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS); // j is for access annotRect (i can be decreased, so we need to store additional index, // indicating current position in ANNOTS array in case if we don't remove anything for (int i = 0, j = 0; i < annotsArray.size(); ++i, ++j) { PdfIndirectReference annotIndRef = annotsArray.getAsIndirectObject(i); PdfDictionary annotDict = annotsArray.getAsDict(i); if (indirRefs.contains(annotIndRef.toString()) || indirRefs.contains(getParentIndRefStr(annotDict))) { PdfStream formXObj = annotDict.getAsStream(PdfName.RO); PdfString overlayText = annotDict.getAsString(PdfName.OVERLAYTEXT); if (fillCleanedArea && formXObj != null) { PdfArray rectArray = annotDict.getAsArray(PdfName.RECT); Rectangle annotRect = new Rectangle(rectArray.getAsNumber(0).floatValue(), rectArray.getAsNumber(1).floatValue(), rectArray.getAsNumber(2).floatValue(), rectArray.getAsNumber(3).floatValue()); insertFormXObj(canvas, pageDict, formXObj, clippingRects.get(j), annotRect); } else if (fillCleanedArea && overlayText != null && overlayText.toUnicodeString().length() > 0) { drawOverlayText(canvas, clippingRects.get(j), overlayText, annotDict.getAsString(PdfName.DA), annotDict.getAsNumber(PdfName.Q), annotDict.getAsBoolean(PdfName.REPEAT)); } annotsArray.remove(i--); // array size is changed, so we need to decrease i } } if (annotsArray.size() == 0) { pageDict.remove(PdfName.ANNOTS); } }
From source file:org.sejda.impl.itext5.component.PdfUnpacker.java
License:Open Source License
private Set<PdfDictionary> getFileAttachmentsDictionaries(PdfReader reader) { Set<PdfDictionary> retSet = new NullSafeSet<PdfDictionary>(); for (int k = 1; k <= reader.getNumberOfPages(); ++k) { PdfArray annots = reader.getPageN(k).getAsArray(PdfName.ANNOTS); if (annots != null) { for (PdfObject current : annots) { PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject(current); if (PdfName.FILEATTACHMENT.equals(annot.getAsName(PdfName.SUBTYPE))) { retSet.add(annot.getAsDict(PdfName.FS)); }//from w w w . j a va 2 s . c om } } } return retSet; }