List of usage examples for com.itextpdf.text.pdf PdfDictionary getAsArray
public PdfArray getAsArray(final PdfName key)
PdfObject
as a PdfArray
, resolving indirect references. From source file:com.poet.ar.remover.AnnotationRemover.java
/** * remove content that matches keywords//from w w w . j a v a2 s. co m * * @param page * @return count of removed content */ private static int doRemoveContent(PdfDictionary page) { // all contents in page i PdfArray contentArray = page.getAsArray(PdfName.CONTENTS); PdfDictionary resources = page.getAsDict(PdfName.RESOURCES); List<Integer> willRemovedIx = new ArrayList<Integer>(); if (contentArray != null) { PdfStream stream = null; for (int i = 0; i < contentArray.size(); i++) { stream = contentArray.getAsStream(i); PRStream pr = (PRStream) stream; // TODO // FIXME: 2016/1/27 0027 java.lang.ClassCastException: com.itextpdf.text.pdf.PdfArray cannot be cast to com.itextpdf.text.pdf.PdfLiteral // get display text // String text = StreamContentExtractor.extractFromPdfStream(stream, resources); // // if (keywords.contains(text)) { // willRemovedIx.add(i); // } try { String text = StreamContentExtractor.extractFromPdfStream(stream, resources); if (keywords.contains(text)) { willRemovedIx.add(i); } } catch (Exception ex) { } } int i = 0; for (Integer ix : willRemovedIx) { contentArray.remove(ix - i++); } } return willRemovedIx.size(); }
From source file:com.poet.ar.remover.AnnotationRemover.java
/** * remove annotation that matches keywords * * @param page//from w w w . j av a 2 s . com * @return count of removed annotations */ private static int doRemoveAnnotation(PdfDictionary page) { // all annotations in page i PdfArray annoArray = page.getAsArray(PdfName.ANNOTS); List<Integer> willRemovedIx = new ArrayList<Integer>(); if (annoArray != null) { PdfDictionary annotation = null; PdfDictionary a = null; PdfString uri = null; for (int i = 0; i < annoArray.size(); i++) { annotation = annoArray.getAsDict(i); if (annotation == null) { continue; } a = annotation.getAsDict(PdfName.A); if (a == null) { continue; } uri = a.getAsString(PdfName.URI); if (uri == null) { continue; } String uriStr = uri.toString().trim(); if (keywords.contains(uriStr)) { willRemovedIx.add(i); } } int i = 0; for (Integer ix : willRemovedIx) { annoArray.remove(ix - i++); } } return willRemovedIx.size(); }
From source file:commentextractor.CommentExtractorApp.java
License:GNU General Public License
static String extractComments(String filename, int first, int last) { StringBuffer output = null;/*from w ww . j a v a 2 s. co m*/ try { PdfReader reader = new PdfReader(filename); if (last >= reader.getNumberOfPages() || (last == -1)) { last = reader.getNumberOfPages(); } output = new StringBuffer(1024); for (int i = first; i <= last; i++) { PdfDictionary page = reader.getPageN(i); PdfArray annotsArray = null; if (page.getAsArray(PdfName.ANNOTS) == null) { continue; } annotsArray = page.getAsArray(PdfName.ANNOTS); for (ListIterator<PdfObject> iter = annotsArray.listIterator(); iter.hasNext();) { PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject(iter.next()); PdfString content = (PdfString) PdfReader.getPdfObject(annot.get(PdfName.CONTENTS)); if (content != null) { output.append("----------\n"); output.append("Page " + i); output.append("\n"); output.append(content.toUnicodeString().replaceAll("\r", "\r\n")); output.append("\n"); } } } } catch (Exception e) { Logger.getLogger(CommentExtractorApp.class.getName()).log(Level.SEVERE, null, e); } return new String(output); }
From source file:de.gbv.marginalia.Marginalia.java
License:Open Source License
/** * Inspect a PDF file and write the info to a writer * @param writer Writer to a text file/*from w ww. j a v a 2 s . co m*/ * @param filename Path to the PDF file * @throws IOException */ public static void inspect(PrintWriter writer, String filename) throws IOException, SAXException { // writer.println(filename); writer.flush(); PdfReader reader = new PdfReader(filename); ContentHandler xmlhandler = new SimpleXMLWriter(writer); xmlhandler.startDocument(); SimpleXMLCreator xml = new SimpleXMLCreator(xmlhandler, Annotation.namespaces, true); /* writer.println("Number of pages: "+reader.getNumberOfPages()); Rectangle mediabox = reader.getPageSize(1); writer.print("Size of page 1: ["); writer.print(mediabox.getLeft()); writer.print(','); writer.print(mediabox.getBottom()); writer.print(','); writer.print(mediabox.getRight()); writer.print(','); writer.print(mediabox.getTop()); writer.println("]"); writer.print("Rotation of page 1: "); writer.println(reader.getPageRotation(1)); writer.print("Page size with rotation of page 1: "); writer.println(reader.getPageSizeWithRotation(1)); writer.println(); writer.flush(); */ List<Annotation> annots = new LinkedList<Annotation>(); xml.startElement("annots"); // TODO: The following elements may be added: // - optionally write <f href="Document.pdf"/> // - optionally write <ids original="ID" modified="ID" /> xml.startElement("m", "pages"); for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); pageNum++) { PdfDictionary pageDic = reader.getPageN(pageNum); Map<String, String> attr = new HashMap<String, String>(); attr.put("number", "" + pageNum); attr.put("rotate", "" + reader.getPageRotation(pageNum)); Rectangle mediabox = reader.getPageSize(pageNum); attr.put("left", "" + mediabox.getLeft()); attr.put("bottom", "" + mediabox.getBottom()); attr.put("right", "" + mediabox.getRight()); attr.put("top", "" + mediabox.getTop()); xml.contentElement("m", "page", "", attr); PdfArray rawannots = pageDic.getAsArray(PdfName.ANNOTS); if (rawannots == null || rawannots.isEmpty()) { // writer.println("page "+pageNum+" contains no annotations"); continue; } // writer.println("page "+pageNum+" has "+rawannots.size()+" annotations"); for (int i = 0; i < rawannots.size(); i++) { PdfObject obj = rawannots.getDirectObject(i); if (!obj.isDictionary()) continue; Annotation a = new Annotation((PdfDictionary) obj, pageNum); annots.add(a); } /** // Now we have all highlight and similar annotations, we need // to find out what words are actually highlighted! PDF in fact // is a dump format to express documents. // For some hints see // http://stackoverflow.com/questions/4028240/extract-each-column-of-a-pdf-file // We could reuse code from LocationTextExtractionStrategy (TODO) // LocationTextExtractionStrategy extr = new LocationTextExtractionStrategy(); String fulltext = PdfTextExtractor.getTextFromPage(reader,pageNum);//,extr writer.println(fulltext); */ } xml.endElement(); for (Annotation a : annots) { a.serializeXML(xmlhandler); } // TODO: add page information (page size and orientation) xml.endAll(); }
From source file:de.rub.dez6a3.jpdfsigner.control.JPodPDFViewer.java
License:Open Source License
@Override public ArrayList getAttachments() throws IOException { ArrayList files = new ArrayList(); PdfReader reader = new PdfReader(conf.getPDFFile()); PdfDictionary root = reader.getCatalog(); PdfDictionary documentnames = root.getAsDict(PdfName.NAMES); PdfDictionary embeddedfiles = documentnames.getAsDict(PdfName.EMBEDDEDFILES); PdfArray filespecs = embeddedfiles.getAsArray(PdfName.NAMES); PdfDictionary filespec;//from w w w . j a v a 2s .c o m PdfDictionary refs; for (int i = 0; i < filespecs.size();) { filespecs.getAsName(i++); filespec = filespecs.getAsDict(i++); refs = filespec.getAsDict(PdfName.EF); Iterator it = refs.getKeys().iterator(); while (it.hasNext()) { PdfName key = (PdfName) it.next(); if (key.toString().equals("/F")) { String filename = "-"; String desc = "-"; int size = -1; String moddate = "-"; String compsize = "-"; PdfObject pdfobj = null; try { filename = filespec.getAsString(key).toString(); } catch (Exception e) { log.warn("Cannot load attachment-name - " + e.getMessage()); } try { desc = filespec.getAsString(PdfName.DESC).toString(); } catch (Exception e) { log.warn("Cannot load attachment-description - " + e.getMessage()); } byte[] attBytes = null; try { PRStream stream = (PRStream) PdfReader.getPdfObject(refs.getAsIndirectObject(key)); attBytes = PdfReader.getStreamBytes(stream); size = attBytes.length; } catch (Exception e) { log.warn("Cannot load attachment-size - " + e.getMessage()); } try { pdfobj = PdfReader.getPdfObject(refs.getAsIndirectObject(key)); } catch (Exception e) { log.warn("Cannot load attachment-pdfobject - " + e.getMessage()); } Hashtable fileData = new Hashtable(); fileData.put(ATTACHMENT_FILENAME_STRING, filename); //filename fileData.put(ATTACHMENT_DESCRIPTION_STRING, desc); //Description fileData.put(ATTACHMENT_SIZE_INT, size); //size fileData.put(ATTACHMENT_BYTES_ARR, attBytes); //bytes files.add(fileData); } } } return files; }
From source file:io.konik.carriage.itext.ITextInvoiceExtractor.java
License:Open Source License
private static PdfArray getValidAf(PdfDictionary catalog) { if (catalog.contains(AF)) { PdfArray af = catalog.getAsArray(AF); if (!af.isEmpty() && af.getDirectObject(0).isDictionary()) { return af; }//from w ww . ja v a 2 s.c om } throw new InvoiceExtractionError("Pdf catalog does not contain Valid AF Entry"); }
From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java
License:Open Source License
/** * Extracts locations from the redact annotations contained in the document and applied to the given page. *//*from w ww. j a v a 2 s . c o m*/ private List<PdfCleanUpLocation> extractLocationsFromRedactAnnots(int page, PdfDictionary pageDict) { List<PdfCleanUpLocation> locations = new ArrayList<PdfCleanUpLocation>(); if (pageDict.contains(PdfName.ANNOTS)) { PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS); for (int i = 0; i < annotsArray.size(); ++i) { PdfIndirectReference annotIndirRef = annotsArray.getAsIndirectObject(i); PdfDictionary annotDict = annotsArray.getAsDict(i); PdfName annotSubtype = annotDict.getAsName(PdfName.SUBTYPE); if (annotSubtype.equals(PdfName.REDACT)) { saveRedactAnnotIndirRef(page, annotIndirRef.toString()); locations.addAll(extractLocationsFromRedactAnnot(page, i, annotDict)); } } } return locations; }
From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java
License:Open Source License
/** * Extracts locations from the concrete annotation. * Note: annotation can consist not only of one area specified by the RECT entry, but also of multiple areas specified * by the QuadPoints entry in the annotation dictionary. */// w ww . j a v a 2s .c om private List<PdfCleanUpLocation> extractLocationsFromRedactAnnot(int page, int annotIndex, PdfDictionary annotDict) { List<PdfCleanUpLocation> locations = new ArrayList<PdfCleanUpLocation>(); List<Rectangle> markedRectangles = new ArrayList<Rectangle>(); PdfArray quadPoints = annotDict.getAsArray(PdfName.QUADPOINTS); if (quadPoints.size() != 0) { markedRectangles.addAll(translateQuadPointsToRectangles(quadPoints)); } else { PdfArray annotRect = annotDict.getAsArray(PdfName.RECT); markedRectangles .add(new Rectangle(annotRect.getAsNumber(0).floatValue(), annotRect.getAsNumber(1).floatValue(), annotRect.getAsNumber(2).floatValue(), annotRect.getAsNumber(3).floatValue())); } clippingRects.put(annotIndex, markedRectangles); BaseColor cleanUpColor = null; PdfArray ic = annotDict.getAsArray(PdfName.IC); if (ic != null) { cleanUpColor = new BaseColor(ic.getAsNumber(0).floatValue(), ic.getAsNumber(1).floatValue(), ic.getAsNumber(2).floatValue()); } PdfStream ro = annotDict.getAsStream(PdfName.RO); if (ro != null) { cleanUpColor = null; } for (Rectangle rect : markedRectangles) { locations.add(new PdfCleanUpLocation(page, rect, cleanUpColor)); } return locations; }
From source file:mkl.testarea.itext5.pdfcleanup.StrictPdfCleanUpProcessor.java
License:Open Source License
/** * Deletes redact annotations from the page and substitutes them with either OverlayText or RO object if it's needed. *///from www . ja va 2 s.com private void deleteRedactAnnots(int pageNum) throws IOException, DocumentException { Set<String> indirRefs = redactAnnotIndirRefs.get(pageNum); if (indirRefs == null || indirRefs.isEmpty()) { return; } PdfReader reader = pdfStamper.getReader(); PdfContentByte canvas = pdfStamper.getOverContent(pageNum); PdfDictionary pageDict = reader.getPageN(pageNum); PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS); // j is for access annotRect (i can be decreased, so we need to store additional index, // indicating current position in ANNOTS array in case if we don't remove anything for (int i = 0, j = 0; i < annotsArray.size(); ++i, ++j) { PdfIndirectReference annotIndRef = annotsArray.getAsIndirectObject(i); PdfDictionary annotDict = annotsArray.getAsDict(i); if (indirRefs.contains(annotIndRef.toString()) || indirRefs.contains(getParentIndRefStr(annotDict))) { PdfStream formXObj = annotDict.getAsStream(PdfName.RO); PdfString overlayText = annotDict.getAsString(PdfName.OVERLAYTEXT); if (fillCleanedArea && formXObj != null) { PdfArray rectArray = annotDict.getAsArray(PdfName.RECT); Rectangle annotRect = new Rectangle(rectArray.getAsNumber(0).floatValue(), rectArray.getAsNumber(1).floatValue(), rectArray.getAsNumber(2).floatValue(), rectArray.getAsNumber(3).floatValue()); insertFormXObj(canvas, pageDict, formXObj, clippingRects.get(j), annotRect); } else if (fillCleanedArea && overlayText != null && overlayText.toUnicodeString().length() > 0) { drawOverlayText(canvas, clippingRects.get(j), overlayText, annotDict.getAsString(PdfName.DA), annotDict.getAsNumber(PdfName.Q), annotDict.getAsBoolean(PdfName.REPEAT)); } annotsArray.remove(i--); // array size is changed, so we need to decrease i } } if (annotsArray.size() == 0) { pageDict.remove(PdfName.ANNOTS); } }
From source file:org.gmdev.pdftrick.utils.CustomExtraImgReader.java
License:Open Source License
/** * Read a png image with if all other method fails * @param ref//from w ww. j a v a 2 s . co m * @param resultFile * @return The BufferedImage obj * @throws IOException * @throws ImageReadException */ public static BufferedImage readIndexedPNG(int ref, String resultFile) throws IOException, ImageReadException { PdfReader reader = new PdfReader(resultFile); PRStream stream = (PRStream) reader.getPdfObject(ref); PdfDictionary dic = stream; byte[] content = PdfReader.getStreamBytesRaw(stream); int width = dic.getAsNumber(PdfName.WIDTH).intValue(); int height = dic.getAsNumber(PdfName.HEIGHT).intValue(); int pngBitDepth = dic.getAsNumber(PdfName.BITSPERCOMPONENT).intValue(); PdfObject colorspace = dic.getDirectObject(PdfName.COLORSPACE); PdfArray decode = dic.getAsArray(PdfName.DECODE); PdfArray carray = (PdfArray) colorspace; PdfObject id2 = carray.getDirectObject(3); byte[] palette = null; if (id2 instanceof PdfString) { palette = ((PdfString) id2).getBytes(); } else if (id2 instanceof PRStream) { palette = PdfReader.getStreamBytes(((PRStream) id2)); } Map<PdfName, FilterHandlers.FilterHandler> handlers = new HashMap<PdfName, FilterHandlers.FilterHandler>( FilterHandlers.getDefaultFilterHandlers()); byte[] imageBytes = PdfReader.decodeBytes(content, dic, handlers); int stride = (width * pngBitDepth + 7) / 8; ByteArrayOutputStream ms = new ByteArrayOutputStream(); PngWriter png = new PngWriter(ms); if (decode != null) { if (pngBitDepth == 1) { // if the decode array is 1,0, then we need to invert the image if (decode.getAsNumber(0).intValue() == 1 && decode.getAsNumber(1).intValue() == 0) { int len = imageBytes.length; for (int t = 0; t < len; ++t) { imageBytes[t] ^= 0xff; } } else { // if the decode array is 0,1, do nothing. It's possible that the array could be 0,0 or 1,1 - but that would be silly, so we'll just ignore that case } } else { // todo: add decode transformation for other depths } } int pngColorType = 0; png.writeHeader(width, height, pngBitDepth, pngColorType); if (palette != null) { png.writePalette(palette); } png.writeData(imageBytes, stride); png.writeEnd(); imageBytes = ms.toByteArray(); InputStream in = new ByteArrayInputStream(imageBytes); ImageInputStream ima_stream = ImageIO.createImageInputStream(in); BufferedImage buffImg = null; BufferedImage buffPic = ImageIO.read(ima_stream); // check if image contains a mask image ... experimental for this type of image BufferedImage buffMask = null; PRStream maskStream = (PRStream) dic.getAsStream(PdfName.SMASK); if (maskStream != null) { PdfImageObject maskImage = new PdfImageObject(maskStream); buffMask = maskImage.getBufferedImage(); Image img = PdfTrickUtils.TransformGrayToTransparency(buffMask); buffImg = PdfTrickUtils.ApplyTransparency(buffPic, img); } else { buffImg = buffPic; } reader.close(); ms.close(); in.close(); return buffImg; }