Example usage for org.apache.pdfbox.pdfparser PDFStreamParser getTokens

List of usage examples for org.apache.pdfbox.pdfparser PDFStreamParser getTokens

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdfparser PDFStreamParser getTokens.

Prototype

public List<Object> getTokens() 

Source Link

Document

This will get the tokens that were parsed from the stream by the #parse() parse() method.

Usage

From source file:airviewer.TextInAnnotationReplacer.java

License:Apache License

static void replaceText(PDDocument document, PDAnnotation anAnnotation, String newContents) {

    if (null != anAnnotation.getAppearance() && null != anAnnotation.getAppearance().getNormalAppearance()) {
        try {/*from  www .  java  2 s. c om*/
            PDAppearanceStream annotationAppearanceStream = anAnnotation.getAppearance().getNormalAppearance()
                    .getAppearanceStream();

            PDFStreamParser parser = new PDFStreamParser(annotationAppearanceStream);
            parser.parse();
            List<Object> tokens = parser.getTokens();
            for (int j = 0; j < tokens.size(); j++) {
                Object next = tokens.get(j);
                if (next instanceof Operator) {
                    Operator op = (Operator) next;
                    //Tj and TJ are the two operators that display strings in a PDF
                    if (op.getName().equals("Tj")) {
                        // Tj takes one operand and that is the string to display so lets update that operator
                        COSString previous = (COSString) tokens.get(j - 1);
                        previous.setValue(newContents.getBytes(Charset.forName("UTF-8")));
                    } else if (op.getName().equals("TJ")) {
                        COSArray previous = (COSArray) tokens.get(j - 1);
                        for (int k = 0; k < previous.size(); k++) {
                            Object arrElement = previous.getObject(k);
                            if (arrElement instanceof COSString) {
                                COSString cosString = (COSString) arrElement;
                                cosString.setValue(newContents.getBytes(Charset.forName("UTF-8")));
                            }
                        }
                    }
                }
            }

            try (OutputStream out = annotationAppearanceStream.getStream().createOutputStream()) {
                ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
                tokenWriter.writeTokens(tokens);
            }

            anAnnotation.getAppearance().setNormalAppearance(annotationAppearanceStream);
        } catch (IOException ex) {
            Logger.getLogger(TextInAnnotationReplacer.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}

From source file:org.apache.fop.render.pdf.pdfbox.PDFWriter.java

License:Apache License

public String writeText(PDStream pdStream) throws IOException {
    PDFStreamParser pdfStreamParser = new PDFStreamParser(pdStream);
    pdfStreamParser.parse();/*  ww w  .  j  a  v a  2  s. c o m*/
    List<Object> it = pdfStreamParser.getTokens();
    List<COSBase> arguments = new ArrayList<COSBase>();
    for (Object o : it) {
        if (o instanceof Operator) {
            Operator op = (Operator) o;
            readPDFArguments(op, arguments);
            s.append(op.getName() + "\n");
            arguments.clear();
            if (op.getImageParameters() != null) {
                for (Map.Entry<COSName, COSBase> cn : op.getImageParameters().entrySet()) {
                    arguments.add(cn.getKey());
                    arguments.add(cn.getValue());
                }
                readPDFArguments(op, arguments);
                s.append("ID " + new String(op.getImageData(), "ISO-8859-1"));
                arguments.clear();
                s.append("EI\n");
            }
        } else {
            arguments.add((COSBase) o);
        }
    }
    return s.toString();
}

From source file:org.lockss.pdf.MockPdfTokenStream.java

License:Open Source License

/**
 * <p>//  www . j  a  v a  2s  .co m
 * Makes a fake PDF token stream from parsing the given input stream.
 * </p>
 * 
 * @param inputStream
 *          An input stream of PDF token stream source.
 * @throws IOException
 *           if parsing fails or an I/O error occurs.
 * @since 1.67
 */
public MockPdfTokenStream(InputStream inputStream) throws IOException {
    PDFStreamParser parser = new PDFStreamParser(inputStream, new RandomAccessBuffer());
    parser.parse();
    List<Object> pdfBoxTokens = parser.getTokens();
    this.pdfTokens = new ArrayList<PdfToken>(pdfBoxTokens.size());
    for (Object pdfBoxToken : pdfBoxTokens) {
        this.pdfTokens.add(convert(pdfBoxToken));
    }
}

From source file:Utilities.GlobalVar.java

public static void updateSeqNum(PDDocument doc, String cycle) throws IOException {
    int sequenceNum = 1;
    List pages = doc.getDocumentCatalog().getAllPages();

    for (int i = 0; i < pages.size(); i++) {
        PDPage page = (PDPage) pages.get(i);
        PDStream contents = page.getContents();
        PDFStreamParser parser = new PDFStreamParser(contents.getStream());
        parser.parse();//from  w w w. j a  v  a 2 s .  co  m
        List tokens = parser.getTokens();
        for (int j = 0; j < tokens.size(); j++) {
            Object next = tokens.get(j);
            if (next instanceof PDFOperator) {
                PDFOperator op = (PDFOperator) next;
                // Tj and TJ are the two operators that display strings in a PDF
                if (op.getOperation().equals("Tj")) {
                    // Tj takes one operator and that is the string
                    // to display so lets update that operator
                    COSString previous = (COSString) tokens.get(j - 1);
                    String string = previous.getString();
                    //                        System.out.println(string);
                    //                        System.out.println(string.charAt(5));
                    if (string.contains("/0")) {
                        String seq = cycle + "/" + GlobalVar.globalCountGenerator5Digit(sequenceNum);
                        string = string.replaceFirst(string, seq);
                        previous.reset();
                        previous.append(string.getBytes("ISO-8859-1"));
                        sequenceNum++;
                        break;
                    }
                    //Word you want to change. Currently this code changes word "Solr" to "Solr123"
                    previous.reset();
                    previous.append(string.getBytes("ISO-8859-1"));

                } else if (op.getOperation().equals("TJ")) {
                    COSArray previous = (COSArray) tokens.get(j - 1);
                    for (int k = 0; k < previous.size(); k++) {
                        Object arrElement = previous.getObject(k);
                        if (arrElement instanceof COSString) {
                            COSString cosString = (COSString) arrElement;
                            String string = cosString.getString();
                            //                                System.out.println(string);
                            if (string.contains("/00")) {
                                String seq = cycle + "/" + GlobalVar.globalCountGenerator5Digit(sequenceNum);
                                string = string.replaceFirst(string, seq);
                                cosString.reset();
                                cosString.append(string.getBytes("ISO-8859-1"));
                                sequenceNum++;
                                break;
                            }
                            // Currently this code changes word "Solr" to "Solr123"
                            cosString.reset();
                            cosString.append(string.getBytes("ISO-8859-1"));
                            //                                break;
                        }
                    }
                }
            }
        }
        // now that the tokens are updated we will replace the page content stream.
        PDStream updatedStream = new PDStream(doc);
        OutputStream out = updatedStream.createOutputStream();
        ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
        tokenWriter.writeTokens(tokens);
        page.setContents(updatedStream);
    }
}