Example usage for javax.swing.text.html HTMLDocument getText

List of usage examples for javax.swing.text.html HTMLDocument getText

Introduction

In this page you can find the example usage for javax.swing.text.html HTMLDocument getText.

Prototype

public String getText(int offset, int length) throws BadLocationException 

Source Link

Document

Gets a sequence of text from the document.

Usage

From source file:MainClass.java

public static void main(String args[]) throws Exception {
    URL url = new URL("http://www.java2s.com");
    URLConnection connection = url.openConnection();
    InputStream is = connection.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);

    HTMLEditorKit htmlKit = new HTMLEditorKit();
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
    HTMLEditorKit.Parser parser = new ParserDelegator();
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
    parser.parse(br, callback, true);// ww w. j  av  a  2  s. c om

    for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator
            .next()) {

        AttributeSet attributes = iterator.getAttributes();
        String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF);
        System.out.print(srcString);
        int startOffset = iterator.getStartOffset();
        int endOffset = iterator.getEndOffset();
        int length = endOffset - startOffset;
        String text = htmlDoc.getText(startOffset, length);
        System.out.println(" - " + text);
    }
}

From source file:MainClass.java

public static void main(String args[]) throws Exception {
    URL url = new URL("http://www.google.com");
    URLConnection connection = url.openConnection();
    InputStream is = connection.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);

    HTMLEditorKit htmlKit = new HTMLEditorKit();
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
    HTMLEditorKit.Parser parser = new ParserDelegator();
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
    parser.parse(br, callback, true);//  w w w.  j a  v a 2  s. com

    for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator
            .next()) {

        AttributeSet attributes = iterator.getAttributes();
        String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF);
        System.out.print(srcString);
        int startOffset = iterator.getStartOffset();
        int endOffset = iterator.getEndOffset();
        int length = endOffset - startOffset;
        String text = htmlDoc.getText(startOffset, length);
        System.out.println(" - " + text);
    }
}

From source file:MainClass.java

public static void main(String args[]) throws Exception {
    URL url = new URL("http://www.google.com");
    URLConnection connection = url.openConnection();
    InputStream is = connection.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);

    HTMLEditorKit htmlKit = new HTMLEditorKit();
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
    HTMLEditorKit.Parser parser = new ParserDelegator();
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
    parser.parse(br, callback, true);/* w  w  w  . j  ava 2 s  . c o m*/

    for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator
            .next()) {

        AttributeSet attributes = iterator.getAttributes();
        String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF);
        System.out.print(srcString);
        int startOffset = iterator.getStartOffset();
        int endOffset = iterator.getEndOffset();
        int length = endOffset - startOffset;
        String text = htmlDoc.getText(startOffset, length);
        System.out.println("  " + text);
    }
}

From source file:DocumentIteratorExample.java

public static void main(String args[]) throws Exception {
    URL url = new URL("http://www.java2s.com");
    URLConnection connection = url.openConnection();
    InputStream is = connection.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);

    HTMLEditorKit htmlKit = new HTMLEditorKit();
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
    HTMLEditorKit.Parser parser = new ParserDelegator();
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
    parser.parse(br, callback, true);//from   ww  w. jav  a2s .  co  m

    for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator
            .next()) {

        AttributeSet attributes = iterator.getAttributes();
        String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF);
        System.out.print(srcString);
        int startOffset = iterator.getStartOffset();
        int endOffset = iterator.getEndOffset();
        int length = endOffset - startOffset;
        String text = htmlDoc.getText(startOffset, length);
        System.out.println(" - " + text);
    }
    System.exit(0);
}

From source file:MainClass.java

public static void main(String args[]) throws Exception {
    URL url = new URL("http://www.java2s.com");
    URLConnection connection = url.openConnection();
    InputStream is = connection.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);

    HTMLEditorKit htmlKit = new HTMLEditorKit();
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
    HTMLEditorKit.Parser parser = new ParserDelegator();
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
    parser.parse(br, callback, true);/*from   w ww  .  java  2 s  . co  m*/

    ElementIterator iterator = new ElementIterator(htmlDoc);
    Element element;
    while ((element = iterator.next()) != null) {
        AttributeSet attributes = element.getAttributes();
        Object name = attributes.getAttribute(StyleConstants.NameAttribute);
        if ((name instanceof HTML.Tag) && (name == HTML.Tag.H1)) {
            StringBuffer text = new StringBuffer();
            int count = element.getElementCount();
            for (int i = 0; i < count; i++) {
                Element child = element.getElement(i);
                AttributeSet childAttributes = child.getAttributes();
                if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) {
                    int startOffset = child.getStartOffset();
                    int endOffset = child.getEndOffset();
                    int length = endOffset - startOffset;
                    text.append(htmlDoc.getText(startOffset, length));
                }
            }
            System.out.println(name + ": " + text.toString());
        }
    }
}

From source file:MainClass.java

public static void main(String args[]) throws Exception {
    URL url = new URL("http://www.google.com");
    URLConnection connection = url.openConnection();
    InputStream is = connection.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);

    HTMLEditorKit htmlKit = new HTMLEditorKit();
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
    HTMLEditorKit.Parser parser = new ParserDelegator();
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
    parser.parse(br, callback, true);/*from   w w w . j a  v  a2  s  .  co  m*/

    Element element;
    ElementIterator iterator = new ElementIterator(htmlDoc);
    while ((element = iterator.next()) != null) {
        AttributeSet attributes = element.getAttributes();
        Object name = attributes.getAttribute(StyleConstants.NameAttribute);

        if ((name instanceof HTML.Tag) && (name == HTML.Tag.H1 || name == HTML.Tag.H2 || name == HTML.Tag.P)) {
            // Build up content text as it may be within multiple elements
            int count = element.getElementCount();
            for (int i = 0; i < count; i++) {
                Element child = element.getElement(i);
                AttributeSet childAttributes = child.getAttributes();
                if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) {
                    int startOffset = child.getStartOffset();
                    int endOffset = child.getEndOffset();
                    int length = endOffset - startOffset;
                    System.out.println(htmlDoc.getText(startOffset, length));
                }
            }
        }
    }
}

From source file:ElementIteratorExample.java

public static void main(String args[]) throws Exception {

    if (args.length != 1) {
        System.err.println("Usage: java ElementIteratorExample input-URL");
    }//w ww  . ja  v  a 2s  . c  om

    // Load HTML file synchronously
    URL url = new URL(args[0]);
    URLConnection connection = url.openConnection();
    InputStream is = connection.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);

    HTMLEditorKit htmlKit = new HTMLEditorKit();
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
    HTMLEditorKit.Parser parser = new ParserDelegator();
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
    parser.parse(br, callback, true);

    // Parse
    ElementIterator iterator = new ElementIterator(htmlDoc);
    Element element;
    while ((element = iterator.next()) != null) {
        AttributeSet attributes = element.getAttributes();
        Object name = attributes.getAttribute(StyleConstants.NameAttribute);
        if ((name instanceof HTML.Tag)
                && ((name == HTML.Tag.H1) || (name == HTML.Tag.H2) || (name == HTML.Tag.H3))) {
            // Build up content text as it may be within multiple elements
            StringBuffer text = new StringBuffer();
            int count = element.getElementCount();
            for (int i = 0; i < count; i++) {
                Element child = element.getElement(i);
                AttributeSet childAttributes = child.getAttributes();
                if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) {
                    int startOffset = child.getStartOffset();
                    int endOffset = child.getEndOffset();
                    int length = endOffset - startOffset;
                    text.append(htmlDoc.getText(startOffset, length));
                }
            }
            System.out.println(name + ": " + text.toString());
        }
    }
    System.exit(0);
}

From source file:com.hexidec.ekit.action.ListAutomationAction.java

public void actionPerformed(ActionEvent ae) {
    try {// w w w .j  a v a  2  s  .co  m
        JEditorPane jepEditor = (JEditorPane) (parentEkit.getTextPane());
        String selTextBase = jepEditor.getSelectedText();
        int textLength = -1;
        if (selTextBase != null) {
            textLength = selTextBase.length();
        }
        if (selTextBase == null || textLength < 1) {
            int pos = parentEkit.getCaretPosition();
            parentEkit.setCaretPosition(pos);
            if (ae.getActionCommand() != "newListPoint") {
                if (htmlUtilities.checkParentsTag(HTML.Tag.OL) || htmlUtilities.checkParentsTag(HTML.Tag.UL)) {
                    revertList(htmlUtilities.getListItemContainer());
                    return;
                }
            }
            String sListType = (baseTag == HTML.Tag.OL ? "ol" : "ul");
            StringBuffer sbNew = new StringBuffer();
            if (htmlUtilities.checkParentsTag(baseTag)) {
                sbNew.append("<li></li>");
                insertHTML(parentEkit.getTextPane(), parentEkit.getExtendedHtmlDoc(),
                        parentEkit.getTextPane().getCaretPosition(), sbNew.toString(), 0, 0, HTML.Tag.LI);
            } else {
                boolean isLast = false;
                int caretPos = parentEkit.getCaretPosition();
                if (caretPos == parentEkit.getExtendedHtmlDoc().getLength()) {
                    isLast = true;
                }
                sbNew.append("<" + sListType + "><li></li></" + sListType + ">"
                        + (isLast ? "<p style=\"margin-top: 0\">&nbsp;</p>" : ""));
                insertHTML(parentEkit.getTextPane(), parentEkit.getExtendedHtmlDoc(),
                        parentEkit.getTextPane().getCaretPosition(), sbNew.toString(), 0, 0,
                        (sListType.equals("ol") ? HTML.Tag.OL : HTML.Tag.UL));
                if (true) {
                    parentEkit.setCaretPosition(caretPos + 1);
                }
            }
            parentEkit.refreshOnUpdate();
        } else {
            if (htmlUtilities.checkParentsTag(HTML.Tag.OL) || htmlUtilities.checkParentsTag(HTML.Tag.UL)) {
                revertList(htmlUtilities.getListItemContainer());
                return;
            } else {
                String sListType = (baseTag == HTML.Tag.OL ? "ol" : "ul");
                HTMLDocument htmlDoc = (HTMLDocument) (jepEditor.getDocument());
                int iStart = jepEditor.getSelectionStart();
                int iEnd = jepEditor.getSelectionEnd();
                String selText = htmlDoc.getText(iStart, iEnd - iStart);
                /*
                for(int ch = 0; ch < selText.length(); ch++)
                {
                   Element elem = htmlDoc.getCharacterElement(iStart + ch);
                   log.debug("elem " + ch + ": " + elem.getName());
                   log.debug("char " + ch + ": " + selText.charAt(ch) + " [" + Character.getNumericValue(selText.charAt(ch)) + "]");
                   if(Character.getNumericValue(selText.charAt(ch)) < 0)
                   {
                      log.debug("  is space?    " + ((selText.charAt(ch) == '\u0020') ? "YES" : "---"));
                      log.debug("  is return?   " + ((selText.charAt(ch) == '\r') ? "YES" : "---"));
                      log.debug("  is newline?  " + ((selText.charAt(ch) == '\n') ? "YES" : "---"));
                      log.debug("  is nextline? " + ((selText.charAt(ch) == '\u0085') ? "YES" : "---"));
                      log.debug("  is linesep?  " + ((selText.charAt(ch) == '\u2028') ? "YES" : "---"));
                      log.debug("  is parasep?  " + ((selText.charAt(ch) == '\u2029') ? "YES" : "---"));
                      log.debug("  is verttab?  " + ((selText.charAt(ch) == '\u000B') ? "YES" : "---"));
                      log.debug("  is formfeed? " + ((selText.charAt(ch) == '\u000C') ? "YES" : "---"));
                   }
                }
                */
                StringBuffer sbNew = new StringBuffer();
                sbNew.append("<" + sListType + ">");
                // tokenize on known linebreaks if present, otherwise manually parse on <br> break tags
                if ((selText.indexOf("\r") > -1) || (selText.indexOf("\n") > -1)) {
                    String sToken = ((selText.indexOf("\r") > -1) ? "\r" : "\n");
                    StringTokenizer stTokenizer = new StringTokenizer(selText, sToken);
                    while (stTokenizer.hasMoreTokens()) {
                        sbNew.append("<li>");
                        sbNew.append(stTokenizer.nextToken());
                        sbNew.append("</li>");
                    }
                } else {
                    StringBuffer sbItem = new StringBuffer();
                    for (int ch = 0; ch < selText.length(); ch++) {
                        String elem = (htmlDoc.getCharacterElement(iStart + ch) != null
                                ? htmlDoc.getCharacterElement(iStart + ch).getName().toLowerCase()
                                : "[null]");
                        if (elem.equals("br") && sbItem.length() > 0) {
                            sbNew.append("<li>");
                            sbNew.append(sbItem.toString());
                            sbNew.append("</li>");
                            sbItem.delete(0, sbItem.length());
                        } else {
                            sbItem.append(selText.charAt(ch));
                        }
                    }
                }
                sbNew.append("</" + sListType + ">");
                htmlDoc.remove(iStart, iEnd - iStart);
                insertHTML(jepEditor, htmlDoc, iStart, sbNew.toString(), 1, 1, null);
            }
        }
    } catch (BadLocationException ble) {
    }
}

From source file:org.deegree.tools.metadata.InspireValidator.java

/**
 * parse INSPIRE metadata validator response and print out result onto the console
 * /*from w w  w . j  a  va 2s  . c o m*/
 * @param response
 * @throws IOException
 * @throws IllegalStateException
 */
private void parseServiceResponse(HttpResponse response) throws Exception {
    String s = FileUtils.readTextFile(((BasicHttpResponse) response).getEntity().getContent()).toString();
    if (response.getStatusLine().getStatusCode() != 200) {
        outputWriter.println(s);
        outputWriter.println();
        return;
    }
    s = "<html><head></head><body>" + s + "</body></html>";
    BufferedReader br = new BufferedReader(new StringReader(s));

    HTMLEditorKit htmlKit = new HTMLEditorKit();
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
    HTMLEditorKit.Parser parser = new ParserDelegator();
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
    parser.parse(br, callback, true);

    // Parse
    ElementIterator iterator = new ElementIterator(htmlDoc);
    Element element;
    while ((element = iterator.next()) != null) {
        AttributeSet attributes = element.getAttributes();
        Object name = attributes.getAttribute(StyleConstants.NameAttribute);
        if ((name instanceof HTML.Tag) && ((name == HTML.Tag.IMPLIED))) {
            // Build up content text as it may be within multiple elements
            StringBuffer text = new StringBuffer();
            int count = element.getElementCount();
            for (int i = 0; i < count; i++) {
                Element child = element.getElement(i);
                AttributeSet childAttributes = child.getAttributes();
                if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) {
                    int startOffset = child.getStartOffset();
                    int endOffset = child.getEndOffset();
                    int length = endOffset - startOffset;
                    text.append(htmlDoc.getText(startOffset, length));
                }
            }
            outputWriter.println(text.toString());
        }
    }
    outputWriter.println("---------------------------------------------------------------------");
    outputWriter.println();
}