Example usage for javax.swing.text EditorKit read

List of usage examples for javax.swing.text EditorKit read

Introduction

In this page you can find the example usage for javax.swing.text EditorKit read.

Prototype

public abstract void read(Reader in, Document doc, int pos) throws IOException, BadLocationException;

Source Link

Document

Inserts content from the given stream which is expected to be in a format appropriate for this kind of content handler.

Usage

From source file:Main.java

public static void main(String[] argv) throws Exception {
    HTMLDocument doc = new HTMLDocument() {
        public HTMLEditorKit.ParserCallback getReader(int pos) {
            return new HTMLEditorKit.ParserCallback() {
                public void handleText(char[] data, int pos) {
                    System.out.println(data);
                }//from w w w . j  a v a2  s . c  om
            };
        }
    };

    URL url = new URI("http://www.google.com").toURL();
    URLConnection conn = url.openConnection();
    Reader rd = new InputStreamReader(conn.getInputStream());

    EditorKit kit = new HTMLEditorKit();
    kit.read(rd, doc, 0);
}

From source file:Main.java

public static void main(String[] argv) throws Exception {
    URL url = new URI("http://www.google.com").toURL();
    URLConnection conn = url.openConnection();
    Reader rd = new InputStreamReader(conn.getInputStream());

    EditorKit kit = new HTMLEditorKit();
    HTMLDocument doc = (HTMLDocument) kit.createDefaultDocument();
    kit.read(rd, doc, 0);

    HTMLDocument.Iterator it = doc.getIterator(HTML.Tag.A);
    while (it.isValid()) {
        SimpleAttributeSet s = (SimpleAttributeSet) it.getAttributes();

        String link = (String) s.getAttribute(HTML.Attribute.HREF);
        if (link != null) {
            System.out.println(link);
        }// w ww .  ja v a  2s  .  co  m
        it.next();
    }
}

From source file:HTML.java

/**
 * Utility method to convert HTML to text.
 * @param html The string containing HTML.
 * @return a String containing the derived text .
 *///from   w w w.ja  v  a2s .  c  o m
public static final String html2text(String html) {
    EditorKit kit = new HTMLEditorKit();
    Document doc = kit.createDefaultDocument();
    doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE);
    try {
        Reader reader = new StringReader(html);
        kit.read(reader, doc, 0);
        return doc.getText(0, doc.getLength());
    } catch (Exception e) {

        return "";
    }
}

From source file:org.python.pydev.core.docutils.StringUtils.java

/**
 * Given some html, extracts its text.//  w w w .j a  v  a2s  .  c  o  m
 */
public static String extractTextFromHTML(String html) {
    try {
        EditorKit kit = new HTMLEditorKit();
        Document doc = kit.createDefaultDocument();

        // The Document class does not yet handle charset's properly.
        doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE);

        // Create a reader on the HTML content.
        Reader rd = new StringReader(html);

        // Parse the HTML.
        kit.read(rd, doc, 0);

        //  The HTML text is now stored in the document
        return doc.getText(0, doc.getLength());
    } catch (Exception e) {
    }
    return "";
}