Parse html loaded from web using jsoup - Java HTML

Java examples for HTML:JSoup

Description

Parse html loaded from web using jsoup

Demo Code

import java.net.URL;
import java.io.File;
import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
import org.jsoup.select.Elements;
import org.jsoup.nodes.Element;

public class ParseWiki {
    public static void main(String[] args) throws Exception
    {//w  w  w .j  a va 2s.com
        parseInfoBox();
        test();
        parsing();
    }
    
    public static void test() throws IOException
    {
        Document doc = Jsoup.connect("http://en.wikipedia.org/wiki/Boston").get();
        Element link = doc.select("a").first();

        String text = doc.body().text(); // "An example link"
        String linkHref = link.attr("href"); // "http://example.com/"
        String linkText = link.text(); // "example""

        String linkOuterH = link.outerHtml(); 
        String linkInnerH = link.html(); // "<b>example</b>"
    }
    public static void parsing() throws Exception 
    {
        Document doc = Jsoup.connect("http://en.wikipedia.org/wiki/boston").get();
        Element contentDiv = doc.select("div[id=mw-content-text] > p").first();
        String paragraph = contentDiv.text();
        System.out.println(paragraph); // The result
    }
    
    public static void parseInfoBox() throws Exception
    {
        Document doc2 = Jsoup.connect("http://en.wikipedia.org/wiki/Tom_Cruise").get();
        Element body = doc2.body();
        Elements tables = body.getElementsByTag("table");
        for (Element table : tables) 
        {
            if (table.className().contains("infobox")==true) {
                System.out.println(table.outerHtml());
                table.outerHtml();
                break;
            }
        }        
    }
    
}

Related Tutorials