Java HTML / XML How to - Parse tags inside CDATA








Question

We would like to know how to parse tags inside CDATA.

Answer

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
/* ww  w . jav a2  s.co m*/
public class Main {
    public static void main(String[] args) throws Exception {
        String html = "<description>"
                        + "..."
                    + "</description>";

        Document doc = Jsoup.parse(html);
        for(Element desc : doc.select("description")){
            String unescapedHtml = desc.text();
            String src = Jsoup.parse(unescapedHtml).select("img").first().attr("src");
            System.out.println(src);
        }
        System.out.println("Done");
    }

}