Java HTML Parse Jsoup parseUTF8HTMLDocument(String html)

Here you can find the source of parseUTF8HTMLDocument(String html)

Description

parse UTFHTML Document

License

BSD License

Declaration

public static Document parseUTF8HTMLDocument(String html) 

Method Source Code


//package com.java2s;
//License from project: BSD License 

import java.io.BufferedReader;
import java.io.InputStreamReader;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;

public class Main {
    public static Document parseUTF8HTMLDocument(String html) {
        return Parser.parse(html, "utf-8");
    }//  w w  w  .j  a v  a2 s.co m

    public static Document parseUTF8HTMLDocument(CloseableHttpResponse response) {
        return Parser.parse(readContent(response).toString(), "utf-8");
    }

    public static String readContent(CloseableHttpResponse response) {
        StringBuilder content = new StringBuilder();
        BufferedReader reader;
        try {
            reader = new BufferedReader(new InputStreamReader(response.getEntity().getContent(), "utf-8"));
            String line;
            while ((line = reader.readLine()) != null) {
                content.append(line);
            }
            reader.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return content.toString();
    }
}

Related

  1. parseInfoHeader(Element element)
  2. parsePropertyTable(Element table)
  3. parseTable2ArrayList(Document doc, String selectorRow, String selectorCol)
  4. parseTemplate1_1(Element element)
  5. parseTemplate1_2(Element element)
  6. parseWithAdultCheck(URL url, int timeout)
  7. prettyPrint(String html)
  8. processHtml(String html)
  9. removeAllHtmlTags(String unsafe)