Java Utililty Methods HTML to String

List of utility methods to do HTML to String

Description

The list of methods to do HTML to String are organized into topic(s).

Method

Stringhtml2Plain(String text)
html Plain
StringBuilder result = new StringBuilder();
for (int i = 0; i < text.length(); i++) {
    if (text.charAt(i) != '&' || i >= text.length() - 2) {
        result.append(text.charAt(i));
    } else {
        i++;
        if (text.charAt(i) != '#') {
            result.append('&');
...
Stringhtml2Text(String html)
Cuts all the html tags/comments/styles from the html-text and returns the only printable text.
StringBuilder sb = new StringBuilder();
int state = 0;
int styleStart = html.toLowerCase().indexOf("<style");
int styleEnd = html.toLowerCase().indexOf("/style>");
while (styleStart != -1 && styleEnd > styleStart) {
    html = html.substring(0, styleStart) + html.substring(styleEnd + "/style>".length());
    styleStart = html.toLowerCase().indexOf("<style");
    styleEnd = html.toLowerCase().indexOf("/style>");
...
StringhtmlToStr(String htmlStr, int max_count)
html To Str
String result = "";
boolean flag = true;
if (htmlStr == null) {
    return null;
char[] a = htmlStr.toCharArray();
int length = a.length;
for (int i = 0; i < length; i++) {
...
StringhtmlToString(String aS_Text)
html To String
if (aS_Text == null) {
    return null;
StringBuffer l_StringBuffer = new StringBuffer();
int li_length = aS_Text.length();
for (int li_index = 0; li_index < li_length; li_index++) {
    char l_char = aS_Text.charAt(li_index);
    switch (l_char) {
...
StringhtmlToString(String s)
Unescape HTML escape characters in String.
StringBuilder sb = new StringBuilder(s.length());
int len = s.length();
for (int i = 0; i < len; ++i) {
    char c = s.charAt(i);
    if (c != '&') {
        sb.append(c);
    } else {
        if (i + 2 < len && s.charAt(i + 1) == '#') { 
...
StringhtmlToString(String string)
html To String
String ans = string.replaceAll("&quot;", "\"");
ans = ans.replaceAll("&amp;", "&");
ans = ans.replaceAll("&lt;", "<");
ans = ans.replaceAll("&gt;", ">");
ans = ans.replaceAll("<.+?>", "");
return ans;
StringhtmlToText(String html)
Simply removes the <...> tags.
char ch = '\u0000';
int idx = 0;
int len = html.length();
StringBuffer sb = new StringBuffer();
for (int i = 0; i < len; i++) {
    ch = html.charAt(i);
    if (ch != '<') {
        sb.append(ch);
...
StringhtmlToText(String html)
html To Text
if (html != null) {
    html = html.replace("&aring;", "\u00e5");
    html = html.replace("&auml;", "\u00e4");
    html = html.replace("&ouml;", "\u00f6");
    html = html.replace("&Aring;", "\u00c5");
    html = html.replace("&Auml;", "\u00c4");
    html = html.replace("&Ouml;", "\u00d6");
return html;
StringhtmlToText(String input)
Converts HTML to plain text, according to the following rules:
  • Replaces any newlines or carriage returns in the source text with single spaces.
    if (input == null)
        input = "";
    input = input.replaceAll("[\r\n]+", " ");
    StringBuilder buf = new StringBuilder(input.trim());
    int openIdx = 0;
    while ((openIdx = buf.indexOf("<", openIdx)) >= 0) {
        int closeIdx = buf.indexOf(">", openIdx);
        if (closeIdx < 0) {
    ...
    
StringhtmlToText(String sHTML)
html To Text
String sText = "";
if (sHTML != null) {
    int i = 0;
    while (i < sHTML.length()) {
        char c = sHTML.charAt(i);
        switch (c) {
        case '<':
            i++;
...