Java HTML Parse Jsoup clearBody(String html)

Here you can find the source of clearBody(String html)

Description

clear Body

License

Open Source License

Declaration

public static String clearBody(String html) 

Method Source Code


//package com.java2s;
//License from project: Open Source License 

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.safety.Whitelist;

public class Main {
    public static String clearBody(String html) {
        Document document = Jsoup.parse(html);
        document.outputSettings(new Document.OutputSettings().prettyPrint(false));//makes html() preserve linebreaks and spacing
        document.select("br").append("\n");
        document.select("p").prepend("\n");
        String result = Jsoup.clean(document.html(), "", Whitelist.none(),
                new Document.OutputSettings().prettyPrint(false));
        result = result.replace("\r", "\n");
        result = result.replace("\n ", "\n");
        result = result.replaceAll("[\\n\\r]+", "\n");
        return result;
    }//from   w  w  w  . jav  a2 s .  c  o  m
}

Related

  1. cleanHTML(final String html)
  2. cleanHtmlCode(String html)
  3. cleanHtmlFromString(String stringToClean)
  4. cleanHTMLTags(String str)
  5. cleanupHtmlDoc(String s)
  6. coverTag(String html, String... tagNames)
  7. extractRssUrl(String html, URI base)
  8. filter(String html)
  9. fixHtml(String htmlContent, String outputFile, String contentFile)