Java HTML Parse Jsoup sanitizeHTML(String html)

Here you can find the source of sanitizeHTML(String html)

Description

sanitize HTML

License

Open Source License

Declaration

public static String sanitizeHTML(String html) 

Method Source Code

//package com.java2s;
/*/*from  ww  w.  j  a  v  a  2s  .  c  o  m*/
 * Copyright 2015 Westf?lische Hochschule
 *
 * This file is part of Poodle.
 *
 * Poodle is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Poodle is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with Poodle.  If not, see <http://www.gnu.org/licenses/>.
 */

import org.jsoup.Jsoup;
import org.jsoup.safety.Whitelist;

public class Main {
    public static String sanitizeHTML(String html) {

        Whitelist whitelist = Whitelist.relaxed().addAttributes(":all", "style") // allow style tags everywhere
                .addAttributes("span", "class") // necessary for CKEditor MathJax plugin
                .addAttributes("table", "border", "align", "cellspacing", "cellpadding")
                .preserveRelativeLinks(true); // preserve our relative links to images or linked exercises

        /*
         * HACK: The jsoup whitelist only allows relative links in <img src=""> if a base URL is set
         * because otherwise it can't check whether the protocol is allowed. We have no way
         * to get the real base URL here, so just set a dummy.
         */
        return Jsoup.clean(html, "https://dummydomain.com/", whitelist);
    }
}

Related

  1. prettyPrint(String html)
  2. processHtml(String html)
  3. removeAllHtmlTags(String unsafe)
  4. removeHTMLTags(final String text)
  5. removeTag(String html)
  6. stripHTML(final String value)
  7. stripHtml(String html)
  8. tidyHtml(String html)
  9. truncateHTML(String content, int len)