Java Regex Matcher remove HTML tag

Description

Java Regex Matcher remove HTML tag


import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Main {
    public static void main(String[] argv) throws Exception {
        String str = "<b>demo2s.com</b>";
        System.out.println(filterHtml(str));
    }//  ww  w.ja  v  a2  s . c o  m

    private final static String regxpForHtml = "<([^>]*)>";

    public static String filterHtml(String str) {
        Pattern pattern = Pattern.compile(regxpForHtml);
        Matcher matcher = pattern.matcher(str);
        StringBuffer sb = new StringBuffer();
        boolean result1 = matcher.find();
        while (result1) {
            matcher.appendReplacement(sb, "");
            result1 = matcher.find();
        }
        matcher.appendTail(sb);
        return sb.toString();
    }
}

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Main {
    public static void main(String[] argv) throws Exception {
        String htmlStr = "<a>demo2s.com</a>";
        System.out.println(delHTMLTag(htmlStr));
    }/*from   w ww  .java 2  s  . c  om*/

    public static String delHTMLTag(String htmlStr) {
        String regEx_script = "<script[^>]*?>[\\s\\S]*?<\\/script>"; 
        String regEx_style = "<style[^>]*?>[\\s\\S]*?<\\/style>"; 
        String regEx_html = "<[^>]+>";

        Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
        Matcher m_script = p_script.matcher(htmlStr);
        htmlStr = m_script.replaceAll("");

        Pattern p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
        Matcher m_style = p_style.matcher(htmlStr);
        htmlStr = m_style.replaceAll(""); 

        Pattern p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
        Matcher m_html = p_html.matcher(htmlStr);
        htmlStr = m_html.replaceAll("");

        return htmlStr.trim();
    }
}



PreviousNext

Related