Java Regular Expression remove HTML Tag

Description

Java Regular Expression remove HTML Tag


import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Main {
    public static void main(String[] argv) throws Exception {
        String htmlStr = "<a>demo2s.com</a>";
        System.out.println(delHTMLTag(htmlStr));
    }/* w  ww  .j  ava  2s.  c  om*/

    public static String delHTMLTag(String htmlStr) {
        String regEx_script = "<script[^>]*?>[\\s\\S]*?<\\/script>"; 
        String regEx_style = "<style[^>]*?>[\\s\\S]*?<\\/style>"; 
        String regEx_html = "<[^>]+>";

        Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
        Matcher m_script = p_script.matcher(htmlStr);
        htmlStr = m_script.replaceAll("");

        Pattern p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
        Matcher m_style = p_style.matcher(htmlStr);
        htmlStr = m_style.replaceAll(""); 

        Pattern p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
        Matcher m_html = p_html.matcher(htmlStr);
        htmlStr = m_html.replaceAll("");

        return htmlStr.trim();
    }
}



PreviousNext

Related