Remove tags from a html string : HTML « Network « C# / C Sharp






Remove tags from a html string

 

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.ComponentModel;

namespace NearForums
{
  public static class Utils
  {
    public static bool IsHtmlFragment(string value)
    {
      return Regex.IsMatch(value, @"</?(p|div)>");
    }

    /// <summary>
    /// Remove tags from a html string
    /// </summary>
    /// <param name="value"></param>
    /// <returns></returns>
    public static string RemoveTags(string value)
    {
      if (value != null)
      {
        value = CleanHtmlComments(value);
        value = CleanHtmlBehaviour(value);
        value = Regex.Replace(value, @"</[^>]+?>", " ");
        value = Regex.Replace(value, @"<[^>]+?>", "");
        value = value.Trim();
      }
      return value;
    }

    /// <summary>
    /// Clean script and styles html tags and content
    /// </summary>
    /// <returns></returns>
    public static string CleanHtmlBehaviour(string value)
    {
      value = Regex.Replace(value, "(<style.+?</style>)|(<script.+?</script>)", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);

      return value;
    }

    /// <summary>
    /// Replace the html commens (also html ifs of msword).
    /// </summary>
    public static string CleanHtmlComments(string value)
    {
      //Remove disallowed html tags.
      value = Regex.Replace(value, "<!--.+?-->", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);

      return value;
    }

    /// <summary>
    /// Adds rel=nofollow to html anchors
    /// </summary>
    public static string HtmlLinkAddNoFollow(string value)
    {
      return Regex.Replace(value, "<a[^>]+href=\"?'?(?!#[\\w-]+)([^'\">]+)\"?'?[^>]*>(.*?)</a>", "<a href=\"$1\" rel=\"nofollow\" target=\"_blank\">$2</a>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
    }
  }
}

   
  








Related examples in the same category

1.Get Links From HTML
2.Parses the value information from any INPUT tag in an HTML string where the name="" attribute matched the tagID parameter
3.Html Utilities
4.Convert HTML To Text
5.Converts a FontUnit to a size for the HTML FONT tag
6.Strip HTML
7.Sanitize any potentially dangerous tags from the provided raw HTML input using a whitelist based approach
8.Get Type As Html
9.HTML-encodes a string and returns the encoded string.
10.Strips all HTML tags from the specified string.
11.Removes the HTML whitespace.
12.Array To Html Breaked String
13.Show Html Page in String with Process