Removes HTML tags and comments. - CSharp System

CSharp examples for System:String HTML

Description

Removes HTML tags and comments.

Demo Code

// BDHero is free software: you can redistribute it and/or modify
using System.Threading;
using System.Text.RegularExpressions;
using System.Net;
using System.Collections.Generic;
using System;/*from   w  w  w.j  a v  a2 s  . co  m*/

public class Main{
        /// <summary>
        /// Removes HTML tags and comments.
        /// </summary>
        /// <param name="str"></param>
        /// <returns></returns>
        /// TODO: Write unit tests
        public static string StripHtml(this string str)
        {
            return new Regex(@"</?[a-z][a-z0-9]*[^<>]*>", RegexOptions.IgnoreCase)
                .Replace(str, "")
                .RegexReplace(@"<!--.*?-->", "")
                .RegexReplace(@"[\s\n\r\f]+", " ")
                .Trim();
        }
        /// <summary>
        /// Perform a substring replace using a regular expression.
        /// </summary>
        /// <param name="input">
        /// The target of the replacement.
        /// </param>
        /// <param name="pattern">
        /// The pattern to match.
        /// </param>
        /// <param name="evaluator">
        /// The evaluator.
        /// </param>
        /// <returns>
        /// A new string.
        /// </returns>
        public static string RegexReplace(this String input, string pattern, MatchEvaluator evaluator)
        {
            return Regex.Replace(input, pattern, evaluator);
        }
        /// <summary>
        /// Perform a substring replace using a regular expression and one or more patterns
        /// </summary>
        /// <exception cref="ArgumentException">
        /// Thrown when the list of replacements is not the same length as the list of patterns.
        /// </exception>
        /// <param name="input">
        /// The target of the replacement.
        /// </param>
        /// <param name="patterns">
        /// The patterns.
        /// </param>
        /// <param name="replacements">
        /// The replacements.
        /// </param>
        /// <returns>
        /// A new string.
        /// </returns>
        public static String RegexReplace(this String input, IEnumerable<string> patterns, IEnumerable<string> replacements)
        {
            List<string> patternList = new List<string>(patterns);
            List<string> replacementList = new List<string>(replacements);
            if (replacementList.Count != patternList.Count)
            {
                throw new ArgumentException("Mismatched pattern and replacement lists.");
            }

            for (var i = 0; i < patternList.Count; i++)
            {
                input = Regex.Replace(input, patternList[i], replacementList[i]);
            }

            return input;
        }
        #region CsQuery.ExtensionMethods

        // https://github.com/jamietre/CsQuery/blob/master/source/CsQuery/ExtensionMethods/ExtensionMethods.cs

        /// <summary>
        /// Perform a substring replace using a regular expression.
        /// </summary>
        ///
        /// <param name="input">
        /// The target of the replacement.
        /// </param>
        /// <param name="pattern">
        /// The pattern to match.
        /// </param>
        /// <param name="replacement">
        /// The replacement string.
        /// </param>
        ///
        /// <returns>
        /// A new string.
        /// </returns>
        public static String RegexReplace(this String input, string pattern, string replacement)
        {
            return input.RegexReplace(new[] { pattern }, new[] { replacement });
        }
}

Related Tutorials