Convert HTML To Text : HTML « Network « C# / C Sharp






Convert HTML To Text

 
// Copyright (c) 2010
// by http://openlightgroup.net/

using System;
using System.Data;
using System.Linq;
using System.Web;
using System.Xml.Linq;
using System.Net.Mail;
using System.Web.Mail;
using System.Text;
using System.Collections.Generic;
using System.IO;
using System.Web.Security;

namespace SilverlightDebateForum
{
    public class Utility
    {
        #region ConvertToText
        public static string ConvertToText(string sHTML)
        {
            string sContent = sHTML;
            sContent = sContent.Replace("<br />", Environment.NewLine);
            sContent = sContent.Replace("<br>", Environment.NewLine);
            sContent = FormatText(sContent, true);
            return StripTags(sContent, true);
        }
        #endregion     

        #region FormatText
        public static string FormatText(string HTML, bool RetainSpace)
        {
            //Match all variants of <br> tag (<br>, <BR>, <br/>, including embedded space
            string brMatch = "\\s*<\\s*[bB][rR]\\s*/\\s*>\\s*";
            //Replace Tags by replacement String and return mofified string
            return System.Text.RegularExpressions.Regex.Replace(HTML, brMatch, Environment.NewLine);
        }
        #endregion

        #region StripTags
        public static string StripTags(string HTML, bool RetainSpace)
        {
            //Set up Replacement String
            string RepString;
            if (RetainSpace)
            {
                RepString = " ";
            }
            else
            {
                RepString = "";
            }

            //Replace Tags by replacement String and return mofified string
            return System.Text.RegularExpressions.Regex.Replace(HTML, "<[^>]*>", RepString);
        }
        #endregion
    }
}

   
  








Related examples in the same category

1.Get Links From HTML
2.Parses the value information from any INPUT tag in an HTML string where the name="" attribute matched the tagID parameter
3.Html Utilities
4.Converts a FontUnit to a size for the HTML FONT tag
5.Strip HTML
6.Remove tags from a html string
7.Sanitize any potentially dangerous tags from the provided raw HTML input using a whitelist based approach
8.Get Type As Html
9.HTML-encodes a string and returns the encoded string.
10.Strips all HTML tags from the specified string.
11.Removes the HTML whitespace.
12.Array To Html Breaked String
13.Show Html Page in String with Process