Implements a StringTokenizer class for splitting a string into substrings using a set of delimiters. : String Split « Data Types « C# / C Sharp






Implements a StringTokenizer class for splitting a string into substrings using a set of delimiters.

   

#region Using Directives
using System;
using System.Collections.Generic;
#endregion

/*
 * $author: QmQ
 * $source: http://www.codeproject.com/useritems/SimpleStringTokenizer.asp
 * $date:   10-June-2006
 */
namespace CBRSystem.Data
{

    #region [Summary and remarks]
    /// <summary>
    /// Implements a StringTokenizer class for splitting a string
    /// into substrings using a set of delimiters.
    /// </summary>
    /// <remarks>
    /// C# version of the java.util.StringTokenizer class.
    /// Basicly it's a wrapper class around the <c>String.Split</c> method.<pare/>
    /// It implements all of it's Java equivalent methods apart from those only needed by the Enumeration interface.
    /// All implemented Java-compilant methods have their C# equivalents in properties. They however differ in names
    /// since Java uses the (Hungarian-like) notation <c>runMe()</c> while C# uses Camel-cased <c>RunMe()</c> and thus
    /// Java's <c>nextToken()</c> method is just an alias of the <c>NextToken</c> property.
    /// </remarks>
    #endregion
    public class StringTokenizer : IEnumerable<string>
    {
        /// <summary>
        /// String conatining the default set of delimiters which are <c>" \t\n\r\f"</c>:
        /// the space character, the tab character, the newline character, the carriage-return character, and the form-feed character.
        /// </summary>
        public const string DefaultDelimiters = " \t\n\r\f";

        private readonly string delims = DefaultDelimiters;
        private string[] tokens = null;
        private int index = 0;
        private string empty = String.Empty;

        #region [Constructors]
        /// <summary>
        /// Constructs a string tokenizer for the specified string using the <see cref="F:DefaultDelimiters">default delimiters</see>.
        /// </summary>
        /// <param name="str">The string to be tokenized.</param>
        /// <exception cref="System.NullReferenceException">Thrown when the passed string is <c>null</c></exception>
        public StringTokenizer(string str)
        {
            Tokenize(str, false, false);
        }

        /// <summary>
        /// Constructs a string tokenizer for the specified string using the given delimiters.
        /// </summary>
        /// <param name="str">The string to be tokenized.</param>
        /// <param name="delims">The delimiters used to tokenize the string (each <see cref="!:char"/> will be used as a delimiter).</param>
        /// <exception cref="System.NullReferenceException">Thrown when the passed string is <c>null</c></exception>
        public StringTokenizer(string str, string delims)
        {
            if(delims!=null) this.delims = delims;
            Tokenize(str, false, false);
        }

        /// <summary>
        /// Constructs a string tokenizer for the specified string using the given delimiters.
        /// </summary>
        /// <param name="str">The string to be tokenized.</param>
        /// <param name="delims">The delimiters used to tokenize the string.</param>
        public StringTokenizer(string str, params char[] delims)
        {
            if (delims != null) this.delims = new string(delims);
            Tokenize(str, false, false);
        }

        /// <summary>
        /// Constructs a string tokenizer for the specified string using the given delimiters and optionally returning them as tokens.
        /// </summary>
        /// <param name="str">The string to be tokenized.</param>
        /// <param name="delims">The delimiters used to tokenize the string (each <see cref="!:char"/> will be used as a delimiter).</param>
        /// <param name="returnDelims">If set to <c>true</c> the encountered delimiters will also be returned as tokens.</param>
        /// <exception cref="System.NullReferenceException">Thrown when the passed string is <c>null</c></exception>
        public StringTokenizer(string str, string delims, bool returnDelims)
        {
            if (delims != null) this.delims = delims;
            Tokenize(str, returnDelims, false);
        }

        /// <summary>
        /// Constructs a string tokenizer for the specified string using the given delimiters,
        /// optionally returning them as tokens. Also empty tokens may be returned using the <see cref="!:String.Empty"/> string.
        /// </summary>
        /// <param name="str">The string to be tokenized.</param>
        /// <param name="delims">The delimiters used to tokenize the string (each <see cref="!:char"/> will be used as a delimiter).</param>
        /// <param name="returnDelims">If set to <c>true</c> the encountered delimiters will also be returned as tokens.</param>
        /// <param name="returnEmpty">If set to <c>true</c> empty tokens will also be returned.</param>
        /// <exception cref="System.NullReferenceException">Thrown when the passed string is <c>null</c></exception>
        public StringTokenizer(string str, string delims, bool returnDelims, bool returnEmpty)
        {
            if (delims != null) this.delims = delims;
            Tokenize(str, returnDelims, returnEmpty);
        }

        /// <summary>
        /// Constructs a string tokenizer for the specified string using the given delimiters,
        /// optionally returning them as tokens. Also empty tokens may be returned using the <paramref name="empty"/> string.
        /// </summary>
        /// <param name="str">The string to be tokenized.</param>
        /// <param name="delims">The delimiters used to tokenize the string (each <see cref="!:char"/> will be used as a delimiter).</param>
        /// <param name="returnDelims">If set to <c>true</c> the encountered delimiters will also be returned as tokens.</param>
        /// <param name="returnEmpty">If set to <c>true</c> empty tokens will also be returned.</param>
        /// <param name="empty">The string to be returned as an empty token.</param>
        /// <exception cref="System.NullReferenceException">Thrown when the passed string is <c>null</c></exception>
        public StringTokenizer(string str, string delims, bool returnDelims, bool returnEmpty, string empty)
        {
            if (delims != null) this.delims = delims;
            this.empty = empty;
            Tokenize(str, returnDelims, returnEmpty);
        }
    #endregion

        #region [The big tokenization method]
        private void Tokenize(string str, bool returnDelims, bool returnEmpty)
        {
            if(returnDelims)
            {
                this.tokens = str.Split(this.delims.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
                List<string> tmp = new List<string>(tokens.Length << 1);

                int delimIndex = str.IndexOfAny(this.delims.ToCharArray());
                int tokensIndex = 0;
                int prevDelimIdx = delimIndex-1;

                if (delimIndex == 0)
                    do
                    {
                        tmp.Add(new string(str[delimIndex], 1));
                        prevDelimIdx = delimIndex++;
                        delimIndex = str.IndexOfAny(this.delims.ToCharArray(), delimIndex);
                        if (returnEmpty && delimIndex == prevDelimIdx + 1)
                            tmp.Add(this.empty);
                    } while (delimIndex == prevDelimIdx + 1);

                while (delimIndex > -1)
                {
                    tmp.Add(this.tokens[tokensIndex++]);

                    do
                    {
                        tmp.Add(new string(str[delimIndex], 1));
                        prevDelimIdx = delimIndex++;
                        delimIndex = str.IndexOfAny(this.delims.ToCharArray(), delimIndex);
                        if (returnEmpty && delimIndex == prevDelimIdx + 1)
                            tmp.Add(this.empty);
                    } while (delimIndex == prevDelimIdx + 1);

                }
                if (tokensIndex < tokens.Length)
                    tmp.Add(this.tokens[tokensIndex++]);

                this.tokens = tmp.ToArray();
                tmp = null;
            }
            else if (returnEmpty)
            {
                this.tokens = str.Split(this.delims.ToCharArray(), StringSplitOptions.None);
                if (this.empty != String.Empty)
                    for(int i=0; i<this.tokens.Length; i++)
                        if (this.tokens[i] == String.Empty) this.tokens[i] = this.empty;
            }
            else
                this.tokens = str.Split(this.delims.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
        }
        #endregion

        #region [Properties covering Java methods]
        /// <summary>
        /// Tests if there are more tokens available from this tokenizer's string.
        /// If this method returns <c>true</c>, then a subsequent
        /// use of the <see cref="P:NextToken"/> property will successfully return a token.
        /// </summary>
        /// <value>
        ///   <c>true</c> if more tokens are available; otherwise <c>false</c>.
        /// </value>
        public bool HasMoreTokens
        {
            get { return this.index < this.tokens.Length; }
        }

        /// <summary>
        /// Gets the next token.
        /// </summary>
        /// <value>The next token.</value>
        /// <exception cref="System.IndexOutOfRangeException">Thrown when trying to get a token which doesn't exist.
        /// Usually caused by not checking if the <see cref="P:HasMoreTokens"/> property returns <c>true</c> before trying to get the next token.</exception>
        public string NextToken
        {
            get { return this.tokens[index++]; }
        }

        /// <summary>
        /// Counts the <see cref="!:remaining"/> tokens - the number of times the
        /// <see cref="P:NextToken"/> property can be used before it throws an exception.
        /// </summary>
        /// <value>The number of remaining tokens.</value>
        /// <seealso cref="P:Count"/>
        public int CountTokens
        {
            get { return this.tokens.Length - this.index; }
        }
    #endregion

        #region [New methods/properties]
        /// <summary>
        /// Gets the total number of tokens extracted.
        /// </summary>
        /// <remarks>
        /// <see cref="!:Equivalent not available in Java!"/>
        /// This property returns the total number of extracted tokens,
        /// contrary to <see cref="P:CountTokens"/>.
        /// </remarks>
        /// <value>The number of tokens extracted.</value>
        /// <seealso cref="P:StringTokenizer.CountTokens"/>
        public int Count
        {
            get { return this.tokens.Length; }
        }

        /// <summary>
        /// Gets the token with the specified index from the tokenizer without moving the current position index.
        /// </summary>
        /// <remarks><see cref="!:Equivalent not available in Java!"/></remarks>
        /// <param name="index">The index of the token to get.</param>
        /// <value>The token with the given index</value>
        /// <exception cref="System.IndexOutOfRangeException">Thrown when trying to get a token which doesn't exist, that is when <see cref="!:index"/> is equal or greater then <see cref="!:Count"/> or <see cref="!:index"/> is negative.</exception>
        public string this[int index]
        {
            get { return this.tokens[index]; }
        }

        /// <summary>
        /// Resets the current position index so that the tokens can be extracted again.
        /// </summary>
        /// <remarks><see cref="!:Equivalent not available in Java!"/></remarks>
        public void Reset()
        {
            this.index = 0;
        }

        /// <summary>
        /// Gets the currently set string for empty tokens.
        /// </summary>
        /// <remarks>Default is <c>System.String.Empty</c></remarks>
        /// <value>The empty token string.</value>
        public string EmptyString
        {
            get { return this.empty; }
        }
        #endregion

        #region [Java-compilant methods]
    /*
        /// <summary>
        /// Tests if there are more tokens available from this tokenizer's string.
        /// If this method returns <c>true</c>, then a subsequent call to <see cref="M:nextToken"/> will successfully return a token.
        /// </summary>
        /// <returns>
        ///   <c>true</c> if and only if there is at least one token in the string after the current position; otherwise <c>false</c>.
        /// </returns>
        /// <seealso cref="M:nextToken"/>
        public bool hasMoreTokens()
        {
            return HasMoreTokens;
        }

        /// <summary>
        /// Returns the next token from this string tokenizer.
        /// </summary>
        /// <returns>The next token from this string tokenizer.</returns>
        public string nextToken()
        {
            return NextToken;
        }

        /// <summary>
        /// Calculates the number of times that this tokenizer's <see cref="M:nextToken"/> method can be called before it generates an exception. The current position is not advanced.
        /// </summary>
        /// <returns>The number of tokens remaining in the string using the current delimiter set.</returns>
        public int countTokens()
        {
            return CountTokens;
        }
    */
        #endregion

        #region [IEnumerable implementation]
        /// <summary>
        /// Returns an enumerator that iterates through the collection.
        /// </summary>
        /// <returns>
        /// A <see cref="T:System.Collections.Generic.IEnumerator"/> that can be used to iterate through the collection.
        /// </returns>
        public IEnumerator<string> GetEnumerator()
        {
            while (this.HasMoreTokens)
                yield return this.NextToken;
        }

        System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
    {
      return GetEnumerator();
    }
        #endregion

    }
}

   
    
    
  








Related examples in the same category

1.Split with space
2.Split with a list of dividers
3.Split spaces with space
4.Show string in proper case
5.use the Split() method to split strings
6.Splitting StringsSplitting Strings
7.Split and join stringsSplit and join strings
8.Tokenize stringsTokenize strings
9.Char String foreachChar String foreach
10.Split a string delimited by another string and return 2 non-empty elements
11.Split a string delimited by another string and return 2 elements
12.Split the original string at the delimiter and return all non-empty elements
13.Split a string delimited by another string and return all elements
14.Split a string delimited by characters and return all elements
15.Split a string delimited by characters and return 2 non-empty elements
16.Split a string delimited by characters and return 2 elements
17.Split a string delimited by characters and return all non-empty elements
18.Use StringSplitOptions enumeration to include or exclude substrings generated by the Split method.
19.String Tokenizer
20.Can parse a string representing a string[] into an actual string[].
21.Converts a collection of tags to a single string representation containing the tags separated by a comma
22.Takes in any string and convert it into a Byte array, suitable for e.g. insertion into a REG_BINARY Registry value.
23.Splits the string into lines.
24.Splits the string into words (all white space is removed).
25.Split Quoted String
26.Helper class to split a long word into a single one.
27.Splits the string into an array, using the separator.
28.Splits string name into a readable string based on camel casing.