Unescape XML - CSharp System.Xml

CSharp examples for System.Xml:XML String

Description

Unescape XML

Demo Code

/**/* ww  w.  j a v a  2 s  .  c  om*/
         * Returns the IANA encoding name that is auto-detected from
         * the bytes specified, with the endian-ness of that encoding where appropriate.
         * (method found in org.apache.xerces.impl.XMLEntityManager, originally published
         * by the Apache Software Foundation under the Apache Software License; now being
         * used in iText under the MPL)
         * @param b4    The first four bytes of the input.
         * @return an IANA-encoding string
         * @since 5.0.6
         */
using System.Text;
using System;

public class Main{
        /**
         * Unescapes a String, replacing &#nn;, <, >, &, ",
         * and &apos to the corresponding characters.
         * @param s a String with entities
         * @return the unescaped string
         */
        public static String UnescapeXML(String s) {
            char[] cc = s.ToCharArray();
            int len = cc.Length;
            StringBuilder sb = new StringBuilder();
            int pos;
            String esc;
            for (int i = 0; i < len; i++) {
                int c = cc[i];
                if (c == '&') {
                    pos = FindInArray(';', cc, i + 3);
                    if (pos > -1) {
                        esc = new String(cc, i + 1, pos - i - 1);
                        if (esc.StartsWith("#")) {
                            esc = esc.Substring(1);
                            if (IsValidCharacterValue(esc)) {
                                c = (char)int.Parse(esc);
                                i = pos;
                            } else {
                                i = pos;
                                continue;
                            }
                        }
                        else {
                            int tmp = Unescape(esc);
                            if (tmp > 0) {
                                c = tmp;
                                i = pos;
                            }
                        }
                    }
                }
                sb.Append((char)c);
            }
            return sb.ToString();
        }
        /**
         * Checks if a character value should be escaped/unescaped.
         * @param   c   a character value
         * @return  true if it's OK to escape or unescape this value 
         */
        public static bool IsValidCharacterValue(int c) {
            return (c == 0x9 || c == 0xA || c == 0xD
                    || c >= 0x20 && c <= 0xD7FF
                    || c >= 0xE000 && c <= 0xFFFD
                    || c >= 0x10000 && c <= 0x10FFFF);
        }
        /**
         * Checks if a character value should be escaped/unescaped.
         * @param   s   the String representation of an integer
         * @return  true if it's OK to escape or unescape this value 
         */
        public static bool IsValidCharacterValue(String s) {
            try {
                int i = int.Parse(s);
                return IsValidCharacterValue(i);
            }
            catch {
                return false;
            }
        }
}

Related Tutorials