Unescapes the String text according to the format in escape(XML String text) - Android XML

Android examples for XML:XML String Escape

Description

Unescapes the String text according to the format in escape(XML String text)

Demo Code

/*/*  w w  w  . ja v a2  s. co  m*/
    * Copyright (c) 2008, SQL Power Group Inc.
    *
    * This file is part of SQL Power Library.
    *
    * SQL Power Library is free software; you can redistribute it and/or modify
    * it under the terms of the GNU General Public License as published by
    * the Free Software Foundation; either version 3 of the License, or
    * (at your option) any later version.
    *
    * SQL Power Library is distributed in the hope that it will be useful,
    * but WITHOUT ANY WARRANTY; without even the implied warranty of
    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    * GNU General Public License for more details.
    *
    * You should have received a copy of the GNU General Public License
    * along with this program.  If not, see <http://www.gnu.org/licenses/>. 
    */
    //package com.java2s;
    import java.io.PrintWriter;

    public class Main {
        public static void main(String[] argv) throws Exception {
            String text = "java2s.com";
            System.out.println(unescape(text));
        }

        public int indent;

        /**
         * Unescapes the String text according to the format described above in escape(String text)
         * 
         * @param text The String to escape. If the String is null, then we return null.
         * @return The unescaped version of the input string. If the string is null, return null
         */
        static String unescape(String text) {
            if (text == null)
                return null;

            StringBuilder unescapedText = new StringBuilder(text.length());

            for (int i = 0, n = text.length(); i < n;) {
                char ch = text.charAt(i);
                char nextch;
                if (i == n - 1) {
                    nextch = 0;
                } else {
                    nextch = text.charAt(i + 1);
                }

                if (ch == '\\' && nextch == 'u') {
                    int charVal = Integer.parseInt(
                            text.substring(i + 2, i + 6), 16);
                    char unescapedChar = (char) charVal;
                    unescapedText.append(unescapedChar);
                    i += 6;
                } else {
                    unescapedText.append(ch);
                    i++;
                }
            }

            return unescapedText.toString();
        }

        /**
         * Prints to the output writer {@link #out} indentation spaces
         * (according to {@link #indent}) followed by the given text
         * followed by a newline.
         */
        public void println(PrintWriter out, String text) {
            for (int i = 0; i < indent; i++) {
                out.print(" ");
            }
            out.println(escape(text));
        }

        /**
         * Prints to the output writer {@link #out} indentation spaces
         * (according to {@link #indent}) followed by the given text.
         * @param out
         */
        public void print(PrintWriter out, String text) {
            for (int i = 0; i < indent; i++) {
                out.print(" ");
            }
            out.print(escape(text));
        }

        /**
         * Takes a String argument and returns a string that escapes characters that
         * are illegal in an XML document according to the XML specification. The
         * set of valid XML characters is taken from the <a
         * href="http://www.w3.org/TR/REC-xml/">XML 1.0 specification</a>, section
         * 2.2. Additionally, the backslash character will be considered illegal if
         * it appears immediately before a lowercase u in the input string.
         * <p>
         * Illegal characters will be represented in the output in the "escaped
         * form," the string <tt>\\uNNNN</tt> where NNNN is the four-digit
* hexadecimal value of the character. There will always be exactly four
* characters following the \\u, and each of those four characters will be a
* hex digit.
* <p>
* This escaping mechanism is not standard XML markup; it's
* application-level data. No generic XML processor will unescape it on the
* way in, so the job of unescaping lies with any application program that
* wants to consume the XML data. The Architect handles this by wrapping a
* SAX parser with a layer that detects and unescapes the \\u sequences.
* <p>
* 
* @param text
*            The input string that we want to check for illegal characters
* @return Returns a string identical to the input string, except any
*         character values that fall outside the range of legal XML
*         characters will appear in the 6-character escaped form described
*         above.
*/
        static String escape(String text) {
            if (text.equals(""))
                return "";

            // arbitrary amount of extra space
            StringBuilder sb = new StringBuilder(text.length() + 10);

            for (int i = 0, n = text.length(); i < n; i++) {
                char ch = text.charAt(i);
                char nextch;
                if (i == n - 1) {
                    nextch = 0;
                } else {
                    nextch = text.charAt(i + 1);
                }

                if (ch == 0x09 || ch == 0x0a || ch == 0x0d
                        || (ch >= 0x20 && ch <= 0xd7ff && ch != '\\')
                        || (ch >= 0xe000 && ch <= 0xfffd)
                        || (ch == '\\' && nextch != 'u')) {
                    sb.append(ch);
                } else {
                    sb.append(String.format("\\u%04x", (int) ch));
                }
            }
            return sb.toString();
        }
    }

Related Tutorials