Java String Unescape unescapeText(String s)

Here you can find the source of unescapeText(String s)

Description

change html entities to escape characters (from http://www.rgagnon.com/howto.html)

License

Open Source License

Parameter

Parameter Description
s string to unescape

Return

new string with html entities changed to escape characters

Declaration

public static String unescapeText(String s) 

Method Source Code

//package com.java2s;
/**//from   w  w  w.  j  a v a  2  s. c o m
 * *****************************************************************************
 * Copyright (c) 2009 Andrey Loskutov. All rights reserved. This program and the
 * accompanying materials are made available under the terms of the Eclipse
 * Public License v1.0 which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html Contributor: Andrey Loskutov -
 * initial API and implementation
 * *****************************************************************************
 */

public class Main {
    /**
     * Characters used for escape operations
     */
    private static final String[][] HTML_ESCAPE_CHARS = { { "&lt;", "<" }, { "&gt;", ">" }, { "&amp;", "&" },
            { "&quot;", "\"" }, { "&agrave;", "\u00e0" }, { "&Agrave;", "\u00c0" }, { "&acirc;", "\u00e2" },
            { "&auml;", "\u00e4" }, { "&Auml;", "\u00c4" }, { "&Acirc;", "\u00c2" }, { "&aring;", "\u00e5" },
            { "&Aring;", "\u00c5" }, { "&aelig;", "\u00e6" }, { "&AElig;", "\u00c6" }, { "&ccedil;", "\u00e7" },
            { "&Ccedil;", "\u00c7" }, { "&eacute;", "\u00e9" }, { "&Eacute;", "\u00c9" }, { "&aacute;", "\u00e1" },
            { "&Aacute;", "\u00c1" }, { "&egrave;", "\u00e8" }, { "&Egrave;", "\u00c8" }, { "&ecirc;", "\u00ea" },
            { "&Ecirc;", "\u00ca" }, { "&euml;", "\u00eb" }, { "&Euml;", "\u00cb" }, { "&iuml;", "\u00ef" },
            { "&Iuml;", "\u00cf" }, { "&iacute;", "\u00ed" }, { "&Iacute;", "\u00cd" }, { "&atilde;", "\u00e3" },
            { "&Atilde;", "\u00c3" }, { "&otilde;", "\u00f5" }, { "&Otilde;", "\u00d5" }, { "&oacute;", "\u00f3" },
            { "&Oacute;", "\u00d3" }, { "&ocirc;", "\u00f4" }, { "&Ocirc;", "\u00d4" }, { "&ouml;", "\u00f6" },
            { "&Ouml;", "\u00d6" }, { "&oslash;", "\u00f8" }, { "&Oslash;", "\u00d8" }, { "&szlig;", "\u00df" },
            { "&ugrave;", "\u00f9" }, { "&Ugrave;", "\u00d9" }, { "&uacute;", "\u00fa" }, { "&Uacute;", "\u00da" },
            { "&ucirc;", "\u00fb" }, { "&Ucirc;", "\u00db" }, { "&uuml;", "\u00fc" }, { "&Uuml;", "\u00dc" },
            { "&nbsp;", " " }, { "&reg;", "\u00AE" }, { "&copy;", "\u00A9" }, { "&euro;", "\u20A0" },
            { "&#8364;", "\u20AC" }

    };

    /**
     * change html entities to escape characters (from
     * http://www.rgagnon.com/howto.html)
     *
     * @param s string to unescape
     * @return new string with html entities changed to escape characters
     */
    public static String unescapeText(String s) {
        int i, j, k;
        if (s != null && (i = s.indexOf('&')) > -1) {
            j = s.indexOf(';', i);
            if (j > i) {
                String temp = s.substring(i, j + 1);
                // search in escape[][] if temp is there
                k = 0;
                int arraySize = HTML_ESCAPE_CHARS.length;
                while (k < arraySize) {
                    if (HTML_ESCAPE_CHARS[k][0].equals(temp)) {
                        break;
                    }
                    k++;
                }
                // now we found html escape character
                if (k < arraySize) {
                    // replace it to ASCII
                    s = new StringBuffer(s.substring(0, i)).append(HTML_ESCAPE_CHARS[k][1])
                            .append(s.substring(j + 1)).toString();
                    return unescapeText(s); // recursive call
                } else if (k == arraySize) {
                    s = new StringBuffer(s.substring(0, i)).append('&').append(unescapeText(s.substring(i + 1)))
                            .toString();
                    return s;
                }
            }
        }
        return s;
    }
}

Related

  1. unescapeString(String text)
  2. unescapeString(String text)
  3. unescapeString(String txt)
  4. unescapeText(String s)
  5. unescapeText(String s)
  6. unescapeText(String s)