Java HTML Unescape unescapeHTML(String s)

Here you can find the source of unescapeHTML(String s)

Description

Turn any HTML escape entities in the string into characters and return the resulting string.

License

Open Source License

Parameter

Parameter Description
s String to be un-escaped.

Exception

Parameter Description
NullPointerException if s is null.

Return

un-escaped String.

Declaration

public static String unescapeHTML(String s) 

Method Source Code

//package com.java2s;
/*//w  ww  . j ava  2  s.c o  m
 * Copyright (C) 2001-2011 Stephen Ostermiller
 * http://ostermiller.org/contact.pl?regarding=Java+Utilities
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * See LICENSE.txt for details.
 */

import java.util.HashMap;

public class Main {
    private static HashMap<String, Integer> htmlEntities = new HashMap<String, Integer>();

    /**
     * Turn any HTML escape entities in the string into
     * characters and return the resulting string.
     *
     * @param s String to be un-escaped.
     * @return un-escaped String.
     * @throws NullPointerException if s is null.
     *
     * @since ostermillerutils 1.00.00
     */
    public static String unescapeHTML(String s) {
        StringBuffer result = new StringBuffer(s.length());
        int ampInd = s.indexOf("&");
        int lastEnd = 0;
        while (ampInd >= 0) {
            int nextAmp = s.indexOf("&", ampInd + 1);
            int nextSemi = s.indexOf(";", ampInd + 1);
            if (nextSemi != -1 && (nextAmp == -1 || nextSemi < nextAmp)) {
                int value = -1;
                String escape = s.substring(ampInd + 1, nextSemi);
                try {
                    if (escape.startsWith("#")) {
                        value = Integer.parseInt(escape.substring(1), 10);
                    } else {
                        if (htmlEntities.containsKey(escape)) {
                            value = htmlEntities.get(escape).intValue();
                        }
                    }
                } catch (NumberFormatException x) {
                    // Could not parse the entity,
                    // output it verbatim
                }
                result.append(s.substring(lastEnd, ampInd));
                lastEnd = nextSemi + 1;
                if (value >= 0 && value <= 0xffff) {
                    result.append((char) value);
                } else {
                    result.append("&").append(escape).append(";");
                }
            }
            ampInd = nextAmp;
        }
        result.append(s.substring(lastEnd));
        return result.toString();
    }

    /**
     * Liberal parse method for integer values.  If the input string is a representation of
     * an integer, that value will be returned.  Otherwise the default value is returned.
     * Surrounding white space is NOT significant.
     * <p>
     * If the number starts with a base prefix ("0x" for hex, "0b" for binary, "0c" for
     * octal), it will be parsed with that radix.  Otherwise, the number will be parsed in
     * base 10 radix.
     * <p>
     * This method does NOT throw number format exceptions.
     *
     * @param s String containing a integer value to be parsed
     * @return parsed integer value or the default value
     * @since ostermillerutils 1.07.01
     */
    public static int parseInt(String s, int defaultValue) {
        Integer integer = parseInteger(s);
        if (integer != null)
            return integer.intValue();
        return defaultValue;
    }

    /**
     * Liberal parse method for integer values.  If the input string is a representation of
     * an integer, that value will be returned.  Otherwise the default value is returned.
     * Surrounding white space is NOT significant.
     * <p>
     * This method does NOT throw number format exceptions.
     *
     * @param s String containing a integer value to be parsed
     * @param radix number base used during parsing
     * @return parsed integer value or the default value
     * @since ostermillerutils 1.07.01
     */
    public static int parseInt(String s, int radix, int defaultValue) {
        Integer integer = parseInteger(s, radix);
        if (integer != null)
            return integer.intValue();
        return defaultValue;
    }

    /**
     * Liberal parse method for integer values.  If the input string is a representation of
     * an integer, that value will be returned.  Otherwise null is returned.
     * Surrounding white space is NOT significant.
     * <p>
     * If the number starts with a base prefix ("0x" for hex, "0b" for binary, "0c" for
     * octal), it will be parsed with that radix.  Otherwise, the number will be parsed in
     * base 10 radix.
     * <p>
     * This method does NOT throw number format exceptions.
     *
     * @param s String containing a integer value to be parsed
     * @return parsed integer value or the default value
     * @since ostermillerutils 1.07.01
     */
    public static Integer parseInteger(String s) {
        if (s == null)
            return null;
        s = s.trim();
        int radix = 10;
        if (s.startsWith("0x") || s.startsWith("0X")) {
            radix = 16;
            s = s.substring(2);
        }
        if (s.startsWith("0c") || s.startsWith("0C")) {
            radix = 8;
            s = s.substring(2);
        }
        if (s.startsWith("0b") || s.startsWith("0B")) {
            radix = 2;
            s = s.substring(2);
        }
        return parseInteger(s, radix);
    }

    /**
     * Liberal parse method for integer values.  If the input string is a representation of
     * an integer, that value will be returned.  Otherwise null is returned.
     * Surrounding white space is NOT significant.
     * <p>
     * This method does NOT throw number format exceptions.
     *
     * @param s String containing a integer value to be parsed
     * @param radix number base used during parsing
     * @return parsed integer value or the default value
     * @since ostermillerutils 1.07.01
     */
    public static Integer parseInteger(String s, int radix) {
        if (s == null)
            return null;
        s = s.trim();
        try {
            return Integer.valueOf(s, radix);
        } catch (NumberFormatException nfx) {
            return null;
        }
    }

    /**
     * Trim any of the characters contained in the second
     * string from the beginning and end of the first.
     *
     * @param s String to be trimmed.
     * @param c list of characters to trim from s.
     * @return trimmed String.
     * @throws NullPointerException if s is null.
     *
     * @since ostermillerutils 1.00.00
     */
    public static String trim(String s, String c) {
        int length = s.length();
        if (c == null) {
            return s;
        }
        int cLength = c.length();
        if (c.length() == 0) {
            return s;
        }
        int start = 0;
        int end = length;
        boolean found; // trim-able character found.
        int i;
        // Start from the beginning and find the
        // first non-trim-able character.
        found = false;
        for (i = 0; !found && i < length; i++) {
            char ch = s.charAt(i);
            found = true;
            for (int j = 0; found && j < cLength; j++) {
                if (c.charAt(j) == ch)
                    found = false;
            }
        }
        // if all characters are trim-able.
        if (!found)
            return "";
        start = i - 1;
        // Start from the end and find the
        // last non-trim-able character.
        found = false;
        for (i = length - 1; !found && i >= 0; i--) {
            char ch = s.charAt(i);
            found = true;
            for (int j = 0; found && j < cLength; j++) {
                if (c.charAt(j) == ch)
                    found = false;
            }
        }
        end = i + 2;
        return s.substring(start, end);
    }
}

Related

  1. unEscapeHTML(final String escapedHTML)
  2. unescapeHtml(final String input)
  3. unescapeHTML(String comment)
  4. unescapeHTML(String html)
  5. unescapeHtml(String s)
  6. unescapeHtml(String s)
  7. unescapeHTML(String s)
  8. unescapeHTML(String s)
  9. unescapeHTML(String source)