A collection of all character entites defined in the HTML4 standard. : HTML Output « Servlets « Java






A collection of all character entites defined in the HTML4 standard.

     
/**
 * 
 * LibXML : a free Java layouting library
 * 
 *
 * Project Info:  http://reporting.pentaho.org/libxml/
 *
 * (C) Copyright 2006-2008, by Object Refinery Ltd, Pentaho Corporation and Contributors.
 *
 * This library is free software; you can redistribute it and/or modify it under the terms
 * of the GNU Lesser General Public License as published by the Free Software Foundation;
 * either version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License along with this
 * library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
 * Boston, MA 02111-1307, USA.
 *
 * [Java is a trademark or registered trademark of Sun Microsystems, Inc.
 * in the United States and other countries.]
 *
 *
 * ------------
 * HtmlCharacterEntities.java
 * ------------
 */

import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;

/**
 * A collection of all character entites defined in the HTML4 standard. The key
 * is the entity name, the property value is the decoded string.
 *
 * @author Thomas Morgner
 */
public class HtmlCharacterEntities extends Properties
{
  /**
   * The singleton instance for this entity-parser implementation.
   */
  private static CharacterEntityParser entityParser;
  private static final long serialVersionUID = 5118172339379209383L;

  /**
   * Gets the character entity parser for HTML content. The CharacterEntity
   * parser translates known characters into predefined entities.
   *
   * @return the character entity parser instance.
   */
  public static CharacterEntityParser getEntityParser()
  {
    if (entityParser == null)
    {
      entityParser = new CharacterEntityParser(new HtmlCharacterEntities());
    }
    return entityParser;
  }

  /**
   * Creates an instance.
   */
  public HtmlCharacterEntities()
  {
    setProperty("ang", "\u2220");
    setProperty("spades", "\u2660");
    setProperty("frasl", "\u2044");
    setProperty("copy", "\u00a9");
    setProperty("Upsilon", "\u03a5");
    setProperty("rsquo", "\u2019");
    setProperty("sdot", "\u22c5");
    setProperty("beta", "\u03b2");
    setProperty("egrave", "\u00e8");
    setProperty("Pi", "\u03a0");
    setProperty("micro", "\u00b5");
    setProperty("lArr", "\u21d0");
    setProperty("Beta", "\u0392");
    setProperty("eacute", "\u00e9");
    setProperty("agrave", "\u00e0");
    setProperty("sbquo", "\u201a");
    setProperty("ucirc", "\u00fb");
    setProperty("mdash", "\u2014");
    setProperty("rho", "\u03c1");
    setProperty("Nu", "\u039d");
    setProperty("ne", "\u2260");
    setProperty("nsub", "\u2284");
    setProperty("AElig", "\u00c6");
    setProperty("raquo", "\u00bb");
    setProperty("aacute", "\u00e1");
    setProperty("le", "\u2264");
    setProperty("harr", "\u2194");
    setProperty("frac34", "\u00be");
    setProperty("bdquo", "\u201e");
    setProperty("cup", "\u222a");
    setProperty("frac14", "\u00bc");
    setProperty("exist", "\u2203");
    setProperty("Ccedil", "\u00c7");
    setProperty("phi", "\u03c6");
    setProperty("Lambda", "\u039b");
    setProperty("alpha", "\u03b1");
    setProperty("sigma", "\u03c3");
    setProperty("thetasym", "\u03d1");
    setProperty("Rho", "\u03a1");
    setProperty("hArr", "\u21d4");
    setProperty("Dagger", "\u2021");
    setProperty("otilde", "\u00f5");
    setProperty("Epsilon", "\u0395");
    setProperty("iuml", "\u00ef");
    setProperty("Phi", "\u03a6");
    setProperty("prod", "\u220f");
    setProperty("Aring", "\u00c5");
    setProperty("rlm", "\u200f");
    setProperty("yen", "\u00a5");
    setProperty("emsp", "\u2003");
    setProperty("rang", "\u232a");
    setProperty("Atilde", "\u00c3");
    setProperty("Iuml", "\u00cf");
    setProperty("iota", "\u03b9");
    setProperty("deg", "\u00b0");
    setProperty("prop", "\u221d");
    setProperty("and", "\u2227");
    setProperty("para", "\u00b6");
    setProperty("darr", "\u2193");
    setProperty("curren", "\u00a4");
    setProperty("crarr", "\u21b5");
    setProperty("not", "\u00ac");
    setProperty("Iota", "\u0399");
    setProperty("aelig", "\u00e6");
    setProperty("rdquo", "\u201d");
    setProperty("Ocirc", "\u00d4");
    setProperty("ntilde", "\u00f1");
    setProperty("reg", "\u00ae");
    setProperty("zeta", "\u03b6");
    setProperty("middot", "\u00b7");
    setProperty("cent", "\u00a2");
    setProperty("quot", "\"");
    setProperty("hellip", "\u2026");
    setProperty("Zeta", "\u0396");
    setProperty("rceil", "\u2309");
    setProperty("eta", "\u03b7");
    setProperty("nbsp", "\u00a0");
    setProperty("rarr", "\u2192");
    setProperty("frac12", "\u00bd");
    setProperty("real", "\u211c");
    setProperty("mu", "\u03bc");
    setProperty("dArr", "\u21d3");
    setProperty("divide", "\u00f7");
    setProperty("cap", "\u2229");
    setProperty("chi", "\u03c7");
    setProperty("times", "\u00d7");
    setProperty("euml", "\u00eb");
    setProperty("Gamma", "\u0393");
    setProperty("loz", "\u25ca");
    setProperty("acute", "\u00b4");
    setProperty("Omega", "\u03a9");
    setProperty("ndash", "\u2013");
    setProperty("clubs", "\u2663");
    setProperty("macr", "\u00af");
    setProperty("Yacute", "\u00dd");
    setProperty("Ugrave", "\u00d9");
    setProperty("Euml", "\u00cb");
    setProperty("Eta", "\u0397");
    setProperty("sect", "\u00a7");
    setProperty("asymp", "\u2248");
    setProperty("ordm", "\u00ba");
    setProperty("rArr", "\u21d2");
    setProperty("radic", "\u221a");
    setProperty("Uacute", "\u00da");
    setProperty("omicron", "\u03bf");
    setProperty("Chi", "\u03a7");
    setProperty("aring", "\u00e5");
    setProperty("Theta", "\u0398");
    setProperty("supe", "\u2287");
    setProperty("ensp", "\u2002");
    setProperty("uml", "\u00a8");
    setProperty("ccedil", "\u00e7");
    setProperty("lambda", "\u03bb");
    setProperty("gt", "\u003e");
    setProperty("uarr", "\u2191");
    setProperty("alefsym", "\u2135");
    setProperty("auml", "\u00e4");
    setProperty("sup3", "\u00b3");
    setProperty("circ", "\u02c6");
    setProperty("lsquo", "\u2018");
    setProperty("Auml", "\u00c4");
    setProperty("dagger", "\u2020");
    setProperty("Kappa", "\u039a");
    setProperty("cong", "\u2245");
    setProperty("zwnj", "\u200c");
    setProperty("shy", "\u00ad");
    setProperty("ouml", "\u00f6");
    setProperty("diams", "\u2666");
    setProperty("uArr", "\u21d1");
    setProperty("atilde", "\u00e3");
    setProperty("THORN", "\u00de");
    setProperty("or", "\u2228");
    setProperty("Ograve", "\u00d2");
    setProperty("ocirc", "\u00f4");
    setProperty("plusm", "\u00b1");
    setProperty("Ouml", "\u00d6");
    setProperty("nabla", "\u2207");
    setProperty("psi", "\u03c8");
    setProperty("sigmaf", "\u03c2");
    setProperty("euro", "\u20ac");
    setProperty("sube", "\u2286");
    setProperty("sup2", "\u00b2");
    setProperty("laquo", "\u00ab");
    setProperty("forall", "\u2200");
    setProperty("Oacute", "\u00d3");
    setProperty("iexcl", "\u00a1");
    fillMoreEntities();
  }

  /**
   * Externalized initialization method to make CheckStyle happy.
   */
  private void fillMoreEntities()
  {
    setProperty("piv", "\u03d6");
    setProperty("minus", "\u2212");
    setProperty("zwj", "\u200d");
    setProperty("tau", "\u03c4");
    setProperty("Mu", "\u039c");
    setProperty("gamma", "\u03b3");
    setProperty("sup", "\u2283");
    setProperty("Psi", "\u03a8");
    setProperty("omega", "\u03c9");
    setProperty("Oslash", "\u00d8");
    setProperty("weierp", "\u2118");
    setProperty("Igrave", "\u00cc");
    setProperty("OElig", "\u0152");
    setProperty("sup1", "\u00b9");
    setProperty("cedil", "\u00b8");
    setProperty("upsilon", "\u03c5");
    setProperty("equiv", "\u2261");
    setProperty("isin", "\u2208");
    setProperty("Delta", "\u0394");
    setProperty("yacute", "\u00fd");
    setProperty("ugrave", "\u00f9");
    setProperty("ge", "\u2265");
    setProperty("Iacute", "\u00cd");
    setProperty("brvbar", "\u00a6");
    setProperty("Tau", "\u03a4");
    setProperty("Prime", "\u2033");
    setProperty("rfloor", "\u22a7");
    setProperty("Ecirc", "\u00ca");
    setProperty("ETH", "\u00d0");
    setProperty("int", "\u222b");
    setProperty("xi", "\u03be");
    setProperty("uacute", "\u00fa");
    setProperty("bull", "\u2022");
    setProperty("Scaron", "\u0160");
    setProperty("theta", "\u03b8");
    setProperty("yuml", "\u00ff");
    setProperty("oplus", "\u2295");
    setProperty("part", "\u2202");
    setProperty("ldquo", "\u201c");
    setProperty("Icirc", "\u00ce");
    setProperty("Yuml", "\u0178");
    setProperty("eth", "\u00f0");
    setProperty("Acirc", "\u00c2");
    setProperty("sub", "\u2282");
    setProperty("lceil", "\u2308");
    setProperty("Egrave", "\u00c8");
    setProperty("tilde", "\u02dc");
    setProperty("pi", "\u03c0");
    setProperty("rsaquo", "\u203a");
    setProperty("kappa", "\u03ba");
    setProperty("upsih", "\u03d2");
    setProperty("Omicron", "\u039f");
    setProperty("otimes", "\u2297");
    setProperty("ni", "\u220b");
    setProperty("amp", "\u0026");
    setProperty("Eacute", "\u00c9");
    setProperty("nu", "\u03bd");
    setProperty("Ucirc", "\u00db");
    setProperty("uuml", "\u00fc");
    setProperty("oslash", "\u00f8");
    setProperty("thorn", "\u00fe");
    setProperty("trade", "\u2122");
    setProperty("epsilon", "\u03b5");
    setProperty("ograve", "\u00f2");
    setProperty("hearts", "\u2665");
    setProperty("iquest", "\u00bf");
    setProperty("Uuml", "\u00dc");
    setProperty("empty", "\u2205");
    setProperty("lowast", "\u2217");
    setProperty("sum", "\u2211");
    setProperty("lfloor", "\u22a6");
    setProperty("lrm", "\u200e");
    setProperty("oacute", "\u00f3");
    setProperty("image", "\u2111");
    setProperty("Agrave", "\u00c0");
    setProperty("oline", "\u203e");
    setProperty("oelig", "\u0153");
    setProperty("Sigma", "\u03a3");
    setProperty("permil", "\u2030");
    setProperty("perp", "\u22a5");
    setProperty("lt", "\u003c");
    setProperty("Aacute", "\u00c1");
    setProperty("acirc", "\u00e2");
    setProperty("lang", "\u2329");
    setProperty("delta", "\u03b4");
    setProperty("infin", "\u221e");
    setProperty("igrave", "\u00ec");
    setProperty("ordf", "\u00aa");
    setProperty("lsaquo", "\u2039");
    setProperty("prime", "\u2032");
    setProperty("ecirc", "\u00ea");
    setProperty("there4", "\u2234");
    setProperty("iacute", "\u00ed");
    setProperty("sim", "\u223c");
    setProperty("Alpha", "\u0391");
    setProperty("pound", "\u00a3");
    setProperty("notin", "\u2209");
    setProperty("Ntilde", "\u00d1");
    setProperty("Xi", "\u039e");
    setProperty("thinsp", "\u2009");
    setProperty("Otilde", "\u00d5");
    setProperty("icirc", "\u00ee");
    setProperty("scaron", "\u0161");
    setProperty("szlig", "\u00df");
    setProperty("larr", "\u2190");
  }
}

/**
 * 
 * LibXML : a free Java layouting library
 * 
 *
 * Project Info:  http://reporting.pentaho.org/libxml/
 *
 * (C) Copyright 2006-2008, by Object Refinery Ltd, Pentaho Corporation and Contributors.
 *
 * This library is free software; you can redistribute it and/or modify it under the terms
 * of the GNU Lesser General Public License as published by the Free Software Foundation;
 * either version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License along with this
 * library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
 * Boston, MA 02111-1307, USA.
 *
 * [Java is a trademark or registered trademark of Sun Microsystems, Inc.
 * in the United States and other countries.]
 *
 *
 * ------------
 * CharacterEntityParser.java
 * ------------
 */


/**
 * The character entity parser replaces all known occurrences of an entity in
 * the format &entityname;.
 *
 * @author Thomas Morgner
 */
 class CharacterEntityParser
{
  private String[] charMap;
  
  /**
   * the entities, keyed by entity name.
   */
  private final HashMap entities;

  /**
   * Creates a new CharacterEntityParser and initializes the parser with the
   * given set of entities.
   *
   * @param characterEntities the entities used for the parser
   */
  public CharacterEntityParser(final Properties characterEntities)
  {
    if (characterEntities == null)
    {
      throw new NullPointerException("CharacterEntities must not be null");
    }

    entities = new HashMap(characterEntities);
    charMap = new String[65536];
    
    final Iterator entries = entities.entrySet().iterator();
    while (entries.hasNext())
    {
      final Map.Entry entry = (Map.Entry) entries.next();
      final String value = (String) entry.getValue();
      final String entityName = (String) entry.getKey();
      if (value.length() != 1)
      {
        throw new IllegalStateException();
      }
      charMap[value.charAt(0)] = entityName;
    }
  }

  /**
   * Creates a new CharacterEntityParser and initializes the parser with the
   * given set of entities.
   *
   * @param characterEntities the entities used for the parser
   */
  public CharacterEntityParser(final HashMap characterEntities)
  {
    if (characterEntities == null)
    {
      throw new NullPointerException("CharacterEntities must not be null");
    }

    entities = (HashMap) characterEntities.clone();
    charMap = new String[65536];

    final Iterator entries = entities.entrySet().iterator();
    while (entries.hasNext())
    {
      final Map.Entry entry = (Map.Entry) entries.next();
      final String value = (String) entry.getValue();
      final String entityName = (String) entry.getKey();
      if (value.length() != 1)
      {
        throw new IllegalStateException();
      }
      charMap[value.charAt(0)] = entityName;
    }
  }

  /**
   * create a new Character entity parser and initializes the parser with the
   * entities defined in the XML standard.
   *
   * @return the CharacterEntityParser initialized with XML entities.
   */
  public static CharacterEntityParser createXMLEntityParser()
  {
    final HashMap entities = new HashMap();
    entities.put("amp", "&");
    entities.put("quot", "\"");
    entities.put("lt", "<");
    entities.put("gt", ">");
    entities.put("apos", "\u0027");
    return new CharacterEntityParser(entities);
  }

  /**
   * returns the entities used in the parser.
   *
   * @return the properties for this parser.
   */
  private HashMap getEntities()
  {
    return entities;
  }

  /**
   * Looks up the character for the entity name specified in <code>key</code>.
   *
   * @param key the entity name
   * @return the character as string with a length of 1
   */
  private String lookupCharacter(final String key)
  {
    return (String) getEntities().get(key);
  }

  /**
   * Encode the given String, so that all known entites are encoded. All
   * characters represented by these entites are now removed from the string.
   *
   * @param value the original string
   * @return the encoded string.
   */
  public String encodeEntities(final String value)
  {
    if (value == null)
    {
      throw new NullPointerException();
    }

    final int length = value.length();
    final StringBuffer writer = new StringBuffer(length);
    for (int i = 0; i < length; i++)
    {
      final char character = value.charAt(i);
      final String lookup = charMap[character];
      if (lookup == null)
      {
        writer.append(character);
      }
      else
      {
        writer.append('&');
        writer.append(lookup);
        writer.append(';');
      }
    }
    return writer.toString();
  }

  /**
   * Decode the string, all known entities are replaced by their resolved
   * characters.
   *
   * @param value the string that should be decoded.
   * @return the decoded string.
   */
  public String decodeEntities(final String value)
  {
    if (value == null)
    {
      throw new NullPointerException();
    }

    int parserIndex = 0;
    int subStart = value.indexOf('&', parserIndex);
    if (subStart == -1)
    {
      return value;
    }
    int subEnd = value.indexOf(';', subStart);
    if (subEnd == -1)
    {
      return value;
    }

    final StringBuffer bufValue = new StringBuffer(value.substring(0, subStart));
    do
    {
      // at this point we know, that there is at least one entity ..
      if (value.charAt(subStart + 1) == '#')
      {
        final int subValue = parseInt(value.substring(subStart + 2, subEnd), 0);
        if ((subValue >= 1) && (subValue <= 65536))
        {
          final char[] chr = new char[1];
          chr[0] = (char) subValue;
          bufValue.append(chr);
        }
        else
        {
          // invalid entity, do not decode ..
          bufValue.append(value.substring(subStart, subEnd));
        }
      }
      else
      {
        final String entity = value.substring(subStart + 1, subEnd);
        final String replaceString = lookupCharacter(entity);
        if (replaceString != null)
        {
          bufValue.append(decodeEntities(replaceString));
        }
        else
        {
          bufValue.append('&');
          bufValue.append(entity);
          bufValue.append(';');
        }
      }
      parserIndex = subEnd + 1;
      subStart = value.indexOf('&', parserIndex);
      if (subStart == -1)
      {
        bufValue.append(value.substring(parserIndex));
        subEnd = -1;
      }
      else
      {
        subEnd = value.indexOf(';', subStart);
        if (subEnd == -1)
        {
          bufValue.append(value.substring(parserIndex));
        }
        else
        {
          bufValue.append(value.substring(parserIndex, subStart));
        }
      }
    }
    while (subStart != -1 && subEnd != -1);

    return bufValue.toString();
  }

  /**
   * Parses the given string into an int-value. On errors the default value
   * is returned.
   *
   * @param s          the string
   * @param defaultVal the default value that should be used in case of errors
   * @return the parsed int or the default value.
   */
  private int parseInt(final String s, final int defaultVal)
  {
    if (s == null)
    {
      return defaultVal;
    }
    try
    {
      return Integer.parseInt(s);
    }
    catch (Exception e)
    {
      // ignored ..
    }
    return defaultVal;
  }
}

   
    
    
    
    
  








Related examples in the same category

1.Servlet Output HTML Demo
2.Servlet Display Static HTML
3.Prints a conversion table of miles per gallon to kilometers per liter
4.Servlet: Print Table
5.Html utilities
6.Html Parse Servlet
7.Escape and unescape string
8.Escapes newlines, tabs, backslashes, and quotes in the specified string
9.Web Calendar
10.HTML Helper
11.Escape HTML
12.Convert HTML to text
13.Text To HTML
14.Unescape HTML
15.Java object representations of the HTML table structure
16.Entity Decoder
17.Format a color to HTML RGB color format (e.g. #FF0000 for Color.red)
18.Definitions of HTML character entities and conversions between unicode characters and HTML character entities
19.Encode special characters and do formatting for HTML output
20.HTML color names
21.Utility methods for dealing with HTML
22.Filter the specified message string for characters that are sensitive in HTML
23.Decode an HTML color string like '#F567BA;' into a Color
24.Normalize Post Data
25.Get HTML Color String from Java Color object
26.HTML Decoder
27.HTML Parser
28.HTML color and Java Color
29.HTML form Utilites
30.Html Dimensions
31.break Lines with HTML
32.insert HTML block dynamically
33.Convert an integer to an HTML RGB value
34.Convert to HTML string