Java Regex String Replace HTML replaceHtmlEntities(String aText, boolean preserveFormatting)

Here you can find the source of replaceHtmlEntities(String aText, boolean preserveFormatting)

Description

Replaces all HTML entities ( <, & ), with their Unicode characters.

License

Apache License

Parameter

Parameter Description
aText text to replace entities in

Return

the text with entities replaced.

Declaration

public static String replaceHtmlEntities(String aText, boolean preserveFormatting) 

Method Source Code


//package com.java2s;
//License from project: Apache License 

import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Main {
    private static Pattern SPECIAL_CHAR_WHITESPACE = Pattern.compile("(\t| +|&[a-z]*;|&#[0-9]*;|\n)");
    private static Pattern SPECIAL_CHAR_NO_WHITESPACE = Pattern.compile("(&[a-z]*;|&#[0-9]*;)");
    private static Map<String, String> REPLACEMENTS = new HashMap<String, String>();

    /**// w  w w . ja  v  a2  s  .  co m
     * Replaces all HTML entities ( &lt;, &amp; ), with their Unicode
     * characters.
     * 
     * @param aText
     *            text to replace entities in
     * @return the text with entities replaced.
     */
    public static String replaceHtmlEntities(String aText, boolean preserveFormatting) {
        StringBuffer result = new StringBuffer();

        Map<String, String> replacements = new HashMap<String, String>(REPLACEMENTS);
        Matcher matcher;

        if (preserveFormatting) {
            matcher = SPECIAL_CHAR_NO_WHITESPACE.matcher(aText);
        } else {
            matcher = SPECIAL_CHAR_WHITESPACE.matcher(aText);
            replacements.put("", " ");
            replacements.put("\n", " ");
        }

        while (matcher.find()) {
            matcher.appendReplacement(result, getReplacement(matcher, replacements));
        }
        matcher.appendTail(result);
        return result.toString();
    }

    private static String getReplacement(Matcher aMatcher, Map<String, String> replacements) {

        String match = aMatcher.group(0).trim();
        String result = replacements.get(match);

        if (result != null) {
            return result;
        } else if (match.startsWith("&#")) {
            // Translate to unicode character.
            try {
                Integer code = Integer.parseInt(match.substring(2, match.length() - 1));
                return "" + (char) code.intValue();
            } catch (NumberFormatException nfe) {
                return "";
            }
        } else {
            return "";
        }
    }
}

Related

  1. removeTags(String input, List knownTagList)
  2. removeTags(String string)
  3. replaceHtml(String html)
  4. replaceHtml(String html)
  5. replaceHtmlEntities(final String text)
  6. replaceTags(String payload, Map tags)
  7. replaceTags(String str, Map tags)
  8. replaceTagsUnlessAnchorTagFound(String wikiString, String searchPattern, String replacementPattern)