A method to get unicode character in decimal represent (html escape code) - Java XML

Java examples for XML:XML String Escape

Description

A method to get unicode character in decimal represent (html escape code)

Demo Code


//package com.java2s;

import java.io.UnsupportedEncodingException;

public class Main {
    public static void main(String[] argv) throws Exception {
        String render = "java2s.com";
        System.out.println(getDecimalChar(render));
    }/*from   ww  w  .  j a  v  a2 s.  com*/

    /**
     * A method to get unicode character in decimal represent (html escape code)
     * @param String render,
     * @return String , converted as decimal represent string
     */
    static public String getDecimalChar(String render) {
        String ret = "";
        int tmp = 0;

        byte[] b = null;
        try {
            b = render.getBytes("UTF-8");
        } catch (UnsupportedEncodingException ex) {
        }
        for (int i = 0; i < b.length; ++i) {
            if ((b[i] & 0x80) != 0) {
                tmp = getUCSfromUTF8(b, i);
                //tmps=Integer.toHexString(b[i]);
                ret += ("&#" + tmp + ";");
                i += (getBytesforUTF8(b[i]) - 1);
            } else {
                ret += new String(b, i, 1);
            }
        }
        /*for(int i=0;i<render.length();++i){
           tmp=(int)render.charAt(i);

           System.out.print("&#"+tmp+";");
           if(tmp>127){
              ret+=("&#"+tmp+";");
           }
           else{
              ret+=render.substring(i,i+1);
           }
        }*/
        //System.out.println(ret);
        return (ret);
    }

    /**
     * A method to create an element to associate to xml document by providing node name & value
     * @param byte[] utf8, the first byte of utf-8 sequence
     * @param int offset, the first byte of utf-8 sequence
     * @return int , Unicode for the character
     */
    static public int getUCSfromUTF8(byte[] utf8, int offset) {

        int ret = 0;
        int numByte = getBytesforUTF8(utf8[offset]);
        byte template = (byte) 0x3f;
        for (int i = 0; i < numByte; ++i) {
            //System.out.println(Integer.toHexString(template));
            template = (byte) (template >> 1);
        }
        ret = utf8[offset] & template;
        for (int i = 1; i < numByte; ++i) {
            ret *= 64;
            ret += (utf8[offset + i] & 0x3f);
        }
        return (ret);
    }

    /**
     * A method to get the number of byte to represent the next character for a sequence utf-8 encoded string
     * @param byte b, the first byte of utf-8 sequence
     * @return int , bytes for the consequence to represent utf-8
     */
    static public int getBytesforUTF8(byte b) {

        int ret;
        byte template = (byte) 0x40;
        for (ret = 2; ret < 7; ++ret) {
            template = (byte) (template >> 1);
            if ((b & template) == 0) {
                break;
            }
            //System.out.println(ret+" "+((b&template)==0));
        }
        return (ret);
    }
}

Related Tutorials