Example usage for java.lang Character getType

List of usage examples for java.lang Character getType

Introduction

In this page you can find the example usage for java.lang Character getType.

Prototype

public static int getType(int codePoint) 

Source Link

Document

Returns a value indicating a character's general category.

Usage

From source file:Main.java

/**
 * Indicates whether a character is classified as "Alphabetic" by the Unicode standard.
 *
 * @param c//from   w  w  w  . j  av  a  2 s .  c  o  m
 *     the character
 * @return true if the character is "Alphabetic"
 */
public static boolean isAlphabetic(int c) {
    //http://www.unicode.org/Public/UNIDATA/UCD.html#Alphabetic
    //Generated from: Other_Alphabetic + Lu + Ll + Lt + Lm + Lo + Nl
    int generalCategory = Character.getType((char) c);
    switch (generalCategory) {
    case Character.UPPERCASE_LETTER: //Lu
    case Character.LOWERCASE_LETTER: //Ll
    case Character.TITLECASE_LETTER: //Lt
    case Character.MODIFIER_LETTER: //Lm
    case Character.OTHER_LETTER: //Lo
    case Character.LETTER_NUMBER: //Nl
        return true;
    default:
        //TODO if (ch in Other_Alphabetic) return true; (Probably need ICU4J for that)
        //Other_Alphabetic contains mostly more exotic characters
        return false;
    }
}

From source file:Main.java

/**
 * Converts any numbers and punctuation into standard ASCII
 * @param inputString//  w  w  w.j a  v  a 2 s.com
 * @return
 */
static public String normalizeNumbersAndPunctuation(String inputString) {
    char[] chars = inputString.toCharArray();

    for (int curCharNum = 0; curCharNum < chars.length; curCharNum++) {
        char curChar = chars[curCharNum];
        if (Character.isDigit(curChar)) {
            int curDigit = Integer.parseInt(inputString.substring(curCharNum, curCharNum + 1));
            chars[curCharNum] = Integer.toString(curDigit).charAt(0);
        }
        if (Character.getType(curChar) == Character.DASH_PUNCTUATION || curChar == 8722)
            chars[curCharNum] = '-';
    }
    String returnString = new String(chars);
    return returnString;
}

From source file:org.flowr.utils.NamingStrategy.java

/**
 * builds an abbreviation based on the given name parts using the "camel cased" words first letter.
 *
 * @param names//w ww .  j  a  v a  2s .  c om
 *        the name element parts.
 * @return
 */
public static String initials(String... names) {
    StringBuilder b = new StringBuilder();
    for (String name : names) {
        b.append(camelCase(name, CLASSNAME_IGNORE_CHARS, null));
    }
    String[] words = StringUtilsExt.splitByCharacterTypeCamelCase(b.toString());
    b = new StringBuilder();
    for (String word : words) {
        char c = word.charAt(0);
        switch (Character.getType(c)) {
        case Character.UPPERCASE_LETTER:
            if (word.length() > 1 && Character.getType(word.charAt(1)) == Character.UPPERCASE_LETTER) {
                b.append(word);
            } else {
                b.append(c);
            }
            break;
        case Character.LOWERCASE_LETTER:
            b.append(c);
            break;
        }
    }
    return b.toString();
}

From source file:de.fau.cs.osr.utils.StringUtils.java

public static String escHtml(String text, boolean forAttribute) {
    // StringEscapeUtils.escapeHtml(in) does not escape '\'' but a lot of 
    // other stuff that doesn't need escaping.

    if (text == null)
        return "";

    int n = text.length();
    StringBuilder sb = new StringBuilder(n * 4 / 3);
    for (int i = 0; i < n; i++) {
        char ch = text.charAt(i);
        switch (ch) {
        case ' ':
        case '\n':
        case '\t':
            sb.append(ch);/*ww w . j a v  a  2s.  com*/
            break;
        case '<':
            sb.append("&lt;");
            break;
        case '>':
            sb.append(forAttribute ? "&gt;" : ">");
            break;
        case '&':
            sb.append("&amp;");
            break;
        case '\'':
            // &apos; cannot safely be used, see wikipedia
            sb.append("&#39;");
            break;
        case '"':
            sb.append(forAttribute ? "&quot;" : "\"");
            break;
        default:
            if ((ch >= 0 && ch < 0x20) || (ch == 0xFE)) {
                hexCharRef(sb, ch);
                break;
            } else if (Character.isHighSurrogate(ch)) {
                ++i;
                if (i < n) {
                    char ch2 = text.charAt(i);
                    if (Character.isLowSurrogate(ch2)) {
                        int codePoint = Character.toCodePoint(ch, ch2);
                        switch (Character.getType(codePoint)) {
                        case Character.CONTROL:
                        case Character.PRIVATE_USE:
                        case Character.UNASSIGNED:
                            hexCharRef(sb, codePoint);
                            break;

                        default:
                            sb.append(ch);
                            sb.append(ch2);
                            break;
                        }

                        continue;
                    }
                }
            } else if (!Character.isLowSurrogate(ch)) {
                sb.append(ch);
                continue;
            }

            // No low surrogate followed or only low surrogate
            throw new IllegalArgumentException("String contains isolated surrogates!");
        }
    }

    return sb.toString();
}

From source file:Main.java

/**
 * <p>/*from   w  w  w . ja  v a2s  .com*/
 * Splits a String by Character type as returned by
 * <code>java.lang.Character.getType(char)</code>. Groups of contiguous
 * characters of the same type are returned as complete tokens, with the
 * following exception: if <code>camelCase</code> is <code>true</code>,
 * the character of type <code>Character.UPPERCASE_LETTER</code>, if any,
 * immediately preceding a token of type
 * <code>Character.LOWERCASE_LETTER</code> will belong to the following
 * token rather than to the preceding, if any,
 * <code>Character.UPPERCASE_LETTER</code> token.
 * 
 * @param str
 *          the String to split, may be <code>null</code>
 * @param camelCase
 *          whether to use so-called "camel-case" for letter types
 * @return an array of parsed Strings, <code>null</code> if null String
 *         input
 * @since 2.4
 */
private static String[] splitByCharacterType(String str, boolean camelCase) {
    if (str == null) {
        return null;
    }
    if (str.length() == 0) {
        return new String[0];
    }
    char[] c = str.toCharArray();
    List list = new ArrayList();
    int tokenStart = 0;
    int currentType = Character.getType(c[tokenStart]);
    for (int pos = tokenStart + 1; pos < c.length; pos++) {
        int type = Character.getType(c[pos]);
        if (type == currentType) {
            continue;
        }
        if (camelCase && type == Character.LOWERCASE_LETTER && currentType == Character.UPPERCASE_LETTER) {
            int newTokenStart = pos - 1;
            if (newTokenStart != tokenStart) {
                list.add(new String(c, tokenStart, newTokenStart - tokenStart));
                tokenStart = newTokenStart;
            }
        } else {
            list.add(new String(c, tokenStart, pos - tokenStart));
            tokenStart = pos;
        }
        currentType = type;
    }
    list.add(new String(c, tokenStart, c.length - tokenStart));
    return (String[]) list.toArray(new String[list.size()]);
}

From source file:ORG.oclc.os.SRW.Utilities.java

public static String byteArrayToString(byte array[], int offset, int length) {
    StringBuffer str = new StringBuffer();
    StringBuffer alpha = new StringBuffer();
    int stopat = length + offset;
    char c;/*from  w ww . j a v a  2s .c  o  m*/
    int i, type;

    for (i = 1; offset < stopat; offset++, i++) {
        if ((array[offset] & 0xff) < 16)
            str.append(" 0");
        else
            str.append(" ");
        str.append(Integer.toString(array[offset] & 0xff, 16));

        c = (char) array[offset];
        type = Character.getType(c);

        //      if (Character.isLetterOrDigit(c) || (c > )
        if (c < ' ' || c >= 0x7f)
            alpha.append('.');
        else
            alpha.append(c);

        if ((i % 16) == 0) {
            str.append("  " + alpha + newLine);
            alpha.setLength(0);
        }
    }
    while (i++ % 16 != 1)
        str.append("   ");
    offset = 0;

    str.append("  " + alpha + newLine);
    str.append(newLine);

    return str.toString();
}

From source file:org.apache.accumulo.monitor.rest.logs.LogResource.java

private String sanitize(String s) {
    StringBuilder text = new StringBuilder();
    for (int i = 0; i < s.length(); i++) {
        char c = s.charAt(i);
        int type = Character.getType(c);
        boolean notPrintable = type == Character.UNASSIGNED || type == Character.LINE_SEPARATOR
                || type == Character.NON_SPACING_MARK || type == Character.PRIVATE_USE;
        text.append(notPrintable ? '?' : c);
    }//w ww .  j a  v a  2s .  c om
    return text.toString().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;");
}

From source file:ru.caffeineim.protocols.icq.tool.Dumper.java

/**
 * This method filters all non-displayable characters and replace them
 * with a '.' in the resulting String./*from   w w w.j  a  v  a  2 s  .  c o m*/
 *
 * @param array The receive byte array.
 * @return The representation of all displayable characters.
 */
private static String stringTranslation(byte[] array) {
    String ent = new String(array);
    String res = new String();

    for (int i = 0; i < ent.length(); i++) {
        if (Character.getType(ent.charAt(i)) == Character.CONTROL)
            res += ".";
        else
            res += ent.charAt(i);
    }

    return res;
}

From source file:pl.edu.icm.coansys.commons.java.DiacriticsRemover.java

/**
 * Removes diacritics from a text./* w  w  w .  j  av  a 2  s  .  c om*/
 *
 * @param text Text to process.
 * @return Text without diacritics.
 */
public static String removeDiacritics(String text) {
    if (text == null) {
        return null;
    }

    String tmp = Normalizer.normalize(text, Normalizer.Form.NFKD);

    StringBuilder builder = new StringBuilder();
    for (int i = 0; i < tmp.length(); i++) {
        Character ch = tmp.charAt(i);
        if (Character.getType(ch) == Character.NON_SPACING_MARK) {
            continue;
        }

        if (lookup.containsKey(ch)) {
            builder.append(lookup.get(ch));
        } else {
            builder.append(ch);
        }
    }

    return builder.toString();
}

From source file:XmlChars.java

/**
 * Returns true if the character is an XML "letter". XML Names must start with
 * Letters or a few other characters, but other characters in names must only
 * satisfy the <em>isNameChar</em> predicate.
 * //from  w ww  . j  a v  a2s.c o m
 * @see #isNameChar
 * @see #isNCNameChar
 */
public static boolean isLetter(char c) {
    // [84] Letter ::= BaseChar | Ideographic
    // [85] BaseChar ::= ... too much to repeat
    // [86] Ideographic ::= ... too much to repeat

    //
    // Optimize the typical case.
    //
    if (c >= 'a' && c <= 'z')
        return true;
    if (c == '/')
        return false;
    if (c >= 'A' && c <= 'Z')
        return true;

    //
    // Since the tables are too ridiculous to use in code,
    // we're using the footnotes here to drive this test.
    //
    switch (Character.getType(c)) {
    // app. B footnote says these are 'name start'
    // chars' ...
    case Character.LOWERCASE_LETTER: // Ll
    case Character.UPPERCASE_LETTER: // Lu
    case Character.OTHER_LETTER: // Lo
    case Character.TITLECASE_LETTER: // Lt
    case Character.LETTER_NUMBER: // Nl

        // OK, here we just have some exceptions to check...
        return !isCompatibilityChar(c)
                // per "5.14 of Unicode", rule out some combiners
                && !(c >= 0x20dd && c <= 0x20e0);

    default:
        // check for some exceptions: these are "alphabetic"
        return ((c >= 0x02bb && c <= 0x02c1) || c == 0x0559 || c == 0x06e5 || c == 0x06e6);
    }
}