Example usage for java.lang Character OTHER

Introduction

In this page you can find the example usage for java.lang Character OTHER_LETTER.

Prototype

byte OTHER_LETTER

To view the source code for java.lang Character OTHER_LETTER.

Click Source Link

Document

General category "Lo" in the Unicode specification.

Usage

From source file:Main.java

public static void main(String[] args) {
    for (char ch = Character.MIN_VALUE; ch < Character.MAX_VALUE; ch++) {
        if (Character.OTHER_LETTER == Character.getType(ch)) {

            String s = String.format("\\u%04x", (int) ch);
            System.out.println(s);
        }//  w  ww.  j  a  v  a  2s  . c  o  m
    }
}

From source file:Main.java

public static void main(String[] args) throws java.io.IOException {
    char c = 'a';

    System.out.println("Character = " + (int) c);
    System.out.println("Defined = " + Character.isDefined(c));
    System.out.println("Digit = " + Character.isDigit(c));
    System.out.println("Ignorable = " + Character.isIdentifierIgnorable(c));
    System.out.println("ISO control = " + Character.isISOControl(c));
    System.out.println("Java identifier part = " + Character.isJavaIdentifierPart(c));
    System.out.println("Java identifier start = " + Character.isJavaIdentifierStart(c));
    System.out.println("Letter = " + Character.isLetter(c));
    System.out.println("Letter or digit = " + Character.isLetterOrDigit(c));
    System.out.println("Lowercase = " + Character.isLowerCase(c));
    System.out.println("Space = " + Character.isSpaceChar(c));
    System.out.println("Titlecase = " + Character.isTitleCase(c));
    System.out.println("Unicode identifier part = " + Character.isUnicodeIdentifierPart(c));
    System.out.println("Unicode identifier start = " + Character.isUnicodeIdentifierStart(c));
    System.out.println("Uppercase = " + Character.isUpperCase(c));
    System.out.println("White space = " + Character.isWhitespace(c));

    byte[] types = { Character.COMBINING_SPACING_MARK, Character.CONNECTOR_PUNCTUATION, Character.CONTROL,
            Character.CURRENCY_SYMBOL, Character.DASH_PUNCTUATION, Character.DECIMAL_DIGIT_NUMBER,
            Character.ENCLOSING_MARK, Character.END_PUNCTUATION, Character.FORMAT, Character.LETTER_NUMBER,
            Character.LINE_SEPARATOR, Character.LOWERCASE_LETTER, Character.MATH_SYMBOL,
            Character.MODIFIER_SYMBOL, Character.NON_SPACING_MARK, Character.OTHER_LETTER,
            Character.OTHER_NUMBER, Character.OTHER_PUNCTUATION, Character.OTHER_SYMBOL,
            Character.PARAGRAPH_SEPARATOR, Character.PRIVATE_USE, Character.SPACE_SEPARATOR,
            Character.START_PUNCTUATION, Character.SURROGATE, Character.TITLECASE_LETTER, Character.UNASSIGNED,
            Character.UPPERCASE_LETTER };

    String[] typeNames = { "Combining spacing mark", "Connector punctuation", "Control", "Currency symbol",
            "Dash punctuation", "Decimal digit number", "Enclosing mark", "End punctuation", "Format",
            "Letter number", "Line separator", "Lowercase letter", "Math symbol", "Modifier symbol",
            "Non spacing mark", "Other letter", "Other number", "Other punctuation", "Other symbol",
            "Paragraph separator", "Private use", "Space separator", "Start punctuation", "Surrogate",
            "Titlecase letter", "Unassigned", "Uppercase letter" };

    int type = Character.getType(c);

    for (int i = 0; i < types.length; i++)
        if (type == types[i]) {
            System.out.println("Type name = " + typeNames[i]);
            break;
        }/*  www . jav  a  2  s.c  o m*/

    System.out.println("Unicode block = " + Character.UnicodeBlock.of(c));
}

From source file:Classify.java

public static void main(String[] args) throws java.io.IOException {
    char c = '\u0beb'; // Tamil digit.

    System.out.println("Character = " + (int) c);
    System.out.println("Defined = " + Character.isDefined(c));
    System.out.println("Digit = " + Character.isDigit(c));
    System.out.println("Ignorable = " + Character.isIdentifierIgnorable(c));
    System.out.println("ISO control = " + Character.isISOControl(c));
    System.out.println("Java identifier part = " + Character.isJavaIdentifierPart(c));
    System.out.println("Java identifier start = " + Character.isJavaIdentifierStart(c));
    System.out.println("Letter = " + Character.isLetter(c));
    System.out.println("Letter or digit = " + Character.isLetterOrDigit(c));
    System.out.println("Lowercase = " + Character.isLowerCase(c));
    System.out.println("Space = " + Character.isSpaceChar(c));
    System.out.println("Titlecase = " + Character.isTitleCase(c));
    System.out.println("Unicode identifier part = " + Character.isUnicodeIdentifierPart(c));
    System.out.println("Unicode identifier start = " + Character.isUnicodeIdentifierStart(c));
    System.out.println("Uppercase = " + Character.isUpperCase(c));
    System.out.println("White space = " + Character.isWhitespace(c));

    byte[] types = { Character.COMBINING_SPACING_MARK, Character.CONNECTOR_PUNCTUATION, Character.CONTROL,
            Character.CURRENCY_SYMBOL, Character.DASH_PUNCTUATION, Character.DECIMAL_DIGIT_NUMBER,
            Character.ENCLOSING_MARK, Character.END_PUNCTUATION, Character.FORMAT, Character.LETTER_NUMBER,
            Character.LINE_SEPARATOR, Character.LOWERCASE_LETTER, Character.MATH_SYMBOL,
            Character.MODIFIER_SYMBOL, Character.NON_SPACING_MARK, Character.OTHER_LETTER,
            Character.OTHER_NUMBER, Character.OTHER_PUNCTUATION, Character.OTHER_SYMBOL,
            Character.PARAGRAPH_SEPARATOR, Character.PRIVATE_USE, Character.SPACE_SEPARATOR,
            Character.START_PUNCTUATION, Character.SURROGATE, Character.TITLECASE_LETTER, Character.UNASSIGNED,
            Character.UPPERCASE_LETTER };

    String[] typeNames = { "Combining spacing mark", "Connector punctuation", "Control", "Currency symbol",
            "Dash punctuation", "Decimal digit number", "Enclosing mark", "End punctuation", "Format",
            "Letter number", "Line separator", "Lowercase letter", "Math symbol", "Modifier symbol",
            "Non spacing mark", "Other letter", "Other number", "Other punctuation", "Other symbol",
            "Paragraph separator", "Private use", "Space separator", "Start punctuation", "Surrogate",
            "Titlecase letter", "Unassigned", "Uppercase letter" };

    int type = Character.getType(c);

    for (int i = 0; i < types.length; i++)
        if (type == types[i]) {
            System.out.println("Type name = " + typeNames[i]);
            break;
        }/*from  ww w .  j a va 2s.  c  o m*/

    System.out.println("Unicode block = " + Character.UnicodeBlock.of(c));
}

From source file:Main.java

/**
 * Indicates whether a character is classified as "Alphabetic" by the Unicode standard.
 *
 * @param c/*from   w  ww  .j a v a  2s.co m*/
 *     the character
 * @return true if the character is "Alphabetic"
 */
public static boolean isAlphabetic(int c) {
    //http://www.unicode.org/Public/UNIDATA/UCD.html#Alphabetic
    //Generated from: Other_Alphabetic + Lu + Ll + Lt + Lm + Lo + Nl
    int generalCategory = Character.getType((char) c);
    switch (generalCategory) {
    case Character.UPPERCASE_LETTER: //Lu
    case Character.LOWERCASE_LETTER: //Ll
    case Character.TITLECASE_LETTER: //Lt
    case Character.MODIFIER_LETTER: //Lm
    case Character.OTHER_LETTER: //Lo
    case Character.LETTER_NUMBER: //Nl
        return true;
    default:
        //TODO if (ch in Other_Alphabetic) return true; (Probably need ICU4J for that)
        //Other_Alphabetic contains mostly more exotic characters
        return false;
    }
}

From source file:XmlChars.java

/**
 * Returns true if the character is an XML "letter". XML Names must start with
 * Letters or a few other characters, but other characters in names must only
 * satisfy the <em>isNameChar</em> predicate.
 * //from ww  w .j  a va 2 s.  c o  m
 * @see #isNameChar
 * @see #isNCNameChar
 */
public static boolean isLetter(char c) {
    // [84] Letter ::= BaseChar | Ideographic
    // [85] BaseChar ::= ... too much to repeat
    // [86] Ideographic ::= ... too much to repeat

    //
    // Optimize the typical case.
    //
    if (c >= 'a' && c <= 'z')
        return true;
    if (c == '/')
        return false;
    if (c >= 'A' && c <= 'Z')
        return true;

    //
    // Since the tables are too ridiculous to use in code,
    // we're using the footnotes here to drive this test.
    //
    switch (Character.getType(c)) {
    // app. B footnote says these are 'name start'
    // chars' ...
    case Character.LOWERCASE_LETTER: // Ll
    case Character.UPPERCASE_LETTER: // Lu
    case Character.OTHER_LETTER: // Lo
    case Character.TITLECASE_LETTER: // Lt
    case Character.LETTER_NUMBER: // Nl

        // OK, here we just have some exceptions to check...
        return !isCompatibilityChar(c)
                // per "5.14 of Unicode", rule out some combiners
                && !(c >= 0x20dd && c <= 0x20e0);

    default:
        // check for some exceptions: these are "alphabetic"
        return ((c >= 0x02bb && c <= 0x02c1) || c == 0x0559 || c == 0x06e5 || c == 0x06e6);
    }
}

From source file:XmlChars.java

/**
 * Returns true if the character is an XML "letter".  XML Names must
 * start with Letters or a few other characters, but other characters
 * in names must only satisfy the <em>isNameChar</em> predicate.
 *
 * @see #isNameChar// w ww  .  j  av  a2s  .  c o m
 * @see #isNCNameChar
 */
public static boolean isLetter(char c) {
    // [84] Letter ::= BaseChar | Ideographic
    // [85] BaseChar ::= ... too much to repeat
    // [86] Ideographic ::= ... too much to repeat

    //
    // Optimize the typical case.
    //
    if (c >= 'a' && c <= 'z')
        return true;
    if (c == '/')
        return false;
    if (c >= 'A' && c <= 'Z')
        return true;

    //
    // Since the tables are too ridiculous to use in code,
    // we're using the footnotes here to drive this test.
    //
    switch (Character.getType(c)) {
    // app. B footnote says these are 'name start'
    // chars' ...
    case Character.LOWERCASE_LETTER: // Ll
    case Character.UPPERCASE_LETTER: // Lu
    case Character.OTHER_LETTER: // Lo
    case Character.TITLECASE_LETTER: // Lt
    case Character.LETTER_NUMBER: // Nl

        // OK, here we just have some exceptions to check...
        return !isCompatibilityChar(c)
                // per "5.14 of Unicode", rule out some combiners
                && !(c >= 0x20dd && c <= 0x20e0);

    default:
        // check for some exceptions:  these are "alphabetic"
        return ((c >= 0x02bb && c <= 0x02c1) || c == 0x0559 || c == 0x06e5 || c == 0x06e6);
    }
}

From source file:gov.va.vinci.leo.ae.ExampleWhitespaceTokenizer.java

/**
 * Given a character c return the type definition from the
 * list of public static type definitions in this class.
 *
 * @param c// w ww  . j  av  a  2s  .c  o  m
 * @return type definition for the character c
 */
private static int characterType(char c) {
    switch (Character.getType(c)) {
    //letters
    case Character.UPPERCASE_LETTER:
    case Character.LOWERCASE_LETTER:
    case Character.TITLECASE_LETTER:
    case Character.MODIFIER_LETTER:
    case Character.OTHER_LETTER:
    case Character.NON_SPACING_MARK:
    case Character.ENCLOSING_MARK:
    case Character.COMBINING_SPACING_MARK:
    case Character.PRIVATE_USE:
    case Character.SURROGATE:
    case Character.MODIFIER_SYMBOL:
        return TK_LETTER;
    //numbers
    case Character.DECIMAL_DIGIT_NUMBER:
    case Character.LETTER_NUMBER:
    case Character.OTHER_NUMBER:
        return TK_NUMBER;
    //Regular Whitespace
    case Character.SPACE_SEPARATOR:
        return TK_WHITESPACE;
    //Punctuation
    case Character.DASH_PUNCTUATION:
    case Character.START_PUNCTUATION:
    case Character.END_PUNCTUATION:
    case Character.OTHER_PUNCTUATION:
        return TK_PUNCTUATION;
    //Simple NewLine
    case Character.LINE_SEPARATOR:
    case Character.PARAGRAPH_SEPARATOR:
        return TK_NEWLINE;
    //Other types of "control" characters
    case Character.CONTROL:
        if (c == '\n' || c == '\r')
            return TK_NEWLINE;
        if (Character.isWhitespace(c)) //Tab char is a "Control" character
            return TK_WHITESPACE;
        return TK_CONTROL;
    default:
        if (Character.isWhitespace(c)) {
            return TK_WHITESPACE;
        } //if
        return TK_UNKNOWN;
    }//switch
}

From source file:XmlChars.java

private static boolean isLetter2(char c) {
    // [84] Letter ::= BaseChar | Ideographic
    // [85] BaseChar ::= ... too much to repeat
    // [86] Ideographic ::= ... too much to repeat
    // [87] CombiningChar ::= ... too much to repeat

    ///*from   w w w.  ja v a2 s  .c o m*/
    // Optimize the typical case.
    //
    if (c >= 'a' && c <= 'z')
        return true;
    if (c == '>')
        return false;
    if (c >= 'A' && c <= 'Z')
        return true;

    //
    // Since the tables are too ridiculous to use in code,
    // we're using the footnotes here to drive this test.
    //
    switch (Character.getType(c)) {
    // app. B footnote says these are 'name start'
    // chars' ...
    case Character.LOWERCASE_LETTER: // Ll
    case Character.UPPERCASE_LETTER: // Lu
    case Character.OTHER_LETTER: // Lo
    case Character.TITLECASE_LETTER: // Lt
    case Character.LETTER_NUMBER: // Nl
        // ... and these are name characters 'other
        // than name start characters'
    case Character.COMBINING_SPACING_MARK: // Mc
    case Character.ENCLOSING_MARK: // Me
    case Character.NON_SPACING_MARK: // Mn
    case Character.MODIFIER_LETTER: // Lm
    case Character.DECIMAL_DIGIT_NUMBER: // Nd

        // OK, here we just have some exceptions to check...
        return !isCompatibilityChar(c)
                // per "5.14 of Unicode", rule out some combiners
                && !(c >= 0x20dd && c <= 0x20e0);

    default:
        // added a character ...
        return c == 0x0387;
    }
}

From source file:org.grails.datastore.bson.json.JsonWriter.java

private void writeStringHelper(final String str) throws IOException {
    writer.write('"');
    for (final char c : str.toCharArray()) {
        switch (c) {
        case '"':
            writer.write("\\\"");
            break;
        case '\\':
            writer.write("\\\\");
            break;
        case '\b':
            writer.write("\\b");
            break;
        case '\f':
            writer.write("\\f");
            break;
        case '\n':
            writer.write("\\n");
            break;
        case '\r':
            writer.write("\\r");
            break;
        case '\t':
            writer.write("\\t");
            break;
        default:/* ww w  .j a  v  a  2  s .c o m*/
            switch (Character.getType(c)) {
            case Character.UPPERCASE_LETTER:
            case Character.LOWERCASE_LETTER:
            case Character.TITLECASE_LETTER:
            case Character.OTHER_LETTER:
            case Character.DECIMAL_DIGIT_NUMBER:
            case Character.LETTER_NUMBER:
            case Character.OTHER_NUMBER:
            case Character.SPACE_SEPARATOR:
            case Character.CONNECTOR_PUNCTUATION:
            case Character.DASH_PUNCTUATION:
            case Character.START_PUNCTUATION:
            case Character.END_PUNCTUATION:
            case Character.INITIAL_QUOTE_PUNCTUATION:
            case Character.FINAL_QUOTE_PUNCTUATION:
            case Character.OTHER_PUNCTUATION:
            case Character.MATH_SYMBOL:
            case Character.CURRENCY_SYMBOL:
            case Character.MODIFIER_SYMBOL:
            case Character.OTHER_SYMBOL:
                writer.write(c);
                break;
            default:
                writer.write("\\u");
                writer.write(Integer.toHexString((c & 0xf000) >> 12));
                writer.write(Integer.toHexString((c & 0x0f00) >> 8));
                writer.write(Integer.toHexString((c & 0x00f0) >> 4));
                writer.write(Integer.toHexString(c & 0x000f));
                break;
            }
            break;
        }
    }
    writer.write('"');
}

From source file:marytts.util.string.StringUtils.java

/**
 * Determine whether the given codepoint is either a letter or
 * a modifier according to the Unicode standard. More precisely,
 * this returns true if codepoint belongs to one of the following categories
 * as defined at http://unicode.org/Public/UNIDATA/UCD.html#General_Category_Values:
 * <ul>/*from   w w w  . ja v a2 s  . c o m*/
 * <li>Lu   Letter, Uppercase</li>
 * <li>Ll  Letter, Lowercase</li>
 * <li>Lt  Letter, Titlecase</li>
 * <li>Lm  Letter, Modifier</li>
 * <li>Lo  Letter, Other</li>
 * <li>Mn  Mark, Nonspacing</li>
 * <li>Mc  Mark, Spacing Combining</li>
 * <li>Me  Mark, Enclosing</li>
 * </ul>
 * Whether a given character is associated with this category can be looked up
 * at http://unicode.org/Public/UNIDATA/UnicodeData.txt
 * @param codePoint the unicode codepoint as determined e.g. by String.codePointAt().
 * @return true if the above condition is met, false otherwise
 */
public static boolean isLetterOrModifier(int codePoint) {
    int type = Character.getType(codePoint);
    return type == Character.UPPERCASE_LETTER || type == Character.LOWERCASE_LETTER
            || type == Character.TITLECASE_LETTER || type == Character.MODIFIER_LETTER
            || type == Character.OTHER_LETTER || type == Character.NON_SPACING_MARK
            || type == Character.COMBINING_SPACING_MARK || type == Character.ENCLOSING_MARK;
}

Example usage for java.lang Character OTHER_LETTER

Introduction

Prototype

Document

Usage