Example usage for java.lang Character getType

Introduction

In this page you can find the example usage for java.lang Character getType.

Prototype

public static int getType(int codePoint)

Source Link

Document

Returns a value indicating a character's general category.

Usage

From source file:pl.edu.icm.coansys.commons.java.DiacriticsRemover.java

/**
 * Generates a sort key for a given text. This key is useful in environments
 * where only basic Latin characters are reliably sorted (for example, a
 * RDBMS with unknown collation settings).
 *
 * @param text Text to process./*from  ww w .  java2 s  .com*/
 * @param idempotent Whether the conversion should be idempotent. This is
 * guaranteed to be true:
 * <code>alphaSortable(s, true).equals(alphaSortable(alphaSortable(s, true), true)</code>,
 * while this is not necessarily true:
 * <code>alphaSortable(s, false).equals(alphaSortable(alphaSortable(s, false), false)</code>.
 * @return
 */
public static String alphaSortable(String text, boolean idempotent) {
    if (text == null) {
        return null;
    }

    if (idempotent && text.startsWith(MAGIC)) {
        return text;
    }

    String tmp = text.toLowerCase(Locale.ENGLISH);
    tmp = Normalizer.normalize(tmp, Normalizer.Form.NFKD);

    StringBuilder builder = new StringBuilder();
    if (idempotent) {
        builder.append(MAGIC);
    }

    boolean wasSpaceSeparator = false;
    for (int i = 0; i < tmp.length(); i++) {
        Character ch = tmp.charAt(i);
        if (!ArrayUtils.contains(INTERESTING_TYPES, Character.getType(ch))
                && !ArrayUtils.contains(INTERESTING_CHARACTERS, ch)) {
            continue;
        }

        String s;

        // TODO quick fix of mantis 3231
        if (isSpaceSeparator(ch)) {
            if (wasSpaceSeparator) {
                continue;
            }
            wasSpaceSeparator = true;
        } else {
            wasSpaceSeparator = false;
        }

        if (alphaSortableMapping.containsKey(ch)) {
            s = alphaSortableMapping.get(ch);
        } else if (lookup.containsKey(ch)) {
            s = lookup.get(ch);
        } else {
            s = ch.toString();
        }

        for (int j = 0; j < s.length(); j++) {
            Character c = s.charAt(j);
            // TODO Very ugly workaround of the problem described in 0002643
            if (ArrayUtils.contains(INTERESTING_CHARACTERS, c)) {
                builder.append(c);
            } else {
                builder.append(StringUtils.leftPad(Integer.toHexString(c.charValue()), 4, '0'));
            }
        }
    }

    return builder.toString();
}

From source file:info.magnolia.cms.taglibs.util.BaseImageTag.java

/**
 * Replace any special characters that are not letters or numbers with a replacement string. The two exceptions are
 * '-' and '_', which are allowed./*from   w  w w  .  j  a v a 2  s  .  c  om*/
 */
public String convertToSimpleString(String string) {

    final StringBuffer result = new StringBuffer();

    final StringCharacterIterator iterator = new StringCharacterIterator(string);
    char character = iterator.current();
    while (character != CharacterIterator.DONE) {
        int charType = Character.getType(character);
        if (charType == Character.SPACE_SEPARATOR) {
            result.append("-");
        } else if ((charType != Character.UPPERCASE_LETTER) && (charType != Character.LOWERCASE_LETTER)
                && (charType != Character.DECIMAL_DIGIT_NUMBER) && (charType != Character.CONNECTOR_PUNCTUATION)
                && (charType != Character.DASH_PUNCTUATION)) {
            result.append("u" + (int) character);

        } else {
            // the char is not a special one
            // add it to the result as is
            result.append(character);
        }
        character = iterator.next();
    }
    return result.toString();
}

From source file:XmlChars.java

/**
 * Returns true if the character is an XML "letter".  XML Names must
 * start with Letters or a few other characters, but other characters
 * in names must only satisfy the <em>isNameChar</em> predicate.
 *
 * @see #isNameChar/*w w w.j  av a  2s .  com*/
 * @see #isNCNameChar
 */
public static boolean isLetter(char c) {
    // [84] Letter ::= BaseChar | Ideographic
    // [85] BaseChar ::= ... too much to repeat
    // [86] Ideographic ::= ... too much to repeat

    //
    // Optimize the typical case.
    //
    if (c >= 'a' && c <= 'z')
        return true;
    if (c == '/')
        return false;
    if (c >= 'A' && c <= 'Z')
        return true;

    //
    // Since the tables are too ridiculous to use in code,
    // we're using the footnotes here to drive this test.
    //
    switch (Character.getType(c)) {
    // app. B footnote says these are 'name start'
    // chars' ...
    case Character.LOWERCASE_LETTER: // Ll
    case Character.UPPERCASE_LETTER: // Lu
    case Character.OTHER_LETTER: // Lo
    case Character.TITLECASE_LETTER: // Lt
    case Character.LETTER_NUMBER: // Nl

        // OK, here we just have some exceptions to check...
        return !isCompatibilityChar(c)
                // per "5.14 of Unicode", rule out some combiners
                && !(c >= 0x20dd && c <= 0x20e0);

    default:
        // check for some exceptions:  these are "alphabetic"
        return ((c >= 0x02bb && c <= 0x02c1) || c == 0x0559 || c == 0x06e5 || c == 0x06e6);
    }
}

From source file:pl.edu.icm.coansys.commons.java.DiacriticsRemover.java

private static boolean isSpaceSeparator(char ch) {
    return Character.SPACE_SEPARATOR == Character.getType(ch);
}

From source file:jef.tools.string.CharUtils.java

 /**
 * ??//from   w ww .  j  a va 2 s  .co  m
 * @param c
 * @return
 */
public static boolean isPunctuation(char c) {
   int type = Character.getType(c);
   return type >= 20 && type <= 25;
}

From source file:org.mule.transport.legstar.tcp.LegstarTcpSocketFactory.java

/**
 * Expecting an ACK reply from the socket server.
 * @param socket the opened socket/* ww w . j  a  v  a  2 s  .  co  m*/
 * @throws IOException if ACK is not received
 */
private void receiveAck(final Socket socket) throws IOException {
    byte[] response = read(socket, MAX_PROT_REPLY_LEN);
    if (response == null) {
        throw new IOException(I18N.noResponseFromHostMessage().getMessage());
    }
    String ackString = (new String(response, HostCodec.HEADER_CODE_PAGE)).trim();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Socket server reply is: " + ackString);
    }
    /* If this is not a valid ACK, it could be an error report*/
    if (REPLY_ACK_MSG_EC.compareTo(ackString.substring(0, REPLY_ACK_MSG_EC.length())) != 0) {
        /* Sanity check for characters being displayable. We expect
         * the host error reply to start with an error code in
         * uppercase characters. */
        if (Character.getType(ackString.charAt(0)) == Character.UPPERCASE_LETTER) {
            throw (new IOException(ackString));
        } else {
            throw (new IOException(I18N.unrecognizedResponseFromHostMessage().getMessage()));
        }
    }
}

From source file:ome.services.blitz.test.utests.FilePathRestrictionsTest.java

/**
 * Test that two complex sets of rules combined as expected.
 * (On a rainy day this test could be broken up into several smaller tests.)
 *//*from   www . j av  a 2  s .  com*/
@Test
public void testCombineRules() {
    /* these variables define the X set of rules to combine */

    final SetMultimap<Integer, Integer> transformationMatrixX = HashMultimap.create();
    final Set<String> unsafePrefixesX = new HashSet<String>();
    final Set<String> unsafeSuffixesX = new HashSet<String>();
    final Set<String> unsafeNamesX = new HashSet<String>();
    final Set<Character> safeCharactersX = new HashSet<Character>();

    /* these variables define the Y set of rules to combine */

    final SetMultimap<Integer, Integer> transformationMatrixY = HashMultimap.create();
    final Set<String> unsafePrefixesY = new HashSet<String>();
    final Set<String> unsafeSuffixesY = new HashSet<String>();
    final Set<String> unsafeNamesY = new HashSet<String>();
    final Set<Character> safeCharactersY = new HashSet<Character>();

    /* these variables define the expected result of combining X and Y */

    final SetMultimap<Integer, Integer> transformationMatrixXY = HashMultimap.create();
    final Set<String> unsafePrefixesXY = new HashSet<String>();
    final Set<String> unsafeSuffixesXY = new HashSet<String>();
    final Set<String> unsafeNamesXY = new HashSet<String>();
    final Set<Character> safeCharactersXY = new HashSet<Character>();

    /* automatically map control characters to the safe characters;
     * we will remove and replace any that are to be tested specially */

    for (int codePoint = 0; codePoint < 0x100; codePoint++) {
        if (Character.getType(codePoint) == Character.CONTROL) {
            transformationMatrixXY.put(codePoint, 65);
        }
    }

    /* choose four control characters and remove them from the transformation matrix */

    final Iterator<Integer> controlCodePointIterator = transformationMatrixXY.keySet().iterator();
    final int controlCharacterP = controlCodePointIterator.next();
    final int controlCharacterQ = controlCodePointIterator.next();
    final int controlCharacterR = controlCodePointIterator.next();
    final int controlCharacterS = controlCodePointIterator.next();

    transformationMatrixXY.removeAll(controlCharacterP);
    transformationMatrixXY.removeAll(controlCharacterQ);
    transformationMatrixXY.removeAll(controlCharacterR);
    transformationMatrixXY.removeAll(controlCharacterS);

    /* set up test case for combining control character mappings */

    transformationMatrixX.put(controlCharacterP, 65);
    transformationMatrixX.put(controlCharacterP, 67);
    transformationMatrixX.put(controlCharacterQ, 65);
    transformationMatrixX.put(controlCharacterQ, 66);
    transformationMatrixX.put(controlCharacterR, 66);

    transformationMatrixY.put(controlCharacterQ, 66);
    transformationMatrixY.put(controlCharacterR, 66);
    transformationMatrixY.put(controlCharacterS, 68);

    transformationMatrixXY.put(controlCharacterP, 65);
    transformationMatrixXY.put(controlCharacterP, 67);
    transformationMatrixXY.put(controlCharacterQ, 66);
    transformationMatrixXY.put(controlCharacterR, 66);
    transformationMatrixXY.put(controlCharacterS, 68);

    /* choose four non-control characters and remove them from the transformation matrix */

    int[] normalCodePoints = new int[4];
    int index = 0;
    int codePoint = 0;
    while (index < normalCodePoints.length) {
        if (Character.getType(codePoint) != Character.CONTROL) {
            normalCodePoints[index++] = codePoint;
            transformationMatrixXY.removeAll(codePoint);
        }
        codePoint++;
    }
    int normalCharacterP = normalCodePoints[0];
    int normalCharacterQ = normalCodePoints[1];
    int normalCharacterR = normalCodePoints[2];
    int normalCharacterS = normalCodePoints[3];

    /* set up test case for combining non-control character mappings */

    transformationMatrixX.put(normalCharacterP, 65);
    transformationMatrixX.put(normalCharacterP, 67);
    transformationMatrixX.put(normalCharacterQ, 65);
    transformationMatrixX.put(normalCharacterQ, 66);
    transformationMatrixX.put(normalCharacterR, 66);

    transformationMatrixY.put(normalCharacterQ, 66);
    transformationMatrixY.put(normalCharacterR, 66);
    transformationMatrixY.put(normalCharacterS, 68);

    transformationMatrixXY.put(normalCharacterP, 65);
    transformationMatrixXY.put(normalCharacterP, 67);
    transformationMatrixXY.put(normalCharacterQ, 66);
    transformationMatrixXY.put(normalCharacterR, 66);
    transformationMatrixXY.put(normalCharacterS, 68);

    /* set up test cases for combining proscribed strings */

    unsafePrefixesX.add("XP");
    unsafePrefixesX.add("YP");

    unsafePrefixesY.add("YP");
    unsafePrefixesY.add("ZP");

    unsafePrefixesXY.add("XP");
    unsafePrefixesXY.add("YP");
    unsafePrefixesXY.add("ZP");

    unsafeSuffixesX.add("XS");
    unsafeSuffixesX.add("YS");

    unsafeSuffixesY.add("YS");
    unsafeSuffixesY.add("ZS");

    unsafeSuffixesXY.add("XS");
    unsafeSuffixesXY.add("YS");
    unsafeSuffixesXY.add("ZS");

    unsafeNamesX.add("XN");
    unsafeNamesX.add("YN");

    unsafeNamesY.add("YN");
    unsafeNamesY.add("ZN");

    unsafeNamesXY.add("XN");
    unsafeNamesXY.add("YN");
    unsafeNamesXY.add("ZN");

    /* set up test case for combining safe characters */

    safeCharactersX.add('A');
    safeCharactersX.add('B');

    safeCharactersY.add('A');

    safeCharactersXY.add('A');

    /* perform the combination */

    final FilePathRestrictions rulesX = new FilePathRestrictions(transformationMatrixX, unsafePrefixesX,
            unsafeSuffixesX, unsafeNamesX, safeCharactersX);
    final FilePathRestrictions rulesY = new FilePathRestrictions(transformationMatrixY, unsafePrefixesY,
            unsafeSuffixesY, unsafeNamesY, safeCharactersY);
    final FilePathRestrictions rulesXY = FilePathRestrictions.combineFilePathRestrictions(rulesX, rulesY);

    /* test that the combination is as expected in all respects */

    Assert.assertTrue(CollectionUtils.isEqualCollection(rulesXY.safeCharacters, safeCharactersXY));
    Assert.assertTrue(CollectionUtils.isEqualCollection(rulesXY.unsafePrefixes, unsafePrefixesXY));
    Assert.assertTrue(CollectionUtils.isEqualCollection(rulesXY.unsafeSuffixes, unsafeSuffixesXY));
    Assert.assertTrue(CollectionUtils.isEqualCollection(rulesXY.unsafeNames, unsafeNamesXY));
    assertEqualMultimaps(rulesXY.transformationMatrix, transformationMatrixXY);

    /* given a mapping choice, prefer the safe character */

    Assert.assertEquals((int) rulesXY.transformationMap.get(controlCharacterP), 65);
    Assert.assertEquals((int) rulesXY.transformationMap.get(normalCharacterP), 65);
}

From source file:CharUtils.java

/**
 * True if character is punctuation.//from  w w  w.j  ava  2  s  . co m
 */

public static boolean isPunctuation(char ch) {
    return punctuationSet.contains(new Integer(Character.getType(ch))) || (ch == '`') || (ch == UNKNOWN_PUNC);
}

From source file:CharUtils.java

/**
 * True if character is symbol./*ww w  .  j  av a  2 s  .  c  o  m*/
 */

public static boolean isSymbol(char ch) {
    return symbolSet.contains(new Integer(Character.getType(ch)));
}

From source file:gov.va.vinci.leo.ae.ExampleWhitespaceTokenizer.java

/**
 * Given a character c return the type definition from the
 * list of public static type definitions in this class.
 *
 * @param c//from ww w  .  ja va 2 s . c om
 * @return type definition for the character c
 */
private static int characterType(char c) {
    switch (Character.getType(c)) {
    //letters
    case Character.UPPERCASE_LETTER:
    case Character.LOWERCASE_LETTER:
    case Character.TITLECASE_LETTER:
    case Character.MODIFIER_LETTER:
    case Character.OTHER_LETTER:
    case Character.NON_SPACING_MARK:
    case Character.ENCLOSING_MARK:
    case Character.COMBINING_SPACING_MARK:
    case Character.PRIVATE_USE:
    case Character.SURROGATE:
    case Character.MODIFIER_SYMBOL:
        return TK_LETTER;
    //numbers
    case Character.DECIMAL_DIGIT_NUMBER:
    case Character.LETTER_NUMBER:
    case Character.OTHER_NUMBER:
        return TK_NUMBER;
    //Regular Whitespace
    case Character.SPACE_SEPARATOR:
        return TK_WHITESPACE;
    //Punctuation
    case Character.DASH_PUNCTUATION:
    case Character.START_PUNCTUATION:
    case Character.END_PUNCTUATION:
    case Character.OTHER_PUNCTUATION:
        return TK_PUNCTUATION;
    //Simple NewLine
    case Character.LINE_SEPARATOR:
    case Character.PARAGRAPH_SEPARATOR:
        return TK_NEWLINE;
    //Other types of "control" characters
    case Character.CONTROL:
        if (c == '\n' || c == '\r')
            return TK_NEWLINE;
        if (Character.isWhitespace(c)) //Tab char is a "Control" character
            return TK_WHITESPACE;
        return TK_CONTROL;
    default:
        if (Character.isWhitespace(c)) {
            return TK_WHITESPACE;
        } //if
        return TK_UNKNOWN;
    }//switch
}