Example usage for java.lang Character getType

List of usage examples for java.lang Character getType

Introduction

In this page you can find the example usage for java.lang Character getType.

Prototype

public static int getType(int codePoint) 

Source Link

Document

Returns a value indicating a character's general category.

Usage

From source file:pl.edu.icm.coansys.commons.java.DiacriticsRemover.java

/**
 * Generates a sort key for a given text. This key is useful in environments
 * where only basic Latin characters are reliably sorted (for example, a
 * RDBMS with unknown collation settings).
 *
 * @param text Text to process./*from  ww w .  java2 s  .com*/
 * @param idempotent Whether the conversion should be idempotent. This is
 * guaranteed to be true:
 * <code>alphaSortable(s, true).equals(alphaSortable(alphaSortable(s, true), true)</code>,
 * while this is not necessarily true:
 * <code>alphaSortable(s, false).equals(alphaSortable(alphaSortable(s, false), false)</code>.
 * @return
 */
public static String alphaSortable(String text, boolean idempotent) {
    if (text == null) {
        return null;
    }

    if (idempotent && text.startsWith(MAGIC)) {
        return text;
    }

    String tmp = text.toLowerCase(Locale.ENGLISH);
    tmp = Normalizer.normalize(tmp, Normalizer.Form.NFKD);

    StringBuilder builder = new StringBuilder();
    if (idempotent) {
        builder.append(MAGIC);
    }

    boolean wasSpaceSeparator = false;
    for (int i = 0; i < tmp.length(); i++) {
        Character ch = tmp.charAt(i);
        if (!ArrayUtils.contains(INTERESTING_TYPES, Character.getType(ch))
                && !ArrayUtils.contains(INTERESTING_CHARACTERS, ch)) {
            continue;
        }

        String s;

        // TODO quick fix of mantis 3231
        if (isSpaceSeparator(ch)) {
            if (wasSpaceSeparator) {
                continue;
            }
            wasSpaceSeparator = true;
        } else {
            wasSpaceSeparator = false;
        }

        if (alphaSortableMapping.containsKey(ch)) {
            s = alphaSortableMapping.get(ch);
        } else if (lookup.containsKey(ch)) {
            s = lookup.get(ch);
        } else {
            s = ch.toString();
        }

        for (int j = 0; j < s.length(); j++) {
            Character c = s.charAt(j);
            // TODO Very ugly workaround of the problem described in 0002643
            if (ArrayUtils.contains(INTERESTING_CHARACTERS, c)) {
                builder.append(c);
            } else {
                builder.append(StringUtils.leftPad(Integer.toHexString(c.charValue()), 4, '0'));
            }
        }
    }

    return builder.toString();
}

From source file:info.magnolia.cms.taglibs.util.BaseImageTag.java

/**
 * Replace any special characters that are not letters or numbers with a replacement string. The two exceptions are
 * '-' and '_', which are allowed./*from   w  w w  .  j  a v a 2  s  .  c  om*/
 */
public String convertToSimpleString(String string) {

    final StringBuffer result = new StringBuffer();

    final StringCharacterIterator iterator = new StringCharacterIterator(string);
    char character = iterator.current();
    while (character != CharacterIterator.DONE) {
        int charType = Character.getType(character);
        if (charType == Character.SPACE_SEPARATOR) {
            result.append("-");
        } else if ((charType != Character.UPPERCASE_LETTER) && (charType != Character.LOWERCASE_LETTER)
                && (charType != Character.DECIMAL_DIGIT_NUMBER) && (charType != Character.CONNECTOR_PUNCTUATION)
                && (charType != Character.DASH_PUNCTUATION)) {
            result.append("u" + (int) character);

        } else {
            // the char is not a special one
            // add it to the result as is
            result.append(character);
        }
        character = iterator.next();
    }
    return result.toString();
}

From source file:XmlChars.java

/**
 * Returns true if the character is an XML "letter".  XML Names must
 * start with Letters or a few other characters, but other characters
 * in names must only satisfy the <em>isNameChar</em> predicate.
 *
 * @see #isNameChar/*w w w.j  av a  2s .  com*/
 * @see #isNCNameChar
 */
public static boolean isLetter(char c) {
    // [84] Letter ::= BaseChar | Ideographic
    // [85] BaseChar ::= ... too much to repeat
    // [86] Ideographic ::= ... too much to repeat

    //
    // Optimize the typical case.
    //
    if (c >= 'a' && c <= 'z')
        return true;
    if (c == '/')
        return false;
    if (c >= 'A' && c <= 'Z')
        return true;

    //
    // Since the tables are too ridiculous to use in code,
    // we're using the footnotes here to drive this test.
    //
    switch (Character.getType(c)) {
    // app. B footnote says these are 'name start'
    // chars' ...
    case Character.LOWERCASE_LETTER: // Ll
    case Character.UPPERCASE_LETTER: // Lu
    case Character.OTHER_LETTER: // Lo
    case Character.TITLECASE_LETTER: // Lt
    case Character.LETTER_NUMBER: // Nl

        // OK, here we just have some exceptions to check...
        return !isCompatibilityChar(c)
                // per "5.14 of Unicode", rule out some combiners
                && !(c >= 0x20dd && c <= 0x20e0);

    default:
        // check for some exceptions:  these are "alphabetic"
        return ((c >= 0x02bb && c <= 0x02c1) || c == 0x0559 || c == 0x06e5 || c == 0x06e6);
    }
}

From source file:pl.edu.icm.coansys.commons.java.DiacriticsRemover.java

private static boolean isSpaceSeparator(char ch) {
    return Character.SPACE_SEPARATOR == Character.getType(ch);
}

From source file:jef.tools.string.CharUtils.java

 /**
 * ??//from   w ww .  j  a va 2 s  .co  m
 * @param c
 * @return
 */
public static boolean isPunctuation(char c) {
   int type = Character.getType(c);
   return type >= 20 && type <= 25;
}

From source file:org.mule.transport.legstar.tcp.LegstarTcpSocketFactory.java

/**
 * Expecting an ACK reply from the socket server.
 * @param socket the opened socket/* ww w . j  a  v  a  2 s  .  co  m*/
 * @throws IOException if ACK is not received
 */
private void receiveAck(final Socket socket) throws IOException {
    byte[] response = read(socket, MAX_PROT_REPLY_LEN);
    if (response == null) {
        throw new IOException(I18N.noResponseFromHostMessage().getMessage());
    }
    String ackString = (new String(response, HostCodec.HEADER_CODE_PAGE)).trim();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Socket server reply is: " + ackString);
    }
    /* If this is not a valid ACK, it could be an error report*/
    if (REPLY_ACK_MSG_EC.compareTo(ackString.substring(0, REPLY_ACK_MSG_EC.length())) != 0) {
        /* Sanity check for characters being displayable. We expect
         * the host error reply to start with an error code in
         * uppercase characters. */
        if (Character.getType(ackString.charAt(0)) == Character.UPPERCASE_LETTER) {
            throw (new IOException(ackString));
        } else {
            throw (new IOException(I18N.unrecognizedResponseFromHostMessage().getMessage()));
        }
    }
}

From source file:ome.services.blitz.test.utests.FilePathRestrictionsTest.java

/**
 * Test that two complex sets of rules combined as expected.
 * (On a rainy day this test could be broken up into several smaller tests.)
 *//*from   www . j av  a 2  s .  com*/
@Test
public void testCombineRules() {
    /* these variables define the X set of rules to combine */

    final SetMultimap<Integer, Integer> transformationMatrixX = HashMultimap.create();
    final Set<String> unsafePrefixesX = new HashSet<String>();
    final Set<String> unsafeSuffixesX = new HashSet<String>();
    final Set<String> unsafeNamesX = new HashSet<String>();
    final Set<Character> safeCharactersX = new HashSet<Character>();

    /* these variables define the Y set of rules to combine */

    final SetMultimap<Integer, Integer> transformationMatrixY = HashMultimap.create();
    final Set<String> unsafePrefixesY = new HashSet<String>();
    final Set<String> unsafeSuffixesY = new HashSet<String>();
    final Set<String> unsafeNamesY = new HashSet<String>();
    final Set<Character> safeCharactersY = new HashSet<Character>();

    /* these variables define the expected result of combining X and Y */

    final SetMultimap<Integer, Integer> transformationMatrixXY = HashMultimap.create();
    final Set<String> unsafePrefixesXY = new HashSet<String>();
    final Set<String> unsafeSuffixesXY = new HashSet<String>();
    final Set<String> unsafeNamesXY = new HashSet<String>();
    final Set<Character> safeCharactersXY = new HashSet<Character>();

    /* automatically map control characters to the safe characters;
     * we will remove and replace any that are to be tested specially */

    for (int codePoint = 0; codePoint < 0x100; codePoint++) {
        if (Character.getType(codePoint) == Character.CONTROL) {
            transformationMatrixXY.put(codePoint, 65);
        }
    }

    /* choose four control characters and remove them from the transformation matrix */

    final Iterator<Integer> controlCodePointIterator = transformationMatrixXY.keySet().iterator();
    final int controlCharacterP = controlCodePointIterator.next();
    final int controlCharacterQ = controlCodePointIterator.next();
    final int controlCharacterR = controlCodePointIterator.next();
    final int controlCharacterS = controlCodePointIterator.next();

    transformationMatrixXY.removeAll(controlCharacterP);
    transformationMatrixXY.removeAll(controlCharacterQ);
    transformationMatrixXY.removeAll(controlCharacterR);
    transformationMatrixXY.removeAll(controlCharacterS);

    /* set up test case for combining control character mappings */

    transformationMatrixX.put(controlCharacterP, 65);
    transformationMatrixX.put(controlCharacterP, 67);
    transformationMatrixX.put(controlCharacterQ, 65);
    transformationMatrixX.put(controlCharacterQ, 66);
    transformationMatrixX.put(controlCharacterR, 66);

    transformationMatrixY.put(controlCharacterQ, 66);
    transformationMatrixY.put(controlCharacterR, 66);
    transformationMatrixY.put(controlCharacterS, 68);

    transformationMatrixXY.put(controlCharacterP, 65);
    transformationMatrixXY.put(controlCharacterP, 67);
    transformationMatrixXY.put(controlCharacterQ, 66);
    transformationMatrixXY.put(controlCharacterR, 66);
    transformationMatrixXY.put(controlCharacterS, 68);

    /* choose four non-control characters and remove them from the transformation matrix */

    int[] normalCodePoints = new int[4];
    int index = 0;
    int codePoint = 0;
    while (index < normalCodePoints.length) {
        if (Character.getType(codePoint) != Character.CONTROL) {
            normalCodePoints[index++] = codePoint;
            transformationMatrixXY.removeAll(codePoint);
        }
        codePoint++;
    }
    int normalCharacterP = normalCodePoints[0];
    int normalCharacterQ = normalCodePoints[1];
    int normalCharacterR = normalCodePoints[2];
    int normalCharacterS = normalCodePoints[3];

    /* set up test case for combining non-control character mappings */

    transformationMatrixX.put(normalCharacterP, 65);
    transformationMatrixX.put(normalCharacterP, 67);
    transformationMatrixX.put(normalCharacterQ, 65);
    transformationMatrixX.put(normalCharacterQ, 66);
    transformationMatrixX.put(normalCharacterR, 66);

    transformationMatrixY.put(normalCharacterQ, 66);
    transformationMatrixY.put(normalCharacterR, 66);
    transformationMatrixY.put(normalCharacterS, 68);

    transformationMatrixXY.put(normalCharacterP, 65);
    transformationMatrixXY.put(normalCharacterP, 67);
    transformationMatrixXY.put(normalCharacterQ, 66);
    transformationMatrixXY.put(normalCharacterR, 66);
    transformationMatrixXY.put(normalCharacterS, 68);

    /* set up test cases for combining proscribed strings */

    unsafePrefixesX.add("XP");
    unsafePrefixesX.add("YP");

    unsafePrefixesY.add("YP");
    unsafePrefixesY.add("ZP");

    unsafePrefixesXY.add("XP");
    unsafePrefixesXY.add("YP");
    unsafePrefixesXY.add("ZP");

    unsafeSuffixesX.add("XS");
    unsafeSuffixesX.add("YS");

    unsafeSuffixesY.add("YS");
    unsafeSuffixesY.add("ZS");

    unsafeSuffixesXY.add("XS");
    unsafeSuffixesXY.add("YS");
    unsafeSuffixesXY.add("ZS");

    unsafeNamesX.add("XN");
    unsafeNamesX.add("YN");

    unsafeNamesY.add("YN");
    unsafeNamesY.add("ZN");

    unsafeNamesXY.add("XN");
    unsafeNamesXY.add("YN");
    unsafeNamesXY.add("ZN");

    /* set up test case for combining safe characters */

    safeCharactersX.add('A');
    safeCharactersX.add('B');

    safeCharactersY.add('A');

    safeCharactersXY.add('A');

    /* perform the combination */

    final FilePathRestrictions rulesX = new FilePathRestrictions(transformationMatrixX, unsafePrefixesX,
            unsafeSuffixesX, unsafeNamesX, safeCharactersX);
    final FilePathRestrictions rulesY = new FilePathRestrictions(transformationMatrixY, unsafePrefixesY,
            unsafeSuffixesY, unsafeNamesY, safeCharactersY);
    final FilePathRestrictions rulesXY = FilePathRestrictions.combineFilePathRestrictions(rulesX, rulesY);

    /* test that the combination is as expected in all respects */

    Assert.assertTrue(CollectionUtils.isEqualCollection(rulesXY.safeCharacters, safeCharactersXY));
    Assert.assertTrue(CollectionUtils.isEqualCollection(rulesXY.unsafePrefixes, unsafePrefixesXY));
    Assert.assertTrue(CollectionUtils.isEqualCollection(rulesXY.unsafeSuffixes, unsafeSuffixesXY));
    Assert.assertTrue(CollectionUtils.isEqualCollection(rulesXY.unsafeNames, unsafeNamesXY));
    assertEqualMultimaps(rulesXY.transformationMatrix, transformationMatrixXY);

    /* given a mapping choice, prefer the safe character */

    Assert.assertEquals((int) rulesXY.transformationMap.get(controlCharacterP), 65);
    Assert.assertEquals((int) rulesXY.transformationMap.get(normalCharacterP), 65);
}

From source file:CharUtils.java

/**
 * True if character is punctuation.//from  w w  w.j  ava  2  s  . co m
 */

public static boolean isPunctuation(char ch) {
    return punctuationSet.contains(new Integer(Character.getType(ch))) || (ch == '`') || (ch == UNKNOWN_PUNC);
}

From source file:CharUtils.java

/**
 * True if character is symbol./*ww w  .  j  av a  2 s  .  c  o  m*/
 */

public static boolean isSymbol(char ch) {
    return symbolSet.contains(new Integer(Character.getType(ch)));
}

From source file:gov.va.vinci.leo.ae.ExampleWhitespaceTokenizer.java

/**
 * Given a character c return the type definition from the
 * list of public static type definitions in this class.
 *
 * @param c//from ww w  .  ja va 2 s . c om
 * @return type definition for the character c
 */
private static int characterType(char c) {
    switch (Character.getType(c)) {
    //letters
    case Character.UPPERCASE_LETTER:
    case Character.LOWERCASE_LETTER:
    case Character.TITLECASE_LETTER:
    case Character.MODIFIER_LETTER:
    case Character.OTHER_LETTER:
    case Character.NON_SPACING_MARK:
    case Character.ENCLOSING_MARK:
    case Character.COMBINING_SPACING_MARK:
    case Character.PRIVATE_USE:
    case Character.SURROGATE:
    case Character.MODIFIER_SYMBOL:
        return TK_LETTER;
    //numbers
    case Character.DECIMAL_DIGIT_NUMBER:
    case Character.LETTER_NUMBER:
    case Character.OTHER_NUMBER:
        return TK_NUMBER;
    //Regular Whitespace
    case Character.SPACE_SEPARATOR:
        return TK_WHITESPACE;
    //Punctuation
    case Character.DASH_PUNCTUATION:
    case Character.START_PUNCTUATION:
    case Character.END_PUNCTUATION:
    case Character.OTHER_PUNCTUATION:
        return TK_PUNCTUATION;
    //Simple NewLine
    case Character.LINE_SEPARATOR:
    case Character.PARAGRAPH_SEPARATOR:
        return TK_NEWLINE;
    //Other types of "control" characters
    case Character.CONTROL:
        if (c == '\n' || c == '\r')
            return TK_NEWLINE;
        if (Character.isWhitespace(c)) //Tab char is a "Control" character
            return TK_WHITESPACE;
        return TK_CONTROL;
    default:
        if (Character.isWhitespace(c)) {
            return TK_WHITESPACE;
        } //if
        return TK_UNKNOWN;
    }//switch
}