Example usage for java.lang Character isLowSurrogate

List of usage examples for java.lang Character isLowSurrogate

Introduction

In this page you can find the example usage for java.lang Character isLowSurrogate.

Prototype

public static boolean isLowSurrogate(char ch) 

Source Link

Document

Determines if the given char value is a <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> Unicode low-surrogate code unit</a> (also known as <i>trailing-surrogate code unit</i>).

Usage

From source file:Main.java

public static void main(String[] args) {
    char ch1 = '\udc28', ch2 = 'a';

    boolean b1 = Character.isLowSurrogate(ch1);
    boolean b2 = Character.isLowSurrogate(ch2);

    System.out.println(b1);//from   w  w  w .  j  a v a  2 s .com
    System.out.println(b2);
}

From source file:Main.java

/**
 * Replaces instances of Emoji unicode characters with their Emoji-Cheat sheet key
 *
 * @param s/* ww w . j av  a  2 s  .c  o  m*/
 * @return
 */
public static String replaceUnicodeEmojis(String s) {
    if (TextUtils.isEmpty(s)) {
        return "";
    }
    for (int i = 0; i < s.length(); i++) {
        String key = s.substring(i, i + 1);
        if ((Character.isLowSurrogate(key.charAt(0)) || Character.isHighSurrogate(key.charAt(0)))
                && s.length() > i + 1) {
            key = s.substring(i, i + 2);
        }
        String emoji = UNICODE_TO_CHEAT_SHEET.get(key);
        if (null != emoji) {
            s = s.replace(key, emoji);
        }
    }
    return s;
}

From source file:Main.java

protected static int testEscape(char[] chars, char[] encodings) {
    int index = 0;
    int length = chars.length;
    while (index < length) {
        char c1 = chars[index++];
        if (Character.isHighSurrogate(c1)) {
            if (index < length) {
                char c2 = chars[index++];
                if (Character.isLowSurrogate(c2)) {
                    int cp = Character.toCodePoint(c1, c2);
                    if (isValidCodePoint(cp)) {
                        continue;
                    }/*from  w ww. ja  va  2 s  .  com*/
                }
                return index - 2;
            }
            return index - 1;
        } else {
            if (isValidCodePoint(c1)) {
                if (encodings != null) {
                    for (char ch : encodings) {
                        if (c1 == ch) {
                            return index - 1;
                        }
                    }
                }
                continue;
            }
            return index - 1;
        }
    }
    return length;
}

From source file:Main.java

public static String $$truncate(String str, int maxLen, boolean doAddEllipsis) {

    if (str.length() <= maxLen) {
        return str; // no need to truncate
    }/* www.j a  v  a2 s  .  com*/

    // If doAddEllipsis, either reduce maxLen to compensate, or else if maxLen is too small, just
    // turn off doAddEllipsis.
    if (doAddEllipsis) {
        if (maxLen > 3) {
            maxLen -= 3;
        } else {
            doAddEllipsis = false;
        }
    }

    // Make sure truncating at maxLen doesn't cut up a unicode surrogate pair.
    if (Character.isHighSurrogate(str.charAt(maxLen - 1)) && Character.isLowSurrogate(str.charAt(maxLen))) {
        maxLen -= 1;
    }

    // Truncate.
    str = str.substring(0, maxLen);

    // Add ellipsis.
    if (doAddEllipsis) {
        str += "...";
    }

    return str;
}

From source file:Main.java

/**
 * Gets the index of the longest NCName that is the suffix of a character
 * sequence.//from ww  w  .j a  v a2  s . c om
 * 
 * @param cs
 *           The character sequence.
 * @return Returns the index of the longest suffix of the specified character
 *         sequence <code>cs</code> that is an NCName, or -1 if the character
 *         sequence <code>cs</code> does not have a suffix that is an NCName.
 */
public static int getNCNameSuffixIndex(CharSequence cs) {
    int index = -1;
    for (int i = cs.length() - 1; i > -1; i--) {
        if (!Character.isLowSurrogate(cs.charAt(i))) {
            int c = Character.codePointAt(cs, i);
            if (isNCNameStartChar(c)) {
                index = i;
            }
            if (!isNCNameChar(c)) {
                break;
            }
        }
    }
    return index;
}

From source file:Main.java

/**
 * Anything other than letter and numbers are considered delimiters.  Remove start and end
 * delimiters since they are not relevant to search.
 *
 * @param query The query string to clean.
 * @return The cleaned query. Empty string if all characters are cleaned out.
 *///  ww w. j  a  v a 2 s. co m
public static String cleanStartAndEndOfSearchQuery(String query) {
    int start = 0;
    while (start < query.length()) {
        int codePoint = query.codePointAt(start);
        if (Character.isLetterOrDigit(codePoint)) {
            break;
        }
        start += Character.charCount(codePoint);
    }

    if (start == query.length()) {
        // All characters are delimiters.
        return "";
    }

    int end = query.length() - 1;
    while (end > -1) {
        if (Character.isLowSurrogate(query.charAt(end))) {
            // Assume valid i18n string.  There should be a matching high surrogate before it.
            end--;
        }
        int codePoint = query.codePointAt(end);
        if (Character.isLetterOrDigit(codePoint)) {
            break;
        }
        end--;
    }

    // end is a letter or digit.
    return query.substring(start, end + 1);
}

From source file:Main.java

/**
 * This method ensures that the output String has only valid XML unicode
 * characters as specified by the XML 1.0 standard. For reference, please
 * see/*from w ww  .  j  ava 2 s  . c o  m*/
 * <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the
 * standard</a>. This method will return an empty String if the input is
 * null or empty.
 *
 * @param in The String whose non-valid characters we want to remove.
 * @return The in String, stripped of non-valid characters.
 */
public static String cleanInvalidXmlChars(String text) {

    if (null == text || text.isEmpty()) {
        return text;
    }

    final int len = text.length();
    char current = 0;
    int codePoint = 0;
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < len; i++) {
        current = text.charAt(i);
        boolean surrogate = false;
        if (Character.isHighSurrogate(current) && i + 1 < len && Character.isLowSurrogate(text.charAt(i + 1))) {
            surrogate = true;
            codePoint = text.codePointAt(i++);
        } else {
            codePoint = current;
        }
        if ((codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD)
                || ((codePoint >= 0x20) && (codePoint <= 0xD7FF))
                || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))
                || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) {
            sb.append(current);

            if (surrogate) {
                sb.append(text.charAt(i));
            }
        } else {

            // 
            // Invalid Char at index transformed into hex 
            //System.err.println("Index=["+ i +"] Char=["+ String.format("%04x", (int)text.charAt(i)) +"] CodePoint=[" + codePoint + "]");
            //sb.append("hex"+String.format("%04x", (int)text.charAt(i)));
        }
    }

    return sb.toString();
}

From source file:Strings.java

/**
 * Returns {@code true} if the specified character sequence is a
 * valid sequence of UTF-16 {@code char} values.  A sequence is
 * legal if each high surrogate {@code char} value is followed by
 * a low surrogate value (as defined by {@link
 * Character#isHighSurrogate(char)} and {@link
 * Character#isLowSurrogate(char)})./*from ww  w  . j a  va  2 s . co  m*/
 *
 * <p>This method does <b>not</b> check to see if the sequence of
 * code points defined by the UTF-16 consists only of code points
 * defined in the latest Unicode standard.  The method only tests
 * the validity of the UTF-16 encoding sequence.
 * 
 * @param cs Character sequence to test.
 * @return {@code true} if the sequence of characters is
 * legal in UTF-16.
 */
public static boolean isLegalUtf16(CharSequence cs) {
    for (int i = 0; i < cs.length(); ++i) {
        char high = cs.charAt(i);
        if (Character.isLowSurrogate(high))
            return false;
        if (!Character.isHighSurrogate(high))
            continue;
        ++i;
        if (i >= cs.length())
            return false;
        char low = cs.charAt(i);
        if (!Character.isLowSurrogate(low))
            return false;
        int codePoint = Character.toCodePoint(high, low);
        if (!Character.isValidCodePoint(codePoint))
            return false;
    }
    return true;
}

From source file:cherry.foundation.validator.CharTypeValidator.java

private int[] createAcceptable(String acceptable) {
    int[] result = new int[acceptable.codePointCount(0, acceptable.length())];
    for (int i = 0, j = 0; i < acceptable.length(); i++) {
        if (Character.isLowSurrogate(acceptable.charAt(i))) {
            continue;
        }/*from   w w w.  j  a  va 2 s .  co m*/
        result[j++] = Character.codePointAt(acceptable, i);
    }
    return result;
}

From source file:com.microsoft.windowsazure.mobileservices.zumoe2etestapp.framework.Util.java

public static String createSimpleRandomString(Random rndGen, int size, int minChar, int maxChar) {
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < size; i++) {

        int charRand;
        char c;//from w w w  .  j  a  va  2  s  . co m
        do {
            charRand = rndGen.nextInt(maxChar - minChar);
            c = (char) (minChar + charRand);
        } while (Character.isLowSurrogate(c) || Character.isHighSurrogate(c));

        sb.append(c);
    }

    return sb.toString();
}