Example usage for java.lang Character isHighSurrogate

List of usage examples for java.lang Character isHighSurrogate

Introduction

In this page you can find the example usage for java.lang Character isHighSurrogate.

Prototype

public static boolean isHighSurrogate(char ch) 

Source Link

Document

Determines if the given char value is a <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> Unicode high-surrogate code unit</a> (also known as <i>leading-surrogate code unit</i>).

Usage

From source file:org.eclipse.rdf4j.rio.ntriples.NTriplesParser.java

/**
 * Reads the next Unicode code point./*from   ww w.j  av a 2s .c  o m*/
 * 
 * @return the next Unicode code point, or -1 if the end of the stream has been reached.
 * @throws IOException
 */
protected int readCodePoint() throws IOException {
    int next = reader.read();
    if (Character.isHighSurrogate((char) next)) {
        next = Character.toCodePoint((char) next, (char) reader.read());
    }
    return next;
}

From source file:nl.tue.ddss.ifcrdf.model.IfcStepSerializer.java

private void writePrimitive(Resource val) throws IOException, SerializerException {
    if (isLogical(val)) {
        if (val.hasProperty(HASLOGICAL, EXPRESS_TRUE)) {
            print(BOOLEAN_TRUE);/*w w  w  .j a va 2s .co m*/
        } else if (val.hasProperty(HASLOGICAL, EXPRESS_FALSE)) {
            print(BOOLEAN_FALSE);
        } else if (val.hasProperty(HASLOGICAL, EXPRESS_UNDEFINED)) {
            print(BOOLEAN_UNDEFINED);
        }
    } else if (isReal(val) || isNumber(val)) {
        Double valDouble = val.getProperty(HASDOUBLE).getObject().asLiteral().getDouble();
        if ((valDouble).isInfinite() || ((valDouble).isNaN())) {
            LOGGER.info("Serializing infinite or NaN double as 0.0");
            print("0.0");
        } else {
            String string = valDouble.toString();
            if (string.endsWith(DOT_0)) {
                print(string.substring(0, string.length() - 1));
            } else {
                print(string);
            }
        }
    } else if (isInteger(val)) {
        Integer valInteger = val.getProperty(HASINTEGER).getObject().asLiteral().getInt();
        String string = valInteger.toString();
        if (string.endsWith(DOT_0)) {
            print(string.substring(0, string.length() - 2));
        } else {
            print(string);
        }
    } else if (isBoolean(val)) {
        if (val.hasLiteral(HASBOOLEAN, true)) {
            print(BOOLEAN_TRUE);
        } else if (val.hasLiteral(HASBOOLEAN, false)) {
            print(BOOLEAN_FALSE);
        }
    } else if (isString(val)) {
        print(SINGLE_QUOTE);
        String stringVal = val.getProperty(HASSTRING).getObject().asLiteral().getString();
        for (int i = 0; i < stringVal.length(); i++) {
            char c = stringVal.charAt(i);
            if (c == '\'') {
                print("\'\'");
            } else if (c == '\\') {
                print("\\\\");
            } else if (c >= 32 && c <= 126) {
                // ISO 8859-1
                print("" + c);
            } else if (c < 255) {
                // ISO 10646 and ISO 8859-1 are the same < 255 , using
                // ISO_8859_1
                print("\\X\\" + new String(Hex.encodeHex(
                        Charsets.ISO_8859_1.encode(CharBuffer.wrap(new char[] { (char) c })).array()))
                                .toUpperCase());
            } else {
                if (useIso8859_1) {
                    // ISO 8859-1 with -128 offset
                    ByteBuffer encode = Charsets.ISO_8859_1.encode(new String(new char[] { (char) (c - 128) }));
                    print("\\S\\" + (char) encode.get());
                } else {
                    // The following code has not been tested (2012-04-25)
                    // Use UCS-2 or UCS-4

                    // TODO when multiple sequential characters should be
                    // encoded in UCS-2 or UCS-4, we don't really need to
                    // add all those \X0\ \X2\ and \X4\ chars
                    if (Character.isLowSurrogate(c)) {
                        throw new SerializerException("Unexpected low surrogate range char");
                    } else if (Character.isHighSurrogate(c)) {
                        // We need UCS-4, this is probably never happening
                        if (i + 1 < stringVal.length()) {
                            char low = stringVal.charAt(i + 1);
                            if (!Character.isLowSurrogate(low)) {
                                throw new SerializerException(
                                        "High surrogate char should be followed by char in low surrogate range");
                            }
                            try {
                                print("\\X4\\" + new String(Hex.encodeHex(Charset.forName("UTF-32")
                                        .encode(new String(new char[] { c, low })).array())).toUpperCase()
                                        + "\\X0\\");
                            } catch (UnsupportedCharsetException e) {
                                throw new SerializerException(e);
                            }
                            i++;
                        } else {
                            throw new SerializerException(
                                    "High surrogate char should be followed by char in low surrogate range, but end of string reached");
                        }
                    } else {
                        // UCS-2 will do
                        print("\\X2\\" + new String(Hex
                                .encodeHex(Charsets.UTF_16BE.encode(CharBuffer.wrap(new char[] { c })).array()))
                                        .toUpperCase()
                                + "\\X0\\");
                    }
                }
            }
        }
        print(SINGLE_QUOTE);
    } else if (isEnumeration(val)) {
        String enumVal = val.getLocalName();
        print("." + enumVal + ".");
    } else {
        print(val == null ? "$" : val.toString());
    }
}

From source file:it.geosdi.era.server.servlet.HTTPProxy.java

public static int escapeHtmlFull(int ch) {
    if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9') {
        // safe//from   w  w  w.  j  av  a  2s  . c om
        return ch;
    } else if (Character.isWhitespace(ch)) {
        if (ch != '\n' && ch != '\r' && ch != '\t')
            // safe
            return ch;
    } else if (Character.isDefined(ch)) {
        // safe
        return ch;
    } else if (Character.isISOControl(ch)) {
        // paranoid version:isISOControl which are not isWhitespace
        // removed !
        // do nothing do not include in output !
        return -1;
    } else if (Character.isHighSurrogate((char) ch)) {
        // do nothing do not include in output !
        return -1;
    } else if (Character.isLowSurrogate((char) ch)) {
        // wrong char[] sequence, //TODO: LOG !!!
        return -1;
    }

    return -1;
}

From source file:adept.io.Reader.java

/**
 * Removes surrogate pairs/* w w  w. jav  a  2  s . c o  m*/
 *
 * @param text
 * @return
 */
public static String checkSurrogates(String text) {
    StringBuffer buffer = new StringBuffer();
    char[] chars = text.toCharArray();
    for (Character c : chars) {
        if (Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
            System.out.println("WARNING -- invalid xml character " + c + " removed");
        } else {
            buffer.append(c);
        }
    }

    return buffer.toString();
}

From source file:com.sjdf.platform.xss.StringUtils.java

/**
 * <p>/*from  ww  w. j ava 2 s  . co  m*/
 * Search a CharSequence to find the first index of any character in the
 * given set of characters.
 * </p>
 * <p/>
 * <p>
 * A {@code null} String will return {@code -1}. A {@code null} or zero
 * length search array will return {@code -1}.
 * </p>
 * <p/>
 * <pre>
 * StringUtils.indexOfAny(null, *)                = -1
 * StringUtils.indexOfAny("", *)                  = -1
 * StringUtils.indexOfAny(*, null)                = -1
 * StringUtils.indexOfAny(*, [])                  = -1
 * StringUtils.indexOfAny("zzabyycdxx",['z','a']) = 0
 * StringUtils.indexOfAny("zzabyycdxx",['b','y']) = 3
 * StringUtils.indexOfAny("aba", ['z'])           = -1
 * </pre>
 *
 * @param cs          the CharSequence to check, may be null
 * @param searchChars the chars to search for, may be null
 * @return the index of any of the chars, -1 if no match or null input
 * @since 3.0 Changed signature from indexOfAny(String, char[]) to
 * indexOfAny(CharSequence, char...)
 */
public static int indexOfAny(CharSequence cs, char... searchChars) {
    if (isEmpty(cs) || searchChars == null) {
        return INDEX_NOT_FOUND;
    }
    int csLen = cs.length();
    int csLast = csLen - 1;
    int searchLen = searchChars.length;
    int searchLast = searchLen - 1;
    for (int i = 0; i < csLen; i++) {
        char ch = cs.charAt(i);
        for (int j = 0; j < searchLen; j++) {
            if (searchChars[j] == ch) {
                if (i < csLast && j < searchLast && Character.isHighSurrogate(ch)) {
                    // ch is a supplementary character
                    if (searchChars[j + 1] == cs.charAt(i + 1)) {
                        return i;
                    }
                } else {
                    return i;
                }
            }
        }
    }
    return INDEX_NOT_FOUND;
}

From source file:com.sjdf.platform.xss.StringUtils.java

/**
 * <p>/*  w ww.  j a v a  2 s.c  o  m*/
 * Checks if the CharSequence contains any character in the given set of
 * characters.
 * </p>
 * <p/>
 * <p>
 * A {@code null} CharSequence will return {@code false}. A {@code null} or
 * zero length search array will return {@code false}.
 * </p>
 * <p/>
 * <pre>
 * StringUtils.containsAny(null, *)                = false
 * StringUtils.containsAny("", *)                  = false
 * StringUtils.containsAny(*, null)                = false
 * StringUtils.containsAny(*, [])                  = false
 * StringUtils.containsAny("zzabyycdxx",['z','a']) = true
 * StringUtils.containsAny("zzabyycdxx",['b','y']) = true
 * StringUtils.containsAny("aba", ['z'])           = false
 * </pre>
 *
 * @param cs          the CharSequence to check, may be null
 * @param searchChars the chars to search for, may be null
 * @return the {@code true} if any of the chars are found, {@code false} if
 * no match or null input
 * @since 3.0 Changed signature from containsAny(String, char[]) to
 * containsAny(CharSequence, char...)
 */
public static boolean containsAny(CharSequence cs, char... searchChars) {
    if (isEmpty(cs) || searchChars == null) {
        return false;
    }
    int csLength = cs.length();
    int searchLength = searchChars.length;
    int csLast = csLength - 1;
    int searchLast = searchLength - 1;
    for (int i = 0; i < csLength; i++) {
        char ch = cs.charAt(i);
        for (int j = 0; j < searchLength; j++) {
            if (searchChars[j] == ch) {
                if (Character.isHighSurrogate(ch)) {
                    if (j == searchLast) {
                        // missing low surrogate, fine, like
                        // String.indexOf(String)
                        return true;
                    }
                    if (i < csLast && searchChars[j + 1] == cs.charAt(i + 1)) {
                        return true;
                    }
                } else {
                    // ch is in the Basic Multilingual Plane
                    return true;
                }
            }
        }
    }
    return false;
}

From source file:org.apache.commons.lang3.StringUtils.java

/**
 * <p>Search a CharSequence to find the first index of any
 * character in the given set of characters.</p>
 *
 * <p>A {@code null} String will return {@code -1}.
 * A {@code null} or zero length search array will return {@code -1}.</p>
 *
 * <pre>//  w w  w .j a v a 2s .  co m
 * StringUtils.indexOfAny(null, *)                = -1
 * StringUtils.indexOfAny("", *)                  = -1
 * StringUtils.indexOfAny(*, null)                = -1
 * StringUtils.indexOfAny(*, [])                  = -1
 * StringUtils.indexOfAny("zzabyycdxx",['z','a']) = 0
 * StringUtils.indexOfAny("zzabyycdxx",['b','y']) = 3
 * StringUtils.indexOfAny("aba", ['z'])           = -1
 * </pre>
 *
 * @param cs  the CharSequence to check, may be null
 * @param searchChars  the chars to search for, may be null
 * @return the index of any of the chars, -1 if no match or null input
 * @since 2.0
 * @since 3.0 Changed signature from indexOfAny(String, char[]) to indexOfAny(CharSequence, char...)
 */
public static int indexOfAny(CharSequence cs, char... searchChars) {
    if (isEmpty(cs) || ArrayUtils.isEmpty(searchChars)) {
        return INDEX_NOT_FOUND;
    }
    int csLen = cs.length();
    int csLast = csLen - 1;
    int searchLen = searchChars.length;
    int searchLast = searchLen - 1;
    for (int i = 0; i < csLen; i++) {
        char ch = cs.charAt(i);
        for (int j = 0; j < searchLen; j++) {
            if (searchChars[j] == ch) {
                if (i < csLast && j < searchLast && Character.isHighSurrogate(ch)) {
                    // ch is a supplementary character
                    if (searchChars[j + 1] == cs.charAt(i + 1)) {
                        return i;
                    }
                } else {
                    return i;
                }
            }
        }
    }
    return INDEX_NOT_FOUND;
}

From source file:com.sjdf.platform.xss.StringUtils.java

/**
 * <p>//w  w w. j a v  a 2s .c o m
 * Searches a CharSequence to find the first index of any character not in
 * the given set of characters.
 * </p>
 * <p/>
 * <p>
 * A {@code null} CharSequence will return {@code -1}. A {@code null} or
 * zero length search array will return {@code -1}.
 * </p>
 * <p/>
 * <pre>
 * StringUtils.indexOfAnyBut(null, *)                              = -1
 * StringUtils.indexOfAnyBut("", *)                                = -1
 * StringUtils.indexOfAnyBut(*, null)                              = -1
 * StringUtils.indexOfAnyBut(*, [])                                = -1
 * StringUtils.indexOfAnyBut("zzabyycdxx", new char[] {'z', 'a'} ) = 3
 * StringUtils.indexOfAnyBut("aba", new char[] {'z'} )             = 0
 * StringUtils.indexOfAnyBut("aba", new char[] {'a', 'b'} )        = -1
 *
 * </pre>
 *
 * @param cs          the CharSequence to check, may be null
 * @param searchChars the chars to search for, may be null
 * @return the index of any of the chars, -1 if no match or null input
 * @since 3.0 Changed signature from indexOfAnyBut(String, char[]) to
 * indexOfAnyBut(CharSequence, char...)
 */
public static int indexOfAnyBut(CharSequence cs, char... searchChars) {
    if (isEmpty(cs) || searchChars == null) {
        return INDEX_NOT_FOUND;
    }
    int csLen = cs.length();
    int csLast = csLen - 1;
    int searchLen = searchChars.length;
    int searchLast = searchLen - 1;
    outer: for (int i = 0; i < csLen; i++) {
        char ch = cs.charAt(i);
        for (int j = 0; j < searchLen; j++) {
            if (searchChars[j] == ch) {
                if (i < csLast && j < searchLast && Character.isHighSurrogate(ch)) {
                    if (searchChars[j + 1] == cs.charAt(i + 1)) {
                        continue outer;
                    }
                } else {
                    continue outer;
                }
            }
        }
        return i;
    }
    return INDEX_NOT_FOUND;
}

From source file:org.apache.commons.lang3.StringUtils.java

/**
 * <p>Checks if the CharSequence contains any character in the given
 * set of characters.</p>/*ww  w  .j a  v a2s  .  com*/
 *
 * <p>A {@code null} CharSequence will return {@code false}.
 * A {@code null} or zero length search array will return {@code false}.</p>
 *
 * <pre>
 * StringUtils.containsAny(null, *)                = false
 * StringUtils.containsAny("", *)                  = false
 * StringUtils.containsAny(*, null)                = false
 * StringUtils.containsAny(*, [])                  = false
 * StringUtils.containsAny("zzabyycdxx",['z','a']) = true
 * StringUtils.containsAny("zzabyycdxx",['b','y']) = true
 * StringUtils.containsAny("aba", ['z'])           = false
 * </pre>
 *
 * @param cs  the CharSequence to check, may be null
 * @param searchChars  the chars to search for, may be null
 * @return the {@code true} if any of the chars are found,
 * {@code false} if no match or null input
 * @since 2.4
 * @since 3.0 Changed signature from containsAny(String, char[]) to containsAny(CharSequence, char...)
 */
public static boolean containsAny(CharSequence cs, char... searchChars) {
    if (isEmpty(cs) || ArrayUtils.isEmpty(searchChars)) {
        return false;
    }
    int csLength = cs.length();
    int searchLength = searchChars.length;
    int csLast = csLength - 1;
    int searchLast = searchLength - 1;
    for (int i = 0; i < csLength; i++) {
        char ch = cs.charAt(i);
        for (int j = 0; j < searchLength; j++) {
            if (searchChars[j] == ch) {
                if (Character.isHighSurrogate(ch)) {
                    if (j == searchLast) {
                        // missing low surrogate, fine, like String.indexOf(String)
                        return true;
                    }
                    if (i < csLast && searchChars[j + 1] == cs.charAt(i + 1)) {
                        return true;
                    }
                } else {
                    // ch is in the Basic Multilingual Plane
                    return true;
                }
            }
        }
    }
    return false;
}

From source file:org.apache.commons.lang3.StringUtils.java

/**
 * <p>Searches a CharSequence to find the first index of any
 * character not in the given set of characters.</p>
 *
 * <p>A {@code null} CharSequence will return {@code -1}.
 * A {@code null} or zero length search array will return {@code -1}.</p>
 *
 * <pre>// w w w  .  j a va2 s  .c o  m
 * StringUtils.indexOfAnyBut(null, *)                              = -1
 * StringUtils.indexOfAnyBut("", *)                                = -1
 * StringUtils.indexOfAnyBut(*, null)                              = -1
 * StringUtils.indexOfAnyBut(*, [])                                = -1
 * StringUtils.indexOfAnyBut("zzabyycdxx", new char[] {'z', 'a'} ) = 3
 * StringUtils.indexOfAnyBut("aba", new char[] {'z'} )             = 0
 * StringUtils.indexOfAnyBut("aba", new char[] {'a', 'b'} )        = -1
        
 * </pre>
 *
 * @param cs  the CharSequence to check, may be null
 * @param searchChars  the chars to search for, may be null
 * @return the index of any of the chars, -1 if no match or null input
 * @since 2.0
 * @since 3.0 Changed signature from indexOfAnyBut(String, char[]) to indexOfAnyBut(CharSequence, char...)
 */
public static int indexOfAnyBut(CharSequence cs, char... searchChars) {
    if (isEmpty(cs) || ArrayUtils.isEmpty(searchChars)) {
        return INDEX_NOT_FOUND;
    }
    int csLen = cs.length();
    int csLast = csLen - 1;
    int searchLen = searchChars.length;
    int searchLast = searchLen - 1;
    outer: for (int i = 0; i < csLen; i++) {
        char ch = cs.charAt(i);
        for (int j = 0; j < searchLen; j++) {
            if (searchChars[j] == ch) {
                if (i < csLast && j < searchLast && Character.isHighSurrogate(ch)) {
                    if (searchChars[j + 1] == cs.charAt(i + 1)) {
                        continue outer;
                    }
                } else {
                    continue outer;
                }
            }
        }
        return i;
    }
    return INDEX_NOT_FOUND;
}