Example usage for java.lang String codePointCount

List of usage examples for java.lang String codePointCount


In this page you can find the example usage for java.lang String codePointCount.


public int codePointCount(int beginIndex, int endIndex) 

Source Link


Returns the number of Unicode code points in the specified text range of this String .


From source file:Main.java

public static void main(String[] args) {

    String str = "java2s.com";
    System.out.println("String = " + str);

    // codepoint from index 1 to index 8
    int retval = str.codePointCount(1, 8);

    // prints character from index 1 to index 8
    System.out.println("Codepoint count = " + retval);

From source file:Main.java

public static final String filterUCS4(String str) {
    if (TextUtils.isEmpty(str)) {
        return str;
    }/* ww  w.jav  a2s  .  c  om*/

    if (str.codePointCount(0, str.length()) == str.length()) {
        return str;

    StringBuilder sb = new StringBuilder();

    int index = 0;
    while (index < str.length()) {
        int codePoint = str.codePointAt(index);
        index += Character.charCount(codePoint);
        if (Character.isSupplementaryCodePoint(codePoint)) {


    return sb.toString();

From source file:SpinnerTest.java

private static int[] toCodePointArray(String str) {
    int[] codePoints = new int[str.codePointCount(0, str.length())];
    for (int i = 0, j = 0; i < str.length(); i++, j++) {
        int cp = str.codePointAt(i);
        if (Character.isSupplementaryCodePoint(cp))
            i++;//from  w  ww .j a v a2  s .c  o m
        codePoints[j] = cp;
    return codePoints;

From source file:com.careerly.utils.TextUtils.java

 * ?// w w w .ja  v a2  s  .  c om
 * @param text
 * @return
public static String removeDoubleByte(String text) {

    if (StringUtils.isBlank(text)) {
        return StringUtils.EMPTY;

    StringBuilder stringBuilder = new StringBuilder();
    for (int i = 0; i < text.codePointCount(0, text.length()); i++) {
        char[] chars = Character.toChars(text.codePointAt(i));
        if (chars.length == 1) {
    return stringBuilder.toString();

From source file:cherry.foundation.validator.CharTypeValidator.java

private int[] createAcceptable(String acceptable) {
    int[] result = new int[acceptable.codePointCount(0, acceptable.length())];
    for (int i = 0, j = 0; i < acceptable.length(); i++) {
        if (Character.isLowSurrogate(acceptable.charAt(i))) {
        }/*from w  ww .  j  a v  a2 s  .  com*/
        result[j++] = Character.codePointAt(acceptable, i);
    return result;

From source file:com.github.fge.jsonschema.keyword.validator.common.MaxLengthValidator.java

public void validate(final Processor<FullData, FullData> processor, final ProcessingReport report,
        final MessageBundle bundle, final FullData data) throws ProcessingException {
    final String value = data.getInstance().getNode().textValue();
    final int size = value.codePointCount(0, value.length());

    if (size > intValue)
        report.error(newMsg(data, bundle, "err.common.maxLength.tooLong").putArgument("value", value)
                .putArgument("found", size).putArgument(keyword, intValue));

From source file:com.github.fge.jsonschema.keyword.validator.common.MinLengthValidator.java

public void validate(final Processor<FullData, FullData> processor, final ProcessingReport report,
        final MessageBundle bundle, final FullData data) throws ProcessingException {
    final String value = data.getInstance().getNode().textValue();
    final int size = value.codePointCount(0, value.length());

    if (size < intValue)
        report.error(newMsg(data, bundle, "err.common.minLength.tooShort").putArgument("value", value)
                .putArgument("found", size).putArgument(keyword, intValue));

From source file:com.ebuddy.cassandra.cql.dao.CqlStructuredDataSupport.java

private String getFinishString(String start) {
    int startCodePointCount = start.codePointCount(0, start.length());
    int finishCodePointCount = startCodePointCount + 1;
    int[] finishCodePoints = new int[finishCodePointCount];
    for (int i = 0; i < startCodePointCount; i++) {
        finishCodePoints[i] = start.codePointAt(i);
    }/*from   w  w  w  .  j  ava  2  s . com*/
    finishCodePoints[finishCodePointCount - 1] = MAX_CODE_POINT;
    return new String(finishCodePoints, 0, finishCodePointCount);

From source file:StreamFlusher.java

public Object visit(ASTtestTokensTextFile_statement node, Object data) {
    // Total: 11 regexp arguments, syntactically constrained
    // //from  w  w w  .j  a  v  a  2s .c  om
    // 0.  the Fst to test

    node.jjtGetChild(0).jjtAccept(this, data);
    Fst testFst = (Fst) (stack.pop());

    // 1.  path of the input file

    node.jjtGetChild(1).jjtAccept(this, data);
    Fst tempFst = (Fst) (stack.pop());

    String inputFilePath = lib.GetSingleString(tempFst,
            "Second arg to testTokensTextFile must denote a language of exactly one string.");

    if (inputFilePath.length() == 0) {
        throw new KleeneArgException(
                "Second arg to testTokensTextFile must denote a language of exactly one non-empty string");

    // 2.  encoding of the input file

    node.jjtGetChild(2).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String inputFileEncoding = lib.GetSingleString(tempFst,
            "Third arg to testTokensTextFile must denote a language of exactly one string.");

    if (inputFileEncoding.length() == 0) {
        throw new KleeneArgException("Third arg to testTokensTextFile must denote one non-empty string");

    // 3.  path of the output file

    node.jjtGetChild(3).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String outputFilePath = lib.GetSingleString(tempFst,
            "Fourth arg to testTokensTextFile must denote a language of exactly one string.");

    if (outputFilePath.length() == 0) {
        throw new KleeneArgException("Fourth arg to testTokensTextFile must denote one non-empty string");

    // 4.  encoding of the output file

    node.jjtGetChild(4).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String outputFileEncoding = lib.GetSingleString(tempFst,
            "Fifth arg to testTokensTextFile must denote a language of exactly one string.");

    if (outputFileEncoding.length() == 0) {
        throw new KleeneArgException("Fifth arg to testTokensTextFile must denote one non-empty string");

    //          And for the XML output

    // 5.  name of the root element

    node.jjtGetChild(5).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String rootElmtName = lib.GetSingleString(tempFst,
            "Sixth arg to testTokensTextFile must denote a language of exactly one string.");

    if (rootElmtName.length() == 0) {
        throw new KleeneArgException("Sixth arg to testTokensTextFile must denote one non-empty string");

    // 6.  name of the token element

    node.jjtGetChild(6).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String tokenElmtName = lib.GetSingleString(tempFst,
            "Seventh arg to testTokensTextFile must denote a language of exactly one string.");

    if (tokenElmtName.length() == 0) {
        throw new KleeneArgException("Seventh arg to testTokensTextFile must denote one non-empty string");

    // 7.  name of the input element

    node.jjtGetChild(7).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String inputElmtName = lib.GetSingleString(tempFst,
            "Eighth arg to testTokensTextFile must denote a language of exactly one string.");

    if (inputElmtName.length() == 0) {
        throw new KleeneArgException("Eighth arg to testTokensTextFile must denote one non-empty string");

    // 8.  name of the outputs element (N.B. plural)

    node.jjtGetChild(8).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String outputsElmtName = lib.GetSingleString(tempFst,
            "Ninth arg to testTokensTextFile must denote a language of exactly one string.");

    if (outputsElmtName.length() == 0) {
        throw new KleeneArgException("Ninth arg to testTokensTextFile must denote one non-empty string");

    // 9.  name of the output element  (N.B. singular)

    node.jjtGetChild(9).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String outputElmtName = lib.GetSingleString(tempFst,
            "Tenth arg to testTokensTextFile must denote a language of exactly one string.");

    if (outputElmtName.length() == 0) {
        throw new KleeneArgException("Tenth arg to testTokensTextFile must denote one non-empty string");

    // 10.  name of the weight attr in the output elmt

    node.jjtGetChild(10).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String weightAttrName = lib.GetSingleString(tempFst,
            "Eleventh arg to testTokensTextFile must denote a language of exactly one string.");

    if (weightAttrName.length() == 0) {
        throw new KleeneArgException("Eleventh arg to testTokensTextFile must denote one non-empty string");

    String fullpath = getFullpath(inputFilePath);

    TranslitTokenizerBuilder ttb = new TranslitTokenizerBuilder(symmap, testFst.getSigma(), lib);
    lib.Iterate4mcs(testFst, ttb, symmap.getStartPuaCpv());
    Transliterator trInput = ttb.getTranslitTokenizer(true); // true for input side

    try {
        BufferedReader in = null;
        if (inputFileEncoding.equals("default") || inputFileEncoding.equals("-")) {
            // get the current default encoding of the operating system
            inputFileEncoding = System.getProperty("file.encoding");
        if (inputFileEncoding.equals("UTF-8")) {
            in = new BufferedReader(new InputStreamReader(
                    new UTF8BOMStripperInputStream(new FileInputStream(fullpath)), inputFileEncoding));
        } else {
            in = new BufferedReader(new InputStreamReader(new FileInputStream(fullpath), inputFileEncoding));

        // now try to open the output file 
        fullpath = getFullpath(outputFilePath);

        BufferedWriter out = null;
        if (outputFileEncoding.equals("default") || outputFileEncoding.equals("-")) {
            // get the current default encoding of the operating system
            outputFileEncoding = System.getProperty("file.encoding");
        out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fullpath), outputFileEncoding));

        out.write("<?xml version=\"1.0\" encoding=\"" + outputFileEncoding + "\"?>");
        out.write("<" + rootElmtName + ">");

        // read the input string/words, one per line, from the input file, write output to the output file

        XMLOutputLister xmlOutputLister = new XMLOutputLister(symmap, out, outputElmtName, weightAttrName);

        String token; // one per line in the input file

        Fst modifiedTestFst;

        while ((token = in.readLine()) != null) {
            String cpvstr = trInput.transliterate(token);
            // converts cpvstr to a sequence of code pt values, and
            // each one could fill one or two 16-bit code units;
            // this is where multichar symbols are reduced to their
            // code point values

            // get length in Unicode characters (not code units)
            int inputlen = cpvstr.codePointCount(0, cpvstr.length());
            // allocate an int array to hold those code-point values,
            //    one int per code point value
            int[] cpvArray = new int[inputlen];

            // UCharacterIterator knows how to iterate over a String and
            // return the Unicode-Character code point values
            UCharacterIterator iter = UCharacterIterator.getInstance(cpvstr);

            // we need to build each input string into a one-path Fst

            // store the codepoints in the int array (which will be passed to
            //    oneStringNativeFst(), a native method
            int codepoint;
            int index = 0;
            while ((codepoint = iter.nextCodePoint()) != UCharacterIterator.DONE) {
                // any multichar symbols will already be in the
                // symmap, or they wouldn't have been identified;
                // but BMP characters may not yet be in the symmap
                if (Character.charCount(codepoint) == 1) {
                    symmap.putsym(String.valueOf((char) codepoint));
                cpvArray[index++] = codepoint;

            // 0 arg means generate
            Fst compFst = lib.ApplyToOneString(testFst, cpvArray, 0);

            // prepare to list the output strings (and their weights)
            long stringCount = lib.NumPaths(compFst);

            // XML output for this input token

            out.write("  <" + tokenElmtName + ">");

            // be careful to escape XML special chars in line; 
            // N.B. escapeXml also escapes non-ASCII Unicode letters
            //out.write("    <" + inputElmtName + ">" + 
            //  StringEscapeUtils.escapeXml(token) + "</" + 
            //  inputElmtName + ">") ;

            out.write("    <" + inputElmtName + ">" + EscapeXML.escapeXML(token) + "</" + inputElmtName + ">");

            out.write("    <" + outputsElmtName + ">");

            if (stringCount == 0) {
                // output nothing
            } else if (stringCount == -1) {
                // means that the composedFstPtr has loops, 
                //   denotes an infinite language
                out.write("      <infinite/>");
            } else {
                // native function listAllStrings will find all 
                //      strings in the Fst
                // and make callbacks to xmlOutputLister, 
                //      which knows how to output them as XML elements
                lib.ListAllStrings(compFst, 1, xmlOutputLister);

            out.write("    </" + outputsElmtName + ">");

            out.write("  </" + tokenElmtName + ">");

        out.write("</" + rootElmtName + ">");
    } catch (Exception e) {
        System.out.println("Exception found while testing input from file.");
    return data;

From source file:StreamFlusher.java

public Object visit(ASTtestTokensXMLFile_statement node, Object data) {
    // Total: 11 regexp arguments, syntactically constrained
    // // w w w.j  ava  2 s  .  c  om
    // 0.  the Fst to test

    node.jjtGetChild(0).jjtAccept(this, data);
    Fst testFst = (Fst) (stack.pop());

    // 1.  path of the input file

    node.jjtGetChild(1).jjtAccept(this, data);
    Fst tempFst = (Fst) (stack.pop());

    String inputFilePath = lib.GetSingleString(tempFst,
            "Second arg to testTokensXMLFile must denote a language of exactly one string.");

    if (inputFilePath.length() == 0) {
        throw new KleeneArgException(
                "Second arg to testTokensXMLFile must denote exactly one non-empty string");

    // 2. argument supplying the name of the element holding
    //      the input strings, by default, "input", i.e.
    //      <input>...</input>
    // N.B. in testTokensTextFile, this argument specifies the
    // encoding of the input file, which is not needed for XML,
    // which either has an explicit "encoding" specification, or
    // is UTF-8 by default

    node.jjtGetChild(2).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String srcInputElmtName = lib.GetSingleString(tempFst,
            "Third arg to testTokensXMLFile must denote a language of exactly one string.");

    if (srcInputElmtName.length() == 0) {
        throw new KleeneArgException("Third arg to testTokensXMLFile must denote one non-empty string");

    // 3.  path of the output file

    node.jjtGetChild(3).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String outputFilePath = lib.GetSingleString(tempFst,
            "Fourth arg to testTokensXMLFile must denote a language of exactly one string.");

    if (outputFilePath.length() == 0) {
        throw new KleeneArgException("Fourth arg to testTokensXMLFile must denote one non-empty string");

    // 4.  encoding of the output file

    node.jjtGetChild(4).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String outputFileEncoding = lib.GetSingleString(tempFst,
            "Fifth arg to testTokensXMLFile must denote a language of exactly one string.");

    if (outputFileEncoding.length() == 0) {
        throw new KleeneArgException("Fifth arg to testTokensXMLFile must denote one non-empty string");

    //          And for the XML output

    // 5.  name of the root element

    node.jjtGetChild(5).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String rootElmtName = lib.GetSingleString(tempFst,
            "Sixth arg to testTokensXMLFile must denote a language of exactly one string.");

    if (rootElmtName.length() == 0) {
        throw new KleeneArgException("Sixth arg to testTokensXMLFile must denote one non-empty string");

    // 6.  name of the token element

    node.jjtGetChild(6).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String tokenElmtName = lib.GetSingleString(tempFst,
            "Seventh arg to testTokensXMLFile must denote a language of exactly one string.");

    if (tokenElmtName.length() == 0) {
        throw new KleeneArgException("Seventh arg to testTokensXMLFile must denote one non-empty string");

    // 7.  name of the input element

    node.jjtGetChild(7).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String inputElmtName = lib.GetSingleString(tempFst,
            "Eighth arg to testTokensXMLFile must denote a language of exactly one string.");

    if (inputElmtName.length() == 0) {
        throw new KleeneArgException("Eighth arg to testTokensXMLFile must denote one non-empty string");

    // 8.  name of the outputs element (N.B. plural)

    node.jjtGetChild(8).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String outputsElmtName = lib.GetSingleString(tempFst,
            "Ninth arg to testTokensXMLFile must denote a language of exactly one string.");

    if (outputsElmtName.length() == 0) {
        throw new KleeneArgException("Ninth arg to testTokensXMLFile must denote one non-empty string");

    // 9.  name of the output element  (N.B. singular)

    node.jjtGetChild(9).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String outputElmtName = lib.GetSingleString(tempFst,
            "Tenth arg to testTokensXMLFile must denote a language of exactly one string.");

    if (outputElmtName.length() == 0) {
        throw new KleeneArgException("Tenth arg to testTokensXMLFile must denote one non-empty string");

    // 10.  name of the weight attr in the output elmt

    node.jjtGetChild(10).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String weightAttrName = lib.GetSingleString(tempFst,
            "Eleventh arg to testTokensXMLFile must denote a language of exactly one string.");

    if (weightAttrName.length() == 0) {
        throw new KleeneArgException("Eleventh arg to testTokensXMLFile must denote one non-empty string");

    String fullpath = getFullpath(inputFilePath);

    TranslitTokenizerBuilder ttb = new TranslitTokenizerBuilder(symmap, testFst.getSigma(), lib);
    lib.Iterate4mcs(testFst, ttb, symmap.getStartPuaCpv());
    Transliterator trInput = ttb.getTranslitTokenizer(true); // true for input side

    try {
        // try to read/parse the XML input file

        Document doc = null;

        doc = parseXML(fullpath); // dom4j

        // Read all the <input></input> elements into a list
        // N.B. by default, the name of the element is "input",
        // but in general it is specified in arg srcInputElmtName
        List list = doc.selectNodes("//" + srcInputElmtName);

        // now try to open the output file 

        fullpath = getFullpath(outputFilePath);

        BufferedWriter out = null;
        if (outputFileEncoding.equals("default") || outputFileEncoding.equals("-")) {
            // get the current default encoding of the operating system
            outputFileEncoding = System.getProperty("file.encoding");
        out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fullpath), outputFileEncoding));

        out.write("<?xml version=\"1.0\" encoding=\"" + outputFileEncoding + "\"?>");
        out.write("<" + rootElmtName + ">");

        XMLOutputLister xmlOutputLister = new XMLOutputLister(symmap, out, outputElmtName, weightAttrName);

        // Loop through the <input></input> elements, extracting and
        //   running the text string from each one; write output to
        //   the output file

        String token;

        Fst modifiedTestFst;

        for (Iterator it = list.iterator(); it.hasNext();) {
            Element inputElmt = (Element) it.next();
            token = inputElmt.getText();

            String cpvstr = trInput.transliterate(token);
            // converts cpvstr to a sequence of code pt values, and
            // each one could fill one or two 16-bit code units;
            // this is where multichar symbols are reduced to their
            // code point values

            // get length in Unicode characters (not code units)
            int inputlen = cpvstr.codePointCount(0, cpvstr.length());
            // allocate an int array to hold those code-point values,
            //    one int per code point value
            int[] cpvArray = new int[inputlen];

            // UCharacterIterator knows how to iterate over a 
            //   String and
            // return the Unicode-Character code point values
            UCharacterIterator iter = UCharacterIterator.getInstance(cpvstr);

            // we need to build each input string into a one-path Fst

            // store the codepoints in the int array 
            //      (which will be passed to
            //    oneStringNativeFst(), a native method
            int codepoint;
            int index = 0;
            while ((codepoint = iter.nextCodePoint()) != UCharacterIterator.DONE) {
                // any multichar symbols will already be in the
                // symmap, or they wouldn't have been identified;
                // but BMP characters may not yet be in the symmap
                if (Character.charCount(codepoint) == 1) {
                    symmap.putsym(String.valueOf((char) codepoint));
                cpvArray[index++] = codepoint;

            // 0 arg for generation, apply the inputFst to the "input"
            // side of testFst
            Fst compFst = lib.ApplyToOneString(testFst, cpvArray, 0);

            // prepare to list the output strings (and their weights)
            long stringCount = lib.NumPaths(compFst);

            // XML output for this input token

            out.write("  <" + tokenElmtName + ">");

            // be careful to escape XML special chars in line; 
            // N.B. escapeXml also escapes non-ASCII Unicode letters
            //out.write("    <" + inputElmtName + ">" + 
            //          StringEscapeUtils.escapeXml(token) + 
            //          "</" + inputElmtName + ">") ;

            out.write("    <" + inputElmtName + ">" + EscapeXML.escapeXML(token) + "</" + inputElmtName + ">");

            out.write("    <" + outputsElmtName + ">");

            if (stringCount == 0) {
                // output nothing
            } else if (stringCount == -1) {
                // means that the compFstPtr has loops, 
                //      denotes an infinite language
                out.write("      <infinite/>");
            } else {
                // native function listAllStrings will find all 
                //      strings in the Fst
                // and make callbacks to xmlOutputLister, 
                //      which knows how to output
                // them as XML elements
                lib.ListAllStrings(compFst, 1, xmlOutputLister);

            out.write("    </" + outputsElmtName + ">");

            out.write("  </" + tokenElmtName + ">");

        out.write("</" + rootElmtName + ">");

    } catch (Exception e) {
        // KRB:  review this
        System.out.println("Exception found while testing input from file.");
    return data;