Compares two strings in Unicode code point order. - Java java.lang

Java examples for java.lang:String Unicode

Description

Compares two strings in Unicode code point order.

Demo Code

/*//from  www .j a  v  a2s .  c  o m
 Written in 2013 by Peter O.
 Any copyright is dedicated to the Public Domain.
 http://creativecommons.org/publicdomain/zero/1.0/
 If you like this, you should donate to Peter O.
 at: http://upokecenter.dreamhosters.com/articles/donate-now-2/
 */
//package com.java2s;

public class Main {
    /**
     * Compares two strings in Unicode code point order. Unpaired surrogates are
     * treated as individual code points.
     * @param strA The first string. Can be null.
     * @param strB The second string. Can be null.
     * @return A value indicating which string is " less" or " greater" . 0: Both
     * strings are equal or null. Less than 0: a is null and b isn't; or the
     * first code point that's different is less in A than in B; or b starts
     * with a and is longer than a. Greater than 0: b is null and a isn't;
     * or the first code point that's different is greater in A than in B;
     * or a starts with b and is longer than b.
     */
    public static int CodePointCompare(String strA, String strB) {
        if (strA == null) {
            return (strB == null) ? 0 : -1;
        }
        if (strB == null) {
            return 1;
        }
        int len = Math.min(strA.length(), strB.length());
        for (int i = 0; i < len; ++i) {
            int ca = strA.charAt(i);
            int cb = strB.charAt(i);
            if (ca == cb) {
                // normal code units and illegal surrogates
                // are treated as single code points
                if ((ca & 0xf800) != 0xd800) {
                    continue;
                }
                boolean incindex = false;
                if (i + 1 < strA.length() && strA.charAt(i + 1) >= 0xdc00
                        && strA.charAt(i + 1) <= 0xdfff) {
                    ca = 0x10000 + ((ca - 0xd800) << 10)
                            + (strA.charAt(i + 1) - 0xdc00);
                    incindex = true;
                }
                if (i + 1 < strB.length() && strB.charAt(i + 1) >= 0xdc00
                        && strB.charAt(i + 1) <= 0xdfff) {
                    cb = 0x10000 + ((cb - 0xd800) << 10)
                            + (strB.charAt(i + 1) - 0xdc00);
                    incindex = true;
                }
                if (ca != cb) {
                    return ca - cb;
                }
                if (incindex) {
                    ++i;
                }
            } else {
                if ((ca & 0xf800) != 0xd800 && (cb & 0xf800) != 0xd800) {
                    return ca - cb;
                }
                if ((ca & 0xfc00) == 0xd800 && i + 1 < strA.length()
                        && strA.charAt(i + 1) >= 0xdc00
                        && strA.charAt(i + 1) <= 0xdfff) {
                    ca = 0x10000 + ((ca - 0xd800) << 10)
                            + (strA.charAt(i + 1) - 0xdc00);
                }
                if ((cb & 0xfc00) == 0xd800 && i + 1 < strB.length()
                        && strB.charAt(i + 1) >= 0xdc00
                        && strB.charAt(i + 1) <= 0xdfff) {
                    cb = 0x10000 + ((cb - 0xd800) << 10)
                            + (strB.charAt(i + 1) - 0xdc00);
                }
                return ca - cb;
            }
        }
        return (strA.length() == strB.length()) ? 0
                : ((strA.length() < strB.length()) ? -1 : 1);
    }
}

Related Tutorials