Android String Sub String Get unicodePreservingSubstring(String str, int begin, int end)

Here you can find the source of unicodePreservingSubstring(String str, int begin, int end)

Description

Returns a substring of str that respects Unicode character boundaries.

License

Apache License

Parameter

Parameter Description
str the original String
begin the beginning index, inclusive
end the ending index, exclusive

Exception

Parameter Description
IndexOutOfBoundsException if the begin is negative,or end is larger than the length of str, or begin is larger than end

Return

the specified substring, possibly adjusted in order to not split unicode surrogate pairs

Declaration

public static String unicodePreservingSubstring(String str, int begin,
        int end) 

Method Source Code

//package com.java2s;
/**/*from  w w  w  . j a  v  a2 s  .c o  m*/
 * Copyright (c) 2000, Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

public class Main {
    /**
     * Returns a substring of {@code str} that respects Unicode character
     * boundaries.
     *
     * <p>The string will never be split between a [high, low] surrogate pair,
     * as defined by {@link Character#isHighSurrogate} and
     * {@link Character#isLowSurrogate}.
     *
     * <p>If {@code begin} or {@code end} are the low surrogate of a unicode
     * character, it will be offset by -1.
     *
     * <p>This behavior guarantees that
     * {@code str.equals(StringUtil.unicodePreservingSubstring(str, 0, n) +
     *     StringUtil.unicodePreservingSubstring(str, n, str.length())) } is
     * true for all {@code n}.
     * </pre>
     *
     * <p>This means that unlike {@link String#substring(int, int)}, the length of
     * the returned substring may not necessarily be equivalent to
     * {@code end - begin}.
     *
     * @param str the original String
     * @param begin the beginning index, inclusive
     * @param end the ending index, exclusive
     * @return the specified substring, possibly adjusted in order to not
     *   split unicode surrogate pairs
     * @throws IndexOutOfBoundsException if the {@code begin} is negative,
     *   or {@code end} is larger than the length of {@code str}, or
     *   {@code begin} is larger than {@code end}
     */
    public static String unicodePreservingSubstring(String str, int begin,
            int end) {
        return str.substring(unicodePreservingIndex(str, begin),
                unicodePreservingIndex(str, end));
    }

    /**
     * Equivalent to:
     *
     * <pre>
     * {@link #unicodePreservingSubstring(String, int, int)}(
     *     str, begin, str.length())
     * </pre>
     */
    public static String unicodePreservingSubstring(String str, int begin) {
        return unicodePreservingSubstring(str, begin, str.length());
    }

    /**
     * Normalizes {@code index} such that it respects Unicode character
     * boundaries in {@code str}.
     *
     * <p>If {@code index} is the low surrogate of a unicode character,
     * the method returns {@code index - 1}. Otherwise, {@code index} is
     * returned.
     *
     * <p>In the case in which {@code index} falls in an invalid surrogate pair
     * (e.g. consecutive low surrogates, consecutive high surrogates), or if
     * if it is not a valid index into {@code str}, the original value of
     * {@code index} is returned.
     *
     * @param str the String
     * @param index the index to be normalized
     * @return a normalized index that does not split a Unicode character
     */
    public static int unicodePreservingIndex(String str, int index) {
        if (index > 0 && index < str.length()) {
            if (Character.isHighSurrogate(str.charAt(index - 1))
                    && Character.isLowSurrogate(str.charAt(index))) {
                return index - 1;
            }
        }
        return index;
    }
}

Related

  1. substringAfter(String str, String separator)
  2. substringAfterLast(String str, String separator)
  3. substringBefore(String str, String separator)
  4. substringBeforeLast(String str, String separator)
  5. unicodePreservingSubstring(String paramString, int paramInt1, int paramInt2)
  6. unicodePreservingSubstring(String str, int begin)
  7. substringAfter(String str, String separator)
  8. substringBetween(String str, String open, String close)
  9. subStringEndString(String sourceStr, String endString)