Java String split String with camel case

Introduction

Splits a String by Character type as returned by java.lang.Character.getType(char).

splitByCharacterTypeCamelCase(null)         = null
splitByCharacterTypeCamelCase("")           = []
splitByCharacterTypeCamelCase("ab de fg")   = ["ab", " ", "de", " ", "fg"]
splitByCharacterTypeCamelCase("ab   de fg") = ["ab", "   ", "de", " ", "fg"]
splitByCharacterTypeCamelCase("ab:cd:ef")   = ["ab", ":", "cd", ":", "ef"]
splitByCharacterTypeCamelCase("number5")    = ["number", "5"]
splitByCharacterTypeCamelCase("fooBar")     = ["foo", "Bar"]
splitByCharacterTypeCamelCase("foo200Bar")  = ["foo", "200", "Bar"]
splitByCharacterTypeCamelCase("ASFRules")   = ["ASF", "Rules"]
import java.util.ArrayList;
import java.util.List;

public class Main {
  public static void main(String[] argv) throws Exception {
    String str = "ThisIsATestHTMLJava";
    System.out.println(java.util.Arrays.toString(splitByCharacterTypeCamelCase(str)));
  }//  w  ww. ja v  a2s  . c om
  public static String[] splitByCharacterTypeCamelCase(String str) {
    return splitByCharacterType(str, true);
  }
  public static String[] splitByCharacterType(String str) {
    return splitByCharacterType(str, false);
  }

  /**
   * <p>
   * Splits a String by Character type as returned by
   * <code>java.lang.Character.getType(char)</code>. Groups of contiguous
   * characters of the same type are returned as complete tokens, with the
   * following exception: if <code>camelCase</code> is <code>true</code>, the
   * character of type <code>Character.UPPERCASE_LETTER</code>, if any,
   * immediately preceding a token of type <code>Character.LOWERCASE_LETTER</code>
   * will belong to the following token rather than to the preceding, if any,
   * <code>Character.UPPERCASE_LETTER</code> token.
   * 
   * @param str
   *          the String to split, may be <code>null</code>
   * @param camelCase
   *          whether to use so-called "camel-case" for letter types
   * @return an array of parsed Strings, <code>null</code> if null String input
   * @since 2.4
   */
  private static String[] splitByCharacterType(String str, boolean camelCase) {
    if (str == null) {
      return null;
    }
    if (str.length() == 0) {
      return EMPTY_STRING_ARRAY;
    }
    char[] c = str.toCharArray();
    List list = new ArrayList();
    int tokenStart = 0;
    int currentType = Character.getType(c[tokenStart]);
    for (int pos = tokenStart + 1; pos < c.length; pos++) {
      int type = Character.getType(c[pos]);
      if (type == currentType) {
        continue;
      }
      if (camelCase && type == Character.LOWERCASE_LETTER && currentType == Character.UPPERCASE_LETTER) {
        int newTokenStart = pos - 1;
        if (newTokenStart != tokenStart) {
          list.add(new String(c, tokenStart, newTokenStart - tokenStart));
          tokenStart = newTokenStart;
        }
      } else {
        list.add(new String(c, tokenStart, pos - tokenStart));
        tokenStart = pos;
      }
      currentType = type;
    }
    list.add(new String(c, tokenStart, c.length - tokenStart));
    return (String[]) list.toArray(new String[list.size()]);
  }

  public static final String[] EMPTY_STRING_ARRAY = new String[0];
}



/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */



PreviousNext

Related