Java String Split by Line split(String aLine, char delimiter, char beginQuoteChar, char endQuoteChar, boolean retainQuotes, boolean trimTokens)

Here you can find the source of split(String aLine, char delimiter, char beginQuoteChar, char endQuoteChar, boolean retainQuotes, boolean trimTokens)

Description

Splits the specified delimited String into tokens, supporting quoted tokens so that quoted strings themselves won't be tokenized.

License

Apache License

Parameter

Parameter Description
aLine the String to parse
delimiter the delimiter by which the <tt>line</tt> argument is to be split
beginQuoteChar the character signifying the start of quoted text (so the quoted text will not be split)
endQuoteChar the character signifying the end of quoted text
retainQuotes if the quotes themselves should be retained when constructing the corresponding token
trimTokens if leading and trailing whitespace should be trimmed from discovered tokens.

Return

the tokens discovered from parsing the given delimited line.

Declaration

public static String[] split(String aLine, char delimiter, char beginQuoteChar, char endQuoteChar,
        boolean retainQuotes, boolean trimTokens) 

Method Source Code

//package com.java2s;
/*//  w  w w. ja v  a 2 s .  co  m
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import java.util.ArrayList;

import java.util.List;

public class Main {
    /** Constant representing the empty string, equal to &quot;&quot; */
    public static final String EMPTY_STRING = "";
    /** Constant representing the default delimiter character (comma), equal to <code>','</code> */
    public static final char DEFAULT_DELIMITER_CHAR = ',';
    /** Constant representing the default quote character (double quote), equal to '&quot;'</code> */
    public static final char DEFAULT_QUOTE_CHAR = '"';

    public static String[] split(String line) {
        return split(line, DEFAULT_DELIMITER_CHAR);
    }

    public static String[] split(String line, char delimiter) {
        return split(line, delimiter, DEFAULT_QUOTE_CHAR);
    }

    public static String[] split(String line, char delimiter, char quoteChar) {
        return split(line, delimiter, quoteChar, quoteChar);
    }

    public static String[] split(String line, char delimiter, char beginQuoteChar, char endQuoteChar) {
        return split(line, delimiter, beginQuoteChar, endQuoteChar, false, true);
    }

    /**
     * Splits the specified delimited String into tokens, supporting quoted tokens so that quoted strings themselves
     * won't be tokenized.
     *
     * <p>This method's implementation is very loosely based (with significant modifications) on
     * <a href="http://blogs.bytecode.com.au/glen">Glen Smith</a>'s open-source
     * <a href="http://opencsv.svn.sourceforge.net/viewvc/opencsv/trunk/src/au/com/bytecode/opencsv/CSVReader.java?&view=markup">CSVReader.java</a>
     * file.
     *
     * <p>That file is Apache 2.0 licensed as well, making Glen's code a great starting point for us to modify to
     * our needs.
     *
     * @param aLine          the String to parse
     * @param delimiter      the delimiter by which the <tt>line</tt> argument is to be split
     * @param beginQuoteChar the character signifying the start of quoted text (so the quoted text will not be split)
     * @param endQuoteChar   the character signifying the end of quoted text
     * @param retainQuotes   if the quotes themselves should be retained when constructing the corresponding token
     * @param trimTokens     if leading and trailing whitespace should be trimmed from discovered tokens.
     * @return the tokens discovered from parsing the given delimited <tt>line</tt>.
     */
    public static String[] split(String aLine, char delimiter, char beginQuoteChar, char endQuoteChar,
            boolean retainQuotes, boolean trimTokens) {
        String line = clean(aLine);
        if (line == null) {
            return null;
        }

        List<String> tokens = new ArrayList<String>();
        StringBuilder sb = new StringBuilder();
        boolean inQuotes = false;

        for (int i = 0; i < line.length(); i++) {

            char c = line.charAt(i);
            if (c == beginQuoteChar) {
                // this gets complex... the quote may end a quoted block, or escape another quote.
                // do a 1-char lookahead:
                if (inQuotes // we are in quotes, therefore there can be escaped quotes in here.
                        && line.length() > (i + 1) // there is indeed another character to check.
                        && line.charAt(i + 1) == beginQuoteChar) { // ..and that char. is a quote also.
                    // we have two quote chars in a row == one quote char, so consume them both and
                    // put one on the token. we do *not* exit the quoted text.
                    sb.append(line.charAt(i + 1));
                    i++;
                } else {
                    inQuotes = !inQuotes;
                    if (retainQuotes) {
                        sb.append(c);
                    }
                }
            } else if (c == endQuoteChar) {
                inQuotes = !inQuotes;
                if (retainQuotes) {
                    sb.append(c);
                }
            } else if (c == delimiter && !inQuotes) {
                String s = sb.toString();
                if (trimTokens) {
                    s = s.trim();
                }
                tokens.add(s);
                sb = new StringBuilder(); // start work on next token
            } else {
                sb.append(c);
            }
        }
        String s = sb.toString();
        if (trimTokens) {
            s = s.trim();
        }
        tokens.add(s);
        return tokens.toArray(new String[tokens.size()]);
    }

    /**
     * Returns a 'cleaned' representation of the specified argument.  'Cleaned' is defined as the following:
     *
     * <ol>
     * <li>If the specified <code>String</code> is <code>null</code>, return <code>null</code></li>
     * <li>If not <code>null</code>, {@link String#trim() trim()} it.</li>
     * <li>If the trimmed string is equal to the empty String (i.e. &quot;&quot;), return <code>null</code></li>
     * <li>If the trimmed string is not the empty string, return the trimmed version</li>.
     * </ol>
     *
     * Therefore this method always ensures that any given string has trimmed text, and if it doesn't, <code>null</code>
     * is returned.
     *
     * @param in the input String to clean.
     * @return a populated-but-trimmed String or <code>null</code> otherwise
     */
    public static String clean(String in) {
        String out = in;

        if (in != null) {
            out = in.trim();
            if (out.equals(EMPTY_STRING)) {
                out = null;
            }
        }

        return out;
    }

    /**
     * Returns the specified array as a comma-delimited (',') string.
     * @param array the array whose contents will be converted to a string.
     * @return the array's contents as a comma-delimited (',') string.
     * @since 1.0
     */
    public static String toString(Object[] array) {
        return toDelimitedString(array, ",");
    }

    /**
     * Returns the array's contents as a string, with each element delimited by the specified
     * {@code delimiter} argument.  Useful for {@code toString()} implementations and log messages.
     *
     * @param array the array whose contents will be converted to a string
     * @param delimiter the delimiter to use between each element
     * @return a single string, delimited by the specified {@code delimiter}.
     * @since 1.0
     */
    public static String toDelimitedString(Object[] array, String delimiter) {
        if (array == null || array.length == 0) {
            return EMPTY_STRING;
        }
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < array.length; i++) {
            if (i > 0) {
                sb.append(delimiter);
            }
            sb.append(array[i]);
        }
        return sb.toString();
    }
}

Related

  1. fastSplit(final String line, final char c)
  2. split(String line)
  3. split(String line)
  4. split(String line)
  5. split(String line)