Java String Tokenize tokenize(String sentence)

Description

Take a string and tokenize it using " " and "--" as delimiters into an Array of Strings.

License

Open Source License

Parameter

Parameter	Description
sentence	The string to parse.

Return

The string array

Declaration

public static String[] tokenize(String sentence)

Method Source Code

//package com.java2s;
/**//from  w w w. j  a va2  s .  co m
 * Distribution License:
 * JSword is free software; you can redistribute it and/or modify it under
 * the terms of the GNU Lesser General Public License, version 2.1 as published by
 * the Free Software Foundation. This program is distributed in the hope
 * that it will be useful, but WITHOUT ANY WARRANTY; without even the
 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 *
 * The License is available on the internet at:
 *       http://www.gnu.org/copyleft/lgpl.html
 * or by writing to:
 *      Free Software Foundation, Inc.
 *      59 Temple Place - Suite 330
 *      Boston, MA 02111-1307, USA
 *
 * Copyright: 2005
 *     The copyright to this program is held by it's authors.
 *
 * ID: $Id$
 */

import java.util.ArrayList;
import java.util.List;

public class Main {
    /**
     * Take a string and tokenize it using " " and "--" as delimiters into an
     * Array of Strings. There is a question mark over what to do with initial
     * spaces. This algorithm discards them, I'm not sure if this is the right
     * thing to do.
     * 
     * @param sentence
     *            The string to parse.
     * @return The string array
     */
    public static String[] tokenize(String sentence) {
        List<String> tokens = new ArrayList<String>();

        int pos = 0;
        String temp;
        boolean alive = true;

        while (alive) {
            // Find the next space and double dash
            int nextSpace = sentence.indexOf(' ', pos);
            int nextDDash = sentence.indexOf("--", pos);

            // If there is a space just after the ddash then ignore the ddash
            if (nextSpace == nextDDash + 2) {
                nextDDash = -1;
            }

            // If there is a ddash just after the space then ignore the space
            if (nextDDash == nextSpace + 1) {
                nextSpace = -1;
            }

            // if there are no more tokens then just add in what we've got.
            if (nextSpace == -1 && nextDDash == -1) {
                temp = sentence.substring(pos);
                alive = false;
            } else if ((nextSpace != -1 && nextSpace < nextDDash) || (nextDDash == -1)) {
                // Space is next if it is not -1 and it is less than ddash
                // The next separator is a space
                temp = sentence.substring(pos, nextSpace) + ' ';
                pos = nextSpace + 1;
            } else {
                // The next separator is a ddash
                temp = sentence.substring(pos, nextDDash) + "--";
                pos = nextDDash + 2;
            }

            if (temp != null && !"".equals(temp.trim())) {
                tokens.add(temp);
            }
        }

        // Create a String[]
        String[] retcode = new String[tokens.size()];
        int i = 0;
        for (String token : tokens) {
            retcode[i++] = token;
        }

        return retcode;
    }
}

Java String Tokenize tokenize(String sentence)

Description

License

Parameter

Return

Declaration

Method Source Code

Related