Java String Tokenize tokenize(String sentence)

Here you can find the source of tokenize(String sentence)

Description

Take a string and tokenize it using " " and "--" as delimiters into an Array of Strings.

License

Open Source License

Parameter

Parameter Description
sentence The string to parse.

Return

The string array

Declaration

public static String[] tokenize(String sentence) 

Method Source Code

//package com.java2s;
/**//from  w w w. j  a va2  s .  co m
 * Distribution License:
 * JSword is free software; you can redistribute it and/or modify it under
 * the terms of the GNU Lesser General Public License, version 2.1 as published by
 * the Free Software Foundation. This program is distributed in the hope
 * that it will be useful, but WITHOUT ANY WARRANTY; without even the
 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 *
 * The License is available on the internet at:
 *       http://www.gnu.org/copyleft/lgpl.html
 * or by writing to:
 *      Free Software Foundation, Inc.
 *      59 Temple Place - Suite 330
 *      Boston, MA 02111-1307, USA
 *
 * Copyright: 2005
 *     The copyright to this program is held by it's authors.
 *
 * ID: $Id$
 */

import java.util.ArrayList;
import java.util.List;

public class Main {
    /**
     * Take a string and tokenize it using " " and "--" as delimiters into an
     * Array of Strings. There is a question mark over what to do with initial
     * spaces. This algorithm discards them, I'm not sure if this is the right
     * thing to do.
     * 
     * @param sentence
     *            The string to parse.
     * @return The string array
     */
    public static String[] tokenize(String sentence) {
        List<String> tokens = new ArrayList<String>();

        int pos = 0;
        String temp;
        boolean alive = true;

        while (alive) {
            // Find the next space and double dash
            int nextSpace = sentence.indexOf(' ', pos);
            int nextDDash = sentence.indexOf("--", pos);

            // If there is a space just after the ddash then ignore the ddash
            if (nextSpace == nextDDash + 2) {
                nextDDash = -1;
            }

            // If there is a ddash just after the space then ignore the space
            if (nextDDash == nextSpace + 1) {
                nextSpace = -1;
            }

            // if there are no more tokens then just add in what we've got.
            if (nextSpace == -1 && nextDDash == -1) {
                temp = sentence.substring(pos);
                alive = false;
            } else if ((nextSpace != -1 && nextSpace < nextDDash) || (nextDDash == -1)) {
                // Space is next if it is not -1 and it is less than ddash
                // The next separator is a space
                temp = sentence.substring(pos, nextSpace) + ' ';
                pos = nextSpace + 1;
            } else {
                // The next separator is a ddash
                temp = sentence.substring(pos, nextDDash) + "--";
                pos = nextDDash + 2;
            }

            if (temp != null && !"".equals(temp.trim())) {
                tokens.add(temp);
            }
        }

        // Create a String[]
        String[] retcode = new String[tokens.size()];
        int i = 0;
        for (String token : tokens) {
            retcode[i++] = token;
        }

        return retcode;
    }
}

Related

  1. tokenize(String s, char separator, int maxTokens)
  2. tokenize(String s, char separator, int maxTokens)
  3. tokenize(String s, String delimiters)
  4. tokenize(String s1)
  5. Tokenize(String sent)
  6. tokenize(String source, char separator)
  7. tokenize(String source, char separator)
  8. tokenize(String str, char delim)
  9. tokenize(String str, String delims)