Java String Tokenize tokenizeString(String inString, char delimiter, String enclosures)

Here you can find the source of tokenizeString(String inString, char delimiter, String enclosures)

Description

Returns an array of tokenized Strings based on the delimiter and enclosures passed in.

License

LGPL

Parameter

Parameter Description
inString a parameter
delimiter a parameter
enclosures a parameter

Declaration

public static String[] tokenizeString(String inString, char delimiter, String enclosures) 

Method Source Code

//package com.java2s;
/*******************************************************************************
 * CogTool Copyright Notice and Distribution Terms
 * CogTool 1.3, Copyright (c) 2005-2013 Carnegie Mellon University
 * This software is distributed under the terms of the FSF Lesser
 * Gnu Public License (see LGPL.txt). /*from w ww  .j ava2  s.  c om*/
 * 
 * CogTool is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or
 * (at your option) any later version.
 * 
 * CogTool is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License
 * along with CogTool; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 * 
 * CogTool makes use of several third-party components, with the 
 * following notices:
 * 
 * Eclipse SWT version 3.448
 * Eclipse GEF Draw2D version 3.2.1
 * 
 * Unless otherwise indicated, all Content made available by the Eclipse 
 * Foundation is provided to you under the terms and conditions of the Eclipse 
 * Public License Version 1.0 ("EPL"). A copy of the EPL is provided with this 
 * Content and is also available at http://www.eclipse.org/legal/epl-v10.html.
 * 
 * CLISP version 2.38
 * 
 * Copyright (c) Sam Steingold, Bruno Haible 2001-2006
 * This software is distributed under the terms of the FSF Gnu Public License.
 * See COPYRIGHT file in clisp installation folder for more information.
 * 
 * ACT-R 6.0
 * 
 * Copyright (c) 1998-2007 Dan Bothell, Mike Byrne, Christian Lebiere & 
 *                         John R Anderson. 
 * This software is distributed under the terms of the FSF Lesser
 * Gnu Public License (see LGPL.txt).
 * 
 * Apache Jakarta Commons-Lang 2.1
 * 
 * This product contains software developed by the Apache Software Foundation
 * (http://www.apache.org/)
 * 
 * jopt-simple version 1.0
 * 
 * Copyright (c) 2004-2013 Paul R. Holser, Jr.
 * 
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 * 
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 * 
 * Mozilla XULRunner 1.9.0.5
 * 
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/.
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
 * License for the specific language governing rights and limitations
 * under the License.
 * 
 * The J2SE(TM) Java Runtime Environment version 5.0
 * 
 * Copyright 2009 Sun Microsystems, Inc., 4150
 * Network Circle, Santa Clara, California 95054, U.S.A.  All
 * rights reserved. U.S.  
 * See the LICENSE file in the jre folder for more information.
 ******************************************************************************/

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

public class Main {
    /**
     * Returns an array of tokenized Strings based on the delimiter and
     * enclosures passed in.
     *
     * Enclosures are passed in a string of character pairs that cause the
     * parser to ignore the delimiter if it occurs between them.  For example,
     * if the string "[]{}XX" is passed in, any delimiter occurring between
     * '[' and ']', '{' and '}', or 'X' and 'X' is ignored.
     *
     * @param inString
     * @param delimiter
     * @param enclosures
     * @return
     */
    public static String[] tokenizeString(String inString, char delimiter, String enclosures) {
        ArrayList<String> tokens = new ArrayList<String>();

        List<Character> openers = new ArrayList<Character>();
        Map<Character, Character> closures = new HashMap<Character, Character>();

        if (inString.length() < 1) {
            return new String[0];
        }

        if ((enclosures != null) && (enclosures.length() > 0)) {
            // Parse enclosure string;
            // should consist of pairs of characters that form enclosures
            for (int indx = 0; indx < enclosures.length(); indx += 2) {
                Character open;
                Character close;

                open = new Character(enclosures.charAt(indx));
                if (indx + 1 < enclosures.length()) {
                    close = new Character(enclosures.charAt(indx + 1));
                } else {
                    close = open;
                }

                openers.add(open);
                closures.put(open, close);
            }
        }

        Character pendingClosure = null;
        String charBucket = "";

        // if the string starts with the delimiter, add a "" to the list
        if (inString.charAt(0) == delimiter) {
            tokens.add("");
        }

        for (int indx = 0; indx < inString.length(); indx++) {
            String remainingString = inString.substring(indx);
            char firstChar = remainingString.charAt(0);

            // If we're waiting on a pending closure
            if (pendingClosure != null) {
                if (remainingString.startsWith(pendingClosure.toString())) {
                    charBucket += remainingString.substring(0, 1);
                    tokens.add(charBucket);
                    charBucket = "";
                    pendingClosure = null;
                } else {
                    charBucket += firstChar;
                }
            } else {
                // Check to see if the first character is in one of the openers
                Iterator<Character> openIter = openers.iterator();

                while (openIter.hasNext()) {
                    Character open = openIter.next();

                    if (firstChar == open.charValue()) {
                        pendingClosure = closures.get(open);
                        if (charBucket.length() > 0) {
                            tokens.add(charBucket);
                            charBucket = "";
                        }
                    }
                }

                // Next check to see if the first char is the delimiter
                if (firstChar == delimiter) {
                    if (charBucket.length() > 0) {
                        tokens.add(charBucket);
                        charBucket = "";
                    }
                } else {
                    // As long as the character is not a delimiter,
                    // add it to the bucket.
                    charBucket += firstChar;
                }
            }
        }

        // Convert list to array of strings
        String[] tokenArr = new String[tokens.size()];

        for (int indx = 0; indx < tokens.size(); indx++) {
            tokenArr[indx] = tokens.get(indx);
        }

        return tokenArr;
    }
}

Related

  1. tokenizePattern(String pattern)
  2. tokenizeQuotedStrings(final String aInput, final String aDelimiters)
  3. tokenizeQuotes(String f1)
  4. tokenizeStatement(String statement)
  5. tokenizeString(final String inputString, final String seperator)
  6. tokenizeStringArray(String[] array, String token)
  7. tokenizeStringWithQuotes(String line, String quoteStyle)
  8. tokenizeToStringArray(String str, String delimiters)
  9. tokenizeToStringArray(String str, String delimiters)