Behaves similiar to BreakIterator.getWordInstance() but handles line break delimeters as simple whitespaces. : BreakIterator « I18N « Java






Behaves similiar to BreakIterator.getWordInstance() but handles line break delimeters as simple whitespaces.

  
/**
 * 
 * JFreeReport : a free Java reporting library
 * 
 *
 * Project Info:  http://reporting.pentaho.org/
 *
 * (C) Copyright 2001-2007, by Object Refinery Ltd, Pentaho Corporation and Contributors.
 *
 * This library is free software; you can redistribute it and/or modify it under the terms
 * of the GNU Lesser General Public License as published by the Free Software Foundation;
 * either version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License along with this
 * library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
 * Boston, MA 02111-1307, USA.
 *
 * [Java is a trademark or registered trademark of Sun Microsystems, Inc.
 * in the United States and other countries.]
 *
 * ------------
 * WordBreakIterator.java
 * ------------
 * (C) Copyright 2001-2007, by Object Refinery Ltd, Pentaho Corporation and Contributors.
 */

/**
 * Behaves similiar to BreakIterator.getWordInstance() but handles line break
 * delimeters as simple whitespaces. <p/> This class is not synchronized.
 * 
 * @author Thomas Morgner.
 * 
 */
public class WordBreakIterator {
  /**
   * A useful constant.
   */
  public static final int DONE = -1;

  /**
   * The current position.
   */
  private int position;

  /**
   * Storage for characters.
   */
  private char[] text;

  /**
   * Creates a new iterator.
   * 
   * @param text
   *          the text to break.
   */
  public WordBreakIterator(final String text) {
    setText(text);
  }

  /**
   * Returns the next word boundary.
   * 
   * @return The index of the next word boundary.
   */
  public int next() {
    if (position == DONE) {
      return DONE;
    }
    if (text == null) {
      return DONE;
    }
    if (position == text.length) {
      return DONE;
    }

    // lastFound = position;

    if (Character.isWhitespace(text[position])) {
      // search the first non whitespace character ..., this is the beginning of
      // the word
      while ((position < text.length) && (Character.isWhitespace(text[position]))) {
        position++;
      }
      return position;
    } else {
      // now search the first whitespace character ..., this is the end of the
      // word
      while ((position < text.length) && (Character.isWhitespace(text[position]) == false)) {
        position++;
      }
      return position;
    }
  }

  /**
   * Same like next(), but returns the End-Of-Text as if there was a linebreak
   * added (Reader.readLine() compatible)
   * 
   * @return The next position.
   */
  public int nextWithEnd() {
    final int pos = position;
    if (pos == DONE || pos == text.length) {
      return DONE;
    }
    final int retval = next();
    if (retval == DONE) {
      return text.length;
    }
    return retval;
  }

  /**
   * Returns the position of the previous break.
   * 
   * @return The index.
   */
  public int previous() {
    // return lastFound;

    if (position == 0) {
      return 0;
    }
    if (text == null) {
      return DONE;
    }
    if (position == DONE) {
      position = text.length;
      return position;
    }
    // lastFound = position;

    if (Character.isWhitespace(text[position - 1])) {
      // search the first non whitespace character ..., this is the beginning of
      // the word
      while ((position > 0) && (Character.isWhitespace(text[position - 1]))) {
        position--;
      }
      return position;
    } else {
      // now search the first whitespace character ..., this is the end of the
      // word
      while ((position > 0) && (Character.isWhitespace(text[position - 1]) == false)) {
        position--;
      }
      return position;
    }

  }

  /**
   * Returns the text to be broken up.
   * 
   * @return the text.
   */
  public String getText() {
    return new String(text);
  }

  /**
   * Sets the text to be broken up.
   * 
   * @param text
   *          the text.
   */
  public void setText(final String text) {
    position = 0;
    // lastFound = 0;
    this.text = text.toCharArray();
  }

  /**
   * Returns the current parsing position of this iterator.
   * 
   * @return returns the current parsing position of this iterator.
   */
  public int getPosition() {
    return position;
  }

  /**
   * Defines the current parse position for the word break iterator. The
   * position must be positive and within the range of the current text.
   * 
   * @param position
   *          the position.
   */
  public void setPosition(final int position) {
    if (position < 0) {
      throw new IndexOutOfBoundsException("Position < 0");
    }
    if (position > text.length) {
      throw new IndexOutOfBoundsException("Position > text.length");
    }
    this.position = position;
  }
}

   
    
  








Related examples in the same category

1.BreakIterator for difference localesBreakIterator for difference locales
2.BreakIterator DemoBreakIterator Demo
3.Determining the Character Boundaries in a Unicode String
4.Determining the Word Boundaries in a Unicode String
5.Determining the Sentence Boundaries in a Unicode String
6.Determining Potential Line Breaks in a Unicode String
7.Searching, Sorting, and Text Boundary Detection: Detecting Text BoundariesSearching, Sorting, and Text Boundary Detection: Detecting Text Boundaries
8.Wrap multi-line strings (and get the individual lines)