Example usage for java.text BreakIterator setText

List of usage examples for java.text BreakIterator setText

Introduction

In this page you can find the example usage for java.text BreakIterator setText.

Prototype

public abstract void setText(CharacterIterator newText);

Source Link

Document

Set a new text for scanning.

Usage

From source file:Main.java

public static void main(String[] argv) throws Exception {

    BreakIterator iterator = BreakIterator.getWordInstance(Locale.CANADA);
    iterator.setText("a sentence");
    for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
        System.out.println(index);
    }/*from   www . jav  a  2s.  c  o m*/
}

From source file:Main.java

public static void main(String[] argv) throws Exception {

    BreakIterator iterator = BreakIterator.getLineInstance(Locale.CANADA);
    iterator.setText("line1\nline2");
    for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
        System.out.println(index);
    }/*from ww w . j  a  v a  2  s . c o  m*/
}

From source file:Main.java

public static void main(String[] argv) throws Exception {

    BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.CANADA);
    iterator.setText("this is a test.");
    for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
        System.out.println(index);
    }//w w  w  .j a v a2s  . c  o m
}

From source file:Main.java

public static void main(String[] argv) throws Exception {

    BreakIterator iterator = BreakIterator.getCharacterInstance(Locale.CANADA);
    iterator.setText("aString");
    for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
        System.out.println(index);
    }//from ww  w .  j ava  2 s.c o  m
}

From source file:HangulTextBoundaryDetection.java

static void printBoundaries(String source, BreakIterator bi) {
    bi.setText(source);
    int boundary = bi.first();

    while (boundary != BreakIterator.DONE) {
        System.out.print(boundary + " ");
        boundary = bi.next();/*from   w  w w  .  jav  a2 s .  c o m*/
    }
}

From source file:Main.java

static void extractWords(String target, BreakIterator wordIterator) {
    wordIterator.setText(target);
    int start = wordIterator.first();
    int end = wordIterator.next();

    while (end != BreakIterator.DONE) {
        String word = target.substring(start, end);
        if (Character.isLetterOrDigit(word.charAt(0))) {
            System.out.println(word);
        }/*from w  w w.j  ava 2  s .  c  o  m*/
        start = end;
        end = wordIterator.next();
    }
}

From source file:Main.java

public static String wordSpace(String source) {
    BreakIterator boundary = BreakIterator.getLineInstance(new Locale("th"));
    boundary.setText(source);
    int start = boundary.first();
    StringBuffer wordbuffer = new StringBuffer("");
    for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
        wordbuffer.append(source.substring(start, end) + "\u200b");
        //           wordbuffer.append(source.substring(start, end)+"\ufeff");
    }/*from  w ww .  j  a v  a 2s. co m*/
    return wordbuffer.toString();
}

From source file:org.yamj.common.tools.StringTools.java

/**
 * Check that the passed string is not longer than the required length and
 * trim it if necessary/*from w  w  w .ja va2  s  .  c o m*/
 *
 * @param sourceString The string to check
 * @param requiredLength The required length (Maximum)
 * @param trimToWord Trim the source string to the last space to avoid
 * partial words
 * @param endingSuffix The ending to append if the string is longer than the
 * required length
 * @return
 */
public static String trimToLength(String sourceString, int requiredLength, boolean trimToWord,
        String endingSuffix) {
    String changedString = sourceString.trim();

    if (StringUtils.isNotBlank(changedString)) {
        if (changedString.length() <= requiredLength) {
            // No need to do anything
            return changedString;
        } else if (trimToWord) {
            BreakIterator bi = BreakIterator.getWordInstance();
            bi.setText(changedString);
            int biLength = bi.preceding(requiredLength - endingSuffix.length());
            return changedString.substring(0, biLength).trim() + endingSuffix;
        } else {
            // We know that the source string is longer that the required length, so trim it to size
            return changedString.substring(0, requiredLength - endingSuffix.length()).trim() + endingSuffix;
        }
    }

    return changedString;
}

From source file:com.cotrino.knowledgemap.db.Question.java

/**
 * http://stackoverflow.com/questions/2103598/java-simple-sentence-parser
 * @param text/*w  w  w .  j  a va  2 s .  c  om*/
 * @param language
 * @param country
 * @return
 */
public static List<String> tokenize(String text, String language, String country) {
    List<String> sentences = new ArrayList<String>();
    Locale currentLocale = new Locale(language, country);
    BreakIterator sentenceIterator = BreakIterator.getSentenceInstance(currentLocale);
    sentenceIterator.setText(text);
    int boundary = sentenceIterator.first();
    int lastBoundary = 0;
    while (boundary != BreakIterator.DONE) {
        boundary = sentenceIterator.next();
        if (boundary != BreakIterator.DONE) {
            sentences.add(text.substring(lastBoundary, boundary));
        }
        lastBoundary = boundary;
    }
    return sentences;
}

From source file:StringUtils.java

/**
 * Reformats a string where lines that are longer than <tt>width</tt>
 * are split apart at the earliest wordbreak or at maxLength, whichever is
 * sooner. If the width specified is less than 5 or greater than the input
 * Strings length the string will be returned as is.
 * <p/>/*  w  w w  .jav  a2 s. co m*/
 * Please note that this method can be lossy - trailing spaces on wrapped
 * lines may be trimmed.
 *
 * @param input the String to reformat.
 * @param width the maximum length of any one line.
 * @return a new String with reformatted as needed.
 */
public static String wordWrap(String input, int width, Locale locale) {
    // protect ourselves
    if (input == null) {
        return "";
    } else if (width < 5) {
        return input;
    } else if (width >= input.length()) {
        return input;
    }

    StringBuilder buf = new StringBuilder(input);
    boolean endOfLine = false;
    int lineStart = 0;

    for (int i = 0; i < buf.length(); i++) {
        if (buf.charAt(i) == '\n') {
            lineStart = i + 1;
            endOfLine = true;
        }

        // handle splitting at width character
        if (i > lineStart + width - 1) {
            if (!endOfLine) {
                int limit = i - lineStart - 1;
                BreakIterator breaks = BreakIterator.getLineInstance(locale);
                breaks.setText(buf.substring(lineStart, i));
                int end = breaks.last();

                // if the last character in the search string isn't a space,
                // we can't split on it (looks bad). Search for a previous
                // break character
                if (end == limit + 1) {
                    if (!Character.isWhitespace(buf.charAt(lineStart + end))) {
                        end = breaks.preceding(end - 1);
                    }
                }

                // if the last character is a space, replace it with a \n
                if (end != BreakIterator.DONE && end == limit + 1) {
                    buf.replace(lineStart + end, lineStart + end + 1, "\n");
                    lineStart = lineStart + end;
                }
                // otherwise, just insert a \n
                else if (end != BreakIterator.DONE && end != 0) {
                    buf.insert(lineStart + end, '\n');
                    lineStart = lineStart + end + 1;
                } else {
                    buf.insert(i, '\n');
                    lineStart = i + 1;
                }
            } else {
                buf.insert(i, '\n');
                lineStart = i + 1;
                endOfLine = false;
            }
        }
    }

    return buf.toString();
}