Example usage for java.text BreakIterator getWordInstance

List of usage examples for java.text BreakIterator getWordInstance

Introduction

In this page you can find the example usage for java.text BreakIterator getWordInstance.

Prototype

public static BreakIterator getWordInstance(Locale locale) 

Source Link

Document

Returns a new BreakIterator instance for word breaks for the given locale.

Usage

From source file:Main.java

public static void main(String[] argv) throws Exception {

    BreakIterator iterator = BreakIterator.getWordInstance(Locale.CANADA);
    iterator.setText("a sentence");
    for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
        System.out.println(index);
    }//  ww w .  jav a  2 s  . c o  m
}

From source file:Main.java

public static void main(String[] args) {
    String text = "this is a test(this is a test).";
    BreakIterator wordIterator = BreakIterator.getWordInstance(Locale.getDefault());
    extractWords(text, wordIterator);//w ww  .j a va  2  s .  com
}

From source file:HangulTextBoundaryDetection.java

public static void main(String s[]) {
    String hangul = "\u1112\u1161\u11ab\u1100\u1173\u11af";
    BreakIterator ci = BreakIterator.getCharacterInstance(Locale.KOREAN);
    BreakIterator wi = BreakIterator.getWordInstance(Locale.KOREAN);

    System.out.print("Character Boundaries: ");
    printBoundaries(hangul, ci);//from   w  w w .jav a 2s. c om
    System.out.print("\nWord Boundaries:");
    printBoundaries(hangul, wi);
}

From source file:Main.java

public TextBoundaryFrame() {
    getContentPane().add(new JScrollPane(outputText));

    Locale currentLocale = Locale.getDefault();
    BreakIterator currentBreakIterator = BreakIterator.getWordInstance(currentLocale);

    String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?";
    currentBreakIterator.setText(text);//from  ww w .  j a v a  2  s  .  c o  m
    outputText.setText("");

    int from = currentBreakIterator.first();
    int to;
    while ((to = currentBreakIterator.next()) != BreakIterator.DONE) {
        outputText.append(text.substring(from, to) + "|");
        from = to;
    }
    outputText.append(text.substring(from));
}

From source file:pt.ua.ri.tokenizer.WordTokenizer.java

private WordTokenizer(String locale) {
    checkNotNull(locale);/*from  w  w  w  .j  a va  2 s .  c om*/
    bi = BreakIterator.getWordInstance(Locale.forLanguageTag(locale));
    tokens = new PatriciaTrie<>();
    bf = new StringBuilder();
}

From source file:nl.gridline.free.taalserver.TokenizeMap.java

@Override
protected void setup(Context context) throws IOException, InterruptedException {

    String loc = context.getConfiguration().get(TaskConfig.TEXT_LANGUAGE);
    if (loc == null) {
        splitter = BreakIterator.getWordInstance(new Locale("nl_NL"));
    } else {//from  ww  w  .ja  v a  2  s . co  m
        splitter = BreakIterator.getWordInstance(new Locale(loc));
    }

    String useDescStr = context.getConfiguration().get(TaskConfig.TEXT_ANALYSIS_USE_DESCRIPTION);
    if (useDescStr != null && !useDescStr.isEmpty()) {
        useDescription = Boolean.parseBoolean(useDescStr);
    } else {
        LOG.warn("parameter missing <" + TaskConfig.TEXT_ANALYSIS_USE_DESCRIPTION + "> defaulting to: true");
        useDescription = true;
    }

    String useTitleStr = context.getConfiguration().get(TaskConfig.TEXT_ANALYSIS_USE_TITLE);
    if (useTitleStr != null && !useTitleStr.isEmpty()) {
        useTitle = Boolean.parseBoolean(useTitleStr);

    } else {
        LOG.warn("parameter missing <" + TaskConfig.TEXT_ANALYSIS_USE_TITLE + "> defaulting to: true");
        useTitle = true;
    }

    String useKeyWordsStr = context.getConfiguration().get(TaskConfig.TEXT_ANALYSIS_USE_KEYWORDS);
    if (useKeyWordsStr != null && !useKeyWordsStr.isEmpty()) {
        useKeywords = Boolean.parseBoolean(useKeyWordsStr);
    }

}

From source file:IteratorTest.java

protected void refreshDisplay() {
    int startIndex, nextIndex;
    Vector items = new Vector();
    String msgText = textArea.getText();
    Locale locale = (Locale) (localeButton.getSelectedItem());
    BreakIterator iterator = null;
    if (charButton.isSelected()) {
        iterator = BreakIterator.getCharacterInstance(locale);
    } else if (wordButton.isSelected()) {
        iterator = BreakIterator.getWordInstance(locale);
    } else if (lineButton.isSelected()) {
        iterator = BreakIterator.getLineInstance(locale);
    } else if (sentButton.isSelected()) {
        iterator = BreakIterator.getSentenceInstance(locale);
    }//from   ww  w  .  j av a 2 s . co m
    iterator.setText(msgText);
    startIndex = iterator.first();
    nextIndex = iterator.next();

    while (nextIndex != BreakIterator.DONE) {
        items.addElement(msgText.substring(startIndex, nextIndex));
        startIndex = nextIndex;
        nextIndex = iterator.next();
    }
    itemList.setListData(items);
}

From source file:forge.view.arcane.util.OutlinedLabel.java

/** {@inheritDoc} */
@Override//from   w ww .  java2s .c o  m
public final void paint(final Graphics g) {
    if (getText().length() == 0) {
        return;
    }

    Dimension size = getSize();
    //
    //        if( size.width < 50 ) {
    //            g.setColor(Color.cyan);
    //            g.drawRect(0, 0, size.width-1, size.height-1);
    //        }

    Graphics2D g2d = (Graphics2D) g;

    g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
    g2d.setRenderingHint(RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_ON);

    int textX = outlineSize, textY = 0;
    int wrapWidth = Math.max(0, wrap ? size.width - outlineSize * 2 : Integer.MAX_VALUE);

    final String text = getText();
    AttributedString attributedString = new AttributedString(text);
    if (!StringUtils.isEmpty(text)) {
        attributedString.addAttribute(TextAttribute.FONT, getFont());
    }
    AttributedCharacterIterator charIterator = attributedString.getIterator();
    FontRenderContext fontContext = g2d.getFontRenderContext();

    LineBreakMeasurer measurer = new LineBreakMeasurer(charIterator,
            BreakIterator.getWordInstance(Locale.ENGLISH), fontContext);
    int lineCount = 0;
    while (measurer.getPosition() < charIterator.getEndIndex()) {
        measurer.nextLayout(wrapWidth);
        lineCount++;
        if (lineCount > 2) {
            break;
        }
    }
    charIterator.first();
    // Use char wrap if word wrap would cause more than two lines of text.
    if (lineCount > 2) {
        measurer = new LineBreakMeasurer(charIterator, BreakIterator.getCharacterInstance(Locale.ENGLISH),
                fontContext);
    } else {
        measurer.setPosition(0);
    }
    while (measurer.getPosition() < charIterator.getEndIndex()) {
        TextLayout textLayout = measurer.nextLayout(wrapWidth);
        float ascent = textLayout.getAscent();
        textY += ascent; // Move down to baseline.

        g2d.setColor(outlineColor);
        g2d.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, 0.8f));

        textLayout.draw(g2d, textX + outlineSize, textY - outlineSize);
        textLayout.draw(g2d, textX + outlineSize, textY + outlineSize);
        textLayout.draw(g2d, textX - outlineSize, textY - outlineSize);
        textLayout.draw(g2d, textX - outlineSize, textY + outlineSize);

        g2d.setColor(getForeground());
        g2d.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, 1.0f));
        textLayout.draw(g2d, textX, textY);

        // Move down to top of next line.
        textY += textLayout.getDescent() + textLayout.getLeading();
    }
}

From source file:de.tudarmstadt.lt.lm.service.BreakIteratorStringProvider.java

@Override
public List<String> tokenizeSentence_intern(String sentence, String language_code) {
    ArrayList<String> tokens = new ArrayList<String>();
    BreakIterator token_bounds = BreakIterator.getWordInstance(LocaleUtils.toLocale(language_code));
    token_bounds.setText(sentence.trim());
    int begin_t = token_bounds.first();
    for (int end_t = token_bounds.next(); end_t != BreakIterator.DONE; begin_t = end_t, end_t = token_bounds
            .next()) {/*from w  w w . j  ava2s  . c  o  m*/
        String token = de.tudarmstadt.lt.utilities.StringUtils
                .trim_and_replace_emptyspace(sentence.substring(begin_t, end_t), "_");
        if (!token.isEmpty()) { // add token iff token is not empty
            tokens.add(token);
        }
    }
    return tokens;
}