Example usage for java.text BreakIterator first

List of usage examples for java.text BreakIterator first

Introduction

In this page you can find the example usage for java.text BreakIterator first.

Prototype

public abstract int first();

Source Link

Document

Returns the first boundary.

Usage

From source file:Main.java

public TextBoundaryFrame() {
    getContentPane().add(new JScrollPane(outputText));

    Locale currentLocale = Locale.getDefault();
    BreakIterator currentBreakIterator = null;
    currentBreakIterator = BreakIterator.getCharacterInstance(currentLocale);

    String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?";
    currentBreakIterator.setText(text);//w  w w  . j a  va  2s.c o m
    outputText.setText("");

    int from = currentBreakIterator.first();
    int to;
    while ((to = currentBreakIterator.next()) != BreakIterator.DONE) {
        outputText.append(text.substring(from, to) + "|");
        from = to;
    }
    outputText.append(text.substring(from));
}

From source file:be.idamediafoundry.sofa.livecycle.dsc.util.AbstractQDoxComponentInfoExtractor.java

final protected String getFirstSentence(String text) {
    String result = text;//from   w w  w  .  j  a va  2  s. c o  m
    if (text != null) {
        BreakIterator iterator = BreakIterator.getSentenceInstance();
        iterator.setText(text);
        int start = iterator.first();
        int end = iterator.next();
        if (end != BreakIterator.DONE) {
            result = text.substring(start, end).trim();
        }
    }
    return result;
}

From source file:com.norconex.importer.handler.tagger.impl.TextStatisticsTagger.java

@Override
protected void tagTextDocument(String reference, Reader input, ImporterMetadata metadata, boolean parsed)
        throws ImporterHandlerException {
    long charCount = 0;
    long wordCharCount = 0;
    long wordCount = 0;
    long sentenceCount = 0;
    long sentenceCharCount = 0;
    long paragraphCount = 0;

    //TODO make this more efficient, by doing all this in one pass.
    LineIterator it = IOUtils.lineIterator(input);
    while (it.hasNext()) {
        String line = it.nextLine().trim();
        if (StringUtils.isBlank(line)) {
            continue;
        }//  w  ww.j a  va 2  s  .  c  om

        // Paragraph
        paragraphCount++;

        // Character
        charCount += line.length();

        // Word
        Matcher matcher = PATTERN_WORD.matcher(line);
        while (matcher.find()) {
            int wordLength = matcher.end() - matcher.start();
            wordCount++;
            wordCharCount += wordLength;
        }

        // Sentence
        BreakIterator boundary = BreakIterator.getSentenceInstance();
        boundary.setText(line);
        int start = boundary.first();
        for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
            sentenceCharCount += (end - start);
            sentenceCount++;
        }
    }

    String field = StringUtils.EMPTY;
    if (StringUtils.isNotBlank(fieldName)) {
        field = fieldName.trim() + ".";
    }

    //--- Add fields ---
    metadata.addLong("document.stat." + field + "characterCount", charCount);
    metadata.addLong("document.stat." + field + "wordCount", wordCount);
    metadata.addLong("document.stat." + field + "sentenceCount", sentenceCount);
    metadata.addLong("document.stat." + field + "paragraphCount", paragraphCount);
    metadata.addString("document.stat." + field + "averageWordCharacterCount",
            divide(wordCharCount, wordCount));
    metadata.addString("document.stat." + field + "averageSentenceCharacterCount",
            divide(sentenceCharCount, sentenceCount));
    metadata.addString("document.stat." + field + "averageSentenceWordCount", divide(wordCount, sentenceCount));
    metadata.addString("document.stat." + field + "averageParagraphCharacterCount",
            divide(charCount, paragraphCount));
    metadata.addString("document.stat." + field + "averageParagraphSentenceCount",
            divide(sentenceCount, paragraphCount));
    metadata.addString("document.stat." + field + "averageParagraphWordCount",
            divide(wordCount, paragraphCount));

}

From source file:com.redhat.rcm.version.Cli.java

private static void printTextLine(final String line, final String indent, final int max, final PrintWriter pw) {
    final String fmt = "%s%-" + max + "s\n";

    final List<String> lines = new ArrayList<String>();

    final BreakIterator iter = BreakIterator.getLineInstance();
    iter.setText(line);//from www.j a va2 s . c  o  m

    int start = iter.first();
    int end = BreakIterator.DONE;
    final StringBuilder currentLine = new StringBuilder();
    String seg;
    while (start != BreakIterator.DONE && (end = iter.next()) != BreakIterator.DONE) {
        seg = line.substring(start, end);
        if (currentLine.length() + seg.length() > max) {
            lines.add(currentLine.toString());
            currentLine.setLength(0);
        }

        currentLine.append(seg);
        start = end;
    }

    if (currentLine.length() > 0) {
        lines.add(currentLine.toString());
    }

    for (final String ln : lines) {
        pw.printf(fmt, indent, ln);
    }
}

From source file:com.redhat.rcm.version.Cli.java

private static void printKVLine(final String key, final String value, final String fmt, final int valMax,
        final PrintWriter pw) {
    final List<String> lines = new ArrayList<String>();

    final BreakIterator iter = BreakIterator.getLineInstance();
    iter.setText(value);/* ww w.  ja v  a 2  s .  c o m*/

    int start = iter.first();
    int end = BreakIterator.DONE;
    final StringBuilder currentLine = new StringBuilder();
    String seg;
    while (start != BreakIterator.DONE && (end = iter.next()) != BreakIterator.DONE) {
        seg = value.substring(start, end);
        if (currentLine.length() + seg.length() > valMax) {
            lines.add(currentLine.toString());
            currentLine.setLength(0);
        }

        currentLine.append(seg);
        start = end;
    }

    if (currentLine.length() > 0) {
        lines.add(currentLine.toString());
    }

    pw.printf(fmt, key, lines.isEmpty() ? "" : lines.get(0));
    if (lines.size() > 1) {
        for (int i = 1; i < lines.size(); i++) {
            // blank string to serve for indentation in format with two fields.
            pw.printf(fmt, "", lines.get(i));
        }
    }
}

From source file:IteratorTest.java

protected void refreshDisplay() {
    int startIndex, nextIndex;
    Vector items = new Vector();
    String msgText = textArea.getText();
    Locale locale = (Locale) (localeButton.getSelectedItem());
    BreakIterator iterator = null;
    if (charButton.isSelected()) {
        iterator = BreakIterator.getCharacterInstance(locale);
    } else if (wordButton.isSelected()) {
        iterator = BreakIterator.getWordInstance(locale);
    } else if (lineButton.isSelected()) {
        iterator = BreakIterator.getLineInstance(locale);
    } else if (sentButton.isSelected()) {
        iterator = BreakIterator.getSentenceInstance(locale);
    }//  w ww .j  a  v a 2 s . c om
    iterator.setText(msgText);
    startIndex = iterator.first();
    nextIndex = iterator.next();

    while (nextIndex != BreakIterator.DONE) {
        items.addElement(msgText.substring(startIndex, nextIndex));
        startIndex = nextIndex;
        nextIndex = iterator.next();
    }
    itemList.setListData(items);
}

From source file:org.cloudgraph.examples.test.model.NLPWikiParseTest.java

private void parse(StringBuilder buf) throws IOException {
    BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US);

    String text = buf.toString();
    int counter = 0;
    iterator.setText(text);/*  ww  w .ja v  a 2s.c om*/

    int lastIndex = iterator.first();
    while (lastIndex != BreakIterator.DONE) {
        int firstIndex = lastIndex;
        lastIndex = iterator.next();

        if (lastIndex != BreakIterator.DONE) {
            String sentence = text.substring(firstIndex, lastIndex);
            long before = System.currentTimeMillis();
            //parse(sentence);
            long after = System.currentTimeMillis();
            log.info("time4: " + String.valueOf(after - before) + ": " + sentence);
            counter++;
        }
    }

}

From source file:org.cloudgraph.examples.test.model.StanfordCoreNLPTest.java

private void parse(StringBuilder buf) throws IOException {
    BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US);

    String text = buf.toString();
    int counter = 0;
    iterator.setText(text);//from ww w. j a  va  2s.c  o  m

    int lastIndex = iterator.first();
    while (lastIndex != BreakIterator.DONE) {
        int firstIndex = lastIndex;
        lastIndex = iterator.next();

        if (lastIndex != BreakIterator.DONE) {
            String sentence = text.substring(firstIndex, lastIndex);
            long before = System.currentTimeMillis();
            parse(sentence);
            long after = System.currentTimeMillis();
            log.info("time4: " + String.valueOf(after - before) + ": " + sentence);
            counter++;
        }
    }

}

From source file:org.lnicholls.galleon.util.Tools.java

public static String[] layout(int width, FontMetrics metrics, String text) {
    ArrayList lines = new ArrayList();

    if (text != null) {
        String line = "";
        BreakIterator boundary = BreakIterator.getWordInstance();
        boundary.setText(text);/*from   w ww . j  a  v a 2  s.  c  om*/
        int start = boundary.first();
        for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
            String word = text.substring(start, end);
            String trimmed = word.replaceAll(" ", "");
            int metricsWidth = (line + word).length() * 20;
            if (metrics != null)
                metricsWidth = metrics.stringWidth(line + word);

            if (trimmed.equals("\n") || trimmed.equals("\r") || trimmed.equals("\r\n")) {
                lines.add(line.trim());
                line = "";
            } else if (metricsWidth > width) {
                lines.add(line.trim());
                line = word;
            } else
                line = line + word;
        }
        if (line.trim().length() > 0)
            lines.add(line.trim());
    }

    return (String[]) lines.toArray(new String[0]);
}

From source file:pl.edu.icm.coansys.kwdextraction.RakeExtractor.java

/**
 * Finding words or word sequences separated by stopwords, punctuation marks
 * etc.// w  w  w. ja  v  a  2 s. c om
 */
private void extractKeywordCandidates() {

    Map<String, KeywordCandidate> candidatesMap = new HashMap<String, KeywordCandidate>();

    BreakIterator wordIterator = BreakIterator.getWordInstance();

    wordIterator.setText(content);
    int wordStart = wordIterator.first();

    int candidateStart = wordStart;
    String candidateStr = null;
    KeywordCandidate kwdCand = new KeywordCandidate();

    for (int wordEnd = wordIterator
            .next(); wordEnd != BreakIterator.DONE; wordStart = wordEnd, wordEnd = wordIterator.next()) {

        String word = content.substring(wordStart, wordEnd).trim().toLowerCase();
        String alpha = word.replaceAll(ILLEGAL_CHARS, "");

        if (!word.isEmpty()) {

            if (stopwords.get(lang).contains(word) || word.matches("\\W+") || isNum(word)
                    || !word.equals(alpha)) {
                candidateStr = content.substring(candidateStart, wordStart);
            } else {
                kwdCand.addWord(word);
                if (wordEnd == content.length()) {
                    candidateStr = content.substring(candidateStart, wordEnd);
                }
            }
            if (candidateStr != null) {
                candidateStr = candidateStr.trim().toLowerCase().replaceAll(ILLEGAL_CHARS, "")
                        .replaceAll("\\s+", " ");
                if (!candidateStr.isEmpty()) {
                    if (candidatesMap.containsKey(candidateStr)) {
                        candidatesMap.get(candidateStr).incCounter();
                    } else {
                        kwdCand.setKeyword(candidateStr);
                        candidatesMap.put(candidateStr, kwdCand);
                    }
                }
                candidateStr = null;
                candidateStart = wordEnd;
                kwdCand = new KeywordCandidate();
            }
        }
    }

    keywordCandidates = new ArrayList<KeywordCandidate>();
    for (Entry<String, KeywordCandidate> e : candidatesMap.entrySet()) {
        keywordCandidates.add(e.getValue());
    }
}