Example usage for org.apache.commons.lang.text StrTokenizer getTSVInstance

List of usage examples for org.apache.commons.lang.text StrTokenizer getTSVInstance

Introduction

In this page you can find the example usage for org.apache.commons.lang.text StrTokenizer getTSVInstance.

Prototype

public static StrTokenizer getTSVInstance() 

Source Link

Document

Gets a new tokenizer instance which parses Tab Separated Value strings.

Usage

From source file:it.drwolf.ridire.session.async.Mapper.java

@SuppressWarnings("unchecked")
public static Integer countWordsFromPoSTagResource(String posTagResourceFileName) throws IOException {
    List<String> lines = FileUtils.readLines(new File(posTagResourceFileName));
    Integer count = 0;//from   w ww  .ja  v a 2  s  .co  m
    StrTokenizer tokenizer = StrTokenizer.getTSVInstance();
    for (String l : lines) {
        tokenizer.reset(l);
        String[] tokens = tokenizer.getTokenArray();
        if (tokens.length == 3) {
            if (Mapper.isValidPos(tokens[1].trim())) {
                ++count;
            }
        }
    }
    return count;
}

From source file:it.drwolf.ridire.session.async.WordCounter.java

public Integer countWordsFromPoSTagResource(File posTagResourceFile) throws IOException {
    List<String> lines = FileUtils.readLines(posTagResourceFile);
    Integer count = 0;//  ww w .  jav  a2s  .  c om
    StrTokenizer tokenizer = StrTokenizer.getTSVInstance();
    for (String l : lines) {
        tokenizer.reset(l);
        String[] tokens = tokenizer.getTokenArray();
        if (tokens.length == 3) {
            if (this.isValidPos(tokens[1].trim())) {
                ++count;
            }
        }
    }
    return count;
}

From source file:it.drwolf.ridire.session.JobManager.java

public void retrievePoSText(CrawledResource cr) {
    File resourceDir = new File(
            FilenameUtils.getFullPath(cr.getArcFile().replaceAll("__\\d+", "")) + JobManager.RESOURCESDIR);
    File posTextFile = new File(resourceDir, cr.getDigest() + ".txt.pos");
    List<PoSLine> posLines = new ArrayList<PoSLine>();
    try {//  w w  w  . j  a v  a  2 s. c  o m
        List<String> lines = FileUtils.readLines(posTextFile);
        StrTokenizer tokenizer = StrTokenizer.getTSVInstance();
        for (String l : lines) {
            tokenizer.reset(l);
            String[] tokens = tokenizer.getTokenArray();
            if (tokens.length == 3) {
                PoSLine poSLine = new PoSLine();
                poSLine.setForm(tokens[0].trim());
                poSLine.setPosTag(tokens[1].trim());
                poSLine.setLemma(tokens[2].trim());
                posLines.add(poSLine);
            }
        }

    } catch (IOException e) {

    }
    this.setPosText(posLines);
}