List of usage examples for edu.stanford.nlp.ie.machinereading.common SimpleTokenize tokenize
public static ArrayList<String> tokenize(String line)
From source file:edu.washington.phrasal.feature.SentenceIdPhrasalVerbId.java
public SentenceIdPhrasalVerbId(Integer sentence_id, Integer phrasal_verb_id, FeatureGenerator fg) { this.sentence_id = sentence_id; this.sentence = fg.sentenceList.getSentence(this.sentence_id); this.phrasal_verb_id = phrasal_verb_id; this.fg = fg; this.sentenceTokens = SimpleTokenize.tokenize(sentence); this.phrasalVerbTokens = SimpleTokenize.tokenize(fg.getPhrasalVerbById(phrasal_verb_id)); sentencePOS = tagWordTokens(sentenceTokens); phraseVerbPOS = tagWordTokens(phrasalVerbTokens); /* start index of phrase in sentence, first occurance */ pvStartIndex = Collections.indexOfSubList(sentenceTokens, phrasalVerbTokens); /* end index of phrase in sentence */ pvEndIndex = pvStartIndex + phrasalVerbTokens.size(); /*Compute character based offsets*/ characterOffsets = new ArrayList<Integer>(); int offset = 0; for (String token : sentenceTokens) { characterOffsets.add(offset);//from www. j a v a2s.com offset = offset + token.length() + 1; } }