List of usage examples for edu.stanford.nlp.ling CoreLabel setSentIndex
@Override public void setSentIndex(int sentIndex)
From source file:edu.cmu.ml.rtw.users.ssrivastava.RegexExtractor.java
public static CoreMap getStanfordSentence(DocumentNLP document, int sentIdx) { List<String> words = document.getSentenceTokenStrs(sentIdx); List<PoSTag> posTags = document.getSentencePoSTags(sentIdx); List<CoreLabel> tokenList = new ArrayList<CoreLabel>(); for (int i = 0; i < words.size(); i++) { /*Re-create Stanford tokens*/ CoreLabel token = new CoreLabel(); token.setWord(words.get(i));//from w w w . java2 s. co m token.setTag(posTags.get(i).toString()); token.setNER("O"); token.setDocID(document.getName()); token.setSentIndex(sentIdx); token.setBeginPosition(document.getToken(sentIdx, i).getCharSpanStart()); token.setEndPosition(document.getToken(sentIdx, i).getCharSpanEnd()); //System.out.println(token.word()+" "+token.beginPosition()+" "+token.endPosition()); tokenList.add(token); } //Add NER labels for sentence List<Pair<TokenSpan, String>> ners = document.getNer(sentIdx); for (Pair<TokenSpan, String> p : ners) { for (int k = p.getFirst().getStartTokenIndex(); k < p.getFirst().getEndTokenIndex(); k++) { tokenList.get(k).setNER(p.getSecond()); } } //Convert to Stanford Sentence CoreMap sentence = new ArrayCoreMap(); sentence.set(TokensAnnotation.class, tokenList); sentence.set(CharacterOffsetBeginAnnotation.class, tokenList.get(0).beginPosition()); sentence.set(CharacterOffsetEndAnnotation.class, tokenList.get(words.size() - 1).endPosition()); return sentence; }
From source file:edu.jhu.hlt.concrete.stanford.ConcreteToStanfordMapper.java
License:Open Source License
private static List<CoreLabel> tokenizationToCoreLabelList(final Tokenization tkz, int sentIdx, int offset) { List<CoreLabel> clList = new ArrayList<CoreLabel>(); TokenList tl = tkz.getTokenList();/* ww w . j a va2 s. c om*/ List<Token> tokList = tl.getTokenList(); for (Token tok : tokList) { final TextSpan ts = tok.getTextSpan(); final int idx = tok.getTokenIndex(); final int idxPlusOne = idx + 1; final int begin = ts.getStart() - offset; final int length = ts.getEnding() - ts.getStart(); CoreLabel cl = factory.makeToken(tok.getText(), begin, length); cl.setIndex(idxPlusOne); cl.setSentIndex(sentIdx); // cl.setOriginalText(tok.getText()); // cl.set(OriginalTextAnnotation.class, tok.getText()); clList.add(cl); } return clList; }