Example usage for edu.stanford.nlp.ling CoreLabel setSentIndex

List of usage examples for edu.stanford.nlp.ling CoreLabel setSentIndex

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling CoreLabel setSentIndex.

Prototype

@Override
public void setSentIndex(int sentIndex) 

Source Link

Usage

From source file:edu.cmu.ml.rtw.users.ssrivastava.RegexExtractor.java

public static CoreMap getStanfordSentence(DocumentNLP document, int sentIdx) {
    List<String> words = document.getSentenceTokenStrs(sentIdx);
    List<PoSTag> posTags = document.getSentencePoSTags(sentIdx);

    List<CoreLabel> tokenList = new ArrayList<CoreLabel>();
    for (int i = 0; i < words.size(); i++) {
        /*Re-create Stanford tokens*/
        CoreLabel token = new CoreLabel();
        token.setWord(words.get(i));//from w  w w  .  java2 s. co m
        token.setTag(posTags.get(i).toString());
        token.setNER("O");
        token.setDocID(document.getName());
        token.setSentIndex(sentIdx);
        token.setBeginPosition(document.getToken(sentIdx, i).getCharSpanStart());
        token.setEndPosition(document.getToken(sentIdx, i).getCharSpanEnd());

        //System.out.println(token.word()+" "+token.beginPosition()+" "+token.endPosition());
        tokenList.add(token);
    }

    //Add NER labels for sentence
    List<Pair<TokenSpan, String>> ners = document.getNer(sentIdx);
    for (Pair<TokenSpan, String> p : ners) {
        for (int k = p.getFirst().getStartTokenIndex(); k < p.getFirst().getEndTokenIndex(); k++) {
            tokenList.get(k).setNER(p.getSecond());
        }
    }

    //Convert to Stanford Sentence
    CoreMap sentence = new ArrayCoreMap();
    sentence.set(TokensAnnotation.class, tokenList);
    sentence.set(CharacterOffsetBeginAnnotation.class, tokenList.get(0).beginPosition());
    sentence.set(CharacterOffsetEndAnnotation.class, tokenList.get(words.size() - 1).endPosition());
    return sentence;
}

From source file:edu.jhu.hlt.concrete.stanford.ConcreteToStanfordMapper.java

License:Open Source License

private static List<CoreLabel> tokenizationToCoreLabelList(final Tokenization tkz, int sentIdx, int offset) {
    List<CoreLabel> clList = new ArrayList<CoreLabel>();

    TokenList tl = tkz.getTokenList();/*  ww  w  .  j a  va2 s. c  om*/
    List<Token> tokList = tl.getTokenList();
    for (Token tok : tokList) {
        final TextSpan ts = tok.getTextSpan();
        final int idx = tok.getTokenIndex();
        final int idxPlusOne = idx + 1;

        final int begin = ts.getStart() - offset;
        final int length = ts.getEnding() - ts.getStart();
        CoreLabel cl = factory.makeToken(tok.getText(), begin, length);
        cl.setIndex(idxPlusOne);
        cl.setSentIndex(sentIdx);
        // cl.setOriginalText(tok.getText());
        // cl.set(OriginalTextAnnotation.class, tok.getText());
        clList.add(cl);
    }

    return clList;
}