Example usage for edu.stanford.nlp.process CoreLabelTokenFactory makeToken

List of usage examples for edu.stanford.nlp.process CoreLabelTokenFactory makeToken

Introduction

In this page you can find the example usage for edu.stanford.nlp.process CoreLabelTokenFactory makeToken.

Prototype

@Override
public CoreLabel makeToken(String tokenText, int begin, int length) 

Source Link

Document

Constructs a CoreLabel as a String with a corresponding BEGIN and END position.

Usage

From source file:edu.illinois.cs.cogcomp.pipeline.handlers.StanfordParseHandler.java

License:Open Source License

static List<CoreMap> buildStanfordSentences(TextAnnotation ta) {
    View tokens = ta.getView(ViewNames.TOKENS);
    View sentences = ta.getView(ViewNames.SENTENCE);
    String rawText = ta.getText();

    List<CoreMap> stanfordSentences = new LinkedList<>();
    List<CoreLabel> stanfordTokens = new LinkedList<>();
    int tokIndex = 0;
    int sentIndex = 0;
    Constituent currentSentence = sentences.getConstituents().get(0);
    String sentText = rawText.substring(currentSentence.getStartCharOffset(),
            currentSentence.getEndCharOffset());

    CoreLabelTokenFactory tf = new CoreLabelTokenFactory();

    for (Constituent tok : tokens.getConstituents()) {
        if (tok.getStartSpan() >= currentSentence.getEndSpan()) {
            CoreMap stanfordSentence = buildStanfordSentence(currentSentence, sentText, sentIndex++,
                    stanfordTokens);/*  ww  w .ja v a2  s  .c  o m*/
            stanfordSentences.add(stanfordSentence);
            stanfordTokens = new LinkedList<>();
            currentSentence = sentences.getConstituents().get(sentIndex);
            sentText = rawText.substring(currentSentence.getStartCharOffset(),
                    currentSentence.getEndCharOffset());
        }
        int tokStart = tok.getStartCharOffset();
        int tokLength = tok.getEndCharOffset() - tokStart;

        String form = rawText.substring(tokStart, tok.getEndCharOffset());

        CoreLabel stanfordTok = tf.makeToken(form, tokStart, tokLength);
        stanfordTok.setIndex(tokIndex++);
        stanfordTokens.add(stanfordTok);

    }
    // should be one last sentence
    CoreMap stanfordSentence = buildStanfordSentence(currentSentence, sentText, sentIndex, stanfordTokens);
    stanfordSentences.add(stanfordSentence);
    return stanfordSentences;
}