List of usage examples for edu.stanford.nlp.process CoreLabelTokenFactory makeToken
@Override public CoreLabel makeToken(String tokenText, int begin, int length)
From source file:edu.illinois.cs.cogcomp.pipeline.handlers.StanfordParseHandler.java
License:Open Source License
static List<CoreMap> buildStanfordSentences(TextAnnotation ta) { View tokens = ta.getView(ViewNames.TOKENS); View sentences = ta.getView(ViewNames.SENTENCE); String rawText = ta.getText(); List<CoreMap> stanfordSentences = new LinkedList<>(); List<CoreLabel> stanfordTokens = new LinkedList<>(); int tokIndex = 0; int sentIndex = 0; Constituent currentSentence = sentences.getConstituents().get(0); String sentText = rawText.substring(currentSentence.getStartCharOffset(), currentSentence.getEndCharOffset()); CoreLabelTokenFactory tf = new CoreLabelTokenFactory(); for (Constituent tok : tokens.getConstituents()) { if (tok.getStartSpan() >= currentSentence.getEndSpan()) { CoreMap stanfordSentence = buildStanfordSentence(currentSentence, sentText, sentIndex++, stanfordTokens);/* ww w .ja v a2 s .c o m*/ stanfordSentences.add(stanfordSentence); stanfordTokens = new LinkedList<>(); currentSentence = sentences.getConstituents().get(sentIndex); sentText = rawText.substring(currentSentence.getStartCharOffset(), currentSentence.getEndCharOffset()); } int tokStart = tok.getStartCharOffset(); int tokLength = tok.getEndCharOffset() - tokStart; String form = rawText.substring(tokStart, tok.getEndCharOffset()); CoreLabel stanfordTok = tf.makeToken(form, tokStart, tokLength); stanfordTok.setIndex(tokIndex++); stanfordTokens.add(stanfordTok); } // should be one last sentence CoreMap stanfordSentence = buildStanfordSentence(currentSentence, sentText, sentIndex, stanfordTokens); stanfordSentences.add(stanfordSentence); return stanfordSentences; }