List of usage examples for edu.stanford.nlp.trees EnglishGrammaticalStructureFactory EnglishGrammaticalStructureFactory
public EnglishGrammaticalStructureFactory()
From source file:org.ets.research.nlp.stanford_thrift.general.CoreNLPThriftUtil.java
License:Open Source License
public static Annotation getAnnotationFromParseTrees(List<String> parseTrees) { List<CoreMap> sentences = new ArrayList<CoreMap>(); List<String> allTokens = new ArrayList<String>(); int tokenOffset = 0; for (String tree : parseTrees) { List<String> tokens = new ArrayList<String>(); String[] firstSplit = tree.split("\\) "); for (String f : firstSplit) { String[] secondSplit = f.split("\\("); String[] tagAndToken = secondSplit[secondSplit.length - 1].trim().replaceAll("\\)+$", "") .split(" "); tokens.add(tagAndToken[1]);/*from ww w. j a va2 s.co m*/ } allTokens.addAll(tokens); String[] tokensArr = new String[tokens.size()]; tokens.toArray(tokensArr); List<CoreLabel> sentenceTokens = Sentence.toCoreLabelList(tokensArr); String originalText = Sentence.listToString(tokens); CoreMap sentence = new Annotation(originalText); sentence.set(CharacterOffsetBeginAnnotation.class, 0); sentence.set(CharacterOffsetEndAnnotation.class, sentenceTokens.get(sentenceTokens.size() - 1).get(TextAnnotation.class).length()); sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens); sentence.set(CoreAnnotations.TokenBeginAnnotation.class, tokenOffset); tokenOffset += sentenceTokens.size(); sentence.set(CoreAnnotations.TokenEndAnnotation.class, tokenOffset); ParserAnnotatorUtils.fillInParseAnnotations(false, true, new EnglishGrammaticalStructureFactory(), sentence, Tree.valueOf(tree)); sentences.add(sentence); } Annotation allSentences = new Annotation(Sentence.listToString(allTokens)); allSentences.set(CoreAnnotations.SentencesAnnotation.class, adjustCharacterOffsets(sentences, true)); return allSentences; }