Example usage for edu.stanford.nlp.trees EnglishGrammaticalStructureFactory EnglishGrammaticalStructureFactory

List of usage examples for edu.stanford.nlp.trees EnglishGrammaticalStructureFactory EnglishGrammaticalStructureFactory

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees EnglishGrammaticalStructureFactory EnglishGrammaticalStructureFactory.

Prototype

public EnglishGrammaticalStructureFactory() 

Source Link

Usage

From source file:org.ets.research.nlp.stanford_thrift.general.CoreNLPThriftUtil.java

License:Open Source License

public static Annotation getAnnotationFromParseTrees(List<String> parseTrees) {
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    List<String> allTokens = new ArrayList<String>();
    int tokenOffset = 0;
    for (String tree : parseTrees) {
        List<String> tokens = new ArrayList<String>();
        String[] firstSplit = tree.split("\\) ");
        for (String f : firstSplit) {
            String[] secondSplit = f.split("\\(");
            String[] tagAndToken = secondSplit[secondSplit.length - 1].trim().replaceAll("\\)+$", "")
                    .split(" ");
            tokens.add(tagAndToken[1]);/*from ww w. j  a va2 s.co  m*/
        }
        allTokens.addAll(tokens);
        String[] tokensArr = new String[tokens.size()];
        tokens.toArray(tokensArr);
        List<CoreLabel> sentenceTokens = Sentence.toCoreLabelList(tokensArr);
        String originalText = Sentence.listToString(tokens);

        CoreMap sentence = new Annotation(originalText);
        sentence.set(CharacterOffsetBeginAnnotation.class, 0);
        sentence.set(CharacterOffsetEndAnnotation.class,
                sentenceTokens.get(sentenceTokens.size() - 1).get(TextAnnotation.class).length());
        sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
        sentence.set(CoreAnnotations.TokenBeginAnnotation.class, tokenOffset);
        tokenOffset += sentenceTokens.size();
        sentence.set(CoreAnnotations.TokenEndAnnotation.class, tokenOffset);
        ParserAnnotatorUtils.fillInParseAnnotations(false, true, new EnglishGrammaticalStructureFactory(),
                sentence, Tree.valueOf(tree));

        sentences.add(sentence);
    }

    Annotation allSentences = new Annotation(Sentence.listToString(allTokens));
    allSentences.set(CoreAnnotations.SentencesAnnotation.class, adjustCharacterOffsets(sentences, true));

    return allSentences;
}