Example usage for edu.stanford.nlp.trees EnglishGrammaticalStructureFactory EnglishGrammaticalStructureFactory

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees EnglishGrammaticalStructureFactory EnglishGrammaticalStructureFactory.

Prototype

public EnglishGrammaticalStructureFactory()

Source Link

Usage

From source file:org.ets.research.nlp.stanford_thrift.general.CoreNLPThriftUtil.java

License:Open Source License

public static Annotation getAnnotationFromParseTrees(List<String> parseTrees) {
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    List<String> allTokens = new ArrayList<String>();
    int tokenOffset = 0;
    for (String tree : parseTrees) {
        List<String> tokens = new ArrayList<String>();
        String[] firstSplit = tree.split("\\) ");
        for (String f : firstSplit) {
            String[] secondSplit = f.split("\\(");
            String[] tagAndToken = secondSplit[secondSplit.length - 1].trim().replaceAll("\\)+$", "")
                    .split(" ");
            tokens.add(tagAndToken[1]);/*from ww w. j  a va2 s.co  m*/
        }
        allTokens.addAll(tokens);
        String[] tokensArr = new String[tokens.size()];
        tokens.toArray(tokensArr);
        List<CoreLabel> sentenceTokens = Sentence.toCoreLabelList(tokensArr);
        String originalText = Sentence.listToString(tokens);

        CoreMap sentence = new Annotation(originalText);
        sentence.set(CharacterOffsetBeginAnnotation.class, 0);
        sentence.set(CharacterOffsetEndAnnotation.class,
                sentenceTokens.get(sentenceTokens.size() - 1).get(TextAnnotation.class).length());
        sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
        sentence.set(CoreAnnotations.TokenBeginAnnotation.class, tokenOffset);
        tokenOffset += sentenceTokens.size();
        sentence.set(CoreAnnotations.TokenEndAnnotation.class, tokenOffset);
        ParserAnnotatorUtils.fillInParseAnnotations(false, true, new EnglishGrammaticalStructureFactory(),
                sentence, Tree.valueOf(tree));

        sentences.add(sentence);
    }

    Annotation allSentences = new Annotation(Sentence.listToString(allTokens));
    allSentences.set(CoreAnnotations.SentencesAnnotation.class, adjustCharacterOffsets(sentences, true));

    return allSentences;
}