Example usage for edu.stanford.nlp.trees MemoryTreebank MemoryTreebank

List of usage examples for edu.stanford.nlp.trees MemoryTreebank MemoryTreebank

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees MemoryTreebank MemoryTreebank.

Prototype

public MemoryTreebank(int initialCapacity) 

Source Link

Document

Create a new Treebank.

Usage

From source file:opennlp.tools.parse_thicket.opinion_processor.DefaultSentimentProcessor.java

License:Apache License

/**
 * Reads an annotation from the given filename using the requested input.
 *//* w  w  w.j a  v  a 2s. c o  m*/
public static List<Annotation> getAnnotations(StanfordCoreNLP tokenizer, Input inputFormat, String filename,
        boolean filterUnknown) {
    switch (inputFormat) {
    case TEXT: {
        String text = IOUtils.slurpFileNoExceptions(filename);
        Annotation annotation = new Annotation(text);
        tokenizer.annotate(annotation);
        List<Annotation> annotations = Generics.newArrayList();
        for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
            Annotation nextAnnotation = new Annotation(sentence.get(CoreAnnotations.TextAnnotation.class));
            nextAnnotation.set(CoreAnnotations.SentencesAnnotation.class, Collections.singletonList(sentence));
            annotations.add(nextAnnotation);
        }
        return annotations;
    }
    case TREES: {
        List<Tree> trees;
        if (filterUnknown) {
            trees = SentimentUtils.readTreesWithGoldLabels(filename);
            trees = SentimentUtils.filterUnknownRoots(trees);
        } else {
            trees = Generics.newArrayList();
            MemoryTreebank treebank = new MemoryTreebank("utf-8");
            treebank.loadPath(filename, null);
            for (Tree tree : treebank) {
                trees.add(tree);
            }
        }

        List<Annotation> annotations = Generics.newArrayList();
        for (Tree tree : trees) {
            CoreMap sentence = new Annotation(listToString(tree.yield()));
            sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
            List<CoreMap> sentences = Collections.singletonList(sentence);
            Annotation annotation = new Annotation("");
            annotation.set(CoreAnnotations.SentencesAnnotation.class, sentences);
            annotations.add(annotation);
        }
        return annotations;
    }
    default:
        throw new IllegalArgumentException("Unknown format " + inputFormat);
    }
}