Example usage for edu.stanford.nlp.trees TreebankLanguagePack grammaticalStructureFactory

List of usage examples for edu.stanford.nlp.trees TreebankLanguagePack grammaticalStructureFactory

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees TreebankLanguagePack grammaticalStructureFactory.

Prototype

GrammaticalStructureFactory grammaticalStructureFactory(Predicate<String> puncFilter,
        HeadFinder typedDependencyHF);

Source Link

Document

Return a GrammaticalStructureFactory suitable for this language/treebank.

Usage

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordCoreferenceResolver.java

License:Open Source License

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    modelProvider.configure(aJCas.getCas());

    List<Tree> trees = new ArrayList<Tree>();
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    List<List<CoreLabel>> sentenceTokens = new ArrayList<List<CoreLabel>>();
    for (ROOT root : select(aJCas, ROOT.class)) {
        // Copy all relevant information from the tokens
        List<CoreLabel> tokens = new ArrayList<CoreLabel>();
        for (Token token : selectCovered(Token.class, root)) {
            tokens.add(tokenToWord(token));
        }//from w  w w. j a  v  a  2  s .  c o  m
        sentenceTokens.add(tokens);

        // SemanticHeadFinder (nonTerminalInfo) does not know about PRN0, so we have to replace
        // it with PRN to avoid NPEs.
        TreeFactory tFact = new LabeledScoredTreeFactory(CoreLabel.factory()) {
            @Override
            public Tree newTreeNode(String aParent, List<Tree> aChildren) {
                String parent = aParent;
                if ("PRN0".equals(parent)) {
                    parent = "PRN";
                }
                Tree node = super.newTreeNode(parent, aChildren);
                return node;
            }
        };

        // deep copy of the tree. These are modified inside coref!
        Tree treeCopy = TreeUtils.createStanfordTree(root, tFact).treeSkeletonCopy();
        treeCopy.indexSpans();
        trees.add(treeCopy);

        // Build the sentence
        CoreMap sentence = new CoreLabel();
        sentence.set(TreeAnnotation.class, treeCopy);
        sentence.set(TokensAnnotation.class, tokens);
        sentence.set(RootKey.class, root);
        sentences.add(sentence);

        // https://code.google.com/p/dkpro-core-asl/issues/detail?id=590
        // We currently do not copy over dependencies from the CAS. This is supposed to fill
        // in the dependencies so we do not get NPEs.
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(),
                tlp.typedDependencyHeadFinder());
        ParserAnnotatorUtils.fillInParseAnnotations(false, true, gsf, sentence, treeCopy,
                GrammaticalStructure.Extras.NONE);

        // https://code.google.com/p/dkpro-core-asl/issues/detail?id=582
        SemanticGraph deps = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
        for (IndexedWord vertex : deps.vertexSet()) {
            vertex.setWord(vertex.value());
        }

        // merge the new CoreLabels with the tree leaves
        MentionExtractor.mergeLabels(treeCopy, tokens);
        MentionExtractor.initializeUtterance(tokens);
    }

    Annotation document = new Annotation(aJCas.getDocumentText());
    document.set(SentencesAnnotation.class, sentences);

    Coreferencer coref = modelProvider.getResource();

    // extract all possible mentions
    // Reparsing only works when the full CoreNLP pipeline system is set up! Passing false here
    // disables reparsing.
    RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(false);
    List<List<Mention>> allUnprocessedMentions = finder.extractPredictedMentions(document, 0,
            coref.corefSystem.dictionaries());

    // add the relevant info to mentions and order them for coref
    Map<Integer, CorefChain> result;
    try {
        Document doc = coref.mentionExtractor.arrange(document, sentenceTokens, trees, allUnprocessedMentions);
        result = coref.corefSystem.coref(doc);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }

    for (CorefChain chain : result.values()) {
        CoreferenceLink last = null;
        for (CorefMention mention : chain.getMentionsInTextualOrder()) {
            CoreLabel beginLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class)
                    .get(mention.startIndex - 1);
            CoreLabel endLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class)
                    .get(mention.endIndex - 2);
            CoreferenceLink link = new CoreferenceLink(aJCas, beginLabel.get(TokenKey.class).getBegin(),
                    endLabel.get(TokenKey.class).getEnd());

            if (mention.mentionType != null) {
                link.setReferenceType(mention.mentionType.toString());
            }

            if (last == null) {
                // This is the first mention. Here we'll initialize the chain
                CoreferenceChain corefChain = new CoreferenceChain(aJCas);
                corefChain.setFirst(link);
                corefChain.addToIndexes();
            } else {
                // For the other mentions, we'll add them to the chain.
                last.setNext(link);
            }
            last = link;

            link.addToIndexes();
        }
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordDependencyConverter.java

License:Open Source License

protected void doCreateDependencyTags(JCas aJCas, TreebankLanguagePack aLP, Tree parseTree,
        List<Token> tokens) {
    GrammaticalStructure gs;/*from  w  w w . j  a  va 2  s  .c om*/
    try {
        gs = aLP.grammaticalStructureFactory(aLP.punctuationWordRejectFilter(), aLP.typedDependencyHeadFinder())
                .newGrammaticalStructure(parseTree);
    } catch (UnsupportedOperationException e) {
        // We already warned in the model provider if dependencies are not supported, so here
        // we just do nothing and skip the dependencies.
        return;
    }

    Collection<TypedDependency> dependencies = null;
    switch (mode) {
    case BASIC:
        dependencies = gs.typedDependencies(); // gs.typedDependencies(false);
        break;
    case NON_COLLAPSED:
        dependencies = gs.allTypedDependencies(); // gs.typedDependencies(true);
        break;
    case COLLAPSED_WITH_EXTRA:
        dependencies = gs.typedDependenciesCollapsed(true);
        break;
    case COLLAPSED:
        dependencies = gs.typedDependenciesCollapsed(false);
        break;
    case CC_PROPAGATED:
        dependencies = gs.typedDependenciesCCprocessed(true);
        break;
    case CC_PROPAGATED_NO_EXTRA:
        dependencies = gs.typedDependenciesCCprocessed(false);
        break;
    case TREE:
        dependencies = gs.typedDependenciesCollapsedTree();
        break;
    }

    for (TypedDependency currTypedDep : dependencies) {
        int govIndex = currTypedDep.gov().index();
        int depIndex = currTypedDep.dep().index();
        if (govIndex != 0) {
            // Stanford CoreNLP produces a dependency relation between a verb and ROOT-0 which
            // is not token at all!
            Token govToken = tokens.get(govIndex - 1);
            Token depToken = tokens.get(depIndex - 1);

            StanfordAnnotator.createDependencyAnnotation(aJCas, currTypedDep.reln(), govToken, depToken);
        }
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordParser.java

License:Open Source License

protected void doCreateDependencyTags(ParserGrammar aParser, StanfordAnnotator sfAnnotator, Tree parseTree,
        List<Token> tokens) {
    GrammaticalStructure gs;//from w  w w.  j a  va  2 s .  c o  m
    try {
        TreebankLanguagePack tlp = aParser.getTLPParams().treebankLanguagePack();
        gs = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(), tlp.typedDependencyHeadFinder())
                .newGrammaticalStructure(parseTree);
    } catch (UnsupportedOperationException e) {
        // We already warned in the model provider if dependencies are not supported, so here
        // we just do nothing and skip the dependencies.
        return;
    }

    Collection<TypedDependency> dependencies = null;
    switch (mode) {
    case BASIC:
        dependencies = gs.typedDependencies(); // gs.typedDependencies(false);
        break;
    case NON_COLLAPSED:
        dependencies = gs.allTypedDependencies(); // gs.typedDependencies(true);
        break;
    case COLLAPSED_WITH_EXTRA:
        dependencies = gs.typedDependenciesCollapsed(true);
        break;
    case COLLAPSED:
        dependencies = gs.typedDependenciesCollapsed(false);
        break;
    case CC_PROPAGATED:
        dependencies = gs.typedDependenciesCCprocessed(true);
        break;
    case CC_PROPAGATED_NO_EXTRA:
        dependencies = gs.typedDependenciesCCprocessed(false);
        break;
    case TREE:
        dependencies = gs.typedDependenciesCollapsedTree();
        break;
    }

    for (TypedDependency currTypedDep : dependencies) {
        int govIndex = currTypedDep.gov().index();
        int depIndex = currTypedDep.dep().index();
        if (govIndex != 0) {
            // Stanford CoreNLP produces a dependency relation between a verb and ROOT-0 which
            // is not token at all!
            Token govToken = tokens.get(govIndex - 1);
            Token depToken = tokens.get(depIndex - 1);

            sfAnnotator.createDependencyAnnotation(currTypedDep.reln(), govToken, depToken);
        }
    }
}