Example usage for edu.stanford.nlp.trees TreebankLanguagePack grammaticalStructureFactory

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees TreebankLanguagePack grammaticalStructureFactory.

Prototype

GrammaticalStructureFactory grammaticalStructureFactory(Predicate<String> puncFilter,
        HeadFinder typedDependencyHF);

Source Link

Document

Return a GrammaticalStructureFactory suitable for this language/treebank.

Usage

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordCoreferenceResolver.java

License:Open Source License

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    modelProvider.configure(aJCas.getCas());

    List<Tree> trees = new ArrayList<Tree>();
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    List<List<CoreLabel>> sentenceTokens = new ArrayList<List<CoreLabel>>();
    for (ROOT root : select(aJCas, ROOT.class)) {
        // Copy all relevant information from the tokens
        List<CoreLabel> tokens = new ArrayList<CoreLabel>();
        for (Token token : selectCovered(Token.class, root)) {
            tokens.add(tokenToWord(token));
        }//from w  w w. j a  v  a  2  s .  c o  m
        sentenceTokens.add(tokens);

        // SemanticHeadFinder (nonTerminalInfo) does not know about PRN0, so we have to replace
        // it with PRN to avoid NPEs.
        TreeFactory tFact = new LabeledScoredTreeFactory(CoreLabel.factory()) {
            @Override
            public Tree newTreeNode(String aParent, List<Tree> aChildren) {
                String parent = aParent;
                if ("PRN0".equals(parent)) {
                    parent = "PRN";
                }
                Tree node = super.newTreeNode(parent, aChildren);
                return node;
            }
        };

        // deep copy of the tree. These are modified inside coref!
        Tree treeCopy = TreeUtils.createStanfordTree(root, tFact).treeSkeletonCopy();
        treeCopy.indexSpans();
        trees.add(treeCopy);

        // Build the sentence
        CoreMap sentence = new CoreLabel();
        sentence.set(TreeAnnotation.class, treeCopy);
        sentence.set(TokensAnnotation.class, tokens);
        sentence.set(RootKey.class, root);
        sentences.add(sentence);

        // https://code.google.com/p/dkpro-core-asl/issues/detail?id=590
        // We currently do not copy over dependencies from the CAS. This is supposed to fill
        // in the dependencies so we do not get NPEs.
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(),
                tlp.typedDependencyHeadFinder());
        ParserAnnotatorUtils.fillInParseAnnotations(false, true, gsf, sentence, treeCopy,
                GrammaticalStructure.Extras.NONE);

        // https://code.google.com/p/dkpro-core-asl/issues/detail?id=582
        SemanticGraph deps = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
        for (IndexedWord vertex : deps.vertexSet()) {
            vertex.setWord(vertex.value());
        }

        // merge the new CoreLabels with the tree leaves
        MentionExtractor.mergeLabels(treeCopy, tokens);
        MentionExtractor.initializeUtterance(tokens);
    }

    Annotation document = new Annotation(aJCas.getDocumentText());
    document.set(SentencesAnnotation.class, sentences);

    Coreferencer coref = modelProvider.getResource();

    // extract all possible mentions
    // Reparsing only works when the full CoreNLP pipeline system is set up! Passing false here
    // disables reparsing.
    RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(false);
    List<List<Mention>> allUnprocessedMentions = finder.extractPredictedMentions(document, 0,
            coref.corefSystem.dictionaries());

    // add the relevant info to mentions and order them for coref
    Map<Integer, CorefChain> result;
    try {
        Document doc = coref.mentionExtractor.arrange(document, sentenceTokens, trees, allUnprocessedMentions);
        result = coref.corefSystem.coref(doc);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }

    for (CorefChain chain : result.values()) {
        CoreferenceLink last = null;
        for (CorefMention mention : chain.getMentionsInTextualOrder()) {
            CoreLabel beginLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class)
                    .get(mention.startIndex - 1);
            CoreLabel endLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class)
                    .get(mention.endIndex - 2);
            CoreferenceLink link = new CoreferenceLink(aJCas, beginLabel.get(TokenKey.class).getBegin(),
                    endLabel.get(TokenKey.class).getEnd());

            if (mention.mentionType != null) {
                link.setReferenceType(mention.mentionType.toString());
            }

            if (last == null) {
                // This is the first mention. Here we'll initialize the chain
                CoreferenceChain corefChain = new CoreferenceChain(aJCas);
                corefChain.setFirst(link);
                corefChain.addToIndexes();
            } else {
                // For the other mentions, we'll add them to the chain.
                last.setNext(link);
            }
            last = link;

            link.addToIndexes();
        }
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordDependencyConverter.java

License:Open Source License

protected void doCreateDependencyTags(JCas aJCas, TreebankLanguagePack aLP, Tree parseTree,
        List<Token> tokens) {
    GrammaticalStructure gs;/*from  w  w w . j  a  va 2  s  .c om*/
    try {
        gs = aLP.grammaticalStructureFactory(aLP.punctuationWordRejectFilter(), aLP.typedDependencyHeadFinder())
                .newGrammaticalStructure(parseTree);
    } catch (UnsupportedOperationException e) {
        // We already warned in the model provider if dependencies are not supported, so here
        // we just do nothing and skip the dependencies.
        return;
    }

    Collection<TypedDependency> dependencies = null;
    switch (mode) {
    case BASIC:
        dependencies = gs.typedDependencies(); // gs.typedDependencies(false);
        break;
    case NON_COLLAPSED:
        dependencies = gs.allTypedDependencies(); // gs.typedDependencies(true);
        break;
    case COLLAPSED_WITH_EXTRA:
        dependencies = gs.typedDependenciesCollapsed(true);
        break;
    case COLLAPSED:
        dependencies = gs.typedDependenciesCollapsed(false);
        break;
    case CC_PROPAGATED:
        dependencies = gs.typedDependenciesCCprocessed(true);
        break;
    case CC_PROPAGATED_NO_EXTRA:
        dependencies = gs.typedDependenciesCCprocessed(false);
        break;
    case TREE:
        dependencies = gs.typedDependenciesCollapsedTree();
        break;
    }

    for (TypedDependency currTypedDep : dependencies) {
        int govIndex = currTypedDep.gov().index();
        int depIndex = currTypedDep.dep().index();
        if (govIndex != 0) {
            // Stanford CoreNLP produces a dependency relation between a verb and ROOT-0 which
            // is not token at all!
            Token govToken = tokens.get(govIndex - 1);
            Token depToken = tokens.get(depIndex - 1);

            StanfordAnnotator.createDependencyAnnotation(aJCas, currTypedDep.reln(), govToken, depToken);
        }
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordParser.java

License:Open Source License

protected void doCreateDependencyTags(ParserGrammar aParser, StanfordAnnotator sfAnnotator, Tree parseTree,
        List<Token> tokens) {
    GrammaticalStructure gs;//from w  w w.  j a  va  2 s .  c o  m
    try {
        TreebankLanguagePack tlp = aParser.getTLPParams().treebankLanguagePack();
        gs = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(), tlp.typedDependencyHeadFinder())
                .newGrammaticalStructure(parseTree);
    } catch (UnsupportedOperationException e) {
        // We already warned in the model provider if dependencies are not supported, so here
        // we just do nothing and skip the dependencies.
        return;
    }

    Collection<TypedDependency> dependencies = null;
    switch (mode) {
    case BASIC:
        dependencies = gs.typedDependencies(); // gs.typedDependencies(false);
        break;
    case NON_COLLAPSED:
        dependencies = gs.allTypedDependencies(); // gs.typedDependencies(true);
        break;
    case COLLAPSED_WITH_EXTRA:
        dependencies = gs.typedDependenciesCollapsed(true);
        break;
    case COLLAPSED:
        dependencies = gs.typedDependenciesCollapsed(false);
        break;
    case CC_PROPAGATED:
        dependencies = gs.typedDependenciesCCprocessed(true);
        break;
    case CC_PROPAGATED_NO_EXTRA:
        dependencies = gs.typedDependenciesCCprocessed(false);
        break;
    case TREE:
        dependencies = gs.typedDependenciesCollapsedTree();
        break;
    }

    for (TypedDependency currTypedDep : dependencies) {
        int govIndex = currTypedDep.gov().index();
        int depIndex = currTypedDep.dep().index();
        if (govIndex != 0) {
            // Stanford CoreNLP produces a dependency relation between a verb and ROOT-0 which
            // is not token at all!
            Token govToken = tokens.get(govIndex - 1);
            Token depToken = tokens.get(depIndex - 1);

            sfAnnotator.createDependencyAnnotation(currTypedDep.reln(), govToken, depToken);
        }
    }
}