Example usage for edu.stanford.nlp.trees GrammaticalStructure typedDependenciesCollapsedTree

List of usage examples for edu.stanford.nlp.trees GrammaticalStructure typedDependenciesCollapsedTree

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees GrammaticalStructure typedDependenciesCollapsedTree.

Prototype

public Collection<TypedDependency> typedDependenciesCollapsedTree() 

Source Link

Document

Get the typed dependencies after mostly collapsing them, but keep a tree structure.

Usage

From source file:Dependency.java

public static void main(String[] args) {
    String modelPath = DependencyParser.DEFAULT_MODEL;
    String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
    Scanner sc = new Scanner(System.in);

    String text = "";
    text = sc.nextLine();/*from  w ww  . ja v  a2s .  co m*/
    // while(text!="exit"){

    MaxentTagger tagger = new MaxentTagger(taggerPath);
    DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);

    DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
    for (List<HasWord> sentence : tokenizer) {

        List<TaggedWord> tagged = tagger.tagSentence(sentence);
        Object[] x = tagged.toArray();
        GrammaticalStructure gs = parser.predict(tagged);
        //System.out.println();

        Collection<TypedDependency> s = gs.typedDependenciesCollapsedTree();
        Object[] z = s.toArray();

        System.out.println(tagged.toString());
        String token[] = new String[z.length];
        String pos[] = new String[z.length];
        int k = 0;
        for (Object i : x) {
            String str = i.toString();
            /*String temp0="(.*?)(?=\\/)";
            String temp1="\\/(.*)";
                    
            System.out.println(str);
            Pattern t0 = Pattern.compile("(.*?)(?=\\/)");
            Pattern t1 = Pattern.compile("\\/(.*)");
            Matcher m0 = t0.matcher(str);
            Matcher m1 = t1.matcher(str);*/
            int index = str.lastIndexOf('/');
            token[k] = str.substring(0, index);
            pos[k] = str.substring(index + 1);
            //System.out.println(pos[k]);
            k++;
        }
        String rels[] = new String[z.length];
        String word1[] = new String[z.length];
        String word2[] = new String[z.length];
        int j = 0;
        for (Object i : z) {
            System.out.println(i);
            String temp = i.toString();
            String pattern0 = "(.*)(?=\\()";
            String pattern1 = "(?<=\\()(.*?)(?=-)";
            String pattern2 = "(?<=, )(.*)(?=-)";
            Pattern r0 = Pattern.compile(pattern0);
            Pattern r1 = Pattern.compile(pattern1);
            Pattern r2 = Pattern.compile(pattern2);
            Matcher m0 = r0.matcher(temp);
            Matcher m1 = r1.matcher(temp);
            Matcher m2 = r2.matcher(temp);
            if (m0.find()) {
                rels[j] = m0.group(0);
                //System.out.println(rels[j]);
            }
            if (m1.find()) {
                word1[j] = m1.group(0);
            }
            if (m2.find()) {
                word2[j] = m2.group(0);
            }
            j++;
        }
        //System.out.println(s);
        //Rules for feature extraction.
        //rule1:::::::::::::::::
        //System.out.println("1");
        int[] q = toIntArray(grRecog(rels, "nsubj"));
        //System.out.println("2");
        if (q.length != 0) {
            //System.out.println("3");
            if (posrecog(token, pos, word2[q[0]]).equals("NN")) {
                //System.out.println("4");
                int[] w = toIntArray(grRecog(rels, "compound"));
                //System.out.println("5");
                if (w.length != 0) {
                    System.out.println("6");
                    System.out.println(word1[q[0]] + "," + word2[q[0]] + "," + word2[w[0]]);
                } else {
                    int conj_and_index = compgrRecog(rels, word1, word2, "conj:and", word2[q[0]]);
                    if (conj_and_index != -1) {
                        System.out.println(
                                word1[conj_and_index] + "," + word2[conj_and_index] + "," + word2[q[0]]);
                    } else
                        System.out.println(word1[q[0]] + "," + word2[q[0]]);
                }
            }
            //RULE 2:::::::::::::
            else if (posrecog(token, pos, word1[q[0]]).equals("JJ")) {
                //System.out.println("aaaaa_JJ");
                int a = compgrRecog(rels, word1, word2, "xcomp", word1[q[0]]);
                if (a != -1) {
                    int b = compgrRecog(rels, word1, word2, "dobj", word2[a]);
                    if (b != -1) {
                        int c = compgrRecog(rels, word1, word2, "compound", word2[b]);
                        if (c != -1) {
                            System.out.println(word1[q[0]] + "," + word1[c] + "," + word2[c]);
                        }
                    }
                }
                //RULE 3::::::::::
                else {
                    int b[] = toIntArray(grRecog(rels, "ccomp"));
                    if (b.length != 0) {
                        System.out.println(word1[q[1]] + "," + word2[q[1]] + "," + word1[b[0]]);
                    }

                }
            }
            //RULE 4::::::::::
            else if (posrecog(token, pos, word1[q[0]]).equals("VBZ")) {
                //System.out.println("aaaaa");
                int vbp_dobj_index = compgrRecog(rels, word1, word2, "dobj", word2[q[0]]);
                if (vbp_dobj_index != -1) {
                    System.out.println(word1[vbp_dobj_index] + "," + word2[vbp_dobj_index]);
                } else {
                    int vbp_xcomp_index = compgrRecog(rels, word1, word2, "xcomp", word1[q[0]]);
                    if (vbp_xcomp_index != -1) {

                        System.out.println(word1[vbp_xcomp_index] + "," + word2[vbp_xcomp_index]);
                    } else {
                        int vbp_acomp_index = compgrRecog(rels, word1, word2, "acomp", word1[q[0]]);
                        if (vbp_acomp_index != -1) {

                            System.out.println(
                                    word1[q[0]] + "," + word1[vbp_acomp_index] + "," + word2[vbp_acomp_index]);
                        } else
                            System.out.println(word1[q[0]]);

                    }

                }

            }
            int[] f = toIntArray(grRecog(rels, "amod"));
            if (f.length != 0) {
                for (int i : f) {
                    System.out.println(word1[i] + "," + word2[i]);
                }
                int cj[] = toIntArray(grRecog(rels, "conj:and"));
                if (cj.length != 0) {
                    for (int i : cj) {
                        System.out.println(word1[i] + "," + word2[i]);
                    }
                }
            }
            int[] neg = toIntArray(grRecog(rels, "neg"));
            if (neg.length != 0) {
                for (int i : neg) {
                    System.out.println(word1[i] + "," + word2[i]);
                }

            }

        } else {
            int[] f = toIntArray(grRecog(rels, "amod"));
            if (f.length != 0) {
                for (int i : f) {
                    System.out.print(word1[i] + "," + word2[i]);
                    String qwe = word1[i] + "," + word2[i];
                }
                int cj[] = toIntArray(grRecog(rels, "conj:and"));
                if (cj.length != 0) {
                    for (int i : cj) {
                        System.out.println(word2[i]);

                    }
                }
            }
            int[] neg = toIntArray(grRecog(rels, "neg"));
            if (neg.length != 0) {
                for (int i : neg) {
                    System.out.println(word1[i] + "," + word2[i]);
                }

            }

        }

        //RULE 2:::::::::::::

    }

    //  text=sc.nextLine();
    //}
}

From source file:Dependency2.java

public static void main(String[] args) {
    String modelPath = DependencyParser.DEFAULT_MODEL;
    String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
    Scanner sc = new Scanner(System.in);

    readCsv();// ww  w .  ja  v a2s. c om
    String text = "";
    text = sc.nextLine();
    if (multifeatures(text)) {
        System.out.println("Multiple features present");
        MaxentTagger tagger = new MaxentTagger(taggerPath);
        DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);

        DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
        for (List<HasWord> sentence : tokenizer) {
            List<TaggedWord> tagged = tagger.tagSentence(sentence);
            GrammaticalStructure gs = parser.predict(tagged);

            Collection<TypedDependency> s = gs.typedDependenciesCollapsedTree();
            Map<Character, Pair<Character, Character>> map = new HashMap<Character, Pair<Character, Character>>();
            Object[] z = s.toArray();
            String rels[] = new String[z.length];
            String word1[] = new String[z.length];
            String word2[] = new String[z.length];
            int j = 0;
            String f, f1, f2;
            for (Object i : z) {
                //System.out.println(i);
                String temp = i.toString();
                System.out.println(temp);
                String pattern0 = "(.*)(?=\\()";
                String pattern1 = "(?<=\\()(.*?)(?=-)";
                String pattern2 = "(?<=,)(.*)(?=-)";
                Pattern r0 = Pattern.compile(pattern0);
                Pattern r1 = Pattern.compile(pattern1);
                Pattern r2 = Pattern.compile(pattern2);
                Matcher m0 = r0.matcher(temp);
                Matcher m1 = r1.matcher(temp);
                Matcher m2 = r2.matcher(temp);
                if (m0.find())
                    rels[j] = m0.group(0);
                if (m1.find())
                    word1[j] = m1.group(0);
                if (m2.find())
                    word2[j] = m2.group(0);
                if (rels[j].equals("amod")) {
                    f1 = getFeature(word1[j]);
                    f2 = getFeature(word2[j]);
                    f = f1 != null ? (f1) : (f2 != null ? f2 : null);
                    if (f != null) {
                        System.out.println("Feature: " + f);

                    }

                }

                j++;
            }
            //System.out.println(Arrays.toString(rels));
        }
    } else {
        //sentence score is feature score
    }

}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordDependencyConverter.java

License:Open Source License

protected void doCreateDependencyTags(JCas aJCas, TreebankLanguagePack aLP, Tree parseTree,
        List<Token> tokens) {
    GrammaticalStructure gs;
    try {//  ww w . j a  v  a2s .  c o  m
        gs = aLP.grammaticalStructureFactory(aLP.punctuationWordRejectFilter(), aLP.typedDependencyHeadFinder())
                .newGrammaticalStructure(parseTree);
    } catch (UnsupportedOperationException e) {
        // We already warned in the model provider if dependencies are not supported, so here
        // we just do nothing and skip the dependencies.
        return;
    }

    Collection<TypedDependency> dependencies = null;
    switch (mode) {
    case BASIC:
        dependencies = gs.typedDependencies(); // gs.typedDependencies(false);
        break;
    case NON_COLLAPSED:
        dependencies = gs.allTypedDependencies(); // gs.typedDependencies(true);
        break;
    case COLLAPSED_WITH_EXTRA:
        dependencies = gs.typedDependenciesCollapsed(true);
        break;
    case COLLAPSED:
        dependencies = gs.typedDependenciesCollapsed(false);
        break;
    case CC_PROPAGATED:
        dependencies = gs.typedDependenciesCCprocessed(true);
        break;
    case CC_PROPAGATED_NO_EXTRA:
        dependencies = gs.typedDependenciesCCprocessed(false);
        break;
    case TREE:
        dependencies = gs.typedDependenciesCollapsedTree();
        break;
    }

    for (TypedDependency currTypedDep : dependencies) {
        int govIndex = currTypedDep.gov().index();
        int depIndex = currTypedDep.dep().index();
        if (govIndex != 0) {
            // Stanford CoreNLP produces a dependency relation between a verb and ROOT-0 which
            // is not token at all!
            Token govToken = tokens.get(govIndex - 1);
            Token depToken = tokens.get(depIndex - 1);

            StanfordAnnotator.createDependencyAnnotation(aJCas, currTypedDep.reln(), govToken, depToken);
        }
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordParser.java

License:Open Source License

protected void doCreateDependencyTags(ParserGrammar aParser, StanfordAnnotator sfAnnotator, Tree parseTree,
        List<Token> tokens) {
    GrammaticalStructure gs;
    try {// w w w .j a v a 2s . c o m
        TreebankLanguagePack tlp = aParser.getTLPParams().treebankLanguagePack();
        gs = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(), tlp.typedDependencyHeadFinder())
                .newGrammaticalStructure(parseTree);
    } catch (UnsupportedOperationException e) {
        // We already warned in the model provider if dependencies are not supported, so here
        // we just do nothing and skip the dependencies.
        return;
    }

    Collection<TypedDependency> dependencies = null;
    switch (mode) {
    case BASIC:
        dependencies = gs.typedDependencies(); // gs.typedDependencies(false);
        break;
    case NON_COLLAPSED:
        dependencies = gs.allTypedDependencies(); // gs.typedDependencies(true);
        break;
    case COLLAPSED_WITH_EXTRA:
        dependencies = gs.typedDependenciesCollapsed(true);
        break;
    case COLLAPSED:
        dependencies = gs.typedDependenciesCollapsed(false);
        break;
    case CC_PROPAGATED:
        dependencies = gs.typedDependenciesCCprocessed(true);
        break;
    case CC_PROPAGATED_NO_EXTRA:
        dependencies = gs.typedDependenciesCCprocessed(false);
        break;
    case TREE:
        dependencies = gs.typedDependenciesCollapsedTree();
        break;
    }

    for (TypedDependency currTypedDep : dependencies) {
        int govIndex = currTypedDep.gov().index();
        int depIndex = currTypedDep.dep().index();
        if (govIndex != 0) {
            // Stanford CoreNLP produces a dependency relation between a verb and ROOT-0 which
            // is not token at all!
            Token govToken = tokens.get(govIndex - 1);
            Token depToken = tokens.get(depIndex - 1);

            sfAnnotator.createDependencyAnnotation(currTypedDep.reln(), govToken, depToken);
        }
    }
}

From source file:ie.pars.bnc.preprocess.ProcessNLP.java

License:Open Source License

/**
 *
 * @param inputStreamFile/*w w  w. j  av a  2 s. com*/
 * @param morphology
 * @param posTagger
 * @param parser
 * @return
 * @throws Exception
 */
public static StringBuilder parseBNCXML(InputStream inputStreamFile, Morphology morphology,
        MaxentTagger posTagger, ParserGrammar parser) throws Exception {
    StringBuilder results = new StringBuilder();
    int counterSent = 0;
    List<List<List<WordLemmaTag>>> parseBNCXMLTokenized = parseBNCXMLTokenized(inputStreamFile);
    for (List<List<WordLemmaTag>> xparseBNCXMLL : parseBNCXMLTokenized) {
        results.append("<p>\n");
        for (List<WordLemmaTag> para : xparseBNCXMLL) {
            if (counterSent++ % 20 == 0) {
                System.out.print(".");
            }
            results.append("<s>\n");
            List<TaggedWord> tagSentence = posTagger.tagSentence(para, true);

            Tree parseTree = parser.parse(tagSentence);

            GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(parseTree,
                    parser.treebankLanguagePack().punctuationWordRejectFilter(),
                    parser.getTLPParams().typedDependencyHeadFinder());

            Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree();
            SemanticGraph depTree = new SemanticGraph(deps);

            for (int i = 0; i < tagSentence.size(); ++i) {

                int head = -1;
                String deprel = null;
                //                    if (depTree != null) {
                Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index)
                        .collect(Collectors.toSet());
                IndexedWord node = depTree.getNodeByIndexSafe(i + 1);
                if (node != null) {
                    List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node);
                    if (!edgeList.isEmpty()) {
                        assert edgeList.size() == 1;
                        head = edgeList.get(0).getGovernor().index();
                        deprel = edgeList.get(0).getRelation().toString();
                    } else if (rootSet.contains(i + 1)) {
                        head = 0;
                        deprel = "ROOT";
                    }
                }
                //     }

                // Write the token
                TaggedWord lexHead = null;
                if (head > 0) {
                    lexHead = tagSentence.get(head - 1);
                }
                results.append(line(i + 1, tagSentence.get(i), morphology, head, deprel, lexHead)).append("\n");
            }
            results.append("</s>\n");
        }
        results.append("</p>\n");
    }
    System.out.println("");
    inputStreamFile.close();

    return results;
}

From source file:ie.pars.bnc.preprocess.ProcessNLP.java

License:Open Source License

public static void handleDependencies(Tree tree, ParserGrammar parser, String arg, OutputStream outStream,
        String commandArgs) throws IOException {
    GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(tree,
            parser.treebankLanguagePack().punctuationWordRejectFilter(),
            parser.getTLPParams().typedDependencyHeadFinder());

    Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree();
    // SemanticGraph sg = new SemanticGraph(deps);

    OutputStreamWriter osw = new OutputStreamWriter(outStream, "utf-8");
    for (TypedDependency dep : deps) {
        String t = dep.dep().word() + "\t" + dep.dep().lemma() + "\t" + dep.dep().tag() + "\t";
        System.out.println(t);//from   w  ww.ja  va2 s  .  c  o m

        osw.write(dep.toString());
        osw.write("\n");
    }
    osw.flush();
}

From source file:ie.pars.bnc.preprocess.ProcessNLP.java

License:Open Source License

private static StringBuilder parseTheSentence(String sentence, Morphology morphology, MaxentTagger posTagger,
        ParserGrammar parser, String sid) {
    TokenizerFactory<Word> newTokenizerFactory = PTBTokenizerFactory.newTokenizerFactory();
    //        TokenizerFactory<WordLemmaTag> tokenizerFactory;
    //        TokenizerFactory<CoreLabel> factory = PTBTokenizer.factory(new CoreLabelTokenFactory() , "");
    //        TokenizerFactory<Word> factory1 = PTBTokenizer.factory();

    StringBuilder results = new StringBuilder();
    results.append("<s id='" + sid + "'>\n");

    StringReader sr = new StringReader(sentence);
    Tokenizer<Word> tokenizer = newTokenizerFactory.getTokenizer(sr);
    List<Word> tokenize = tokenizer.tokenize();

    List<TaggedWord> tagSentence = posTagger.tagSentence(tokenize);

    Tree parseTree = parser.parse(tagSentence);

    GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(parseTree,
            parser.treebankLanguagePack().punctuationWordRejectFilter(),
            parser.getTLPParams().typedDependencyHeadFinder());

    Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree();
    SemanticGraph depTree = new SemanticGraph(deps);

    for (int i = 0; i < tagSentence.size(); ++i) {

        int head = -1;
        String deprel = null;//from w  ww .j a v  a  2s. c  om
        //                    if (depTree != null) {
        Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index).collect(Collectors.toSet());
        IndexedWord node = depTree.getNodeByIndexSafe(i + 1);
        if (node != null) {
            List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node);
            if (!edgeList.isEmpty()) {
                assert edgeList.size() == 1;
                head = edgeList.get(0).getGovernor().index();
                deprel = edgeList.get(0).getRelation().toString();
            } else if (rootSet.contains(i + 1)) {
                head = 0;
                deprel = "ROOT";
            }
        }
        //     }

        // Write the token
        TaggedWord lexHead = null;
        if (head > 0) {
            lexHead = tagSentence.get(head - 1);
        }
        results.append(line(i + 1, tagSentence.get(i), morphology, head, deprel, lexHead)).append("\n");
    }
    results.append("</s>\n");
    return results;
}