Example usage for edu.stanford.nlp.trees TreebankLanguagePack grammaticalStructureFactory

List of usage examples for edu.stanford.nlp.trees TreebankLanguagePack grammaticalStructureFactory

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees TreebankLanguagePack grammaticalStructureFactory.

Prototype

GrammaticalStructureFactory grammaticalStructureFactory();

Source Link

Document

Return a GrammaticalStructureFactory suitable for this language/treebank.

Usage

From source file:ConstituencyParse.java

License:Apache License

public ConstituencyParse(String tokPath, String parentPath, boolean tokenize) throws IOException {
    this.tokenize = tokenize;
    if (tokPath != null) {
        tokWriter = new BufferedWriter(new FileWriter(tokPath));
    }/*from  w w  w  .jav a2 s . c om*/
    parentWriter = new BufferedWriter(new FileWriter(parentPath));
    parser = LexicalizedParser.loadModel(PCFG_PATH);
    binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(),
            parser.treebankLanguagePack());
    transformer = new CollapseUnaryTransformer();

    // set up to produce dependency representations from constituency trees
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    gsf = tlp.grammaticalStructureFactory();
}

From source file:artinex.TypDep.java

public static void main(String[] args) {
    String str = "What is index in array";
    TypDep parser = new TypDep();
    Tree tree = parser.parse(str);/*w  ww.j av  a  2s  .  co m*/

    List<Tree> leaves = tree.getLeaves();
    // Print words and Pos Tags
    for (Tree leaf : leaves) {
        Tree parent = leaf.parent(tree);
        System.out.print(leaf.label().value() + "-" + parent.label().value() + " ");
    }
    System.out.println();

    //Type dependencies

    // Tree tree1 = str.get(TreeAnnotation.class);
    // Get dependency tree
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
    Collection<TypedDependency> td = gs.typedDependenciesCollapsed();
    System.out.println(td);

}

From source file:com.parse.Dependency.java

public static void main(String[] args) {
    LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
    lp.setOptionFlags(new String[] { "-maxLength", "80", "-retainTmpSubcategories", });

    String[] sent = { "This", "is", "an", "easy", "sentence", "." };
    List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent);
    Tree parse = lp.apply(rawWords);//  w  ww  . j  a  v  a2  s  .  c  om
    parse.pennPrint();
    System.out.println();

    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
    System.out.println(tdl);
    //System.out.println();

    //TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
    // tp.printTree(parse);

    String sentence = "which movies were directed by Christopher Nolan";
    Tree t2 = lp.parse(sentence);
    System.out.println(t2.firstChild().toString());
    gs = gsf.newGrammaticalStructure(t2);
    tdl = gs.typedDependenciesCCprocessed();
    System.out.println(tdl);
    System.out.println(tdl.get(0).dep().nodeString());

}

From source file:DependencyParser.ParseDependency.java

public List<String> getAspect_OpinionPair(String str) {
    sr = new StringReader(str);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();//  w ww. j  a  va2  s.  co m
    Tree parse = (Tree) lp.apply(toks);
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    Collection<TypedDependency> td = gs.typedDependenciesCollapsed();
    System.out.println(td);
    List<String> ls = new ArrayList<String>();
    Object[] list = td.toArray();
    List<String> pair_1 = getPair(list, "nsubj", "nn", 1);
    for (String pair1 : pair_1) {
        if (!pair1.isEmpty()) {
            System.out.println("find pair1: " + pair1);
            ls.add(pair1);
        }
    }

    List<String> pair_2 = getPair(list, "nsubj", "xcomp", 2);
    for (String pair2 : pair_2) {
        if (!pair2.isEmpty()) {
            List<String> pair_22 = getPair(list, "dobj", "nn", 22);
            for (String pair22 : pair_22) {
                if (!pair22.isEmpty()) {
                    System.out.println("find pair2: (" + pair22 + "," + pair2 + ")");
                    ls.add(pair22 + ", " + pair2);
                }
            }
        }
    }
    List<String> pair_3 = getPair(list, "nsubj", "dobj", 3);
    for (String pair3 : pair_3) {
        if (!pair3.isEmpty()) {
            System.out.println("find pair3: " + pair3);
            ls.add(pair3);
        }
    }
    List<String> pair_4 = getPair(list, "nsubj", "acomp", 4);
    for (String pair4 : pair_4) {
        if (!pair4.isEmpty()) {
            System.out.println("find pair4: " + pair4);
            ls.add(pair4);
        }
    }
    List<String> pair_5 = getPair(list, "nsubj", "acomp", 5);
    for (String pair5 : pair_5) {
        if (!pair5.isEmpty()) {
            List<String> pair_55 = getPair(list, "rcmod", "nn", 55);
            for (String pair55 : pair_55) {
                if (!pair55.isEmpty()) {
                    System.out.println("find pair5: " + pair55 + "," + pair5);
                    ls.add(pair55 + "," + pair5);
                }
            }
        }
    }

    List<String> pair_6 = getPair(list, "amod", "", 6);
    for (String pair6 : pair_6) {
        if (!pair6.isEmpty()) {

            System.out.println("find pair6: " + pair6);
            ls.add(pair6);
        }
    }
    List<String> pair_7 = getPair(list, "amod", "amod", 7);
    for (String pair7 : pair_7) {
        if (!pair7.isEmpty()) {

            System.out.println("find pair7: " + pair7);
            ls.add(pair7);
        }
    }
    List<String> pair_7a = getPair(list, "amod", "conj_and", 7);
    for (String pair7a : pair_7) {
        if (!pair7a.isEmpty()) {

            System.out.println("find pair7a: " + pair7a);
            ls.add(pair7a);
        }
    }
    List<String> pair_8 = getPair(list, "amod", "conj_and", 8);
    for (String pair8 : pair_8) {
        if (!pair8.isEmpty()) {

            System.out.println("find pair9: " + pair8);
            ls.add(pair8);
        }
    }
    List<String> pair_10 = getPair(list, "nsubj", "nn", 10);
    for (String pair10 : pair_10) {
        if (!pair10.isEmpty()) {

            System.out.println("find pair10: " + pair10);
            ls.add(pair10);
        }
    }
    List<String> pair_11 = getPair(list, "nsubj", "prep_with", 11);
    for (String pair11a : pair_11) {
        if (!pair11a.isEmpty()) {
            List<String> pair_12 = getPair(list, "prep_with", "nn", 12);
            for (String pair12 : pair_12) {
                if (!pair12.isEmpty()) {
                    System.out.println("find paart12: " + pair12);
                    ls.add(pair12);
                }
            }
        }
    }
    //System.out.println(list.length);
    //TypedDependency typedDependency,typedDependency2,typedDependency3;
    //for (Object object : list) {

    // getPair(list,TypedDependency typedDependency,TypedDependency typedDependency2,String relation);
    //typedDependency = (TypedDependency) object;
    //System.out.println("Depdency Name: "+typedDependency.dep().nodeString()+ " :: "+ "Node: "+typedDependency.reln()+":: Gov: "+typedDependency.gov().nodeString());
    //  if (typedDependency.reln().getShortName().equals("nsubj")) {

    //if(!pair1.isEmpty())
    //{
    //  System.out.println("find dependency: "+pair1);
    //}

    //}

    //}
    return ls;
}

From source file:DependencyParser.Parser.java

public void CallParser(String text) // start of the main method

{
    try {/*from w w w.  ja  v  a 2s  .  com*/

        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
        LexicalizedParser lp = LexicalizedParser
                .loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
        lp.setOptionFlags(new String[] { "-maxLength", "500", "-retainTmpSubcategories" });
        TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
        List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(text)).tokenize();
        Tree tree = lp.apply(wordList);

        GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
        Collection<TypedDependency> tdl = gs.typedDependenciesCCprocessed(true);
        System.out.println(tdl);

        PrintWriter pw = new PrintWriter("H:\\Thesis Development\\Thesis\\NLP\\src\\nlp\\Text-Parsed.txt");
        TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
        tp.printTree(tree, pw);

        pw.close();
        Main.writeImage(tree, tdl, "H:\\Thesis Development\\Thesis\\NLP\\src\\nlp\\image.png", 3);
        assert (new File("image.png").exists());
    } catch (FileNotFoundException f) {

    } catch (Exception ex) {
        Logger.getLogger(Parser.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:DependencyParser.RunStanfordParser.java

public RunStanfordParser(String filename) throws FileNotFoundException, IOException {
    // input format: data directory, and output directory

    String fileToParse = filename;

    LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
    //lp.setOptionFlags(new String[]{"-maxLength", "80", "-retainTmpSubcategories"}); // set max sentence length if you want

    // Call parser on files, and tokenize the contents
    FileInputStream fstream = new FileInputStream(fileToParse);
    DataInputStream in = new DataInputStream(fstream); // Get the object of DataInputStream
    BufferedReader br = new BufferedReader(new InputStreamReader(in));
    StringReader sr; // we need to re-read each line into its own reader because the tokenizer is over-complicated garbage
    PTBTokenizer tkzr; // tokenizer object
    WordStemmer ls = new WordStemmer(); // stemmer/lemmatizer object

    // Read File Line By Line
    String strLine;/*w  w  w .j  ava 2  s  .c  om*/
    while ((strLine = br.readLine()) != null) {
        System.out.println("Tokenizing and Parsing: " + strLine); // print current line to console

        // do all the standard java over-complication to use the stanford parser tokenizer
        sr = new StringReader(strLine);
        tkzr = PTBTokenizer.newPTBTokenizer(sr);
        List toks = tkzr.tokenize();
        System.out.println("tokens: " + toks);

        Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

        // Output Option 1: Printing out various data by accessing it programmatically

        // Get words, stemmed words and POS tags
        ArrayList<String> words = new ArrayList();
        ArrayList<String> stems = new ArrayList();
        ArrayList<String> tags = new ArrayList();

        // Get words and Tags
        ls.visitTree(parse); // apply the stemmer to the tree
        for (TaggedWord tw : parse.taggedYield()) {
            if (tw.tag().startsWith("N") || tw.tag().startsWith("J")) {
                words.add(tw.word());
                tags.add(tw.tag());
            }
        }
        System.out.println("Noun and Ajective words: " + words);
        System.out.println("POStags: " + tags);

        // Get stems
        ls.visitTree(parse); // apply the stemmer to the tree
        for (TaggedWord tw : parse.taggedYield()) {
            stems.add(tw.word());
        }

        // Get dependency tree
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
        GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
        Collection tdl = gs.typedDependenciesCollapsed();

        // And print!
        System.out.println("words: " + words);
        System.out.println("POStags: " + tags);
        System.out.println("stemmedWordsAndTags: " + stems);
        System.out.println("typedDependencies: " + tdl);
        //  getAspect_OpinionWord(tdl.toString(),words,tags);

        TreePrint tp = new TreePrint("words,penn");
        //TreePrint tn = new TreePrint("words,typedDependenciesCollapsed");
        //TreePrint to = new TreePrint("rootLabelOnlyFormat,penn");

        //System.out.println("Tree print"+tp.); 
        tp.printTree(parse);
        //tn.printTree(parse);
        System.out.println("Noun Phrases are: -------"); //(NP (DT a) (JJ temporary) (NN searcher))
        String reg = "(\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\(DT \\w*\\) \\((JJ||JJR||JJS) \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\))";
        Pattern patt = Pattern.compile(reg);
        System.out.println(" Noun Phrase List:..");
        dfs(parse, parse, patt);

        //for (Tree subtree: parse)
        //{

        /* if(subtree.label().value().equals("NP"))
         {
                     
           String a=subtree.toString();  
          //System.out.println(a);
           Matcher match = patt.matcher(a.trim());
           while(match.find()) {
                System.out.println("NP: "+match.group());
           }
         }*/
        /*for(Tree np:subtree)
        {
            if(np.label().value().equals("NP"))
            {
                for(Tree n:np)
                {
                    if(np.label().value().equals("\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\)"))
                    {
                         System.out.println("NP tag Tags: "+np);
                         System.out.println(Sentence.listToString(np.yield()));
                    }
                    else if(np.label().value().equals("\\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)"))
                    {
                        System.out.println("NP tag Tags: "+np);
                         System.out.println(Sentence.listToString(np.yield()));
                    }
                    else if(np.label().value().equals("\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)"))
                    {
                        System.out.println("NP tag Tags: "+np);
                        System.out.println(Sentence.listToString(np.yield()));
                    }
                    else{
                        if(n.label().value().equals("NP"))
                        {
                      
                            System.out.println("N tag Tags: "+n);
                            System.out.println(Sentence.listToString(n.yield()));
                        }
                    }
                                
                            
                }
                       
            }
        }*/

        //}
        //}
        System.out.println(); // separate output lines*/
    }

}

From source file:edu.iastate.airl.semtus.parser.StructureAnalyzer.java

License:Open Source License

/**
 * Constructor// w  w  w  . j  ava2s. c o  m
 * 
 */
public StructureAnalyzer() {

    TreebankLanguagePack thisTlp = new PennTreebankLanguagePack();
    theFactory = thisTlp.grammaticalStructureFactory();
}

From source file:Engines.Test.StanfordParser.TreeHandling.java

License:Open Source License

public static void test(String text) {
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
    lp.setOptionFlags(new String[] { "-maxLength", "500", "-retainTmpSubcategories" });
    TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
    List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(text)).tokenize();
    Tree tree = lp.apply(wordList);/* w  ww. j  a va  2 s . co  m*/
    GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
    Collection<TypedDependency> tdl = gs.typedDependenciesCCprocessed(true);

}

From source file:englishparser.EnglishParser.java

public static void demoDP(LexicalizedParser lp, String filename) throws FileNotFoundException, IOException {
    printer_NP = new ResultSaver("/home/bigstone/Documents/medicine_NP.txt");
    printer_NN = new ResultSaver("/home/bigstone/Documents/medicine_NN.txt");
    printer_NNP = new ResultSaver("/home/bigstone/Documents/medicine_NNP.txt");
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();

    for (List<HasWord> sentence : new DocumentPreprocessor(filename)) {
        Tree parse = lp.apply(sentence);
        extractNP(parse);//from   w  w  w  .j  a v a2  s. c  om
        extractNN(parse);
        extractNNP(parse);
    }

    printer_NP.close();
}

From source file:englishparser.EnglishParser.java

/**
 * demoAPI demonstrates other ways of calling the parser with already
 * tokenized text, or in some cases, raw text that needs to be tokenized as
 * a single sentence. Output is handled with a TreePrint object. Note that
 * the options used when creating the TreePrint can determine what results
 * to print out. Once again, one can capture the output by passing a
 * PrintWriter to TreePrint.printTree.//from  w ww  . j  a va 2s . co m
 */
public static void demoAPI(LexicalizedParser lp) {
    // This option shows parsing a list of correctly tokenized words
    String[] sent = { "This", "is", "an", "easy", "sentence", "." };
    List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent);
    Tree parse = lp.apply(rawWords);
    parse.pennPrint();
    System.out.println();

    // This option shows loading and using an explicit tokenizer
    String sent2 = "This is another sentence.";
    TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
    Tokenizer<CoreLabel> tok = tokenizerFactory.getTokenizer(new StringReader(sent2));
    List<CoreLabel> rawWords2 = tok.tokenize();
    parse = lp.apply(rawWords2);

    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
    System.out.println(tdl);
    System.out.println();

    // You can also use a TreePrint object to print trees and dependencies
    TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
    tp.printTree(parse);
}