Example usage for edu.stanford.nlp.trees PennTreebankLanguagePack PennTreebankLanguagePack

List of usage examples for edu.stanford.nlp.trees PennTreebankLanguagePack PennTreebankLanguagePack

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees PennTreebankLanguagePack PennTreebankLanguagePack.

Prototype

public PennTreebankLanguagePack() 

Source Link

Document

Gives a handle to the TreebankLanguagePack

Usage

From source file:ConstituencyParse.java

License:Apache License

public ConstituencyParse(String tokPath, String parentPath, boolean tokenize) throws IOException {
    this.tokenize = tokenize;
    if (tokPath != null) {
        tokWriter = new BufferedWriter(new FileWriter(tokPath));
    }/*from  w ww . ja v a2 s .c  om*/
    parentWriter = new BufferedWriter(new FileWriter(parentPath));
    parser = LexicalizedParser.loadModel(PCFG_PATH);
    binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(),
            parser.treebankLanguagePack());
    transformer = new CollapseUnaryTransformer();

    // set up to produce dependency representations from constituency trees
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    gsf = tlp.grammaticalStructureFactory();
}

From source file:artinex.TypDep.java

public static void main(String[] args) {
    String str = "What is index in array";
    TypDep parser = new TypDep();
    Tree tree = parser.parse(str);//  w  w  w . j  ava 2s  . c o  m

    List<Tree> leaves = tree.getLeaves();
    // Print words and Pos Tags
    for (Tree leaf : leaves) {
        Tree parent = leaf.parent(tree);
        System.out.print(leaf.label().value() + "-" + parent.label().value() + " ");
    }
    System.out.println();

    //Type dependencies

    // Tree tree1 = str.get(TreeAnnotation.class);
    // Get dependency tree
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
    Collection<TypedDependency> td = gs.typedDependenciesCollapsed();
    System.out.println(td);

}

From source file:com.parse.Dependency.java

public static void main(String[] args) {
    LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
    lp.setOptionFlags(new String[] { "-maxLength", "80", "-retainTmpSubcategories", });

    String[] sent = { "This", "is", "an", "easy", "sentence", "." };
    List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent);
    Tree parse = lp.apply(rawWords);/*from  w  w w.j a va 2 s .c  om*/
    parse.pennPrint();
    System.out.println();

    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
    System.out.println(tdl);
    //System.out.println();

    //TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
    // tp.printTree(parse);

    String sentence = "which movies were directed by Christopher Nolan";
    Tree t2 = lp.parse(sentence);
    System.out.println(t2.firstChild().toString());
    gs = gsf.newGrammaticalStructure(t2);
    tdl = gs.typedDependenciesCCprocessed();
    System.out.println(tdl);
    System.out.println(tdl.get(0).dep().nodeString());

}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordCoreferenceResolver.java

License:Open Source License

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    modelProvider.configure(aJCas.getCas());

    List<Tree> trees = new ArrayList<Tree>();
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    List<List<CoreLabel>> sentenceTokens = new ArrayList<List<CoreLabel>>();
    for (ROOT root : select(aJCas, ROOT.class)) {
        // Copy all relevant information from the tokens
        List<CoreLabel> tokens = new ArrayList<CoreLabel>();
        for (Token token : selectCovered(Token.class, root)) {
            tokens.add(tokenToWord(token));
        }//from   w  w  w. j a v a 2  s . com
        sentenceTokens.add(tokens);

        // SemanticHeadFinder (nonTerminalInfo) does not know about PRN0, so we have to replace
        // it with PRN to avoid NPEs.
        TreeFactory tFact = new LabeledScoredTreeFactory(CoreLabel.factory()) {
            @Override
            public Tree newTreeNode(String aParent, List<Tree> aChildren) {
                String parent = aParent;
                if ("PRN0".equals(parent)) {
                    parent = "PRN";
                }
                Tree node = super.newTreeNode(parent, aChildren);
                return node;
            }
        };

        // deep copy of the tree. These are modified inside coref!
        Tree treeCopy = TreeUtils.createStanfordTree(root, tFact).treeSkeletonCopy();
        treeCopy.indexSpans();
        trees.add(treeCopy);

        // Build the sentence
        CoreMap sentence = new CoreLabel();
        sentence.set(TreeAnnotation.class, treeCopy);
        sentence.set(TokensAnnotation.class, tokens);
        sentence.set(RootKey.class, root);
        sentences.add(sentence);

        // https://code.google.com/p/dkpro-core-asl/issues/detail?id=590
        // We currently do not copy over dependencies from the CAS. This is supposed to fill
        // in the dependencies so we do not get NPEs.
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(),
                tlp.typedDependencyHeadFinder());
        ParserAnnotatorUtils.fillInParseAnnotations(false, true, gsf, sentence, treeCopy,
                GrammaticalStructure.Extras.NONE);

        // https://code.google.com/p/dkpro-core-asl/issues/detail?id=582
        SemanticGraph deps = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
        for (IndexedWord vertex : deps.vertexSet()) {
            vertex.setWord(vertex.value());
        }

        // merge the new CoreLabels with the tree leaves
        MentionExtractor.mergeLabels(treeCopy, tokens);
        MentionExtractor.initializeUtterance(tokens);
    }

    Annotation document = new Annotation(aJCas.getDocumentText());
    document.set(SentencesAnnotation.class, sentences);

    Coreferencer coref = modelProvider.getResource();

    // extract all possible mentions
    // Reparsing only works when the full CoreNLP pipeline system is set up! Passing false here
    // disables reparsing.
    RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(false);
    List<List<Mention>> allUnprocessedMentions = finder.extractPredictedMentions(document, 0,
            coref.corefSystem.dictionaries());

    // add the relevant info to mentions and order them for coref
    Map<Integer, CorefChain> result;
    try {
        Document doc = coref.mentionExtractor.arrange(document, sentenceTokens, trees, allUnprocessedMentions);
        result = coref.corefSystem.coref(doc);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }

    for (CorefChain chain : result.values()) {
        CoreferenceLink last = null;
        for (CorefMention mention : chain.getMentionsInTextualOrder()) {
            CoreLabel beginLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class)
                    .get(mention.startIndex - 1);
            CoreLabel endLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class)
                    .get(mention.endIndex - 2);
            CoreferenceLink link = new CoreferenceLink(aJCas, beginLabel.get(TokenKey.class).getBegin(),
                    endLabel.get(TokenKey.class).getEnd());

            if (mention.mentionType != null) {
                link.setReferenceType(mention.mentionType.toString());
            }

            if (last == null) {
                // This is the first mention. Here we'll initialize the chain
                CoreferenceChain corefChain = new CoreferenceChain(aJCas);
                corefChain.setFirst(link);
                corefChain.addToIndexes();
            } else {
                // For the other mentions, we'll add them to the chain.
                last.setNext(link);
            }
            last = link;

            link.addToIndexes();
        }
    }
}

From source file:dependencies.ParsingUtils.java

License:Open Source License

private static GrammaticalStructureFactory getGrammaticalStructureFactoryInstance() {
    return new PennTreebankLanguagePack().grammaticalStructureFactory();
}

From source file:DependencyParser.ParseDependency.java

public List<String> getAspect_OpinionPair(String str) {
    sr = new StringReader(str);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();/*from ww w  . j  ava 2s.  c om*/
    Tree parse = (Tree) lp.apply(toks);
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    Collection<TypedDependency> td = gs.typedDependenciesCollapsed();
    System.out.println(td);
    List<String> ls = new ArrayList<String>();
    Object[] list = td.toArray();
    List<String> pair_1 = getPair(list, "nsubj", "nn", 1);
    for (String pair1 : pair_1) {
        if (!pair1.isEmpty()) {
            System.out.println("find pair1: " + pair1);
            ls.add(pair1);
        }
    }

    List<String> pair_2 = getPair(list, "nsubj", "xcomp", 2);
    for (String pair2 : pair_2) {
        if (!pair2.isEmpty()) {
            List<String> pair_22 = getPair(list, "dobj", "nn", 22);
            for (String pair22 : pair_22) {
                if (!pair22.isEmpty()) {
                    System.out.println("find pair2: (" + pair22 + "," + pair2 + ")");
                    ls.add(pair22 + ", " + pair2);
                }
            }
        }
    }
    List<String> pair_3 = getPair(list, "nsubj", "dobj", 3);
    for (String pair3 : pair_3) {
        if (!pair3.isEmpty()) {
            System.out.println("find pair3: " + pair3);
            ls.add(pair3);
        }
    }
    List<String> pair_4 = getPair(list, "nsubj", "acomp", 4);
    for (String pair4 : pair_4) {
        if (!pair4.isEmpty()) {
            System.out.println("find pair4: " + pair4);
            ls.add(pair4);
        }
    }
    List<String> pair_5 = getPair(list, "nsubj", "acomp", 5);
    for (String pair5 : pair_5) {
        if (!pair5.isEmpty()) {
            List<String> pair_55 = getPair(list, "rcmod", "nn", 55);
            for (String pair55 : pair_55) {
                if (!pair55.isEmpty()) {
                    System.out.println("find pair5: " + pair55 + "," + pair5);
                    ls.add(pair55 + "," + pair5);
                }
            }
        }
    }

    List<String> pair_6 = getPair(list, "amod", "", 6);
    for (String pair6 : pair_6) {
        if (!pair6.isEmpty()) {

            System.out.println("find pair6: " + pair6);
            ls.add(pair6);
        }
    }
    List<String> pair_7 = getPair(list, "amod", "amod", 7);
    for (String pair7 : pair_7) {
        if (!pair7.isEmpty()) {

            System.out.println("find pair7: " + pair7);
            ls.add(pair7);
        }
    }
    List<String> pair_7a = getPair(list, "amod", "conj_and", 7);
    for (String pair7a : pair_7) {
        if (!pair7a.isEmpty()) {

            System.out.println("find pair7a: " + pair7a);
            ls.add(pair7a);
        }
    }
    List<String> pair_8 = getPair(list, "amod", "conj_and", 8);
    for (String pair8 : pair_8) {
        if (!pair8.isEmpty()) {

            System.out.println("find pair9: " + pair8);
            ls.add(pair8);
        }
    }
    List<String> pair_10 = getPair(list, "nsubj", "nn", 10);
    for (String pair10 : pair_10) {
        if (!pair10.isEmpty()) {

            System.out.println("find pair10: " + pair10);
            ls.add(pair10);
        }
    }
    List<String> pair_11 = getPair(list, "nsubj", "prep_with", 11);
    for (String pair11a : pair_11) {
        if (!pair11a.isEmpty()) {
            List<String> pair_12 = getPair(list, "prep_with", "nn", 12);
            for (String pair12 : pair_12) {
                if (!pair12.isEmpty()) {
                    System.out.println("find paart12: " + pair12);
                    ls.add(pair12);
                }
            }
        }
    }
    //System.out.println(list.length);
    //TypedDependency typedDependency,typedDependency2,typedDependency3;
    //for (Object object : list) {

    // getPair(list,TypedDependency typedDependency,TypedDependency typedDependency2,String relation);
    //typedDependency = (TypedDependency) object;
    //System.out.println("Depdency Name: "+typedDependency.dep().nodeString()+ " :: "+ "Node: "+typedDependency.reln()+":: Gov: "+typedDependency.gov().nodeString());
    //  if (typedDependency.reln().getShortName().equals("nsubj")) {

    //if(!pair1.isEmpty())
    //{
    //  System.out.println("find dependency: "+pair1);
    //}

    //}

    //}
    return ls;
}

From source file:DependencyParser.Parser.java

public void CallParser(String text) // start of the main method

{
    try {//w  w w  . j a  v  a  2 s.  c o  m

        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
        LexicalizedParser lp = LexicalizedParser
                .loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
        lp.setOptionFlags(new String[] { "-maxLength", "500", "-retainTmpSubcategories" });
        TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
        List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(text)).tokenize();
        Tree tree = lp.apply(wordList);

        GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
        Collection<TypedDependency> tdl = gs.typedDependenciesCCprocessed(true);
        System.out.println(tdl);

        PrintWriter pw = new PrintWriter("H:\\Thesis Development\\Thesis\\NLP\\src\\nlp\\Text-Parsed.txt");
        TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
        tp.printTree(tree, pw);

        pw.close();
        Main.writeImage(tree, tdl, "H:\\Thesis Development\\Thesis\\NLP\\src\\nlp\\image.png", 3);
        assert (new File("image.png").exists());
    } catch (FileNotFoundException f) {

    } catch (Exception ex) {
        Logger.getLogger(Parser.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:DependencyParser.RunStanfordParser.java

public RunStanfordParser(String filename) throws FileNotFoundException, IOException {
    // input format: data directory, and output directory

    String fileToParse = filename;

    LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
    //lp.setOptionFlags(new String[]{"-maxLength", "80", "-retainTmpSubcategories"}); // set max sentence length if you want

    // Call parser on files, and tokenize the contents
    FileInputStream fstream = new FileInputStream(fileToParse);
    DataInputStream in = new DataInputStream(fstream); // Get the object of DataInputStream
    BufferedReader br = new BufferedReader(new InputStreamReader(in));
    StringReader sr; // we need to re-read each line into its own reader because the tokenizer is over-complicated garbage
    PTBTokenizer tkzr; // tokenizer object
    WordStemmer ls = new WordStemmer(); // stemmer/lemmatizer object

    // Read File Line By Line
    String strLine;//from  ww  w  .j  av  a 2 s .c o m
    while ((strLine = br.readLine()) != null) {
        System.out.println("Tokenizing and Parsing: " + strLine); // print current line to console

        // do all the standard java over-complication to use the stanford parser tokenizer
        sr = new StringReader(strLine);
        tkzr = PTBTokenizer.newPTBTokenizer(sr);
        List toks = tkzr.tokenize();
        System.out.println("tokens: " + toks);

        Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

        // Output Option 1: Printing out various data by accessing it programmatically

        // Get words, stemmed words and POS tags
        ArrayList<String> words = new ArrayList();
        ArrayList<String> stems = new ArrayList();
        ArrayList<String> tags = new ArrayList();

        // Get words and Tags
        ls.visitTree(parse); // apply the stemmer to the tree
        for (TaggedWord tw : parse.taggedYield()) {
            if (tw.tag().startsWith("N") || tw.tag().startsWith("J")) {
                words.add(tw.word());
                tags.add(tw.tag());
            }
        }
        System.out.println("Noun and Ajective words: " + words);
        System.out.println("POStags: " + tags);

        // Get stems
        ls.visitTree(parse); // apply the stemmer to the tree
        for (TaggedWord tw : parse.taggedYield()) {
            stems.add(tw.word());
        }

        // Get dependency tree
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
        GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
        Collection tdl = gs.typedDependenciesCollapsed();

        // And print!
        System.out.println("words: " + words);
        System.out.println("POStags: " + tags);
        System.out.println("stemmedWordsAndTags: " + stems);
        System.out.println("typedDependencies: " + tdl);
        //  getAspect_OpinionWord(tdl.toString(),words,tags);

        TreePrint tp = new TreePrint("words,penn");
        //TreePrint tn = new TreePrint("words,typedDependenciesCollapsed");
        //TreePrint to = new TreePrint("rootLabelOnlyFormat,penn");

        //System.out.println("Tree print"+tp.); 
        tp.printTree(parse);
        //tn.printTree(parse);
        System.out.println("Noun Phrases are: -------"); //(NP (DT a) (JJ temporary) (NN searcher))
        String reg = "(\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\(DT \\w*\\) \\((JJ||JJR||JJS) \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\))";
        Pattern patt = Pattern.compile(reg);
        System.out.println(" Noun Phrase List:..");
        dfs(parse, parse, patt);

        //for (Tree subtree: parse)
        //{

        /* if(subtree.label().value().equals("NP"))
         {
                     
           String a=subtree.toString();  
          //System.out.println(a);
           Matcher match = patt.matcher(a.trim());
           while(match.find()) {
                System.out.println("NP: "+match.group());
           }
         }*/
        /*for(Tree np:subtree)
        {
            if(np.label().value().equals("NP"))
            {
                for(Tree n:np)
                {
                    if(np.label().value().equals("\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\)"))
                    {
                         System.out.println("NP tag Tags: "+np);
                         System.out.println(Sentence.listToString(np.yield()));
                    }
                    else if(np.label().value().equals("\\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)"))
                    {
                        System.out.println("NP tag Tags: "+np);
                         System.out.println(Sentence.listToString(np.yield()));
                    }
                    else if(np.label().value().equals("\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)"))
                    {
                        System.out.println("NP tag Tags: "+np);
                        System.out.println(Sentence.listToString(np.yield()));
                    }
                    else{
                        if(n.label().value().equals("NP"))
                        {
                      
                            System.out.println("N tag Tags: "+n);
                            System.out.println(Sentence.listToString(n.yield()));
                        }
                    }
                                
                            
                }
                       
            }
        }*/

        //}
        //}
        System.out.println(); // separate output lines*/
    }

}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

private AnalysisUtilities() {
    parser = null;/* ww w .j  ava  2 s .  c om*/

    conjugator = new VerbConjugator();
    conjugator.load(GlobalProperties.getProperties().getProperty("verbConjugationsFile",
            "config" + File.separator + "verbConjugations.txt"));
    headfinder = new CollinsHeadFinder();
    tree_factory = new LabeledScoredTreeFactory();
    tlp = new PennTreebankLanguagePack();
}

From source file:edu.cmu.ark.nlp.question.QuestionUtil.java

License:Open Source License

private QuestionUtil(Properties props) {
    //parser = null;

    //headfinder = new CollinsHeadFinder();
    tlp = new PennTreebankLanguagePack();
}