List of usage examples for edu.stanford.nlp.trees PennTreebankLanguagePack PennTreebankLanguagePack
public PennTreebankLanguagePack()
From source file:ConstituencyParse.java
License:Apache License
public ConstituencyParse(String tokPath, String parentPath, boolean tokenize) throws IOException { this.tokenize = tokenize; if (tokPath != null) { tokWriter = new BufferedWriter(new FileWriter(tokPath)); }/*from w ww . ja v a2 s .c om*/ parentWriter = new BufferedWriter(new FileWriter(parentPath)); parser = LexicalizedParser.loadModel(PCFG_PATH); binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(), parser.treebankLanguagePack()); transformer = new CollapseUnaryTransformer(); // set up to produce dependency representations from constituency trees TreebankLanguagePack tlp = new PennTreebankLanguagePack(); gsf = tlp.grammaticalStructureFactory(); }
From source file:artinex.TypDep.java
public static void main(String[] args) { String str = "What is index in array"; TypDep parser = new TypDep(); Tree tree = parser.parse(str);// w w w . j ava 2s . c o m List<Tree> leaves = tree.getLeaves(); // Print words and Pos Tags for (Tree leaf : leaves) { Tree parent = leaf.parent(tree); System.out.print(leaf.label().value() + "-" + parent.label().value() + " "); } System.out.println(); //Type dependencies // Tree tree1 = str.get(TreeAnnotation.class); // Get dependency tree TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(tree); Collection<TypedDependency> td = gs.typedDependenciesCollapsed(); System.out.println(td); }
From source file:com.parse.Dependency.java
public static void main(String[] args) { LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); lp.setOptionFlags(new String[] { "-maxLength", "80", "-retainTmpSubcategories", }); String[] sent = { "This", "is", "an", "easy", "sentence", "." }; List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent); Tree parse = lp.apply(rawWords);/*from w w w.j a va 2 s .c om*/ parse.pennPrint(); System.out.println(); TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); List<TypedDependency> tdl = gs.typedDependenciesCCprocessed(); System.out.println(tdl); //System.out.println(); //TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed"); // tp.printTree(parse); String sentence = "which movies were directed by Christopher Nolan"; Tree t2 = lp.parse(sentence); System.out.println(t2.firstChild().toString()); gs = gsf.newGrammaticalStructure(t2); tdl = gs.typedDependenciesCCprocessed(); System.out.println(tdl); System.out.println(tdl.get(0).dep().nodeString()); }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordCoreferenceResolver.java
License:Open Source License
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { modelProvider.configure(aJCas.getCas()); List<Tree> trees = new ArrayList<Tree>(); List<CoreMap> sentences = new ArrayList<CoreMap>(); List<List<CoreLabel>> sentenceTokens = new ArrayList<List<CoreLabel>>(); for (ROOT root : select(aJCas, ROOT.class)) { // Copy all relevant information from the tokens List<CoreLabel> tokens = new ArrayList<CoreLabel>(); for (Token token : selectCovered(Token.class, root)) { tokens.add(tokenToWord(token)); }//from w w w. j a v a 2 s . com sentenceTokens.add(tokens); // SemanticHeadFinder (nonTerminalInfo) does not know about PRN0, so we have to replace // it with PRN to avoid NPEs. TreeFactory tFact = new LabeledScoredTreeFactory(CoreLabel.factory()) { @Override public Tree newTreeNode(String aParent, List<Tree> aChildren) { String parent = aParent; if ("PRN0".equals(parent)) { parent = "PRN"; } Tree node = super.newTreeNode(parent, aChildren); return node; } }; // deep copy of the tree. These are modified inside coref! Tree treeCopy = TreeUtils.createStanfordTree(root, tFact).treeSkeletonCopy(); treeCopy.indexSpans(); trees.add(treeCopy); // Build the sentence CoreMap sentence = new CoreLabel(); sentence.set(TreeAnnotation.class, treeCopy); sentence.set(TokensAnnotation.class, tokens); sentence.set(RootKey.class, root); sentences.add(sentence); // https://code.google.com/p/dkpro-core-asl/issues/detail?id=590 // We currently do not copy over dependencies from the CAS. This is supposed to fill // in the dependencies so we do not get NPEs. TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(), tlp.typedDependencyHeadFinder()); ParserAnnotatorUtils.fillInParseAnnotations(false, true, gsf, sentence, treeCopy, GrammaticalStructure.Extras.NONE); // https://code.google.com/p/dkpro-core-asl/issues/detail?id=582 SemanticGraph deps = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); for (IndexedWord vertex : deps.vertexSet()) { vertex.setWord(vertex.value()); } // merge the new CoreLabels with the tree leaves MentionExtractor.mergeLabels(treeCopy, tokens); MentionExtractor.initializeUtterance(tokens); } Annotation document = new Annotation(aJCas.getDocumentText()); document.set(SentencesAnnotation.class, sentences); Coreferencer coref = modelProvider.getResource(); // extract all possible mentions // Reparsing only works when the full CoreNLP pipeline system is set up! Passing false here // disables reparsing. RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(false); List<List<Mention>> allUnprocessedMentions = finder.extractPredictedMentions(document, 0, coref.corefSystem.dictionaries()); // add the relevant info to mentions and order them for coref Map<Integer, CorefChain> result; try { Document doc = coref.mentionExtractor.arrange(document, sentenceTokens, trees, allUnprocessedMentions); result = coref.corefSystem.coref(doc); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } for (CorefChain chain : result.values()) { CoreferenceLink last = null; for (CorefMention mention : chain.getMentionsInTextualOrder()) { CoreLabel beginLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class) .get(mention.startIndex - 1); CoreLabel endLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class) .get(mention.endIndex - 2); CoreferenceLink link = new CoreferenceLink(aJCas, beginLabel.get(TokenKey.class).getBegin(), endLabel.get(TokenKey.class).getEnd()); if (mention.mentionType != null) { link.setReferenceType(mention.mentionType.toString()); } if (last == null) { // This is the first mention. Here we'll initialize the chain CoreferenceChain corefChain = new CoreferenceChain(aJCas); corefChain.setFirst(link); corefChain.addToIndexes(); } else { // For the other mentions, we'll add them to the chain. last.setNext(link); } last = link; link.addToIndexes(); } } }
From source file:dependencies.ParsingUtils.java
License:Open Source License
private static GrammaticalStructureFactory getGrammaticalStructureFactoryInstance() { return new PennTreebankLanguagePack().grammaticalStructureFactory(); }
From source file:DependencyParser.ParseDependency.java
public List<String> getAspect_OpinionPair(String str) { sr = new StringReader(str); tkzr = PTBTokenizer.newPTBTokenizer(sr); List toks = tkzr.tokenize();/*from ww w . j ava 2s. c om*/ Tree parse = (Tree) lp.apply(toks); TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); Collection<TypedDependency> td = gs.typedDependenciesCollapsed(); System.out.println(td); List<String> ls = new ArrayList<String>(); Object[] list = td.toArray(); List<String> pair_1 = getPair(list, "nsubj", "nn", 1); for (String pair1 : pair_1) { if (!pair1.isEmpty()) { System.out.println("find pair1: " + pair1); ls.add(pair1); } } List<String> pair_2 = getPair(list, "nsubj", "xcomp", 2); for (String pair2 : pair_2) { if (!pair2.isEmpty()) { List<String> pair_22 = getPair(list, "dobj", "nn", 22); for (String pair22 : pair_22) { if (!pair22.isEmpty()) { System.out.println("find pair2: (" + pair22 + "," + pair2 + ")"); ls.add(pair22 + ", " + pair2); } } } } List<String> pair_3 = getPair(list, "nsubj", "dobj", 3); for (String pair3 : pair_3) { if (!pair3.isEmpty()) { System.out.println("find pair3: " + pair3); ls.add(pair3); } } List<String> pair_4 = getPair(list, "nsubj", "acomp", 4); for (String pair4 : pair_4) { if (!pair4.isEmpty()) { System.out.println("find pair4: " + pair4); ls.add(pair4); } } List<String> pair_5 = getPair(list, "nsubj", "acomp", 5); for (String pair5 : pair_5) { if (!pair5.isEmpty()) { List<String> pair_55 = getPair(list, "rcmod", "nn", 55); for (String pair55 : pair_55) { if (!pair55.isEmpty()) { System.out.println("find pair5: " + pair55 + "," + pair5); ls.add(pair55 + "," + pair5); } } } } List<String> pair_6 = getPair(list, "amod", "", 6); for (String pair6 : pair_6) { if (!pair6.isEmpty()) { System.out.println("find pair6: " + pair6); ls.add(pair6); } } List<String> pair_7 = getPair(list, "amod", "amod", 7); for (String pair7 : pair_7) { if (!pair7.isEmpty()) { System.out.println("find pair7: " + pair7); ls.add(pair7); } } List<String> pair_7a = getPair(list, "amod", "conj_and", 7); for (String pair7a : pair_7) { if (!pair7a.isEmpty()) { System.out.println("find pair7a: " + pair7a); ls.add(pair7a); } } List<String> pair_8 = getPair(list, "amod", "conj_and", 8); for (String pair8 : pair_8) { if (!pair8.isEmpty()) { System.out.println("find pair9: " + pair8); ls.add(pair8); } } List<String> pair_10 = getPair(list, "nsubj", "nn", 10); for (String pair10 : pair_10) { if (!pair10.isEmpty()) { System.out.println("find pair10: " + pair10); ls.add(pair10); } } List<String> pair_11 = getPair(list, "nsubj", "prep_with", 11); for (String pair11a : pair_11) { if (!pair11a.isEmpty()) { List<String> pair_12 = getPair(list, "prep_with", "nn", 12); for (String pair12 : pair_12) { if (!pair12.isEmpty()) { System.out.println("find paart12: " + pair12); ls.add(pair12); } } } } //System.out.println(list.length); //TypedDependency typedDependency,typedDependency2,typedDependency3; //for (Object object : list) { // getPair(list,TypedDependency typedDependency,TypedDependency typedDependency2,String relation); //typedDependency = (TypedDependency) object; //System.out.println("Depdency Name: "+typedDependency.dep().nodeString()+ " :: "+ "Node: "+typedDependency.reln()+":: Gov: "+typedDependency.gov().nodeString()); // if (typedDependency.reln().getShortName().equals("nsubj")) { //if(!pair1.isEmpty()) //{ // System.out.println("find dependency: "+pair1); //} //} //} return ls; }
From source file:DependencyParser.Parser.java
public void CallParser(String text) // start of the main method { try {//w w w . j a v a 2 s. c o m TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); LexicalizedParser lp = LexicalizedParser .loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); lp.setOptionFlags(new String[] { "-maxLength", "500", "-retainTmpSubcategories" }); TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(text)).tokenize(); Tree tree = lp.apply(wordList); GrammaticalStructure gs = gsf.newGrammaticalStructure(tree); Collection<TypedDependency> tdl = gs.typedDependenciesCCprocessed(true); System.out.println(tdl); PrintWriter pw = new PrintWriter("H:\\Thesis Development\\Thesis\\NLP\\src\\nlp\\Text-Parsed.txt"); TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(tree, pw); pw.close(); Main.writeImage(tree, tdl, "H:\\Thesis Development\\Thesis\\NLP\\src\\nlp\\image.png", 3); assert (new File("image.png").exists()); } catch (FileNotFoundException f) { } catch (Exception ex) { Logger.getLogger(Parser.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:DependencyParser.RunStanfordParser.java
public RunStanfordParser(String filename) throws FileNotFoundException, IOException { // input format: data directory, and output directory String fileToParse = filename; LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); //lp.setOptionFlags(new String[]{"-maxLength", "80", "-retainTmpSubcategories"}); // set max sentence length if you want // Call parser on files, and tokenize the contents FileInputStream fstream = new FileInputStream(fileToParse); DataInputStream in = new DataInputStream(fstream); // Get the object of DataInputStream BufferedReader br = new BufferedReader(new InputStreamReader(in)); StringReader sr; // we need to re-read each line into its own reader because the tokenizer is over-complicated garbage PTBTokenizer tkzr; // tokenizer object WordStemmer ls = new WordStemmer(); // stemmer/lemmatizer object // Read File Line By Line String strLine;//from ww w .j av a 2 s .c o m while ((strLine = br.readLine()) != null) { System.out.println("Tokenizing and Parsing: " + strLine); // print current line to console // do all the standard java over-complication to use the stanford parser tokenizer sr = new StringReader(strLine); tkzr = PTBTokenizer.newPTBTokenizer(sr); List toks = tkzr.tokenize(); System.out.println("tokens: " + toks); Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something // Output Option 1: Printing out various data by accessing it programmatically // Get words, stemmed words and POS tags ArrayList<String> words = new ArrayList(); ArrayList<String> stems = new ArrayList(); ArrayList<String> tags = new ArrayList(); // Get words and Tags ls.visitTree(parse); // apply the stemmer to the tree for (TaggedWord tw : parse.taggedYield()) { if (tw.tag().startsWith("N") || tw.tag().startsWith("J")) { words.add(tw.word()); tags.add(tw.tag()); } } System.out.println("Noun and Ajective words: " + words); System.out.println("POStags: " + tags); // Get stems ls.visitTree(parse); // apply the stemmer to the tree for (TaggedWord tw : parse.taggedYield()) { stems.add(tw.word()); } // Get dependency tree TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); Collection tdl = gs.typedDependenciesCollapsed(); // And print! System.out.println("words: " + words); System.out.println("POStags: " + tags); System.out.println("stemmedWordsAndTags: " + stems); System.out.println("typedDependencies: " + tdl); // getAspect_OpinionWord(tdl.toString(),words,tags); TreePrint tp = new TreePrint("words,penn"); //TreePrint tn = new TreePrint("words,typedDependenciesCollapsed"); //TreePrint to = new TreePrint("rootLabelOnlyFormat,penn"); //System.out.println("Tree print"+tp.); tp.printTree(parse); //tn.printTree(parse); System.out.println("Noun Phrases are: -------"); //(NP (DT a) (JJ temporary) (NN searcher)) String reg = "(\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\(DT \\w*\\) \\((JJ||JJR||JJS) \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\))"; Pattern patt = Pattern.compile(reg); System.out.println(" Noun Phrase List:.."); dfs(parse, parse, patt); //for (Tree subtree: parse) //{ /* if(subtree.label().value().equals("NP")) { String a=subtree.toString(); //System.out.println(a); Matcher match = patt.matcher(a.trim()); while(match.find()) { System.out.println("NP: "+match.group()); } }*/ /*for(Tree np:subtree) { if(np.label().value().equals("NP")) { for(Tree n:np) { if(np.label().value().equals("\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\)")) { System.out.println("NP tag Tags: "+np); System.out.println(Sentence.listToString(np.yield())); } else if(np.label().value().equals("\\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)")) { System.out.println("NP tag Tags: "+np); System.out.println(Sentence.listToString(np.yield())); } else if(np.label().value().equals("\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)")) { System.out.println("NP tag Tags: "+np); System.out.println(Sentence.listToString(np.yield())); } else{ if(n.label().value().equals("NP")) { System.out.println("N tag Tags: "+n); System.out.println(Sentence.listToString(n.yield())); } } } } }*/ //} //} System.out.println(); // separate output lines*/ } }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
private AnalysisUtilities() { parser = null;/* ww w .j ava 2 s . c om*/ conjugator = new VerbConjugator(); conjugator.load(GlobalProperties.getProperties().getProperty("verbConjugationsFile", "config" + File.separator + "verbConjugations.txt")); headfinder = new CollinsHeadFinder(); tree_factory = new LabeledScoredTreeFactory(); tlp = new PennTreebankLanguagePack(); }
From source file:edu.cmu.ark.nlp.question.QuestionUtil.java
License:Open Source License
private QuestionUtil(Properties props) { //parser = null; //headfinder = new CollinsHeadFinder(); tlp = new PennTreebankLanguagePack(); }