List of usage examples for edu.stanford.nlp.trees GrammaticalStructure typedDependenciesCollapsedTree
public Collection<TypedDependency> typedDependenciesCollapsedTree()
From source file:Dependency.java
public static void main(String[] args) { String modelPath = DependencyParser.DEFAULT_MODEL; String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; Scanner sc = new Scanner(System.in); String text = ""; text = sc.nextLine();/*from w ww . ja v a2s . co m*/ // while(text!="exit"){ MaxentTagger tagger = new MaxentTagger(taggerPath); DependencyParser parser = DependencyParser.loadFromModelFile(modelPath); DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text)); for (List<HasWord> sentence : tokenizer) { List<TaggedWord> tagged = tagger.tagSentence(sentence); Object[] x = tagged.toArray(); GrammaticalStructure gs = parser.predict(tagged); //System.out.println(); Collection<TypedDependency> s = gs.typedDependenciesCollapsedTree(); Object[] z = s.toArray(); System.out.println(tagged.toString()); String token[] = new String[z.length]; String pos[] = new String[z.length]; int k = 0; for (Object i : x) { String str = i.toString(); /*String temp0="(.*?)(?=\\/)"; String temp1="\\/(.*)"; System.out.println(str); Pattern t0 = Pattern.compile("(.*?)(?=\\/)"); Pattern t1 = Pattern.compile("\\/(.*)"); Matcher m0 = t0.matcher(str); Matcher m1 = t1.matcher(str);*/ int index = str.lastIndexOf('/'); token[k] = str.substring(0, index); pos[k] = str.substring(index + 1); //System.out.println(pos[k]); k++; } String rels[] = new String[z.length]; String word1[] = new String[z.length]; String word2[] = new String[z.length]; int j = 0; for (Object i : z) { System.out.println(i); String temp = i.toString(); String pattern0 = "(.*)(?=\\()"; String pattern1 = "(?<=\\()(.*?)(?=-)"; String pattern2 = "(?<=, )(.*)(?=-)"; Pattern r0 = Pattern.compile(pattern0); Pattern r1 = Pattern.compile(pattern1); Pattern r2 = Pattern.compile(pattern2); Matcher m0 = r0.matcher(temp); Matcher m1 = r1.matcher(temp); Matcher m2 = r2.matcher(temp); if (m0.find()) { rels[j] = m0.group(0); //System.out.println(rels[j]); } if (m1.find()) { word1[j] = m1.group(0); } if (m2.find()) { word2[j] = m2.group(0); } j++; } //System.out.println(s); //Rules for feature extraction. //rule1::::::::::::::::: //System.out.println("1"); int[] q = toIntArray(grRecog(rels, "nsubj")); //System.out.println("2"); if (q.length != 0) { //System.out.println("3"); if (posrecog(token, pos, word2[q[0]]).equals("NN")) { //System.out.println("4"); int[] w = toIntArray(grRecog(rels, "compound")); //System.out.println("5"); if (w.length != 0) { System.out.println("6"); System.out.println(word1[q[0]] + "," + word2[q[0]] + "," + word2[w[0]]); } else { int conj_and_index = compgrRecog(rels, word1, word2, "conj:and", word2[q[0]]); if (conj_and_index != -1) { System.out.println( word1[conj_and_index] + "," + word2[conj_and_index] + "," + word2[q[0]]); } else System.out.println(word1[q[0]] + "," + word2[q[0]]); } } //RULE 2::::::::::::: else if (posrecog(token, pos, word1[q[0]]).equals("JJ")) { //System.out.println("aaaaa_JJ"); int a = compgrRecog(rels, word1, word2, "xcomp", word1[q[0]]); if (a != -1) { int b = compgrRecog(rels, word1, word2, "dobj", word2[a]); if (b != -1) { int c = compgrRecog(rels, word1, word2, "compound", word2[b]); if (c != -1) { System.out.println(word1[q[0]] + "," + word1[c] + "," + word2[c]); } } } //RULE 3:::::::::: else { int b[] = toIntArray(grRecog(rels, "ccomp")); if (b.length != 0) { System.out.println(word1[q[1]] + "," + word2[q[1]] + "," + word1[b[0]]); } } } //RULE 4:::::::::: else if (posrecog(token, pos, word1[q[0]]).equals("VBZ")) { //System.out.println("aaaaa"); int vbp_dobj_index = compgrRecog(rels, word1, word2, "dobj", word2[q[0]]); if (vbp_dobj_index != -1) { System.out.println(word1[vbp_dobj_index] + "," + word2[vbp_dobj_index]); } else { int vbp_xcomp_index = compgrRecog(rels, word1, word2, "xcomp", word1[q[0]]); if (vbp_xcomp_index != -1) { System.out.println(word1[vbp_xcomp_index] + "," + word2[vbp_xcomp_index]); } else { int vbp_acomp_index = compgrRecog(rels, word1, word2, "acomp", word1[q[0]]); if (vbp_acomp_index != -1) { System.out.println( word1[q[0]] + "," + word1[vbp_acomp_index] + "," + word2[vbp_acomp_index]); } else System.out.println(word1[q[0]]); } } } int[] f = toIntArray(grRecog(rels, "amod")); if (f.length != 0) { for (int i : f) { System.out.println(word1[i] + "," + word2[i]); } int cj[] = toIntArray(grRecog(rels, "conj:and")); if (cj.length != 0) { for (int i : cj) { System.out.println(word1[i] + "," + word2[i]); } } } int[] neg = toIntArray(grRecog(rels, "neg")); if (neg.length != 0) { for (int i : neg) { System.out.println(word1[i] + "," + word2[i]); } } } else { int[] f = toIntArray(grRecog(rels, "amod")); if (f.length != 0) { for (int i : f) { System.out.print(word1[i] + "," + word2[i]); String qwe = word1[i] + "," + word2[i]; } int cj[] = toIntArray(grRecog(rels, "conj:and")); if (cj.length != 0) { for (int i : cj) { System.out.println(word2[i]); } } } int[] neg = toIntArray(grRecog(rels, "neg")); if (neg.length != 0) { for (int i : neg) { System.out.println(word1[i] + "," + word2[i]); } } } //RULE 2::::::::::::: } // text=sc.nextLine(); //} }
From source file:Dependency2.java
public static void main(String[] args) { String modelPath = DependencyParser.DEFAULT_MODEL; String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; Scanner sc = new Scanner(System.in); readCsv();// ww w . ja v a2s. c om String text = ""; text = sc.nextLine(); if (multifeatures(text)) { System.out.println("Multiple features present"); MaxentTagger tagger = new MaxentTagger(taggerPath); DependencyParser parser = DependencyParser.loadFromModelFile(modelPath); DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text)); for (List<HasWord> sentence : tokenizer) { List<TaggedWord> tagged = tagger.tagSentence(sentence); GrammaticalStructure gs = parser.predict(tagged); Collection<TypedDependency> s = gs.typedDependenciesCollapsedTree(); Map<Character, Pair<Character, Character>> map = new HashMap<Character, Pair<Character, Character>>(); Object[] z = s.toArray(); String rels[] = new String[z.length]; String word1[] = new String[z.length]; String word2[] = new String[z.length]; int j = 0; String f, f1, f2; for (Object i : z) { //System.out.println(i); String temp = i.toString(); System.out.println(temp); String pattern0 = "(.*)(?=\\()"; String pattern1 = "(?<=\\()(.*?)(?=-)"; String pattern2 = "(?<=,)(.*)(?=-)"; Pattern r0 = Pattern.compile(pattern0); Pattern r1 = Pattern.compile(pattern1); Pattern r2 = Pattern.compile(pattern2); Matcher m0 = r0.matcher(temp); Matcher m1 = r1.matcher(temp); Matcher m2 = r2.matcher(temp); if (m0.find()) rels[j] = m0.group(0); if (m1.find()) word1[j] = m1.group(0); if (m2.find()) word2[j] = m2.group(0); if (rels[j].equals("amod")) { f1 = getFeature(word1[j]); f2 = getFeature(word2[j]); f = f1 != null ? (f1) : (f2 != null ? f2 : null); if (f != null) { System.out.println("Feature: " + f); } } j++; } //System.out.println(Arrays.toString(rels)); } } else { //sentence score is feature score } }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordDependencyConverter.java
License:Open Source License
protected void doCreateDependencyTags(JCas aJCas, TreebankLanguagePack aLP, Tree parseTree, List<Token> tokens) { GrammaticalStructure gs; try {// ww w . j a v a2s . c o m gs = aLP.grammaticalStructureFactory(aLP.punctuationWordRejectFilter(), aLP.typedDependencyHeadFinder()) .newGrammaticalStructure(parseTree); } catch (UnsupportedOperationException e) { // We already warned in the model provider if dependencies are not supported, so here // we just do nothing and skip the dependencies. return; } Collection<TypedDependency> dependencies = null; switch (mode) { case BASIC: dependencies = gs.typedDependencies(); // gs.typedDependencies(false); break; case NON_COLLAPSED: dependencies = gs.allTypedDependencies(); // gs.typedDependencies(true); break; case COLLAPSED_WITH_EXTRA: dependencies = gs.typedDependenciesCollapsed(true); break; case COLLAPSED: dependencies = gs.typedDependenciesCollapsed(false); break; case CC_PROPAGATED: dependencies = gs.typedDependenciesCCprocessed(true); break; case CC_PROPAGATED_NO_EXTRA: dependencies = gs.typedDependenciesCCprocessed(false); break; case TREE: dependencies = gs.typedDependenciesCollapsedTree(); break; } for (TypedDependency currTypedDep : dependencies) { int govIndex = currTypedDep.gov().index(); int depIndex = currTypedDep.dep().index(); if (govIndex != 0) { // Stanford CoreNLP produces a dependency relation between a verb and ROOT-0 which // is not token at all! Token govToken = tokens.get(govIndex - 1); Token depToken = tokens.get(depIndex - 1); StanfordAnnotator.createDependencyAnnotation(aJCas, currTypedDep.reln(), govToken, depToken); } } }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordParser.java
License:Open Source License
protected void doCreateDependencyTags(ParserGrammar aParser, StanfordAnnotator sfAnnotator, Tree parseTree, List<Token> tokens) { GrammaticalStructure gs; try {// w w w .j a v a 2s . c o m TreebankLanguagePack tlp = aParser.getTLPParams().treebankLanguagePack(); gs = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(), tlp.typedDependencyHeadFinder()) .newGrammaticalStructure(parseTree); } catch (UnsupportedOperationException e) { // We already warned in the model provider if dependencies are not supported, so here // we just do nothing and skip the dependencies. return; } Collection<TypedDependency> dependencies = null; switch (mode) { case BASIC: dependencies = gs.typedDependencies(); // gs.typedDependencies(false); break; case NON_COLLAPSED: dependencies = gs.allTypedDependencies(); // gs.typedDependencies(true); break; case COLLAPSED_WITH_EXTRA: dependencies = gs.typedDependenciesCollapsed(true); break; case COLLAPSED: dependencies = gs.typedDependenciesCollapsed(false); break; case CC_PROPAGATED: dependencies = gs.typedDependenciesCCprocessed(true); break; case CC_PROPAGATED_NO_EXTRA: dependencies = gs.typedDependenciesCCprocessed(false); break; case TREE: dependencies = gs.typedDependenciesCollapsedTree(); break; } for (TypedDependency currTypedDep : dependencies) { int govIndex = currTypedDep.gov().index(); int depIndex = currTypedDep.dep().index(); if (govIndex != 0) { // Stanford CoreNLP produces a dependency relation between a verb and ROOT-0 which // is not token at all! Token govToken = tokens.get(govIndex - 1); Token depToken = tokens.get(depIndex - 1); sfAnnotator.createDependencyAnnotation(currTypedDep.reln(), govToken, depToken); } } }
From source file:ie.pars.bnc.preprocess.ProcessNLP.java
License:Open Source License
/** * * @param inputStreamFile/*w w w. j av a 2 s. com*/ * @param morphology * @param posTagger * @param parser * @return * @throws Exception */ public static StringBuilder parseBNCXML(InputStream inputStreamFile, Morphology morphology, MaxentTagger posTagger, ParserGrammar parser) throws Exception { StringBuilder results = new StringBuilder(); int counterSent = 0; List<List<List<WordLemmaTag>>> parseBNCXMLTokenized = parseBNCXMLTokenized(inputStreamFile); for (List<List<WordLemmaTag>> xparseBNCXMLL : parseBNCXMLTokenized) { results.append("<p>\n"); for (List<WordLemmaTag> para : xparseBNCXMLL) { if (counterSent++ % 20 == 0) { System.out.print("."); } results.append("<s>\n"); List<TaggedWord> tagSentence = posTagger.tagSentence(para, true); Tree parseTree = parser.parse(tagSentence); GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(parseTree, parser.treebankLanguagePack().punctuationWordRejectFilter(), parser.getTLPParams().typedDependencyHeadFinder()); Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree(); SemanticGraph depTree = new SemanticGraph(deps); for (int i = 0; i < tagSentence.size(); ++i) { int head = -1; String deprel = null; // if (depTree != null) { Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index) .collect(Collectors.toSet()); IndexedWord node = depTree.getNodeByIndexSafe(i + 1); if (node != null) { List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node); if (!edgeList.isEmpty()) { assert edgeList.size() == 1; head = edgeList.get(0).getGovernor().index(); deprel = edgeList.get(0).getRelation().toString(); } else if (rootSet.contains(i + 1)) { head = 0; deprel = "ROOT"; } } // } // Write the token TaggedWord lexHead = null; if (head > 0) { lexHead = tagSentence.get(head - 1); } results.append(line(i + 1, tagSentence.get(i), morphology, head, deprel, lexHead)).append("\n"); } results.append("</s>\n"); } results.append("</p>\n"); } System.out.println(""); inputStreamFile.close(); return results; }
From source file:ie.pars.bnc.preprocess.ProcessNLP.java
License:Open Source License
public static void handleDependencies(Tree tree, ParserGrammar parser, String arg, OutputStream outStream, String commandArgs) throws IOException { GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(tree, parser.treebankLanguagePack().punctuationWordRejectFilter(), parser.getTLPParams().typedDependencyHeadFinder()); Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree(); // SemanticGraph sg = new SemanticGraph(deps); OutputStreamWriter osw = new OutputStreamWriter(outStream, "utf-8"); for (TypedDependency dep : deps) { String t = dep.dep().word() + "\t" + dep.dep().lemma() + "\t" + dep.dep().tag() + "\t"; System.out.println(t);//from w ww.ja va2 s . c o m osw.write(dep.toString()); osw.write("\n"); } osw.flush(); }
From source file:ie.pars.bnc.preprocess.ProcessNLP.java
License:Open Source License
private static StringBuilder parseTheSentence(String sentence, Morphology morphology, MaxentTagger posTagger, ParserGrammar parser, String sid) { TokenizerFactory<Word> newTokenizerFactory = PTBTokenizerFactory.newTokenizerFactory(); // TokenizerFactory<WordLemmaTag> tokenizerFactory; // TokenizerFactory<CoreLabel> factory = PTBTokenizer.factory(new CoreLabelTokenFactory() , ""); // TokenizerFactory<Word> factory1 = PTBTokenizer.factory(); StringBuilder results = new StringBuilder(); results.append("<s id='" + sid + "'>\n"); StringReader sr = new StringReader(sentence); Tokenizer<Word> tokenizer = newTokenizerFactory.getTokenizer(sr); List<Word> tokenize = tokenizer.tokenize(); List<TaggedWord> tagSentence = posTagger.tagSentence(tokenize); Tree parseTree = parser.parse(tagSentence); GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(parseTree, parser.treebankLanguagePack().punctuationWordRejectFilter(), parser.getTLPParams().typedDependencyHeadFinder()); Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree(); SemanticGraph depTree = new SemanticGraph(deps); for (int i = 0; i < tagSentence.size(); ++i) { int head = -1; String deprel = null;//from w ww .j a v a 2s. c om // if (depTree != null) { Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index).collect(Collectors.toSet()); IndexedWord node = depTree.getNodeByIndexSafe(i + 1); if (node != null) { List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node); if (!edgeList.isEmpty()) { assert edgeList.size() == 1; head = edgeList.get(0).getGovernor().index(); deprel = edgeList.get(0).getRelation().toString(); } else if (rootSet.contains(i + 1)) { head = 0; deprel = "ROOT"; } } // } // Write the token TaggedWord lexHead = null; if (head > 0) { lexHead = tagSentence.get(head - 1); } results.append(line(i + 1, tagSentence.get(i), morphology, head, deprel, lexHead)).append("\n"); } results.append("</s>\n"); return results; }