List of usage examples for edu.stanford.nlp.trees TypedDependency gov
IndexedWord gov
To view the source code for edu.stanford.nlp.trees TypedDependency gov.
Click Source Link
From source file:DependencyParse.java
License:Apache License
public static void main(String[] args) throws Exception { Properties props = StringUtils.argsToProperties(args); if (!props.containsKey("tokpath") || !props.containsKey("parentpath") || !props.containsKey("relpath")) { System.err.println(/*from w ww.j a v a 2 s. c o m*/ "usage: java DependencyParse -tokenize - -tokpath <tokpath> -parentpath <parentpath> -relpath <relpath>"); System.exit(1); } boolean tokenize = false; if (props.containsKey("tokenize")) { tokenize = true; } String tokPath = props.getProperty("tokpath"); String parentPath = props.getProperty("parentpath"); String relPath = props.getProperty("relpath"); BufferedWriter tokWriter = new BufferedWriter(new FileWriter(tokPath)); BufferedWriter parentWriter = new BufferedWriter(new FileWriter(parentPath)); BufferedWriter relWriter = new BufferedWriter(new FileWriter(relPath)); MaxentTagger tagger = new MaxentTagger(TAGGER_MODEL); DependencyParser parser = DependencyParser.loadFromModelFile(PARSER_MODEL); Scanner stdin = new Scanner(System.in); int count = 0; long start = System.currentTimeMillis(); while (stdin.hasNextLine()) { String line = stdin.nextLine(); List<HasWord> tokens = new ArrayList<>(); if (tokenize) { PTBTokenizer<Word> tokenizer = new PTBTokenizer(new StringReader(line), new WordTokenFactory(), ""); for (Word label; tokenizer.hasNext();) { tokens.add(tokenizer.next()); } } else { for (String word : line.split(" ")) { tokens.add(new Word(word)); } } List<TaggedWord> tagged = tagger.tagSentence(tokens); int len = tagged.size(); Collection<TypedDependency> tdl = parser.predict(tagged).typedDependencies(); int[] parents = new int[len]; for (int i = 0; i < len; i++) { // if a node has a parent of -1 at the end of parsing, then the node // has no parent. parents[i] = -1; } String[] relns = new String[len]; for (TypedDependency td : tdl) { // let root have index 0 int child = td.dep().index(); int parent = td.gov().index(); relns[child - 1] = td.reln().toString(); parents[child - 1] = parent; } // print tokens StringBuilder sb = new StringBuilder(); for (int i = 0; i < len - 1; i++) { if (tokenize) { sb.append(PTBTokenizer.ptbToken2Text(tokens.get(i).word())); } else { sb.append(tokens.get(i).word()); } sb.append(' '); } if (tokenize) { sb.append(PTBTokenizer.ptbToken2Text(tokens.get(len - 1).word())); } else { sb.append(tokens.get(len - 1).word()); } sb.append('\n'); tokWriter.write(sb.toString()); // print parent pointers sb = new StringBuilder(); for (int i = 0; i < len - 1; i++) { sb.append(parents[i]); sb.append(' '); } sb.append(parents[len - 1]); sb.append('\n'); parentWriter.write(sb.toString()); // print relations sb = new StringBuilder(); for (int i = 0; i < len - 1; i++) { sb.append(relns[i]); sb.append(' '); } sb.append(relns[len - 1]); sb.append('\n'); relWriter.write(sb.toString()); count++; if (count % 1000 == 0) { double elapsed = (System.currentTimeMillis() - start) / 1000.0; System.err.printf("Parsed %d lines (%.2fs)\n", count, elapsed); } } long totalTimeMillis = System.currentTimeMillis() - start; System.err.printf("Done: %d lines in %.2fs (%.1fms per line)\n", count, totalTimeMillis / 1000.0, totalTimeMillis / (double) count); tokWriter.close(); parentWriter.close(); relWriter.close(); }
From source file:ConstituencyParse.java
License:Apache License
public int[] depTreeParents(Tree tree, List<HasWord> tokens) { GrammaticalStructure gs = gsf.newGrammaticalStructure(tree); Collection<TypedDependency> tdl = gs.typedDependencies(); int len = tokens.size(); int[] parents = new int[len]; for (int i = 0; i < len; i++) { // if a node has a parent of -1 at the end of parsing, then the node // has no parent. parents[i] = -1;/*w w w . ja v a 2 s .c o m*/ } for (TypedDependency td : tdl) { // let root have index 0 int child = td.dep().index(); int parent = td.gov().index(); parents[child - 1] = parent; } return parents; }
From source file:ca.ualberta.exemplar.core.ParserMalt.java
License:Open Source License
@Override public List<CoreMap> parseText(String text) { List<CoreMap> sentences = null; try {// w ww. j av a2 s . c o m Annotation document = new Annotation(text); pipeline.annotate(document); sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { List<CoreLabel> tokens = sentence.get(TokensAnnotation.class); String[] conllInput = sentenceToCoNLLInput(tokens); DependencyGraph graph = (DependencyGraph) maltParser.parse(conllInput); Result result = graphToCoNLL(graph); List<List<String>> conll = result.conll; int rootIndex = result.rootIndex; EnglishGrammaticalStructure gs = EnglishGrammaticalStructure.buildCoNNLXGrammaticStructure(conll); TreeGraphNode root = null; List<TypedDependency> deps = gs.typedDependenciesCCprocessed(); // Add root dependency and ner annotations int size = deps.size(); for (int i = 0; i < size; i++) { TypedDependency td = deps.get(i); if (td.gov().index() == rootIndex) { root = td.gov(); deps.add(new TypedDependency(GrammaticalRelation.ROOT, td.gov(), td.gov())); } { TreeGraphNode n = td.dep(); if (n.label().ner() == null) { n.label().setNER(tokens.get(n.index() - 1).ner()); n.label().setBeginPosition(tokens.get(n.index() - 1).beginPosition()); n.label().setEndPosition(tokens.get(n.index() - 1).endPosition()); n.label().setLemma(tokens.get(n.index() - 1).lemma()); } } { TreeGraphNode n = td.gov(); if (n.label().ner() == null) { n.label().setNER(tokens.get(n.index() - 1).ner()); n.label().setBeginPosition(tokens.get(n.index() - 1).beginPosition()); n.label().setEndPosition(tokens.get(n.index() - 1).endPosition()); n.label().setLemma(tokens.get(n.index() - 1).lemma()); } } } if (root == null) continue; List<TreeGraphNode> roots = new ArrayList<TreeGraphNode>(); roots.add(gs.root()); SemanticGraph sg = new SemanticGraph(deps, roots); sentence.set(CollapsedCCProcessedDependenciesAnnotation.class, sg); } } catch (Exception e) { e.printStackTrace(); } return sentences; }
From source file:cc.vidr.parseviz.ParseViz.java
License:Open Source License
public static void printDependenciesDot(Tree tree, StringBuilder sb) { sb.append("digraph{\n"); for (TypedDependency td : typedDependencies(tree)) { GrammaticalRelation reln = td.reln(); TreeGraphNode gov = td.gov(); TreeGraphNode dep = td.dep();/*from w ww .ja v a2 s . c om*/ sb.append("n").append(gov.index()).append("[label=\"").append(gov).append("\"];\n").append("n") .append(dep.index()).append("[label=\"").append(dep).append("\"];\n").append("n") .append(gov.index()).append("->n").append(dep.index()).append("[label=\"").append(reln) .append("\"];\n"); } sb.append("}\n"); }
From source file:com.search.MySearchHandler.java
License:Apache License
private static String compress(TermTokenStream queryStream) { List<TypedDependency> tdl = getTyped(queryStream.toNLPString()); ListIterator<TypedDependency> tdllist = tdl.listIterator(); int count = 1; while (tdllist.hasNext()) { TypedDependency typd = tdllist.next(); // Have to fix the multiple words in the sequence issue if ((typd.reln().toString().equals("nn") | typd.reln().toString().equals("amod") | (typd.reln().toString().equals("det") && typd.dep().index() != 1) | typd.reln().toString().equals("num") | typd.reln().toString().equals("number")) && ((typd.gov().index() - typd.dep().index()) == 1 || (typd.gov().index() - typd.dep().index()) == -1)) { if ((typd.gov().index() - typd.dep().index()) == -1) { queryStream.mergeWithNext(typd.gov().index() - count); count++;/*w w w . j a va2 s .co m*/ } else { queryStream.mergeWithNext(typd.dep().index() - count); count++; } } } if (found(getTyped(queryStream.toNLPString()))) { return compress(queryStream); } return queryStream.toNLPString(); }
From source file:com.search.MySearchHandler.java
License:Apache License
private static boolean found(List<TypedDependency> typed) { ListIterator<TypedDependency> tdllist = typed.listIterator(); boolean iftrue = false; while (tdllist.hasNext()) { TypedDependency typd = tdllist.next(); // Have to fix the multiple words in the sequence issue if ((typd.reln().toString().equals("nn") | typd.reln().toString().equals("amod") | (typd.reln().toString().equals("det") && typd.dep().index() != 1) | typd.reln().toString().equals("num") | typd.reln().toString().equals("number")) && ((typd.gov().index() - typd.dep().index()) == 1 || (typd.gov().index() - typd.dep().index()) == -1)) { iftrue = true;//from w w w . j a v a 2s. c om } } return iftrue; }
From source file:coreferenceresolver.element.Token.java
/** * @param newTokenSentiment the sentimentOrientation to set *///from w ww . j a va2 s . c o m public void setSentimentOrientation(int newTokenSentiment, Collection<TypedDependency> typedDeps) { if ((newTokenSentiment == Util.POSITIVE || newTokenSentiment == Util.NEGATIVE) && (!this.POS.equals("IN"))) { for (TypedDependency typedDependency : typedDeps) { if (typedDependency.reln().toString().equals("neg") && typedDependency.gov().value().equals(word)) { newTokenSentiment = Util.reverseSentiment(newTokenSentiment); } if (typedDependency.reln().toString().equals("mark") && (typedDependency.dep().value().toLowerCase().equals("although") || typedDependency.dep().value().toLowerCase().equals("though") || typedDependency.dep().value().toLowerCase().equals("while")) && typedDependency.gov().value().equals(word)) { newTokenSentiment = Util.NEUTRAL; break; } } } this.sentimentOrientation = newTokenSentiment; }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordDependencyConverter.java
License:Open Source License
protected void doCreateDependencyTags(JCas aJCas, TreebankLanguagePack aLP, Tree parseTree, List<Token> tokens) { GrammaticalStructure gs;// w w w. jav a 2 s . c om try { gs = aLP.grammaticalStructureFactory(aLP.punctuationWordRejectFilter(), aLP.typedDependencyHeadFinder()) .newGrammaticalStructure(parseTree); } catch (UnsupportedOperationException e) { // We already warned in the model provider if dependencies are not supported, so here // we just do nothing and skip the dependencies. return; } Collection<TypedDependency> dependencies = null; switch (mode) { case BASIC: dependencies = gs.typedDependencies(); // gs.typedDependencies(false); break; case NON_COLLAPSED: dependencies = gs.allTypedDependencies(); // gs.typedDependencies(true); break; case COLLAPSED_WITH_EXTRA: dependencies = gs.typedDependenciesCollapsed(true); break; case COLLAPSED: dependencies = gs.typedDependenciesCollapsed(false); break; case CC_PROPAGATED: dependencies = gs.typedDependenciesCCprocessed(true); break; case CC_PROPAGATED_NO_EXTRA: dependencies = gs.typedDependenciesCCprocessed(false); break; case TREE: dependencies = gs.typedDependenciesCollapsedTree(); break; } for (TypedDependency currTypedDep : dependencies) { int govIndex = currTypedDep.gov().index(); int depIndex = currTypedDep.dep().index(); if (govIndex != 0) { // Stanford CoreNLP produces a dependency relation between a verb and ROOT-0 which // is not token at all! Token govToken = tokens.get(govIndex - 1); Token depToken = tokens.get(depIndex - 1); StanfordAnnotator.createDependencyAnnotation(aJCas, currTypedDep.reln(), govToken, depToken); } } }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordParser.java
License:Open Source License
protected void doCreateDependencyTags(ParserGrammar aParser, StanfordAnnotator sfAnnotator, Tree parseTree, List<Token> tokens) { GrammaticalStructure gs;/*from www . j a v a2s .c om*/ try { TreebankLanguagePack tlp = aParser.getTLPParams().treebankLanguagePack(); gs = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(), tlp.typedDependencyHeadFinder()) .newGrammaticalStructure(parseTree); } catch (UnsupportedOperationException e) { // We already warned in the model provider if dependencies are not supported, so here // we just do nothing and skip the dependencies. return; } Collection<TypedDependency> dependencies = null; switch (mode) { case BASIC: dependencies = gs.typedDependencies(); // gs.typedDependencies(false); break; case NON_COLLAPSED: dependencies = gs.allTypedDependencies(); // gs.typedDependencies(true); break; case COLLAPSED_WITH_EXTRA: dependencies = gs.typedDependenciesCollapsed(true); break; case COLLAPSED: dependencies = gs.typedDependenciesCollapsed(false); break; case CC_PROPAGATED: dependencies = gs.typedDependenciesCCprocessed(true); break; case CC_PROPAGATED_NO_EXTRA: dependencies = gs.typedDependenciesCCprocessed(false); break; case TREE: dependencies = gs.typedDependenciesCollapsedTree(); break; } for (TypedDependency currTypedDep : dependencies) { int govIndex = currTypedDep.gov().index(); int depIndex = currTypedDep.dep().index(); if (govIndex != 0) { // Stanford CoreNLP produces a dependency relation between a verb and ROOT-0 which // is not token at all! Token govToken = tokens.get(govIndex - 1); Token depToken = tokens.get(depIndex - 1); sfAnnotator.createDependencyAnnotation(currTypedDep.reln(), govToken, depToken); } } }
From source file:dependencies.TypedDependencyWrapper.java
License:Open Source License
/** * Constructor for class TypedDependencyWrapper */// w w w .j ava2 s.c o m public TypedDependencyWrapper(TypedDependency d) { govIndex = d.gov().index(); govLabel = d.gov().value().toLowerCase(); depIndex = d.dep().index(); depLabel = d.dep().value().toLowerCase(); relation = d.reln(); }