List of usage examples for edu.stanford.nlp.semgraph SemanticGraph outgoingEdgeList
public List<SemanticGraphEdge> outgoingEdgeList(IndexedWord v)
From source file:ca.mcgill.cs.crown.procedure.ParseExtractor.java
License:Creative Commons License
/** * Gets the candidate hypernyms form the provided subdef * * @returns a mapping from the candidate to the heuristics that generated it *//*ww w . jav a 2 s.c om*/ MultiMap<String, String> getCandidates(SemanticGraph dependencies, String subdef, POS spos_) { MultiMap<String, String> candidates = new HashMultiMap<String, String>(); char sensePos = toChar(spos_); Collection<IndexedWord> roots = dependencies.getRoots(); next_root: for (IndexedWord root : roots) { String word = root.get(TextAnnotation.class); String lemma = root.get(LemmaAnnotation.class); String pos = root.get(PartOfSpeechAnnotation.class); char lemmaPos = pos.substring(0, 1).toLowerCase().charAt(0); String lemmaLc = lemma.toLowerCase(); //System.out.println("testing: " + lemma + "/" + pos); // If the lemma is a verb, check for phrasal verbal particle (e.g., // "lead on", "edge out") and if present, add them to the lemma if (lemmaPos == 'v') { List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { if (e.getRelation().getShortName().equals("prt")) { IndexedWord dep = e.getDependent(); lemma = lemma + " " + dep.get(LemmaAnnotation.class); break; } } } // Heuristic 1: root matches exact POS if (lemmaPos == sensePos) { // Edge case for Heuristics 7: If the lemma is a noun and is // saying that this is an instance (e.g., "An instance of ..."), // then we take the dependent noun from instance // // Terrible example: // The second of the two Books of Chronicles and the // fourteenth book of the Old Testament of the Bible. // boolean foundExistentialDependent = false; if (lemma.equals("instance") || lemma.equals("example") || lemma.equals("first") || lemma.equals("second") || lemma.equals("third") || lemma.equals("fourth") || lemma.equals("fifth") || lemma.equals("sixth") || lemma.equals("series")) { // Check that there's actually a prepositional phrase // attached List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { if (e.getRelation().getShortName().equals("prep")) { IndexedWord dep = e.getDependent(); String depLemma = dep.get(LemmaAnnotation.class); char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase() .charAt(0); //System.out.println("HEURISTIC 7"); if (depPos == sensePos) { candidates.put(depLemma, "Heuristic-7"); addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-7"); foundExistentialDependent = true; } } } } if (foundExistentialDependent) continue next_root; // Heuristic 10: In the case of noun phrases, take the last noun // in the phrase, e.g., "Molten material", "pringtime snow // runoff" List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); boolean foundDependent = false; for (SemanticGraphEdge e : edges) { if (e.getRelation().getShortName().equals("dep")) { IndexedWord dep = e.getDependent(); String depLemma = dep.get(LemmaAnnotation.class); char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0); //System.out.println("HEURISTIC 10"); if (depPos == sensePos) { foundDependent = true; candidates.put(depLemma, "Heuristic-10"); addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-10"); } } } if (!foundDependent) { //System.out.println("HEURISTIC 1"); candidates.put(lemma, "Heuristic-1"); addSiblings(root, candidates, sensePos, dependencies, "Heuristic-1"); } } // Heuristic 2: subdef is either (1) one word or (2) two or more // word that *must be connected by a conjunction, and (3) the lemma // has the wrong part of speech, but could have the same POS (i.e., // the lemma was probably POS-tagged incorrectly). if (sensePos != lemmaPos) { // Only one word in the subdef, which can manifest itself as the // graph having no vertices! (size == 0) if (dependencies.size() < 1) { // System.out.println("HEURISTIC 2a"); IIndexWord iword = dict.getIndexWord(lemma, spos_); if (iword != null) candidates.put(lemma, "Heuristic-2a"); else { // Sometimes adjectves get lemmatized to a verb form // which is in correct. Check to see if the token // matches String token = root.get(TextAnnotation.class); iword = dict.getIndexWord(token, spos_); if (iword != null) candidates.put(token, "Heuristic-2a"); } } else { // System.out.println("HEURISTIC 2b"); Set<IndexedWord> tmp = new HashSet<IndexedWord>(); List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { // System.out.printf("edge from %s -> %s %s%n", lemma, // e.getRelation().getShortName(), // e.getRelation().getLongName()); if (e.getRelation().getShortName().equals("conj")) { if (tmp.size() == 0) tmp.add(root); tmp.add(e.getDependent()); } } if (!tmp.isEmpty()) { for (IndexedWord iw : tmp) { String lem = iw.get(LemmaAnnotation.class); IIndexWord iword = dict.getIndexWord(lem, spos_); if (iword != null) candidates.put(lem, "Heuristic-2b"); else { // Sometimes adjectves get lemmatized to a verb // form which is in correct. Check to see if // the token matches String token = iw.get(TextAnnotation.class); iword = dict.getIndexWord(token, spos_); if (iword != null) candidates.put(token, "Heuristic-2b"); } } //System.out.println(tmp); } } } // Heuristics 3: the subdef is phrased as an overly-general description // of a person using "one", e.g., "one who does X". Replace this with // "person" if (sensePos == 'n' && (lemma.equals("one") || lemma.equals("someone"))) { // check the dependency graph for a "who" attachment // TODO // ... or be lazy and just check for the token Matcher m = WHO.matcher(subdef); if (m.find()) { candidates.put("person", "Heuristic-3: Person"); } } // Heuristic 4: if the root lemma is an adjective and the target // sense is a noun, look for a modifying a noun or set of nouns, // report those /// // Example: "a small, arched passageway" if (sensePos == 'n' && lemmaPos == 'j') { //System.out.println("HEURISTIC 4"); List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { // System.out.printf("edge from %s -> %s %s%n", lemma, // e.getRelation().getShortName(), // e.getRelation().getLongName()); if (e.getRelation().getShortName().equals("appos") || e.getRelation().getShortName().equals("dep")) { IndexedWord dep = e.getDependent(); String depLemma = dep.get(LemmaAnnotation.class); // System.out.println("!!! " + depLemma); char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0); if (depPos == sensePos) { candidates.put(depLemma, "Heuristic-4: Head Noun"); addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-4: Head Noun"); } //break; } } } // Heuristic 5: if the root lemma is a verb and the target sense is // a noun, look for a subject noun if (sensePos == 'n' && lemmaPos == 'v') { List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { if (e.getRelation().getShortName().equals("nsubj")) { IndexedWord dep = e.getDependent(); String depLemma = dep.get(LemmaAnnotation.class); char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0); if (depPos == sensePos) { candidates.put(depLemma, "Heuristic-5: Subject Noun"); addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-5: Subject Noun"); } break; } } } // Heuristic 6: if the root lemma is an existential quantifier or // something like it (e.g., "Any of ...") and // the target sense is a noun, look for a subject noun if (sensePos == 'n' && lemmaPos == 'd') { List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { // System.out.printf("edge from %s -> %s %s%n", lemma, // e.getRelation().getShortName(), // e.getRelation().getLongName()); if (e.getRelation().getShortName().equals("prep") || e.getRelation().getShortName().equals("dep")) { IndexedWord dep = e.getDependent(); String depLemma = dep.get(LemmaAnnotation.class); char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0); // System.out.println(depLemma + "/" + depPos); // This should be the common case if (depPos == sensePos) { candidates.put(depLemma, "Heuristic-6: Existential Example"); addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-6: Existential Example"); } // This is for some really (really) unusually parsed // edge cases else { List<SemanticGraphEdge> depEdges = dependencies.outgoingEdgeList(dep); for (SemanticGraphEdge e2 : depEdges) { if (e2.getRelation().getShortName().equals("rcmod")) { IndexedWord dep2 = e2.getDependent(); String depLemma2 = dep2.get(LemmaAnnotation.class); char depPos2 = dep2.get(PartOfSpeechAnnotation.class).substring(0, 1) .toLowerCase().charAt(0); if (depPos2 == sensePos) { candidates.put(depLemma2, "Heuristic-6: Existential Example"); addSiblings(dep2, candidates, sensePos, dependencies, "Heuristic-6: Existential Example"); } } } } } } } // Heuristic 8: if the root lemma is a verb and the sense is an // adjective, but the verb is modified by an adverb, this catches // that cases that Heuristics 2 does not if (sensePos == 'j' && lemmaPos == 'v') { Set<IndexedWord> tmp = new HashSet<IndexedWord>(); List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { // System.out.printf("edge from %s -> %s %s%n", lemma, // e.getRelation().getShortName(), // e.getRelation().getLongName()); if (e.getRelation().getShortName().equals("advmod")) { IIndexWord iword = dict.getIndexWord(lemma, spos_); if (iword != null) candidates.put(lemma, "Heuristic-8: Adv-modified Verb"); else { // Sometimes adjectves get lemmatized to a verb // form which is in correct. Check to see if // the token matches String token = root.get(TextAnnotation.class); iword = dict.getIndexWord(token, spos_); if (iword != null) candidates.put(token, "Heuristic-8: Adv-modified Verb"); } } } } // Heuristic 9: if the sense is an adjective and the root lemma // begins with with a negative *and* the gloss contains something // like "not [x]", then pull out the "x" and use it as the hypernym if (sensePos == 'j' && lemma.equals("not")) { List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { // System.out.printf("edge from %s -> %s %s%n", lemma, // e.getRelation().getShortName(), // e.getRelation().getLongName()); if (e.getRelation().getShortName().equals("dep")) { IndexedWord dep = e.getDependent(); String depLemma = dep.get(LemmaAnnotation.class); char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0); if (depPos == sensePos) { candidates.put(depLemma, "Heuristic-9: negated adj"); addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-9: negated adj"); } break; } } } // Heuristic 11: if the sense is a verb and the root lemma // is "to", this is probably a case of mistaken POS-tagging if (sensePos == 'v' && lemma.equals("to")) { List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { if (e.getRelation().getShortName().equals("pobj")) { IndexedWord dep = e.getDependent(); IIndexWord iword = dict.getIndexWord(lemma, spos_); if (iword != null) candidates.put(lemma, "Heuristic-11: verbal infinitive"); else { // Sometimes verbs get lemmatized to a noun form // that is incorrect. Check to see if the token // matches String token = dep.get(TextAnnotation.class); iword = dict.getIndexWord(token, spos_); if (iword != null) candidates.put(token, "Heuristic-9: verbal infinitive"); } } } } } return candidates; }
From source file:ca.mcgill.cs.crown.procedure.ParseExtractor.java
License:Creative Commons License
/** * If we know we want {@code toAdd}, get all of its siblings that are joined * by conjunctions as candidates too//from ww w. j a va 2 s.c o m */ void addSiblings(IndexedWord toAdd, MultiMap<String, String> candidates, char targetPos, SemanticGraph parse, String reason) { List<SemanticGraphEdge> edges = parse.outgoingEdgeList(toAdd); for (SemanticGraphEdge e : edges) { if (e.getRelation().getShortName().equals("conj")) { IndexedWord dep = e.getDependent(); String depLemma = dep.get(LemmaAnnotation.class); char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0); if (targetPos == depPos) { if (targetPos != 'v') { candidates.put(depLemma, reason + " (In conjunction)"); } // Check for phrasal verb particles else { List<SemanticGraphEdge> depEdges = parse.outgoingEdgeList(dep); for (SemanticGraphEdge e2 : depEdges) { if (e2.getRelation().getShortName().equals("prt")) { IndexedWord dep2 = e.getDependent(); depLemma = depLemma + " " + dep2.get(LemmaAnnotation.class); break; } } } } } } }