List of usage examples for edu.stanford.nlp.semgraph SemanticGraphEdge getRelation
public GrammaticalRelation getRelation()
From source file:ca.mcgill.cs.crown.procedure.ParseExtractor.java
License:Creative Commons License
/** * Gets the candidate hypernyms form the provided subdef * * @returns a mapping from the candidate to the heuristics that generated it *//* www .j a v a 2 s . c om*/ MultiMap<String, String> getCandidates(SemanticGraph dependencies, String subdef, POS spos_) { MultiMap<String, String> candidates = new HashMultiMap<String, String>(); char sensePos = toChar(spos_); Collection<IndexedWord> roots = dependencies.getRoots(); next_root: for (IndexedWord root : roots) { String word = root.get(TextAnnotation.class); String lemma = root.get(LemmaAnnotation.class); String pos = root.get(PartOfSpeechAnnotation.class); char lemmaPos = pos.substring(0, 1).toLowerCase().charAt(0); String lemmaLc = lemma.toLowerCase(); //System.out.println("testing: " + lemma + "/" + pos); // If the lemma is a verb, check for phrasal verbal particle (e.g., // "lead on", "edge out") and if present, add them to the lemma if (lemmaPos == 'v') { List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { if (e.getRelation().getShortName().equals("prt")) { IndexedWord dep = e.getDependent(); lemma = lemma + " " + dep.get(LemmaAnnotation.class); break; } } } // Heuristic 1: root matches exact POS if (lemmaPos == sensePos) { // Edge case for Heuristics 7: If the lemma is a noun and is // saying that this is an instance (e.g., "An instance of ..."), // then we take the dependent noun from instance // // Terrible example: // The second of the two Books of Chronicles and the // fourteenth book of the Old Testament of the Bible. // boolean foundExistentialDependent = false; if (lemma.equals("instance") || lemma.equals("example") || lemma.equals("first") || lemma.equals("second") || lemma.equals("third") || lemma.equals("fourth") || lemma.equals("fifth") || lemma.equals("sixth") || lemma.equals("series")) { // Check that there's actually a prepositional phrase // attached List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { if (e.getRelation().getShortName().equals("prep")) { IndexedWord dep = e.getDependent(); String depLemma = dep.get(LemmaAnnotation.class); char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase() .charAt(0); //System.out.println("HEURISTIC 7"); if (depPos == sensePos) { candidates.put(depLemma, "Heuristic-7"); addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-7"); foundExistentialDependent = true; } } } } if (foundExistentialDependent) continue next_root; // Heuristic 10: In the case of noun phrases, take the last noun // in the phrase, e.g., "Molten material", "pringtime snow // runoff" List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); boolean foundDependent = false; for (SemanticGraphEdge e : edges) { if (e.getRelation().getShortName().equals("dep")) { IndexedWord dep = e.getDependent(); String depLemma = dep.get(LemmaAnnotation.class); char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0); //System.out.println("HEURISTIC 10"); if (depPos == sensePos) { foundDependent = true; candidates.put(depLemma, "Heuristic-10"); addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-10"); } } } if (!foundDependent) { //System.out.println("HEURISTIC 1"); candidates.put(lemma, "Heuristic-1"); addSiblings(root, candidates, sensePos, dependencies, "Heuristic-1"); } } // Heuristic 2: subdef is either (1) one word or (2) two or more // word that *must be connected by a conjunction, and (3) the lemma // has the wrong part of speech, but could have the same POS (i.e., // the lemma was probably POS-tagged incorrectly). if (sensePos != lemmaPos) { // Only one word in the subdef, which can manifest itself as the // graph having no vertices! (size == 0) if (dependencies.size() < 1) { // System.out.println("HEURISTIC 2a"); IIndexWord iword = dict.getIndexWord(lemma, spos_); if (iword != null) candidates.put(lemma, "Heuristic-2a"); else { // Sometimes adjectves get lemmatized to a verb form // which is in correct. Check to see if the token // matches String token = root.get(TextAnnotation.class); iword = dict.getIndexWord(token, spos_); if (iword != null) candidates.put(token, "Heuristic-2a"); } } else { // System.out.println("HEURISTIC 2b"); Set<IndexedWord> tmp = new HashSet<IndexedWord>(); List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { // System.out.printf("edge from %s -> %s %s%n", lemma, // e.getRelation().getShortName(), // e.getRelation().getLongName()); if (e.getRelation().getShortName().equals("conj")) { if (tmp.size() == 0) tmp.add(root); tmp.add(e.getDependent()); } } if (!tmp.isEmpty()) { for (IndexedWord iw : tmp) { String lem = iw.get(LemmaAnnotation.class); IIndexWord iword = dict.getIndexWord(lem, spos_); if (iword != null) candidates.put(lem, "Heuristic-2b"); else { // Sometimes adjectves get lemmatized to a verb // form which is in correct. Check to see if // the token matches String token = iw.get(TextAnnotation.class); iword = dict.getIndexWord(token, spos_); if (iword != null) candidates.put(token, "Heuristic-2b"); } } //System.out.println(tmp); } } } // Heuristics 3: the subdef is phrased as an overly-general description // of a person using "one", e.g., "one who does X". Replace this with // "person" if (sensePos == 'n' && (lemma.equals("one") || lemma.equals("someone"))) { // check the dependency graph for a "who" attachment // TODO // ... or be lazy and just check for the token Matcher m = WHO.matcher(subdef); if (m.find()) { candidates.put("person", "Heuristic-3: Person"); } } // Heuristic 4: if the root lemma is an adjective and the target // sense is a noun, look for a modifying a noun or set of nouns, // report those /// // Example: "a small, arched passageway" if (sensePos == 'n' && lemmaPos == 'j') { //System.out.println("HEURISTIC 4"); List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { // System.out.printf("edge from %s -> %s %s%n", lemma, // e.getRelation().getShortName(), // e.getRelation().getLongName()); if (e.getRelation().getShortName().equals("appos") || e.getRelation().getShortName().equals("dep")) { IndexedWord dep = e.getDependent(); String depLemma = dep.get(LemmaAnnotation.class); // System.out.println("!!! " + depLemma); char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0); if (depPos == sensePos) { candidates.put(depLemma, "Heuristic-4: Head Noun"); addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-4: Head Noun"); } //break; } } } // Heuristic 5: if the root lemma is a verb and the target sense is // a noun, look for a subject noun if (sensePos == 'n' && lemmaPos == 'v') { List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { if (e.getRelation().getShortName().equals("nsubj")) { IndexedWord dep = e.getDependent(); String depLemma = dep.get(LemmaAnnotation.class); char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0); if (depPos == sensePos) { candidates.put(depLemma, "Heuristic-5: Subject Noun"); addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-5: Subject Noun"); } break; } } } // Heuristic 6: if the root lemma is an existential quantifier or // something like it (e.g., "Any of ...") and // the target sense is a noun, look for a subject noun if (sensePos == 'n' && lemmaPos == 'd') { List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { // System.out.printf("edge from %s -> %s %s%n", lemma, // e.getRelation().getShortName(), // e.getRelation().getLongName()); if (e.getRelation().getShortName().equals("prep") || e.getRelation().getShortName().equals("dep")) { IndexedWord dep = e.getDependent(); String depLemma = dep.get(LemmaAnnotation.class); char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0); // System.out.println(depLemma + "/" + depPos); // This should be the common case if (depPos == sensePos) { candidates.put(depLemma, "Heuristic-6: Existential Example"); addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-6: Existential Example"); } // This is for some really (really) unusually parsed // edge cases else { List<SemanticGraphEdge> depEdges = dependencies.outgoingEdgeList(dep); for (SemanticGraphEdge e2 : depEdges) { if (e2.getRelation().getShortName().equals("rcmod")) { IndexedWord dep2 = e2.getDependent(); String depLemma2 = dep2.get(LemmaAnnotation.class); char depPos2 = dep2.get(PartOfSpeechAnnotation.class).substring(0, 1) .toLowerCase().charAt(0); if (depPos2 == sensePos) { candidates.put(depLemma2, "Heuristic-6: Existential Example"); addSiblings(dep2, candidates, sensePos, dependencies, "Heuristic-6: Existential Example"); } } } } } } } // Heuristic 8: if the root lemma is a verb and the sense is an // adjective, but the verb is modified by an adverb, this catches // that cases that Heuristics 2 does not if (sensePos == 'j' && lemmaPos == 'v') { Set<IndexedWord> tmp = new HashSet<IndexedWord>(); List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { // System.out.printf("edge from %s -> %s %s%n", lemma, // e.getRelation().getShortName(), // e.getRelation().getLongName()); if (e.getRelation().getShortName().equals("advmod")) { IIndexWord iword = dict.getIndexWord(lemma, spos_); if (iword != null) candidates.put(lemma, "Heuristic-8: Adv-modified Verb"); else { // Sometimes adjectves get lemmatized to a verb // form which is in correct. Check to see if // the token matches String token = root.get(TextAnnotation.class); iword = dict.getIndexWord(token, spos_); if (iword != null) candidates.put(token, "Heuristic-8: Adv-modified Verb"); } } } } // Heuristic 9: if the sense is an adjective and the root lemma // begins with with a negative *and* the gloss contains something // like "not [x]", then pull out the "x" and use it as the hypernym if (sensePos == 'j' && lemma.equals("not")) { List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { // System.out.printf("edge from %s -> %s %s%n", lemma, // e.getRelation().getShortName(), // e.getRelation().getLongName()); if (e.getRelation().getShortName().equals("dep")) { IndexedWord dep = e.getDependent(); String depLemma = dep.get(LemmaAnnotation.class); char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0); if (depPos == sensePos) { candidates.put(depLemma, "Heuristic-9: negated adj"); addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-9: negated adj"); } break; } } } // Heuristic 11: if the sense is a verb and the root lemma // is "to", this is probably a case of mistaken POS-tagging if (sensePos == 'v' && lemma.equals("to")) { List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root); for (SemanticGraphEdge e : edges) { if (e.getRelation().getShortName().equals("pobj")) { IndexedWord dep = e.getDependent(); IIndexWord iword = dict.getIndexWord(lemma, spos_); if (iword != null) candidates.put(lemma, "Heuristic-11: verbal infinitive"); else { // Sometimes verbs get lemmatized to a noun form // that is incorrect. Check to see if the token // matches String token = dep.get(TextAnnotation.class); iword = dict.getIndexWord(token, spos_); if (iword != null) candidates.put(token, "Heuristic-9: verbal infinitive"); } } } } } return candidates; }
From source file:ca.mcgill.cs.crown.procedure.ParseExtractor.java
License:Creative Commons License
/** * If we know we want {@code toAdd}, get all of its siblings that are joined * by conjunctions as candidates too/*from w w w. ja v a2 s .c om*/ */ void addSiblings(IndexedWord toAdd, MultiMap<String, String> candidates, char targetPos, SemanticGraph parse, String reason) { List<SemanticGraphEdge> edges = parse.outgoingEdgeList(toAdd); for (SemanticGraphEdge e : edges) { if (e.getRelation().getShortName().equals("conj")) { IndexedWord dep = e.getDependent(); String depLemma = dep.get(LemmaAnnotation.class); char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0); if (targetPos == depPos) { if (targetPos != 'v') { candidates.put(depLemma, reason + " (In conjunction)"); } // Check for phrasal verb particles else { List<SemanticGraphEdge> depEdges = parse.outgoingEdgeList(dep); for (SemanticGraphEdge e2 : depEdges) { if (e2.getRelation().getShortName().equals("prt")) { IndexedWord dep2 = e.getDependent(); depLemma = depLemma + " " + dep2.get(LemmaAnnotation.class); break; } } } } } } }
From source file:context.core.tokenizer.SemanticAnnotation.java
License:Open Source License
/** * * @param text//w w w . j a v a 2s.c o m * @param docId * @return */ public static Map<String, CustomEdge> tokenize(String text, String docId) { Map<String, CustomEdge> customEdges = new LinkedHashMap<>(); Annotation document = new Annotation(text); pipeline.annotate(document); List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); int sentIndex = 0; for (CoreMap sentence : sentences) { // traversing the words in the current sentence // a CoreLabel is a CoreMap with additional token-specific methods int index = 0; SemanticGraph dependencies = sentence .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); // System.out.println(dependencies); for (SemanticGraphEdge edge : dependencies.edgeListSorted()) { CustomEdge cedge = new CustomEdge(); cedge.setDocId(docId); cedge.setSentenceIndex(sentIndex); cedge.setIndex(index); cedge.setWord1(removePOS(edge.getSource() + "")); cedge.setWord2(removePOS(edge.getTarget() + "")); cedge.setType(edge.getRelation() + ""); // System.out.println(edge + " >d: " + edge.getDependent() + " >g: " + edge.getGovernor() + " > " + edge.getRelation() + "> " + edge.getSource() + " > " + edge.getTarget() + " >w: " + edge.getWeight()); customEdges.put(cedge.getWord1() + "/" + cedge.getWord2() + "/" + cedge.getDocId() + "/" + cedge.getSentenceIndex(), cedge); index++; } // Collection<TypedDependency> deps = dependencies.typedDependencies(); // for (TypedDependency typedDep : deps) { // GrammaticalRelation reln = typedDep.reln(); // String type = reln.toString(); // System.out.println("type=" + type + " >> " + typedDep); // } // Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); // sentIndex++; } return customEdges; }
From source file:context.core.tokenizer.SPOExtractor.java
License:Open Source License
static List<SPOStructure> extractSPOs(CoreMap sentence, String docId, int sentIndex) { // traversing the words in the current sentence // a CoreLabel is a CoreMap with additional token-specific methods int index = 0; Map<String, CustomEdge> customEdges = new LinkedHashMap<>(); SemanticGraph dependencies = sentence .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); for (SemanticGraphEdge edge : dependencies.edgeListSorted()) { CustomEdge cedge = new CustomEdge(); cedge.setDocId(docId);//from www . j av a2 s .co m cedge.setSentenceIndex(sentIndex); cedge.setIndex(index); cedge.setWord1(edge.getSource().originalText()); cedge.setWord2(edge.getTarget().originalText()); cedge.setType(edge.getRelation() + ""); customEdges.put(cedge.getWord1() + "/" + cedge.getWord2() + "/" + cedge.getDocId() + "/" + cedge.getSentenceIndex(), cedge); } Collection<String> verbs = extractVerbs(customEdges.values()); List<SPOStructure> spos_list = new ArrayList<>(); for (String v : verbs) { SPOStructure spo = new SPOStructure(); for (CustomEdge cedge : customEdges.values()) { if (cedge.getType().equals("nsubj") && cedge.getWord1().equals(v)) { CustomToken subject = new CustomToken(); String expandedSubject = expandNoun(cedge.getWord2(), customEdges.values()); subject.setWord(expandedSubject); spo.addSubject(subject); } else if (cedge.getType().equals("dobj") && cedge.getWord1().equals(v)) { CustomToken object = new CustomToken(); String expandedObject = expandNoun(cedge.getWord2(), customEdges.values()); object.setWord(expandedObject); spo.addObject(object); } } if (spo.getObjects().isEmpty()) { for (CustomEdge cedge : customEdges.values()) { if (cedge.getType().contains("prep") && cedge.getWord1().equals(v)) { CustomToken object = new CustomToken(); String expandedObject = expandNoun(cedge.getWord2(), customEdges.values()); object.setWord(expandedObject); spo.addObject(object); break; } } } if (spo.getObjects().size() > 0 && spo.getSubjects().size() > 0) { CustomToken predicate = new CustomToken(); predicate.setWord(v); spo.setPredicate(predicate); spos_list.add(spo); } } return spos_list; }
From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.CoreNlp2DKPro.java
License:Open Source License
public static void convertDependencies(JCas aJCas, Annotation document, MappingProvider mappingProvider, boolean internStrings) { for (CoreMap s : document.get(SentencesAnnotation.class)) { SemanticGraph graph = s.get(CollapsedDependenciesAnnotation.class); //SemanticGraph graph = s.get(EnhancedDependenciesAnnotation.class); // If there are no dependencies for this sentence, skip it. Might well mean we // skip all sentences because normally either there are dependencies for all or for // none./*w w w . j av a2 s.co m*/ if (graph == null) { continue; } for (IndexedWord root : graph.getRoots()) { Dependency dep = new ROOT(aJCas); dep.setDependencyType("root"); dep.setDependent(root.get(TokenKey.class)); dep.setGovernor(root.get(TokenKey.class)); dep.setBegin(dep.getDependent().getBegin()); dep.setEnd(dep.getDependent().getEnd()); dep.setFlavor(DependencyFlavor.BASIC); dep.addToIndexes(); } for (SemanticGraphEdge edge : graph.edgeListSorted()) { Token dependent = edge.getDependent().get(TokenKey.class); Token governor = edge.getGovernor().get(TokenKey.class); // For the type mapping, we use getShortName() instead, because the <specific> // actually doesn't change the relation type String labelUsedForMapping = edge.getRelation().getShortName(); // The nndepparser may produce labels in which the shortName contains a colon. // These represent language-specific labels of the UD, cf: // http://universaldependencies.github.io/docs/ext-dep-index.html labelUsedForMapping = StringUtils.substringBefore(labelUsedForMapping, ":"); // Need to use toString() here to get "<shortname>_<specific>" String actualLabel = edge.getRelation().toString(); Type depRel = mappingProvider.getTagType(labelUsedForMapping); Dependency dep = (Dependency) aJCas.getCas().createFS(depRel); dep.setDependencyType(internStrings ? actualLabel.intern() : actualLabel); dep.setDependent(dependent); dep.setGovernor(governor); dep.setBegin(dep.getDependent().getBegin()); dep.setEnd(dep.getDependent().getEnd()); dep.setFlavor(edge.isExtra() ? DependencyFlavor.ENHANCED : DependencyFlavor.BASIC); dep.addToIndexes(); } } }
From source file:edu.cmu.deiis.annotator.StanfordCoreNLPAnnotator.java
License:Open Source License
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { Annotation document = this.processor.process(jCas.getDocumentText()); String lastNETag = "O"; int lastNEBegin = -1; int lastNEEnd = -1; for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) { // create the token annotation int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class); int end = tokenAnn.get(CharacterOffsetEndAnnotation.class); String pos = tokenAnn.get(PartOfSpeechAnnotation.class); String lemma = tokenAnn.get(LemmaAnnotation.class); Token token = new Token(jCas, begin, end); token.setPos(pos);/* w w w .j a v a2 s . c o m*/ token.setLemma(lemma); token.addToIndexes(); // hackery to convert token-level named entity tag into phrase-level tag String neTag = tokenAnn.get(NamedEntityTagAnnotation.class); if (neTag.equals("O") && !lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } else { if (lastNETag.equals("O")) { lastNEBegin = begin; } else if (lastNETag.equals(neTag)) { // do nothing - begin was already set } else { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); lastNEBegin = begin; } lastNEEnd = end; } lastNETag = neTag; } if (!lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } // add sentences and trees for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) { // add the sentence annotation int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class); int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class); Sentence sentence = new Sentence(jCas, sentBegin, sentEnd); sentence.addToIndexes(); // add the syntactic tree annotation List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class); Tree tree = sentenceAnn.get(TreeAnnotation.class); if (tree.children().length != 1) { throw new RuntimeException("Expected single root node, found " + tree); } tree = tree.firstChild(); tree.indexSpans(0); TopTreebankNode root = new TopTreebankNode(jCas); root.setTreebankParse(tree.toString()); // TODO: root.setTerminals(v) this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns); // get the dependencies SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class); // convert Stanford nodes to UIMA annotations List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence); Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>(); for (IndexedWord stanfordNode : dependencies.vertexSet()) { int indexBegin = stanfordNode.get(BeginIndexAnnotation.class); int indexEnd = stanfordNode.get(EndIndexAnnotation.class); int tokenBegin = tokens.get(indexBegin).getBegin(); int tokenEnd = tokens.get(indexEnd - 1).getEnd(); DependencyNode node; if (dependencies.getRoots().contains(stanfordNode)) { node = new TopDependencyNode(jCas, tokenBegin, tokenEnd); } else { node = new DependencyNode(jCas, tokenBegin, tokenEnd); } stanfordToUima.put(stanfordNode, node); } // create relation annotations for each Stanford dependency ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create(); ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create(); for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) { DependencyRelation relation = new DependencyRelation(jCas); DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor()); DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent()); String relationType = stanfordEdge.getRelation().toString(); if (head == null || child == null || relationType == null) { throw new RuntimeException(String.format( "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation, child, head)); } relation.setHead(head); relation.setChild(child); relation.setRelation(relationType); relation.addToIndexes(); headRelations.put(child, relation); childRelations.put(head, relation); } // set the relations for each node annotation for (DependencyNode node : stanfordToUima.values()) { List<DependencyRelation> heads = headRelations.get(node); node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size())); if (heads != null) { FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads); } List<DependencyRelation> children = childRelations.get(node); node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size())); if (children != null) { FSCollectionFactory.fillArrayFS(node.getChildRelations(), children); } node.addToIndexes(); } } // map from spans to named entity mentions Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>(); for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention); } // add mentions for all entities identified by the coreference system List<NamedEntity> entities = new ArrayList<NamedEntity>(); List<List<Token>> sentenceTokens = new ArrayList<List<Token>>(); for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence)); } Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class); for (CorefChain chain : corefChains.values()) { List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>(); for (CorefMention corefMention : chain.getMentionsInTextualOrder()) { // figure out the character span of the token List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1); int begin = tokens.get(corefMention.startIndex - 1).getBegin(); int end = tokens.get(corefMention.endIndex - 2).getEnd(); // use an existing named entity mention when possible; otherwise create a new one NamedEntityMention mention = spanMentionMap.get(new Span(begin, end)); if (mention == null) { mention = new NamedEntityMention(jCas, begin, end); mention.addToIndexes(); } mentions.add(mention); } // create an entity for the mentions Collections.sort(mentions, new Comparator<NamedEntityMention>() { @Override public int compare(NamedEntityMention m1, NamedEntityMention m2) { return m1.getBegin() - m2.getBegin(); } }); // create mentions and add them to entity NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, mentions.size())); int index = 0; for (NamedEntityMention mention : mentions) { mention.setMentionedEntity(entity); entity.setMentions(index, mention); index += 1; } entities.add(entity); } // add singleton entities for any named entities not picked up by coreference system for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { if (mention.getMentionedEntity() == null) { NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, 1)); entity.setMentions(0, mention); mention.setMentionedEntity(entity); entity.getMentions(); entities.add(entity); } } // sort entities by document order Collections.sort(entities, new Comparator<NamedEntity>() { @Override public int compare(NamedEntity o1, NamedEntity o2) { return getFirstBegin(o1) - getFirstBegin(o2); } private int getFirstBegin(NamedEntity entity) { int min = Integer.MAX_VALUE; for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) { if (mention.getBegin() < min) { min = mention.getBegin(); } } return min; } }); // add entities to document for (NamedEntity entity : entities) { entity.addToIndexes(); } }
From source file:edu.cmu.deiis.annotators.StanfordAnnotator.java
License:Open Source License
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { Annotation document = this.processor.process(jCas.getDocumentText()); String lastNETag = "O"; int lastNEBegin = -1; int lastNEEnd = -1; for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) { // create the token annotation int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class); int end = tokenAnn.get(CharacterOffsetEndAnnotation.class); String pos = tokenAnn.get(PartOfSpeechAnnotation.class); String lemma = tokenAnn.get(LemmaAnnotation.class); Token token = new Token(jCas, begin, end); token.setPos(pos);// ww w . j av a 2 s.com token.setLemma(lemma); token.addToIndexes(); // hackery to convert token-level named entity tag into phrase-level tag String neTag = tokenAnn.get(NamedEntityTagAnnotation.class); if (neTag.equals("O") && !lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } else { if (lastNETag.equals("O")) { lastNEBegin = begin; } else if (lastNETag.equals(neTag)) { // do nothing - begin was already set } else { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); lastNEBegin = begin; } lastNEEnd = end; } lastNETag = neTag; } if (!lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } // add sentences and trees for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) { // add the sentence annotation int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class); int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class); Sentence sentence = new Sentence(jCas, sentBegin, sentEnd); sentence.addToIndexes(); // add the syntactic tree annotation List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class); Tree tree = sentenceAnn.get(TreeAnnotation.class); if (tree.children().length != 1) { throw new RuntimeException("Expected single root node, found " + tree); } tree = tree.firstChild(); tree.indexSpans(0); TopTreebankNode root = new TopTreebankNode(jCas); root.setTreebankParse(tree.toString()); // TODO: root.setTerminals(v) this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns); // get the dependencies SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class); // convert Stanford nodes to UIMA annotations List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence); Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>(); for (IndexedWord stanfordNode : dependencies.vertexSet()) { int indexBegin = stanfordNode.get(BeginIndexAnnotation.class); int indexEnd = stanfordNode.get(EndIndexAnnotation.class); int tokenBegin = tokens.get(indexBegin).getBegin(); int tokenEnd = tokens.get(indexEnd - 1).getEnd(); DependencyNode node; if (dependencies.getRoots().contains(stanfordNode)) { node = new TopDependencyNode(jCas, tokenBegin, tokenEnd); } else { node = new DependencyNode(jCas, tokenBegin, tokenEnd); } stanfordToUima.put(stanfordNode, node); } // create relation annotations for each Stanford dependency ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create(); ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create(); for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) { DependencyRelation relation = new DependencyRelation(jCas); DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor()); DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent()); String relationType = stanfordEdge.getRelation().toString(); if (head == null || child == null || relationType == null) { throw new RuntimeException(String.format( "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation, child, head)); } relation.setHead(head); relation.setChild(child); relation.setRelation(relationType); relation.addToIndexes(); headRelations.put(child, relation); childRelations.put(head, relation); } // set the relations for each node annotation for (DependencyNode node : stanfordToUima.values()) { List<DependencyRelation> heads = headRelations.get(node); node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size())); if (heads != null) { FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads); } List<DependencyRelation> children = childRelations.get(node); node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size())); if (children != null) { FSCollectionFactory.fillArrayFS(node.getChildRelations(), children); } node.addToIndexes(); } } // map from spans to named entity mentions Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>(); for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention); } // add mentions for all entities identified by the coreference system List<NamedEntity> entities = new ArrayList<NamedEntity>(); List<List<Token>> sentenceTokens = new ArrayList<List<Token>>(); for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence)); } Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class); for (CorefChain chain : corefChains.values()) { List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>(); for (CorefMention corefMention : chain.getMentionsInTextualOrder()) { // figure out the character span of the token List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1); int begin = tokens.get(corefMention.startIndex - 1).getBegin(); int end = tokens.get(corefMention.endIndex - 2).getEnd(); // use an existing named entity mention when possible; otherwise create a new one NamedEntityMention mention = spanMentionMap.get(new Span(begin, end)); if (mention == null) { mention = new NamedEntityMention(jCas, begin, end); //String line = mention.getCoveredText(); //System.out.println(line); mention.addToIndexes(); } mentions.add(mention); } // create an entity for the mentions Collections.sort(mentions, new Comparator<NamedEntityMention>() { @Override public int compare(NamedEntityMention m1, NamedEntityMention m2) { return m1.getBegin() - m2.getBegin(); } }); // create mentions and add them to entity NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, mentions.size())); int index = 0; for (NamedEntityMention mention : mentions) { mention.setMentionedEntity(entity); entity.setMentions(index, mention); index += 1; } entities.add(entity); } // add singleton entities for any named entities not picked up by coreference system for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { if (mention.getMentionedEntity() == null) { NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, 1)); entity.setMentions(0, mention); mention.setMentionedEntity(entity); entity.getMentions(); entities.add(entity); } } // sort entities by document order Collections.sort(entities, new Comparator<NamedEntity>() { @Override public int compare(NamedEntity o1, NamedEntity o2) { return getFirstBegin(o1) - getFirstBegin(o2); } private int getFirstBegin(NamedEntity entity) { int min = Integer.MAX_VALUE; for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) { if (mention.getBegin() < min) { min = mention.getBegin(); } } return min; } }); // add entities to document for (NamedEntity entity : entities) { //NamedEntityMention mention=entity.getMentions(3); //System.out.println(mention.getBegin()); entity.addToIndexes(); } }
From source file:edu.csupomona.nlp.util.Sentence2Clause.java
public void process(String text) { // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document);/*from w w w . j a v a 2 s . c o m*/ List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { // this is the Stanford dependency graph of the current sentence SemanticGraph dependencies = sentence.get(BasicDependenciesAnnotation.class); System.out.println(dependencies.toString("plain")); for (SemanticGraphEdge edge : dependencies.getEdgeSet()) { System.out.println(edge.getRelation().getShortName() + ": " + edge.getGovernor().value() + "(" + edge.getGovernor().index() + ") => " + edge.getDependent().value() + "(" + edge.getDependent().index() + ")"); } } }
From source file:edu.jhu.hlt.concrete.stanford.PreNERCoreMapWrapper.java
License:Open Source License
private List<Dependency> makeDependencies(SemanticGraph graph) { List<Dependency> depList = new ArrayList<Dependency>(); for (IndexedWord root : graph.getRoots()) { // this mimics CoreNLP's handling String rel = GrammaticalRelation.ROOT.getLongName().replaceAll("\\s+", ""); int dep = root.index() - 1; Dependency depend = DependencyFactory.create(dep, rel); depList.add(depend);//from w w w.j a v a 2s . co m } for (SemanticGraphEdge edge : graph.edgeListSorted()) { String rel = edge.getRelation().toString().replaceAll("\\s+", ""); int gov = edge.getSource().index() - 1; int dep = edge.getTarget().index() - 1; Dependency depend = DependencyFactory.create(dep, rel, gov); depList.add(depend); } return depList; }
From source file:edu.nus.comp.nlp.stanford.UtilParser.java
License:Open Source License
public static DefaultMutableTreeNode toDMTree(IndexedWord root, SemanticGraph dependencies) { if (root == null) { root = dependencies.getFirstRoot(); }//ww w . ja v a 2 s . c o m DefaultMutableTreeNode node = new DefaultMutableTreeNode(); String nodeContent = root.value(); for (SemanticGraphEdge edge : dependencies.edgeIterable()) { if (edge.getDependent().equals(root)) { nodeContent = "<-" + edge.getRelation() + "- " + nodeContent; break; } } node.setUserObject(nodeContent); for (IndexedWord c : dependencies.getChildList(root)) { DefaultMutableTreeNode n = toDMTree(c, dependencies); node.add(n); } return node; }