List of usage examples for edu.stanford.nlp.ling IndexedWord word
@Override
public String word()
From source file:ca.ualberta.exemplar.core.RelationExtraction.java
License:Open Source License
public List<RelationInstance> extractNAryRelations(CoreMap sentence) { List<RelationInstance> ret = new ArrayList<RelationInstance>(); SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); if (dependencies == null) { return ret; }/*from ww w. java 2 s. com*/ /*System.out.println(); System.out.println(dependencies.toFormattedString()); for(IndexedWord word : dependencies.vertexSet()){ System.out.println("Word: " + word + " -- " + word.ner() + "--" + word.beginPosition() + "--" + word.index()); } System.out.println(); List<CoreLabel> tokens = sentence.get(TokensAnnotation.class); for(CoreLabel token : tokens){ String word = token.get(TextAnnotation.class); String ne = token.get(NamedEntityTagAnnotation.class); System.out.println(word + " -- " + ne + " -- " + token.beginPosition()); }*/ // Template A { SemgrexMatcher matcher = patternA.matcher(dependencies); while (matcher.find()) { IndexedWord rel = matcher.getNode("rel"); IndexedWord relnom = matcher.getNode("relnom"); boolean nom = relnom != null; if (nom) rel = relnom; if (!hasNerChild(rel, dependencies)) continue; RelationInstance instance = new RelationInstance(); List<IndexedWord> triggers = new ArrayList<IndexedWord>(2); if (nom) { instance.setNormalizedRelation(nominalizedVerbMap.get(relnom.word())); instance.setOriginalRelation(relnom.word()); triggers.add(relnom); } else { instance.setNormalizedRelation(produceRelationName(rel, null, dependencies, true)); instance.setOriginalRelation(produceRelationName(rel, null, dependencies, false)); triggers.add(rel); } instance.setTriggerIndex(triggers.get(0).index() - 1); //System.out.println("Template A: " + relText); argExtractor.extractArgumentsTemplateA(sentence, dependencies, triggers, instance); if (shouldAddInstance(instance)) { ret.add(instance); } } } // Template B { SemgrexMatcher matcher = patternB.matcher(dependencies); while (matcher.find()) { IndexedWord rel = matcher.getNode("rel"); if (!hasNerChild(rel, dependencies)) continue; List<IndexedWord> triggers = new ArrayList<IndexedWord>(); triggers.add(rel); IndexedWord dobj = matcher.getNode("dobj"); if (dobj != null) { triggers.add(dobj); } RelationInstance instance = new RelationInstance(); instance.setTriggerIndex(triggers.get(0).index() - 1); instance.setNormalizedRelation(produceRelationName(rel, dobj, dependencies, true)); instance.setOriginalRelation(produceRelationName(rel, dobj, dependencies, false)); //System.out.println("Template B: " + instance.getOriginalRelation()); argExtractor.extractArgumentsTemplateB(sentence, dependencies, triggers, instance); if (shouldAddInstance(instance)) { ret.add(instance); } } } // Template C { SemgrexMatcher matcher = patternC.matcher(dependencies); while (matcher.find()) { IndexedWord copula = matcher.getNode("copula"); IndexedWord rel = matcher.getNode("rel"); List<IndexedWord> triggers = new ArrayList<IndexedWord>(); triggers.add(rel); RelationInstance instance = new RelationInstance(); if (copula != null) { triggers.add(copula); instance.setNormalizedRelation(produceRelationName(copula, rel, dependencies, true)); instance.setOriginalRelation(produceRelationName(copula, rel, dependencies, false)); } else { instance.setNormalizedRelation(produceRelationName("be", rel, dependencies, true)); instance.setOriginalRelation(produceRelationName("", rel, dependencies, false)); } instance.setTriggerIndex(triggers.get(0).index() - 1); //System.out.println("Template C: " + instance.getOriginalRelation()); argExtractor.extractArgumentsTemplateC(sentence, dependencies, triggers, instance); if (shouldAddInstance(instance)) { ret.add(instance); } } } //System.out.println(instanceList); return ret; }
From source file:ca.ualberta.exemplar.core.RelationExtraction.java
License:Open Source License
private static String produceRelationName(IndexedWord verb, IndexedWord noun, SemanticGraph dependencies, boolean shouldNormalize) { StringBuilder rel = new StringBuilder(); List<IndexedWord> verbPhrase = new ArrayList<IndexedWord>(); List<IndexedWord> nounPhrase = new ArrayList<IndexedWord>(); if (verb != null) verbPhrase.add(verb);//from w w w . j a va 2 s .c o m if (noun != null) nounPhrase.add(noun); if (!shouldNormalize) { if (noun != null) addModifiers(nounPhrase, noun, dependencies); if (verb != null) addModifiers(verbPhrase, verb, dependencies); } sortWordsByIndex(verbPhrase); sortWordsByIndex(nounPhrase); for (IndexedWord word : verbPhrase) { if (shouldNormalize) rel.append(word.lemma()); else rel.append(word.word()); rel.append(' '); } for (IndexedWord word : nounPhrase) { if (shouldNormalize) rel.append(word.lemma()); else rel.append(word.word()); rel.append(' '); } return rel.toString().trim(); }
From source file:ca.ualberta.exemplar.core.RelationExtraction.java
License:Open Source License
private static String produceRelationName(String verb, IndexedWord noun, SemanticGraph dependencies, boolean shouldNormalize) { StringBuilder rel = new StringBuilder(); List<IndexedWord> nounPhrase = new ArrayList<IndexedWord>(); rel.append(verb + ' '); if (noun != null) nounPhrase.add(noun);//from www. ja va2 s . c o m if (!shouldNormalize) if (noun != null) addModifiers(nounPhrase, noun, dependencies); sortWordsByIndex(nounPhrase); for (IndexedWord word : nounPhrase) { if (shouldNormalize) rel.append(word.lemma()); else rel.append(word.word()); rel.append(' '); } return rel.toString().trim(); }
From source file:org.sam_agent.csparser.ContinuousParser.java
License:Open Source License
public String stringify(SemanticGraph dependencies) { List<String> depsList = new ArrayList<String>(); for (SemanticGraphEdge eit : dependencies.edgeIterable()) { String rel = eit.getRelation().toString(); IndexedWord gov = eit.getGovernor(), dep = eit.getDependent(); String arg0 = gov.word().toString() + "-" + gov.index(); String arg1 = dep.word().toString() + "-" + dep.index(); depsList.add(String.format("{\"rel\":\"%s\",\"arg0\":\"%s\",\"arg1\":\"%s\"}", rel, arg0, arg1)); }/*from w w w . j a va 2 s .com*/ return String.format("\"dependencies\":[%s]", String.join(",", depsList)); }
From source file:semRewrite.datesandnumber.DateAndNumbersGeneration.java
License:Open Source License
/** *************************************************************** * * Generates a set of sumo terms corresponding to measure functions. Identifies the unit of measurement, * value of measurement and entity of the measurement by performing a graph search. * @param input: The tokens, count of measure, utility value. *///from w w w . ja v a 2 s .co m private void measureFn(Tokens token, int count, Utilities utilities) { IndexedWord tokenNode = utilities.StanfordDependencies.getNodeByIndex(token.getId()); IndexedWord unitOfMeasurementNode = utilities.StanfordDependencies.getParent(tokenNode); IndexedWord measuredEntity = null; String posTagRemover = null; String unitOfMeasurementStr = ""; String sumoUnitOfMeasure = ""; List<String> visitedNodes = new ArrayList<String>(); Matcher posTagRemoverMatcher = null; String measuredEntityStr = null; boolean flag = false; //int x = 0; // fix to remove comma in numbers if (token.getWord().contains(",")) { token.setWord(token.getWord().replaceAll(",", "")); } if (unitOfMeasurementNode != null) { //unitOfMeasurementStr = lemmatizeWord(unitOfMeasurementNode); unitOfMeasurementStr = unitOfMeasurementNode.word(); measuredEntity = utilities.StanfordDependencies.getParent(unitOfMeasurementNode); visitedNodes.add(unitOfMeasurementNode.toString() + "-" + unitOfMeasurementNode.index()); } if ((measuredEntity == null) && (unitOfMeasurementNode != null)) { for (SemanticGraphEdge e : utilities.StanfordDependencies.getOutEdgesSorted(unitOfMeasurementNode)) { if ((e.getRelation().toString().equals("nsubj")) || (e.getRelation().toString().equals("dobj"))) { measuredEntity = e.getDependent(); flag = true; break; } } } else if ((measuredEntity == null) && (unitOfMeasurementNode == null)) { return; } while ((measuredEntity != null) && (!flag)) { measuredEntityStr = measuredEntity.value() + "-" + measuredEntity.index(); if (!visitedNodes.contains(measuredEntityStr)) { visitedNodes.add(measuredEntityStr); } posTagRemoverMatcher = POS_TAG_REMOVER.matcher(measuredEntity.toString()); if (posTagRemoverMatcher.find()) { posTagRemover = posTagRemoverMatcher.group(1); if (Utilities.nounTags.contains(posTagRemover)) { break; } //IndexedWord tempMeasuredEntity = StanfordDependencies.getParent(measuredEntity); if (utilities.StanfordDependencies.getParent(measuredEntity) == null) { Set<IndexedWord> childrenSet = utilities.StanfordDependencies.getChildren(measuredEntity); //which means it is unitOfMeasurementNode. Hence remove infinite looping condition if ((childrenSet.size() == 1)) { measuredEntity = unitOfMeasurementNode; //String lemmatizedWord = lemmatizeWord(measuredEntity); utilities.sumoTerms.add("measure(" + measuredEntity.word() + "-" + measuredEntity.index() + ", measure" + count + ")"); utilities.sumoTerms.add("unit(measure" + count + ", " + "memberCount" + ")"); utilities.sumoTerms.add("value(measure" + count + ", " + token.getWord() + ")"); utilities.sumoTerms.add("valueToken(" + token.getWord() + "," + token.getWord() + "-" + token.getId() + ")"); flag = true; return; } IndexedWord measuredEntity_temp = null; for (IndexedWord child : childrenSet) { String childPosTagRemover = null; posTagRemoverMatcher = POS_TAG_REMOVER.matcher(child.toString()); //childPosTagRemover = posTagRemoverMatcher.group(1); if (posTagRemoverMatcher.find()) { childPosTagRemover = posTagRemoverMatcher.group(1); } if (!(visitedNodes.contains(child.toString() + "-" + child.index())) && (Utilities.nounTags.contains(childPosTagRemover.replaceFirst("\\/", "")))) { if ((utilities.StanfordDependencies.reln(measuredEntity, child) != null) && (utilities.StanfordDependencies.reln(measuredEntity, child).getShortName() .equals("nsubj"))) { measuredEntity = child; visitedNodes.add(child.toString() + "-" + child.index()); flag = true; break; } measuredEntity_temp = child; visitedNodes.add(child.toString() + "-" + child.index()); } } if (!flag) { measuredEntity = measuredEntity_temp; flag = true; } } else { measuredEntity = utilities.StanfordDependencies.getParent(measuredEntity); } } } if (measuredEntity != null) { String lemmatizedWord = lemmatizeWord(measuredEntity); utilities.sumoTerms .add("measure(" + lemmatizedWord + "-" + measuredEntity.index() + ", measure" + count + ")"); } sumoUnitOfMeasure = lemmatizeWord(unitOfMeasurementNode); sumoUnitOfMeasure = WSD.getBestDefaultSUMOsense(sumoUnitOfMeasure, 1); if ((sumoUnitOfMeasure != null) && (!sumoUnitOfMeasure.isEmpty())) { sumoUnitOfMeasure = sumoUnitOfMeasure.replaceAll("[^\\p{Alpha}\\p{Digit}]+", ""); } else { if ((measuredEntity != null) && (unitOfMeasurementStr.equals(measuredEntity.value()))) { unitOfMeasurementStr = "memberCount"; } sumoUnitOfMeasure = unitOfMeasurementStr; } utilities.sumoTerms.add("unit(measure" + count + ", " + sumoUnitOfMeasure + ")"); utilities.sumoTerms.add("value(measure" + count + ", " + token.getWord() + ")"); utilities.sumoTerms .add("valueToken(" + token.getWord() + "," + token.getWord() + "-" + token.getId() + ")"); WordNet.wn.initOnce(); }
From source file:semRewrite.datesandnumber.Utilities.java
License:Open Source License
/** *************************************************************** *//*from w ww . j a v a 2s. c o m*/ public String populateRootWord(int wordIndex) { IndexedWord tempParent = StanfordDependencies.getNodeByIndex(wordIndex); while (!tempParent.equals(StanfordDependencies.getFirstRoot())) { tempParent = StanfordDependencies.getParent(tempParent); if (containsIndexWord(tempParent.tag())) { return tempParent.word() + "-" + tempParent.index(); } } return null; }
From source file:sleventextraction.SLEntity.java
public SLEntity(AceMention m, CoreMap senCM, SemanticGraph senSG) { this();/*from w w w. j a v a 2s . c om*/ isArg = m.isArg; argProb = m.argProb; role = m.role; if (m.getParent() instanceof AceJet.AceEntity) { this.entitytype = ((AceEntity) m.getParent()).type; this.entitysubtype = ((AceEntity) m.getParent()).subtype; } else if (m.getParent() instanceof AceJet.AceTimex) { this.entitytype = ""; this.entitysubtype = ""; } else if (m.getParent() instanceof AceJet.AceValue) { this.entitytype = ((AceValue) m.getParent()).type; this.entitysubtype = ((AceValue) m.getParent()).subtype; } else { this.entitytype = ""; this.entitysubtype = ""; } this.mentiontype = m.getType(); System.arraycopy(m.roleProb, 0, roleProb, 0, m.roleProb.length); ground = m.ground; span = senCM; SemanticGraph totaldep = span.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); this.content = m.text.trim(); if (m.text.charAt(0) == '\"') { this.content = m.text.substring(1).trim(); } if ("s\nb".equals(this.content)) { this.content = "his brother"; } else if (" f".equals(this.content)) { this.content = "foreign"; } else if ("-l".equals(this.content)) { this.content = "US-led"; } else if ("s a".equals(this.content)) { if (span.toString().contains("Arafat's administration")) { this.content = "Arafat's administration"; } else if (span.toString().contains("bus attack")) { this.content = "bus attack"; } } else if ("33-month".equals(this.content)) { this.content = "33-month-old"; } else if ("U.S".equals(this.content)) { this.content = "U.S."; } else if ("four-day".equals(this.content)) { this.content = "four-day-old"; } else if ("U.N".equals(this.content)) { this.content = "U.N."; } else if ("33-year".equals(this.content)) { this.content = "33-year-old"; } Annotation document = ParseSentence(this.content); List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); CoreMap cm = sentences.get(0); int pathlength = -1, imin = 1000; for (int i = 0; i < senCM.get(TokensAnnotation.class).size(); i++) { IndexedWord debug = new IndexedWord(senCM.get(TokensAnnotation.class).get(i)); boolean canmatch = true; for (int j = 0; j < cm.get(TokensAnnotation.class).size(); j++) { IndexedWord iw = new IndexedWord(senCM.get(TokensAnnotation.class).get(i + j)); IndexedWord shortiw = new IndexedWord(cm.get(TokensAnnotation.class).get(j)); if (!iw.word().equals(shortiw.word())) { if (SLEventExtraction.overlap(iw.word(), shortiw.word()) <= 0 || Double.isNaN(SLEventExtraction.overlap(iw.word(), shortiw.word()))) { canmatch = false; break; } } } if (canmatch) { for (int j = 0; j < cm.get(TokensAnnotation.class).size(); j++) { IndexedWord iw = new IndexedWord(senCM.get(TokensAnnotation.class).get(i + j)); this.ContentIws.add(iw); try { pathlength = totaldep.getPathToRoot(iw).size(); } catch (java.lang.IllegalArgumentException err) { pathlength = 100; } if (imin > pathlength) { imin = pathlength; this.head = iw; } } break; } } if (this.head == null) { return; } this.predicate = totaldep.getParent(this.head); if (this.predicate == null) { this.predicate = this.head; } else { IndexedWord curr = head; dep = totaldep.getEdge(predicate, curr).getRelation().getShortName(); if (totaldep.getEdge(predicate, curr).getRelation().getSpecific() != null) { dep += "_" + totaldep.getEdge(predicate, curr).getRelation().getSpecific(); } } }
From source file:sleventextraction.SLEventTypeClassifier.java
public static IndexedWord GetCorrespondingIndexedWord(String anchor, CoreMap cm) { anchor = anchor.replaceAll("[.|,|\n|\"]", " "); String[] split = anchor.split("[ |']"); IndexedWord iw = null; for (int i = split.length - 1; i >= 0; i--) { for (CoreLabel token : cm.get(TokensAnnotation.class)) { iw = new IndexedWord(token); if (split[i].contains(iw.word()) || iw.word().contains(split[i])) { if (Math.abs(split[i].length() - iw.word().length()) <= 3) { return iw; } else if (iw.word().contains("-")) { String[] split1 = iw.word().split("-"); boolean match = false; for (int j = 0; j < split1.length; j++) { if (split1[j].equals(split[i])) { match = true; }//from w w w.j a v a2 s. c o m } if (match) { return iw; } } } if (split[i].contains(iw.lemma())) { if (Math.abs(split[i].length() - iw.lemma().length()) <= 2) { return iw; } } } } return null; }
From source file:sleventextraction.SLEventTypeClassifier.java
public static LinkedList<Double> GetFeatures(String anchor, Annotation document) { LinkedList<Double> res = new LinkedList<>(); LinkedList<SLEntity> entities = new LinkedList<>(); IndexedWord trigger = ParseSentenceAndGetEntities(anchor, entities, document, pipeline); if (trigger == null) { if (word2vec.containsKey(anchor.toLowerCase())) { res.addAll(word2vec.get(anchor.toLowerCase())); } else {/*www . jav a 2 s . c o m*/ return null; } } else if (word2vec.containsKey(trigger.word().toLowerCase())) { res.addAll(word2vec.get(trigger.word().toLowerCase())); } else if (word2vec.containsKey(trigger.lemma())) { res.addAll(word2vec.get(trigger.lemma())); } else { return null; } Lookup_All_wordvec(entities); LinkedList<Double> EntityHeadVector = SLMath.Vector_0(SLEventExtraction.dim); int valid_entity_num = 0; for (SLEntity ent : entities) { if (!ent.headwordvec.isEmpty()) { valid_entity_num++; EntityHeadVector = SLMath.Vector_add(EntityHeadVector, ent.headwordvec); } } if (valid_entity_num == 0) { return null; } EntityHeadVector = SLMath.Vector_divide_num(EntityHeadVector, valid_entity_num); res.addAll(EntityHeadVector); return res; }
From source file:sleventextraction.SLEventTypeClassifier.java
private LinkedList<String> GetCandidateTriggers(Annotation parsedsen) { LinkedList<String> res = new LinkedList<>(); List<CoreMap> sentences = parsedsen.get(CoreAnnotations.SentencesAnnotation.class); assert sentences.size() == 1; CoreMap cm = sentences.get(0);// w ww. j a va2 s. c o m for (CoreLabel token : cm.get(TokensAnnotation.class)) { IndexedWord iw = new IndexedWord(token); if (iw.tag().contains("NN") || iw.tag().contains("VB")) { res.add(iw.word()); } } return res; }