List of usage examples for edu.stanford.nlp.dcoref CorefChain getRepresentativeMention
public CorefMention getRepresentativeMention()
From source file:be.fivebyfive.lingua.stanfordcorenlp.Pipeline.java
License:Open Source License
public PipelineSentenceList process(String text) { if (pipeline == null) { initPipeline();// w w w. j ava2s. com } PipelineSentenceList outList = new PipelineSentenceList(); Annotation document = new Annotation(text); if (document == null) { return null; } pipeline.annotate(document); for (CoreMap sentence : document.get(SentencesAnnotation.class)) { String str = sentence.get(TextAnnotation.class); PipelineTokenList ptl = new PipelineTokenList(); PipelineDependencyList pel = new PipelineDependencyList(); for (CoreLabel token : sentence.get(TokensAnnotation.class)) { String word = token.get(TextAnnotation.class); String pos = token.get(PartOfSpeechAnnotation.class); String ner = token.get(NamedEntityTagAnnotation.class); String lemma = token.get(LemmaAnnotation.class); ptl.add(new PipelineToken(word, pos, ner, lemma)); } SemanticGraph dependencies = sentence.get(depMode.equals(DEP_BASIC) ? BasicDependenciesAnnotation.class : depMode.equals(DEP_COLLAPSED) ? CollapsedDependenciesAnnotation.class : CollapsedCCProcessedDependenciesAnnotation.class); if (dependencies != null) { for (SemanticGraphEdge edge : dependencies.edgeListSorted()) { GrammaticalRelation rel = edge.getRelation(); int govTokenIndex = edge.getGovernor().index() - 1; int depTokenIndex = edge.getDependent().index() - 1; if (govTokenIndex >= 0 && depTokenIndex >= 0 && govTokenIndex < ptl.size() && depTokenIndex < ptl.size()) { pel.add(new PipelineDependency(ptl.get(govTokenIndex), ptl.get(depTokenIndex), govTokenIndex, depTokenIndex, rel)); } else { System.err.println("Index of " + edge.toString() + " out of range!"); } } } outList.add(new PipelineSentence(str, ptl, pel)); } //for -- SentenceAnnotation Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class); if (graph != null) { for (CorefChain crc : graph.values()) { List<CorefMention> crms = crc.getMentionsInTextualOrder(); CorefMention rm = crc.getRepresentativeMention(); if (rm != null) { PipelineCorefChain crChain = new PipelineCorefChain(); PipelineCorefMention repRef = PipelineCorefMention.fromMention(rm); repRef.setTokens(outList.get(repRef.getSentNum()).getTokens().slice(repRef.getStartIndex(), repRef.getEndIndex())); repRef.setHeadToken(outList.get(repRef.getSentNum()).getTokens().get(repRef.getHeadIndex())); crChain.setRepresentativeMention(repRef); if (crms.size() > 0) { for (CorefMention cm : crms) { PipelineCorefMention cr = PipelineCorefMention.fromMention(cm); cr.setTokens(outList.get(cr.getSentNum()).getTokens().slice(cr.getStartIndex(), cr.getEndIndex())); crChain.addMention(cr); } } outList.get(repRef.getSentNum()).addCorefChain(crChain); } //if(rm } //for } //if(graph return outList; }
From source file:com.project.NLP.Requirement.AnaphoraAnalyzer.java
public String doPronounResolving() { for (int i = 1; i <= graph.size(); i++) { CorefChain cc = graph.get(i); if (cc != null) { //System.out.println("-----"+cc.toString()); //System.out.println("---TextualOrder--"+cc.getMentionsInTextualOrder()); Map<IntPair, Set<CorefChain.CorefMention>> mentionMap = cc.getMentionMap(); //System.out.println("--MentionMap-----"+mentionMap); int mentionSize = mentionMap.size(); Set intPairSet = mentionMap.keySet(); // System.out.println("-----"+cc.getMentionsWithSameHead(1,i)); //System.out.println("---RepresentativeMention-----"+cc.getRepresentativeMention()); String mentionSpan = cc.getRepresentativeMention().mentionSpan; //System.out.println("----get the mentionspan---"+mentionSpan); String animacy = cc.getRepresentativeMention().animacy.toString(); //System.out.println("----get the animacy---"+animacy); if (animacy.equalsIgnoreCase("ANIMATE") && mentionSize > 1) { Iterator it = intPairSet.iterator(); while (it.hasNext()) { IntPair ip = (IntPair) it.next(); Set coref = mentionMap.get(ip); Iterator itC = coref.iterator(); while (itC.hasNext()) { CorefChain.CorefMention cm = (CorefMention) itC.next(); String mentionPronoun = cm.mentionSpan; //mentionPronoun.replace(mentionPronoun,mentionSpan) //System.out.println("---Sentences ------- :"+sentencesFromDoc); //System.out.println("---Words ------- :"+wordsFromDoc); //for(String[] str:wordsFromDoc){ // System.out.println("---Words from array ------- :"+str[0] + " "+str[1]); //} //System.out.println("--- cm.mentionSpan--- "+mentionPronoun+ " int pair : "+ip); int sentenceIndex = ip.getSource() - 1; int wordIndex = ip.getTarget() - 1; try { String docWord = wordsFromDoc.get(sentenceIndex)[wordIndex]; //System.out.println("From arraylist : "+docWord); if (mentionPronoun.equalsIgnoreCase(docWord)) { wordsFromDoc.get(sentenceIndex)[wordIndex] = mentionSpan; }/* w w w . java 2s . c o m*/ } catch (ArrayIndexOutOfBoundsException e) { //System.err.println("----- AnaphoraAnalyzer ------- : "+e.getMessage()); } } } } } } return getPronounResolvedDocument(); }
From source file:edu.jhu.hlt.concrete.stanford.CorefManager.java
License:Open Source License
private Entity makeEntity(CorefChain chain, EntityMentionSet ems, List<Tokenization> tokenizations) throws AnalyticException { Entity concEntity = new Entity().setUuid(this.gen.next()); CorefChain.CorefMention coreHeadMention = chain.getRepresentativeMention(); // CoreNLP uses 1-based indexing for the sentences // just subtract 1. Tokenization tkz = tokenizations.get(coreHeadMention.sentNum - 1); UUID tkzUuid = tkz.getUuid(); LOGGER.debug("Creating EntityMention based on tokenization: {}", tkzUuid.getUuidString()); EntityMention concHeadMention = makeEntityMention(coreHeadMention, tkzUuid, true); TokenRefSequence trs = concHeadMention.getTokens(); // TODO: below throws if they're invalid. maybe this can be removed in the future. this.validateTokenRefSeqValidity(trs, tkz); concEntity.setCanonicalName(coreHeadMention.mentionSpan); concEntity.addToMentionIdList(concHeadMention.getUuid()); ems.addToMentionList(concHeadMention); for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) { if (mention == coreHeadMention) continue; // CoreNLP uses 1-based indexing for the sentences // we'll just subtract one. Tokenization localTkz = tokenizations.get(mention.sentNum - 1); EntityMention concMention = this.makeEntityMention(mention, localTkz.getUuid(), false); TokenRefSequence localTrs = concMention.getTokens(); this.validateTokenRefSeqValidity(localTrs, localTkz); ems.addToMentionList(concMention); concEntity.addToMentionIdList(concMention.getUuid()); }//w w w. j ava 2 s . c o m return concEntity; }
From source file:it.uniud.ailab.dcore.wrappers.external.StanfordBootstrapperAnnotator.java
License:Open Source License
/** * Annotate the document by splitting the document, tokenizing it, * performing PoS tagging and Named Entity Recognition using the Stanford * Core NLP tools./* www . j a v a 2 s . c o m*/ * * @param component the component to annotate. */ @Override public void annotate(Blackboard blackboard, DocumentComponent component) { if (pipeline == null) { // creates a StanfordCoreNLP object, with POS tagging, lemmatization, //NER, parsing, and coreference resolution Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, parse, lemma, ner, dcoref"); pipeline = new StanfordCoreNLP(props); } // read some text in the text variable String text = component.getText(); // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document); //get the graph for coreference resolution Map<Integer, CorefChain> graph = document.get(CorefCoreAnnotations.CorefChainAnnotation.class); //prepare the map for coreference graph of document Map<String, Collection<Set<CorefChain.CorefMention>>> coreferenceGraph = new HashMap<>(); for (CorefChain corefChain : graph.values()) { //get the representative mention, that is the word recall in other sentences CorefChain.CorefMention cm = corefChain.getRepresentativeMention(); //eliminate auto-references if (corefChain.getMentionMap().size() <= 1) { continue; } //get the stemmed form of the references, so the comparison with //grams will be easier List<CoreLabel> tks = document.get(SentencesAnnotation.class).get(cm.sentNum - 1) .get(TokensAnnotation.class); //list of tokens which compose the anaphor List<Token> anaphorsTokens = new ArrayList<>(); for (int i = cm.startIndex - 1; i < cm.endIndex - 1; i++) { CoreLabel current = tks.get(i); Token t = new Token(current.word()); t.setPoS(current.tag()); t.setLemma(current.lemma()); anaphorsTokens.add(t); } //the mention n-gram which is formed by the anaphor and a //list of references Mention mention = new Mention(cm.mentionSpan, anaphorsTokens, cm.mentionSpan); //get map of the references to the corefchain obj Collection<Set<CorefChain.CorefMention>> mentionMap = corefChain.getMentionMap().values(); for (Set<CorefChain.CorefMention> mentions : mentionMap) { for (CorefChain.CorefMention reference : mentions) { //eliminate self-references if (reference.mentionSpan.equalsIgnoreCase(cm.mentionSpan)) { continue; } List<CoreLabel> tokens = document.get(SentencesAnnotation.class).get(reference.sentNum - 1) .get(TokensAnnotation.class); //list of tokens which compose the mention List<Token> mentionTokens = new ArrayList<>(); for (int i = reference.startIndex - 1; i < reference.endIndex - 1; i++) { CoreLabel current = tokens.get(i); //set token features Token t = new Token(current.word()); t.setPoS(current.tag()); t.setLemma(current.lemma()); mentionTokens.add(t); } //add to mention a new reference mention.addReference(reference.mentionSpan, mentionTokens, reference.mentionType.toString()); } } //assign to the document a new corenference obj //containing the anaphor and its mentions blackboard.addGram(mention); } // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and //has values with custom types List<CoreMap> sentences = document.get(SentencesAnnotation.class); //A counter that keeps track of the number of phrases in a sentences int phraseCounter = 0; for (CoreMap stanfordSentence : sentences) { Sentence distilledSentence = new Sentence(stanfordSentence.toString(), "" + sentenceCounter++); distilledSentence.setLanguage(Locale.ENGLISH); //getting the dependency graph of the document so to count the number of phrases //ROOT sentences are the first level children in the parse tree; every ROOT sentence //is constitute by a group of clauses which can be the principal (main clauses) or not //(coordinate and subordinate). We use ROOT sentences as a starting point to find out all //the phrases present in the sentences themselves, checking out for the tag "S". Tree sentenceTree = stanfordSentence.get(TreeCoreAnnotations.TreeAnnotation.class); for (Tree sub : sentenceTree.subTreeList()) { if (sub.label().value().equals("S")) { phraseCounter++; } } //annotate the sentence with a new feature counting all the phrases //cointained in the sentence distilledSentence.addAnnotation(new FeatureAnnotation(DefaultAnnotations.PHRASES_COUNT, phraseCounter)); // traversing the words in the current sentence // for each token in the text, we create a new token annotate it // with the word representing it, its pos tag and its lemma for (CoreLabel token : stanfordSentence.get(TokensAnnotation.class)) { // this is the text of the token Token t = new Token(token.originalText()); // this is the POS tag of the token t.setPoS(token.tag()); // this is the lemma of the ttoken t.setLemma(token.lemma()); String ner = token.get(NamedEntityTagAnnotation.class); if (!ner.equalsIgnoreCase("O")) { t.addAnnotation(new NERAnnotation(DefaultAnnotations.IS_NER, ner)); } //add the token to the sentence distilledSentence.addToken(t); } //add the sentence to document ((DocumentComposite) component).addComponent(distilledSentence); } }
From source file:org.ets.research.nlp.stanford_thrift.coref.StanfordCorefThrift.java
License:Open Source License
@SuppressWarnings("unused") private void newStyleCoreferenceGraphOutput(Annotation annotation) { // display the new-style coreference graph //List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class); if (corefChains != null) { for (CorefChain chain : corefChains.values()) { CorefChain.CorefMention representative = chain.getRepresentativeMention(); for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) { System.out.println(mention); if (mention == representative) continue; // all offsets start at 1! System.out.println("\t" + mention.mentionID + ": (Mention from sentence " + mention.sentNum + ", " + "Head word = " + mention.headIndex + ", (" + mention.startIndex + "," + mention.endIndex + ")" + ")" + " -> " + "(Representative from sentence " + representative.sentNum + ", " + "Head word = " + representative.headIndex + ", (" + representative.startIndex + "," + representative.endIndex + ")" + "), that is: \"" + mention.mentionSpan + "\" -> \"" + representative.mentionSpan + "\""); }/*from w w w . j a v a 2 s .c o m*/ } } }
From source file:org.ets.research.nlp.stanford_thrift.coref.StanfordCorefThrift.java
License:Open Source License
private List<String> MUCStyleOutput(Annotation annotation) { Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class); Map<Integer, Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>>> mentionMap = new HashMap<Integer, Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>>>(); List<String> mucOutput = new ArrayList<String>(); for (CorefChain chain : corefChains.values()) { CorefChain.CorefMention ref = chain.getRepresentativeMention(); for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) { if (mention != ref) { // first add the mention itself Pair<CorefChain.CorefMention, CorefChain.CorefMention> mentions = new Pair<CorefChain.CorefMention, CorefChain.CorefMention>( mention, ref);/*from www .j a v a2 s .c o m*/ if (mentionMap.containsKey(mention.sentNum)) { Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>> value = mentionMap .get(mention.sentNum); value.put(mention.startIndex, mentions); mentionMap.put(mention.sentNum, value); } else { Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>> startIndexToMentionMap = new HashMap<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>>(); startIndexToMentionMap.put(mention.startIndex, mentions); mentionMap.put(mention.sentNum, startIndexToMentionMap); } // now make sure the representative is there (TODO make this code less redundant) Pair<CorefChain.CorefMention, CorefChain.CorefMention> refMention = new Pair<CorefChain.CorefMention, CorefChain.CorefMention>( ref, ref); if (mentionMap.containsKey(ref.sentNum)) { Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>> value = mentionMap .get(ref.sentNum); value.put(ref.startIndex, refMention); mentionMap.put(ref.sentNum, value); } else { Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>> startIndexToMentionMap = new HashMap<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>>(); startIndexToMentionMap.put(ref.startIndex, refMention); mentionMap.put(ref.sentNum, startIndexToMentionMap); } } } } List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); for (Integer sentenceNum : mentionMap.keySet()) { CoreMap currentSentence = sentences.get(sentenceNum - 1); Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>> currentSetOfMentions = mentionMap .get(sentenceNum); CorefChain.CorefMention lastMention = null; String outputString = ""; for (CoreLabel token : currentSentence.get(CoreAnnotations.TokensAnnotation.class)) { if (currentSetOfMentions.containsKey(token.index())) { lastMention = currentSetOfMentions.get(token.index()).first(); CorefChain.CorefMention ref = currentSetOfMentions.get(token.index()).second(); outputString += "<COREF ID=\"" + lastMention.mentionID + "\""; if (lastMention.mentionID != ref.mentionID) { outputString += " REF=\"" + ref.mentionID + "\""; } outputString += ">"; } if (lastMention != null && token.index() == lastMention.endIndex) { outputString += "</COREF> "; } outputString += token.word() + " "; } mucOutput.add(CoreNLPThriftUtil.closeHTMLTags(outputString.replaceAll(" </", "</"))); } return mucOutput; }
From source file:parsers.CoreNLPSuite.java
public static String resolveCoreferences(String input) { String ans = ""; Annotation document = new Annotation(input); Parsers.pipeline.annotate(document); Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class); //http://stackoverflow.com/questions/6572207/stanford-core-nlp-understanding-coreference-resolution for (Map.Entry<Integer, CorefChain> entry : graph.entrySet()) { CorefChain c = entry.getValue(); //this is because it prints out a lot of self references which aren't that useful if (c.getMentionsInTextualOrder().size() <= 1) continue; CorefMention cm = c.getRepresentativeMention(); String clust = ""; List<CoreLabel> tks = document.get(SentencesAnnotation.class).get(cm.sentNum - 1) .get(TokensAnnotation.class); for (int i = cm.startIndex - 1; i < cm.endIndex - 1; i++) clust += tks.get(i).get(TextAnnotation.class) + " "; clust = clust.trim();//from w ww. j av a 2 s . c om ans = ans + "Representative mention: \"" + clust + "\" is mentioned by:"; for (CorefMention m : c.getMentionsInTextualOrder()) { String clust2 = ""; tks = document.get(SentencesAnnotation.class).get(m.sentNum - 1).get(TokensAnnotation.class); for (int i = m.startIndex - 1; i < m.endIndex - 1; i++) clust2 += tks.get(i).get(TextAnnotation.class) + " "; clust2 = clust2.trim(); //don't need the self mention if (clust.equals(clust2)) continue; ans = ans + "\t" + clust2 + "\n"; } } return ans; }