List of usage examples for edu.stanford.nlp.dcoref CorefChain getMentionMap
public Map<IntPair, Set<CorefMention>> getMentionMap()
From source file:com.project.NLP.Requirement.AnaphoraAnalyzer.java
public String doPronounResolving() { for (int i = 1; i <= graph.size(); i++) { CorefChain cc = graph.get(i); if (cc != null) { //System.out.println("-----"+cc.toString()); //System.out.println("---TextualOrder--"+cc.getMentionsInTextualOrder()); Map<IntPair, Set<CorefChain.CorefMention>> mentionMap = cc.getMentionMap(); //System.out.println("--MentionMap-----"+mentionMap); int mentionSize = mentionMap.size(); Set intPairSet = mentionMap.keySet(); // System.out.println("-----"+cc.getMentionsWithSameHead(1,i)); //System.out.println("---RepresentativeMention-----"+cc.getRepresentativeMention()); String mentionSpan = cc.getRepresentativeMention().mentionSpan; //System.out.println("----get the mentionspan---"+mentionSpan); String animacy = cc.getRepresentativeMention().animacy.toString(); //System.out.println("----get the animacy---"+animacy); if (animacy.equalsIgnoreCase("ANIMATE") && mentionSize > 1) { Iterator it = intPairSet.iterator(); while (it.hasNext()) { IntPair ip = (IntPair) it.next(); Set coref = mentionMap.get(ip); Iterator itC = coref.iterator(); while (itC.hasNext()) { CorefChain.CorefMention cm = (CorefMention) itC.next(); String mentionPronoun = cm.mentionSpan; //mentionPronoun.replace(mentionPronoun,mentionSpan) //System.out.println("---Sentences ------- :"+sentencesFromDoc); //System.out.println("---Words ------- :"+wordsFromDoc); //for(String[] str:wordsFromDoc){ // System.out.println("---Words from array ------- :"+str[0] + " "+str[1]); //} //System.out.println("--- cm.mentionSpan--- "+mentionPronoun+ " int pair : "+ip); int sentenceIndex = ip.getSource() - 1; int wordIndex = ip.getTarget() - 1; try { String docWord = wordsFromDoc.get(sentenceIndex)[wordIndex]; //System.out.println("From arraylist : "+docWord); if (mentionPronoun.equalsIgnoreCase(docWord)) { wordsFromDoc.get(sentenceIndex)[wordIndex] = mentionSpan; }//w w w.j av a 2 s . c o m } catch (ArrayIndexOutOfBoundsException e) { //System.err.println("----- AnaphoraAnalyzer ------- : "+e.getMessage()); } } } } } } return getPronounResolvedDocument(); }
From source file:it.uniud.ailab.dcore.wrappers.external.StanfordBootstrapperAnnotator.java
License:Open Source License
/** * Annotate the document by splitting the document, tokenizing it, * performing PoS tagging and Named Entity Recognition using the Stanford * Core NLP tools.//from ww w .j av a 2s . c o m * * @param component the component to annotate. */ @Override public void annotate(Blackboard blackboard, DocumentComponent component) { if (pipeline == null) { // creates a StanfordCoreNLP object, with POS tagging, lemmatization, //NER, parsing, and coreference resolution Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, parse, lemma, ner, dcoref"); pipeline = new StanfordCoreNLP(props); } // read some text in the text variable String text = component.getText(); // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document); //get the graph for coreference resolution Map<Integer, CorefChain> graph = document.get(CorefCoreAnnotations.CorefChainAnnotation.class); //prepare the map for coreference graph of document Map<String, Collection<Set<CorefChain.CorefMention>>> coreferenceGraph = new HashMap<>(); for (CorefChain corefChain : graph.values()) { //get the representative mention, that is the word recall in other sentences CorefChain.CorefMention cm = corefChain.getRepresentativeMention(); //eliminate auto-references if (corefChain.getMentionMap().size() <= 1) { continue; } //get the stemmed form of the references, so the comparison with //grams will be easier List<CoreLabel> tks = document.get(SentencesAnnotation.class).get(cm.sentNum - 1) .get(TokensAnnotation.class); //list of tokens which compose the anaphor List<Token> anaphorsTokens = new ArrayList<>(); for (int i = cm.startIndex - 1; i < cm.endIndex - 1; i++) { CoreLabel current = tks.get(i); Token t = new Token(current.word()); t.setPoS(current.tag()); t.setLemma(current.lemma()); anaphorsTokens.add(t); } //the mention n-gram which is formed by the anaphor and a //list of references Mention mention = new Mention(cm.mentionSpan, anaphorsTokens, cm.mentionSpan); //get map of the references to the corefchain obj Collection<Set<CorefChain.CorefMention>> mentionMap = corefChain.getMentionMap().values(); for (Set<CorefChain.CorefMention> mentions : mentionMap) { for (CorefChain.CorefMention reference : mentions) { //eliminate self-references if (reference.mentionSpan.equalsIgnoreCase(cm.mentionSpan)) { continue; } List<CoreLabel> tokens = document.get(SentencesAnnotation.class).get(reference.sentNum - 1) .get(TokensAnnotation.class); //list of tokens which compose the mention List<Token> mentionTokens = new ArrayList<>(); for (int i = reference.startIndex - 1; i < reference.endIndex - 1; i++) { CoreLabel current = tokens.get(i); //set token features Token t = new Token(current.word()); t.setPoS(current.tag()); t.setLemma(current.lemma()); mentionTokens.add(t); } //add to mention a new reference mention.addReference(reference.mentionSpan, mentionTokens, reference.mentionType.toString()); } } //assign to the document a new corenference obj //containing the anaphor and its mentions blackboard.addGram(mention); } // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and //has values with custom types List<CoreMap> sentences = document.get(SentencesAnnotation.class); //A counter that keeps track of the number of phrases in a sentences int phraseCounter = 0; for (CoreMap stanfordSentence : sentences) { Sentence distilledSentence = new Sentence(stanfordSentence.toString(), "" + sentenceCounter++); distilledSentence.setLanguage(Locale.ENGLISH); //getting the dependency graph of the document so to count the number of phrases //ROOT sentences are the first level children in the parse tree; every ROOT sentence //is constitute by a group of clauses which can be the principal (main clauses) or not //(coordinate and subordinate). We use ROOT sentences as a starting point to find out all //the phrases present in the sentences themselves, checking out for the tag "S". Tree sentenceTree = stanfordSentence.get(TreeCoreAnnotations.TreeAnnotation.class); for (Tree sub : sentenceTree.subTreeList()) { if (sub.label().value().equals("S")) { phraseCounter++; } } //annotate the sentence with a new feature counting all the phrases //cointained in the sentence distilledSentence.addAnnotation(new FeatureAnnotation(DefaultAnnotations.PHRASES_COUNT, phraseCounter)); // traversing the words in the current sentence // for each token in the text, we create a new token annotate it // with the word representing it, its pos tag and its lemma for (CoreLabel token : stanfordSentence.get(TokensAnnotation.class)) { // this is the text of the token Token t = new Token(token.originalText()); // this is the POS tag of the token t.setPoS(token.tag()); // this is the lemma of the ttoken t.setLemma(token.lemma()); String ner = token.get(NamedEntityTagAnnotation.class); if (!ner.equalsIgnoreCase("O")) { t.addAnnotation(new NERAnnotation(DefaultAnnotations.IS_NER, ner)); } //add the token to the sentence distilledSentence.addToken(t); } //add the sentence to document ((DocumentComposite) component).addComponent(distilledSentence); } }