List of usage examples for edu.stanford.nlp.dcoref CorefChain getMentionsInTextualOrder
public List<CorefMention> getMentionsInTextualOrder()
From source file:Anaphora_Resolution.AnaphoraDetection.java
public void anophora() { String text = "Tom is a smart boy. He know a lot of thing."; Annotation document = new Annotation(text); Properties props = new Properties(); props.put("annotators", "tokenize, ssplit,parse, lemma, ner, dcoref"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); pipeline.annotate(document);/*from w w w . j a va 2s . co m*/ Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class); for (Integer i : graph.keySet()) { System.out.println("GROUP " + i); CorefChain x = graph.get(i); for (CorefMention m : x.getMentionsInTextualOrder()) { System.out.println(m.mentionSpan); } } }
From source file:be.fivebyfive.lingua.stanfordcorenlp.Pipeline.java
License:Open Source License
public PipelineSentenceList process(String text) { if (pipeline == null) { initPipeline();/*w w w . j a v a 2 s . c o m*/ } PipelineSentenceList outList = new PipelineSentenceList(); Annotation document = new Annotation(text); if (document == null) { return null; } pipeline.annotate(document); for (CoreMap sentence : document.get(SentencesAnnotation.class)) { String str = sentence.get(TextAnnotation.class); PipelineTokenList ptl = new PipelineTokenList(); PipelineDependencyList pel = new PipelineDependencyList(); for (CoreLabel token : sentence.get(TokensAnnotation.class)) { String word = token.get(TextAnnotation.class); String pos = token.get(PartOfSpeechAnnotation.class); String ner = token.get(NamedEntityTagAnnotation.class); String lemma = token.get(LemmaAnnotation.class); ptl.add(new PipelineToken(word, pos, ner, lemma)); } SemanticGraph dependencies = sentence.get(depMode.equals(DEP_BASIC) ? BasicDependenciesAnnotation.class : depMode.equals(DEP_COLLAPSED) ? CollapsedDependenciesAnnotation.class : CollapsedCCProcessedDependenciesAnnotation.class); if (dependencies != null) { for (SemanticGraphEdge edge : dependencies.edgeListSorted()) { GrammaticalRelation rel = edge.getRelation(); int govTokenIndex = edge.getGovernor().index() - 1; int depTokenIndex = edge.getDependent().index() - 1; if (govTokenIndex >= 0 && depTokenIndex >= 0 && govTokenIndex < ptl.size() && depTokenIndex < ptl.size()) { pel.add(new PipelineDependency(ptl.get(govTokenIndex), ptl.get(depTokenIndex), govTokenIndex, depTokenIndex, rel)); } else { System.err.println("Index of " + edge.toString() + " out of range!"); } } } outList.add(new PipelineSentence(str, ptl, pel)); } //for -- SentenceAnnotation Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class); if (graph != null) { for (CorefChain crc : graph.values()) { List<CorefMention> crms = crc.getMentionsInTextualOrder(); CorefMention rm = crc.getRepresentativeMention(); if (rm != null) { PipelineCorefChain crChain = new PipelineCorefChain(); PipelineCorefMention repRef = PipelineCorefMention.fromMention(rm); repRef.setTokens(outList.get(repRef.getSentNum()).getTokens().slice(repRef.getStartIndex(), repRef.getEndIndex())); repRef.setHeadToken(outList.get(repRef.getSentNum()).getTokens().get(repRef.getHeadIndex())); crChain.setRepresentativeMention(repRef); if (crms.size() > 0) { for (CorefMention cm : crms) { PipelineCorefMention cr = PipelineCorefMention.fromMention(cm); cr.setTokens(outList.get(cr.getSentNum()).getTokens().slice(cr.getStartIndex(), cr.getEndIndex())); crChain.addMention(cr); } } outList.get(repRef.getSentNum()).addCorefChain(crChain); } //if(rm } //for } //if(graph return outList; }
From source file:candidateGeneration.remove_missingContext.java
public static void main(String[] args) throws FileNotFoundException, IOException { InputStream is = new FileInputStream(sentence_detect_model); SentenceModel model = new SentenceModel(is); SentenceDetectorME sdetector = new SentenceDetectorME(model); Properties props = new Properties(); props.put("annotators", "tokenize,ssplit,pos,lemma,ner,parse,dcoref"); StanfordCoreNLP pi = new StanfordCoreNLP(props); File writeFile = new File( "C:\\Users\\Abhay Prakash\\Workspace\\trivia\\Data\\Candidate_Generation\\good_sentences_new.txt"); writeFile.createNewFile();// w w w .jav a 2 s.c om FileWriter writer = new FileWriter(writeFile); File writeFile2 = new File( "C:\\Users\\Abhay Prakash\\Workspace\\trivia\\Data\\Candidate_Generation\\bad_sentences_new.txt"); writeFile2.createNewFile(); FileWriter writer2 = new FileWriter(writeFile2); String folderPath = "C:\\Users\\Abhay Prakash\\Workspace\\trivia\\Data\\movieTest\\indivFiles\\"; File[] files = new File(folderPath).listFiles(); for (File file : files) { if (file.isFile()) { String name = file.getName(); name = name.replace("_", " "); name = name.replace("%28", "("); name = name.replace("%29", ")"); name = name.replace(".txt", ""); System.out.println("File: " + name); FileReader inputFile = new FileReader(folderPath + file.getName()); BufferedReader bufferReader = new BufferedReader(inputFile); String input; while ((input = bufferReader.readLine()) != null) { //System.out.println("Line: " + input); String sentences[] = sdetector.sentDetect(input); HashMap<Integer, Integer> toRemove = new HashMap<>(); Annotation doc = new Annotation(input); pi.annotate(doc); Map<Integer, CorefChain> graph = doc.get(CorefCoreAnnotations.CorefChainAnnotation.class); for (Map.Entry<Integer, CorefChain> entry : graph.entrySet()) { CorefChain c = entry.getValue(); if (c.getMentionsInTextualOrder().size() <= 1) { continue; } //System.out.println("Mentions: " + c.toString()); String[] sentenceOccurence = c.toString().split(" "); int firstOccurence = -1; for (int i = 0; i < sentenceOccurence.length; i++) { if (firstOccurence == -1 && sentenceOccurence[i].equals("sentence")) { //System.out.println("first occurence : " + sentenceOccurence[i+1]); firstOccurence = Integer .parseInt(sentenceOccurence[i + 1].replace(",", "").replace("]", "")); continue; } if (sentenceOccurence[i].equals("sentence")) { //System.out.println("further occurence : "+sentenceOccurence[i+1]); if (Integer.parseInt(sentenceOccurence[i + 1].replace(",", "").replace("]", "")) != firstOccurence) { //System.out.println("Added " + sentenceOccurence[i+1].replace(",", "").replace("]", "") + " for removal"); toRemove.put(Integer.parseInt( sentenceOccurence[i + 1].replace(",", "").replace("]", "")), 1); } } } //System.out.println(c.toString()); } int cand_i = 1; for (String candidate_sentence : sentences) { if (toRemove.containsKey(cand_i)) { //System.out.println("REMOVING: " + candidate_sentence + "\n"); writer2.write(name + "\t" + candidate_sentence + "\n"); continue; } //System.out.println("TAKING: " + candidate_sentence + "\n"); writer.write(name + "\t" + candidate_sentence + "\n"); cand_i++; } //System.in.read(); } //System.out.println("Line done"); bufferReader.close(); //System.in.read(); } writer.flush(); writer2.flush(); } writer.close(); writer2.close(); }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordCoreferenceResolver.java
License:Open Source License
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { modelProvider.configure(aJCas.getCas()); List<Tree> trees = new ArrayList<Tree>(); List<CoreMap> sentences = new ArrayList<CoreMap>(); List<List<CoreLabel>> sentenceTokens = new ArrayList<List<CoreLabel>>(); for (ROOT root : select(aJCas, ROOT.class)) { // Copy all relevant information from the tokens List<CoreLabel> tokens = new ArrayList<CoreLabel>(); for (Token token : selectCovered(Token.class, root)) { tokens.add(tokenToWord(token)); }//from www. j ava 2 s . c o m sentenceTokens.add(tokens); // SemanticHeadFinder (nonTerminalInfo) does not know about PRN0, so we have to replace // it with PRN to avoid NPEs. TreeFactory tFact = new LabeledScoredTreeFactory(CoreLabel.factory()) { @Override public Tree newTreeNode(String aParent, List<Tree> aChildren) { String parent = aParent; if ("PRN0".equals(parent)) { parent = "PRN"; } Tree node = super.newTreeNode(parent, aChildren); return node; } }; // deep copy of the tree. These are modified inside coref! Tree treeCopy = TreeUtils.createStanfordTree(root, tFact).treeSkeletonCopy(); treeCopy.indexSpans(); trees.add(treeCopy); // Build the sentence CoreMap sentence = new CoreLabel(); sentence.set(TreeAnnotation.class, treeCopy); sentence.set(TokensAnnotation.class, tokens); sentence.set(RootKey.class, root); sentences.add(sentence); // https://code.google.com/p/dkpro-core-asl/issues/detail?id=590 // We currently do not copy over dependencies from the CAS. This is supposed to fill // in the dependencies so we do not get NPEs. TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(), tlp.typedDependencyHeadFinder()); ParserAnnotatorUtils.fillInParseAnnotations(false, true, gsf, sentence, treeCopy, GrammaticalStructure.Extras.NONE); // https://code.google.com/p/dkpro-core-asl/issues/detail?id=582 SemanticGraph deps = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); for (IndexedWord vertex : deps.vertexSet()) { vertex.setWord(vertex.value()); } // merge the new CoreLabels with the tree leaves MentionExtractor.mergeLabels(treeCopy, tokens); MentionExtractor.initializeUtterance(tokens); } Annotation document = new Annotation(aJCas.getDocumentText()); document.set(SentencesAnnotation.class, sentences); Coreferencer coref = modelProvider.getResource(); // extract all possible mentions // Reparsing only works when the full CoreNLP pipeline system is set up! Passing false here // disables reparsing. RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(false); List<List<Mention>> allUnprocessedMentions = finder.extractPredictedMentions(document, 0, coref.corefSystem.dictionaries()); // add the relevant info to mentions and order them for coref Map<Integer, CorefChain> result; try { Document doc = coref.mentionExtractor.arrange(document, sentenceTokens, trees, allUnprocessedMentions); result = coref.corefSystem.coref(doc); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } for (CorefChain chain : result.values()) { CoreferenceLink last = null; for (CorefMention mention : chain.getMentionsInTextualOrder()) { CoreLabel beginLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class) .get(mention.startIndex - 1); CoreLabel endLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class) .get(mention.endIndex - 2); CoreferenceLink link = new CoreferenceLink(aJCas, beginLabel.get(TokenKey.class).getBegin(), endLabel.get(TokenKey.class).getEnd()); if (mention.mentionType != null) { link.setReferenceType(mention.mentionType.toString()); } if (last == null) { // This is the first mention. Here we'll initialize the chain CoreferenceChain corefChain = new CoreferenceChain(aJCas); corefChain.setFirst(link); corefChain.addToIndexes(); } else { // For the other mentions, we'll add them to the chain. last.setNext(link); } last = link; link.addToIndexes(); } } }
From source file:edu.cmu.deiis.annotator.StanfordCoreNLPAnnotator.java
License:Open Source License
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { Annotation document = this.processor.process(jCas.getDocumentText()); String lastNETag = "O"; int lastNEBegin = -1; int lastNEEnd = -1; for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) { // create the token annotation int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class); int end = tokenAnn.get(CharacterOffsetEndAnnotation.class); String pos = tokenAnn.get(PartOfSpeechAnnotation.class); String lemma = tokenAnn.get(LemmaAnnotation.class); Token token = new Token(jCas, begin, end); token.setPos(pos);/*from w w w. j a v a2 s . c o m*/ token.setLemma(lemma); token.addToIndexes(); // hackery to convert token-level named entity tag into phrase-level tag String neTag = tokenAnn.get(NamedEntityTagAnnotation.class); if (neTag.equals("O") && !lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } else { if (lastNETag.equals("O")) { lastNEBegin = begin; } else if (lastNETag.equals(neTag)) { // do nothing - begin was already set } else { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); lastNEBegin = begin; } lastNEEnd = end; } lastNETag = neTag; } if (!lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } // add sentences and trees for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) { // add the sentence annotation int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class); int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class); Sentence sentence = new Sentence(jCas, sentBegin, sentEnd); sentence.addToIndexes(); // add the syntactic tree annotation List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class); Tree tree = sentenceAnn.get(TreeAnnotation.class); if (tree.children().length != 1) { throw new RuntimeException("Expected single root node, found " + tree); } tree = tree.firstChild(); tree.indexSpans(0); TopTreebankNode root = new TopTreebankNode(jCas); root.setTreebankParse(tree.toString()); // TODO: root.setTerminals(v) this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns); // get the dependencies SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class); // convert Stanford nodes to UIMA annotations List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence); Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>(); for (IndexedWord stanfordNode : dependencies.vertexSet()) { int indexBegin = stanfordNode.get(BeginIndexAnnotation.class); int indexEnd = stanfordNode.get(EndIndexAnnotation.class); int tokenBegin = tokens.get(indexBegin).getBegin(); int tokenEnd = tokens.get(indexEnd - 1).getEnd(); DependencyNode node; if (dependencies.getRoots().contains(stanfordNode)) { node = new TopDependencyNode(jCas, tokenBegin, tokenEnd); } else { node = new DependencyNode(jCas, tokenBegin, tokenEnd); } stanfordToUima.put(stanfordNode, node); } // create relation annotations for each Stanford dependency ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create(); ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create(); for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) { DependencyRelation relation = new DependencyRelation(jCas); DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor()); DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent()); String relationType = stanfordEdge.getRelation().toString(); if (head == null || child == null || relationType == null) { throw new RuntimeException(String.format( "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation, child, head)); } relation.setHead(head); relation.setChild(child); relation.setRelation(relationType); relation.addToIndexes(); headRelations.put(child, relation); childRelations.put(head, relation); } // set the relations for each node annotation for (DependencyNode node : stanfordToUima.values()) { List<DependencyRelation> heads = headRelations.get(node); node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size())); if (heads != null) { FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads); } List<DependencyRelation> children = childRelations.get(node); node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size())); if (children != null) { FSCollectionFactory.fillArrayFS(node.getChildRelations(), children); } node.addToIndexes(); } } // map from spans to named entity mentions Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>(); for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention); } // add mentions for all entities identified by the coreference system List<NamedEntity> entities = new ArrayList<NamedEntity>(); List<List<Token>> sentenceTokens = new ArrayList<List<Token>>(); for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence)); } Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class); for (CorefChain chain : corefChains.values()) { List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>(); for (CorefMention corefMention : chain.getMentionsInTextualOrder()) { // figure out the character span of the token List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1); int begin = tokens.get(corefMention.startIndex - 1).getBegin(); int end = tokens.get(corefMention.endIndex - 2).getEnd(); // use an existing named entity mention when possible; otherwise create a new one NamedEntityMention mention = spanMentionMap.get(new Span(begin, end)); if (mention == null) { mention = new NamedEntityMention(jCas, begin, end); mention.addToIndexes(); } mentions.add(mention); } // create an entity for the mentions Collections.sort(mentions, new Comparator<NamedEntityMention>() { @Override public int compare(NamedEntityMention m1, NamedEntityMention m2) { return m1.getBegin() - m2.getBegin(); } }); // create mentions and add them to entity NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, mentions.size())); int index = 0; for (NamedEntityMention mention : mentions) { mention.setMentionedEntity(entity); entity.setMentions(index, mention); index += 1; } entities.add(entity); } // add singleton entities for any named entities not picked up by coreference system for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { if (mention.getMentionedEntity() == null) { NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, 1)); entity.setMentions(0, mention); mention.setMentionedEntity(entity); entity.getMentions(); entities.add(entity); } } // sort entities by document order Collections.sort(entities, new Comparator<NamedEntity>() { @Override public int compare(NamedEntity o1, NamedEntity o2) { return getFirstBegin(o1) - getFirstBegin(o2); } private int getFirstBegin(NamedEntity entity) { int min = Integer.MAX_VALUE; for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) { if (mention.getBegin() < min) { min = mention.getBegin(); } } return min; } }); // add entities to document for (NamedEntity entity : entities) { entity.addToIndexes(); } }
From source file:edu.cmu.deiis.annotators.StanfordAnnotator.java
License:Open Source License
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { Annotation document = this.processor.process(jCas.getDocumentText()); String lastNETag = "O"; int lastNEBegin = -1; int lastNEEnd = -1; for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) { // create the token annotation int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class); int end = tokenAnn.get(CharacterOffsetEndAnnotation.class); String pos = tokenAnn.get(PartOfSpeechAnnotation.class); String lemma = tokenAnn.get(LemmaAnnotation.class); Token token = new Token(jCas, begin, end); token.setPos(pos);/*from w w w.ja v a2 s . c o m*/ token.setLemma(lemma); token.addToIndexes(); // hackery to convert token-level named entity tag into phrase-level tag String neTag = tokenAnn.get(NamedEntityTagAnnotation.class); if (neTag.equals("O") && !lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } else { if (lastNETag.equals("O")) { lastNEBegin = begin; } else if (lastNETag.equals(neTag)) { // do nothing - begin was already set } else { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); lastNEBegin = begin; } lastNEEnd = end; } lastNETag = neTag; } if (!lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } // add sentences and trees for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) { // add the sentence annotation int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class); int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class); Sentence sentence = new Sentence(jCas, sentBegin, sentEnd); sentence.addToIndexes(); // add the syntactic tree annotation List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class); Tree tree = sentenceAnn.get(TreeAnnotation.class); if (tree.children().length != 1) { throw new RuntimeException("Expected single root node, found " + tree); } tree = tree.firstChild(); tree.indexSpans(0); TopTreebankNode root = new TopTreebankNode(jCas); root.setTreebankParse(tree.toString()); // TODO: root.setTerminals(v) this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns); // get the dependencies SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class); // convert Stanford nodes to UIMA annotations List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence); Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>(); for (IndexedWord stanfordNode : dependencies.vertexSet()) { int indexBegin = stanfordNode.get(BeginIndexAnnotation.class); int indexEnd = stanfordNode.get(EndIndexAnnotation.class); int tokenBegin = tokens.get(indexBegin).getBegin(); int tokenEnd = tokens.get(indexEnd - 1).getEnd(); DependencyNode node; if (dependencies.getRoots().contains(stanfordNode)) { node = new TopDependencyNode(jCas, tokenBegin, tokenEnd); } else { node = new DependencyNode(jCas, tokenBegin, tokenEnd); } stanfordToUima.put(stanfordNode, node); } // create relation annotations for each Stanford dependency ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create(); ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create(); for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) { DependencyRelation relation = new DependencyRelation(jCas); DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor()); DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent()); String relationType = stanfordEdge.getRelation().toString(); if (head == null || child == null || relationType == null) { throw new RuntimeException(String.format( "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation, child, head)); } relation.setHead(head); relation.setChild(child); relation.setRelation(relationType); relation.addToIndexes(); headRelations.put(child, relation); childRelations.put(head, relation); } // set the relations for each node annotation for (DependencyNode node : stanfordToUima.values()) { List<DependencyRelation> heads = headRelations.get(node); node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size())); if (heads != null) { FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads); } List<DependencyRelation> children = childRelations.get(node); node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size())); if (children != null) { FSCollectionFactory.fillArrayFS(node.getChildRelations(), children); } node.addToIndexes(); } } // map from spans to named entity mentions Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>(); for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention); } // add mentions for all entities identified by the coreference system List<NamedEntity> entities = new ArrayList<NamedEntity>(); List<List<Token>> sentenceTokens = new ArrayList<List<Token>>(); for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence)); } Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class); for (CorefChain chain : corefChains.values()) { List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>(); for (CorefMention corefMention : chain.getMentionsInTextualOrder()) { // figure out the character span of the token List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1); int begin = tokens.get(corefMention.startIndex - 1).getBegin(); int end = tokens.get(corefMention.endIndex - 2).getEnd(); // use an existing named entity mention when possible; otherwise create a new one NamedEntityMention mention = spanMentionMap.get(new Span(begin, end)); if (mention == null) { mention = new NamedEntityMention(jCas, begin, end); //String line = mention.getCoveredText(); //System.out.println(line); mention.addToIndexes(); } mentions.add(mention); } // create an entity for the mentions Collections.sort(mentions, new Comparator<NamedEntityMention>() { @Override public int compare(NamedEntityMention m1, NamedEntityMention m2) { return m1.getBegin() - m2.getBegin(); } }); // create mentions and add them to entity NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, mentions.size())); int index = 0; for (NamedEntityMention mention : mentions) { mention.setMentionedEntity(entity); entity.setMentions(index, mention); index += 1; } entities.add(entity); } // add singleton entities for any named entities not picked up by coreference system for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { if (mention.getMentionedEntity() == null) { NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, 1)); entity.setMentions(0, mention); mention.setMentionedEntity(entity); entity.getMentions(); entities.add(entity); } } // sort entities by document order Collections.sort(entities, new Comparator<NamedEntity>() { @Override public int compare(NamedEntity o1, NamedEntity o2) { return getFirstBegin(o1) - getFirstBegin(o2); } private int getFirstBegin(NamedEntity entity) { int min = Integer.MAX_VALUE; for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) { if (mention.getBegin() < min) { min = mention.getBegin(); } } return min; } }); // add entities to document for (NamedEntity entity : entities) { //NamedEntityMention mention=entity.getMentions(3); //System.out.println(mention.getBegin()); entity.addToIndexes(); } }
From source file:edu.jhu.hlt.concrete.stanford.CorefManager.java
License:Open Source License
private Entity makeEntity(CorefChain chain, EntityMentionSet ems, List<Tokenization> tokenizations) throws AnalyticException { Entity concEntity = new Entity().setUuid(this.gen.next()); CorefChain.CorefMention coreHeadMention = chain.getRepresentativeMention(); // CoreNLP uses 1-based indexing for the sentences // just subtract 1. Tokenization tkz = tokenizations.get(coreHeadMention.sentNum - 1); UUID tkzUuid = tkz.getUuid(); LOGGER.debug("Creating EntityMention based on tokenization: {}", tkzUuid.getUuidString()); EntityMention concHeadMention = makeEntityMention(coreHeadMention, tkzUuid, true); TokenRefSequence trs = concHeadMention.getTokens(); // TODO: below throws if they're invalid. maybe this can be removed in the future. this.validateTokenRefSeqValidity(trs, tkz); concEntity.setCanonicalName(coreHeadMention.mentionSpan); concEntity.addToMentionIdList(concHeadMention.getUuid()); ems.addToMentionList(concHeadMention); for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) { if (mention == coreHeadMention) continue; // CoreNLP uses 1-based indexing for the sentences // we'll just subtract one. Tokenization localTkz = tokenizations.get(mention.sentNum - 1); EntityMention concMention = this.makeEntityMention(mention, localTkz.getUuid(), false); TokenRefSequence localTrs = concMention.getTokens(); this.validateTokenRefSeqValidity(localTrs, localTkz); ems.addToMentionList(concMention); concEntity.addToMentionIdList(concMention.getUuid()); }/*ww w .j a va2 s . c o m*/ return concEntity; }
From source file:edu.tuberlin.dima.textmining.jedi.core.features.detector.StanfordUIMAAnnotator.java
License:Open Source License
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { CAS cas = jCas.getCas();/*from w w w .j a va2 s . c o m*/ posMappingProvider.configure(cas); nerMappingProvider.configure(cas); modelProvider.configure(cas); Annotation document = modelProvider.getResource().process(jCas.getDocumentText()); String lastNETag = "O"; int lastNEBegin = -1; int lastNEEnd = -1; for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) { // create the token annotation int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class); int end = tokenAnn.get(CharacterOffsetEndAnnotation.class); String pos = tokenAnn.get(PartOfSpeechAnnotation.class); String lemma = tokenAnn.get(LemmaAnnotation.class); Token token = new Token(jCas, begin, end); Type posTag = posMappingProvider.getTagType(pos); POS posAnno = (POS) cas.createAnnotation(posTag, begin, end); posAnno.setStringValue(posTag.getFeatureByBaseName("PosValue"), pos.intern()); posAnno.addToIndexes(); token.setPos(posAnno); Lemma dkproLemma = new Lemma(jCas, begin, end); dkproLemma.setValue(lemma); dkproLemma.addToIndexes(); token.setLemma(dkproLemma); token.addToIndexes(); // hackery to convert token-level named entity tag into phrase-level tag String neTag = tokenAnn.get(NamedEntityTagAnnotation.class); if (neTag == null) continue; if (neTag.equals("O") && !lastNETag.equals("O")) { Type type = nerMappingProvider.getTagType(lastNETag); NamedEntity neAnno = (NamedEntity) cas.createAnnotation(type, lastNEBegin, lastNEEnd); neAnno.setValue(lastNETag); neAnno.addToIndexes(); } else { if (lastNETag.equals("O")) { lastNEBegin = begin; } else if (lastNETag.equals(neTag)) { // do nothing - begin was already set } else { Type type = nerMappingProvider.getTagType(lastNETag); NamedEntity neAnno = (NamedEntity) cas.createAnnotation(type, lastNEBegin, lastNEEnd); neAnno.setValue(lastNETag); neAnno.addToIndexes(); lastNEBegin = begin; } lastNEEnd = end; } lastNETag = neTag; } if (!lastNETag.equals("O")) { Type type = nerMappingProvider.getTagType(lastNETag); NamedEntity neAnno = (NamedEntity) cas.createAnnotation(type, lastNEBegin, lastNEEnd); neAnno.setValue(lastNETag); neAnno.addToIndexes(); } // add sentences and trees List<CoreMap> sentenceAnnotations = document.get(SentencesAnnotation.class); for (CoreMap sentenceAnn : sentenceAnnotations) { // add the sentence annotation int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class); int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class); Sentence sentence = new Sentence(jCas, sentBegin, sentEnd); sentence.addToIndexes(); } Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class); if (corefChains != null) { for (CorefChain chain : corefChains.values()) { CoreferenceLink last = null; for (CorefMention mention : chain.getMentionsInTextualOrder()) { CoreLabel beginLabel = sentenceAnnotations.get(mention.sentNum - 1).get(TokensAnnotation.class) .get(mention.startIndex - 1); CoreLabel endLabel = sentenceAnnotations.get(mention.sentNum - 1).get(TokensAnnotation.class) .get(mention.endIndex - 2); CoreferenceLink link = new CoreferenceLink(jCas, beginLabel.get(CharacterOffsetBeginAnnotation.class), endLabel.get(CharacterOffsetEndAnnotation.class)); if (mention.mentionType != null) { link.setReferenceType(mention.mentionType.toString()); } if (last == null) { // This is the first mention. Here we'll initialize the chain CoreferenceChain corefChain = new CoreferenceChain(jCas); corefChain.setFirst(link); corefChain.addToIndexes(); } else { // For the other mentions, we'll add them to the chain. last.setNext(link); } last = link; link.addToIndexes(); } } } }
From source file:nlp.pipeline.SentenceUtil.java
License:Open Source License
/** *************************************************************** * Print the coreference link graph/*from www . ja va 2 s . co m*/ * Each chain stores a set of mentions that link to each other, * along with a method for getting the most representative mention * Both sentence and token offsets start at 1! */ public static void printCorefChain(Annotation document) { Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class); if (graph == null) return; for (CorefChain cc : graph.values()) { List<CorefChain.CorefMention> mentions = cc.getMentionsInTextualOrder(); if (mentions.size() > 1) { for (CorefChain.CorefMention ment : mentions) { System.out.println(ment.sentNum + " : " + ment.headIndex + " : " + ment.mentionSpan); } System.out.println(); } } }
From source file:org.ets.research.nlp.stanford_thrift.coref.StanfordCorefThrift.java
License:Open Source License
@SuppressWarnings("unused") private void newStyleCoreferenceGraphOutput(Annotation annotation) { // display the new-style coreference graph //List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class); if (corefChains != null) { for (CorefChain chain : corefChains.values()) { CorefChain.CorefMention representative = chain.getRepresentativeMention(); for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) { System.out.println(mention); if (mention == representative) continue; // all offsets start at 1! System.out.println("\t" + mention.mentionID + ": (Mention from sentence " + mention.sentNum + ", " + "Head word = " + mention.headIndex + ", (" + mention.startIndex + "," + mention.endIndex + ")" + ")" + " -> " + "(Representative from sentence " + representative.sentNum + ", " + "Head word = " + representative.headIndex + ", (" + representative.startIndex + "," + representative.endIndex + ")" + "), that is: \"" + mention.mentionSpan + "\" -> \"" + representative.mentionSpan + "\""); }//www . j a v a2 s .com } } }