List of usage examples for edu.stanford.nlp.ling CoreLabel containsKey
@Override public <VALUE> boolean containsKey(Class<? extends Key<VALUE>> key)
From source file:knu.univ.lingvo.coref.ACEMentionExtractor.java
License:Open Source License
public Document nextDoc() throws Exception { List<List<CoreLabel>> allWords = new ArrayList<List<CoreLabel>>(); List<List<Mention>> allGoldMentions = new ArrayList<List<Mention>>(); List<List<Mention>> allPredictedMentions; List<Tree> allTrees = new ArrayList<Tree>(); Annotation anno;//from ww w . ja va 2 s.co m try { String filename = ""; while (files.length > fileIndex) { if (files[fileIndex].contains("apf.xml")) { filename = files[fileIndex]; fileIndex++; break; } else { fileIndex++; filename = ""; } } if (files.length <= fileIndex && filename.equals("")) return null; anno = aceReader.parse(corpusPath + filename); stanfordProcessor.annotate(anno); List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap s : sentences) { int i = 1; for (CoreLabel w : s.get(CoreAnnotations.TokensAnnotation.class)) { w.set(CoreAnnotations.IndexAnnotation.class, i++); if (!w.containsKey(CoreAnnotations.UtteranceAnnotation.class)) { w.set(CoreAnnotations.UtteranceAnnotation.class, 0); } } allTrees.add(s.get(TreeCoreAnnotations.TreeAnnotation.class)); allWords.add(s.get(CoreAnnotations.TokensAnnotation.class)); EntityComparator comparator = new EntityComparator(); extractGoldMentions(s, allGoldMentions, comparator); } if (Constants.USE_GOLD_MENTIONS) allPredictedMentions = allGoldMentions; else allPredictedMentions = mentionFinder.extractPredictedMentions(anno, maxID, dictionaries); printRawDoc(sentences, allGoldMentions, filename, true); printRawDoc(sentences, allPredictedMentions, filename, false); } catch (IOException e) { throw new RuntimeIOException(e); } return arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true); }
From source file:knu.univ.lingvo.coref.CoNLLMentionExtractor.java
License:Open Source License
@Override public Document nextDoc() throws Exception { List<List<CoreLabel>> allWords = new ArrayList<List<CoreLabel>>(); List<Tree> allTrees = new ArrayList<Tree>(); CoNLL2011DocumentReader.Document conllDoc = reader.getNextDocument(); if (conllDoc == null) { return null; }//from w w w . j av a2 s . co m Annotation anno = conllDoc.getAnnotation(); List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap sentence : sentences) { if (!Constants.USE_GOLD_PARSES && !replicateCoNLL) { // Remove tree from annotation and replace with parse using stanford parser sentence.remove(TreeCoreAnnotations.TreeAnnotation.class); } else { Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); if (LEMMATIZE) { treeLemmatizer.transformTree(tree); } // generate the dependency graph try { SemanticGraph deps = SemanticGraphFactory.makeFromTree(tree, SemanticGraphFactory.Mode.COLLAPSED, includeExtras, threadSafe); SemanticGraph basicDeps = SemanticGraphFactory.makeFromTree(tree, SemanticGraphFactory.Mode.BASIC, includeExtras, threadSafe); sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, basicDeps); sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps); } catch (Exception e) { logger.log(Level.WARNING, "Exception caught during extraction of Stanford dependencies. Will ignore and continue...", e); } } } String preSpeaker = null; int utterance = -1; for (CoreLabel token : anno.get(CoreAnnotations.TokensAnnotation.class)) { if (!token.containsKey(CoreAnnotations.SpeakerAnnotation.class)) { token.set(CoreAnnotations.SpeakerAnnotation.class, ""); } String curSpeaker = token.get(CoreAnnotations.SpeakerAnnotation.class); if (!curSpeaker.equals(preSpeaker)) { utterance++; preSpeaker = curSpeaker; } token.set(CoreAnnotations.UtteranceAnnotation.class, utterance); } // Run pipeline stanfordProcessor.annotate(anno); for (CoreMap sentence : anno.get(CoreAnnotations.SentencesAnnotation.class)) { allWords.add(sentence.get(CoreAnnotations.TokensAnnotation.class)); allTrees.add(sentence.get(TreeCoreAnnotations.TreeAnnotation.class)); } // Initialize gold mentions List<List<Mention>> allGoldMentions = extractGoldMentions(conllDoc); List<List<Mention>> allPredictedMentions; if (Constants.USE_GOLD_MENTIONS) { //allPredictedMentions = allGoldMentions; // Make copy of gold mentions since mentions may be later merged, mentionID's changed and stuff allPredictedMentions = makeCopy(allGoldMentions); } else if (Constants.USE_GOLD_MENTION_BOUNDARIES) { allPredictedMentions = ((RuleBasedCorefMentionFinder) mentionFinder) .filterPredictedMentions(allGoldMentions, anno, dictionaries); } else { allPredictedMentions = mentionFinder.extractPredictedMentions(anno, maxID, dictionaries); } try { recallErrors(allGoldMentions, allPredictedMentions, anno); } catch (IOException e) { throw new RuntimeException(e); } Document doc = arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true); doc.conllDoc = conllDoc; return doc; }
From source file:knu.univ.lingvo.coref.Document.java
License:Open Source License
/** Set paragraph index */ private void setParagraphAnnotation() { int paragraphIndex = 0; int previousOffset = -10; for (CoreMap sent : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { for (CoreLabel w : sent.get(CoreAnnotations.TokensAnnotation.class)) { if (w.containsKey(CoreAnnotations.CharacterOffsetBeginAnnotation.class)) { if (w.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) > previousOffset + 2) paragraphIndex++;//w ww .j a v a 2s .c o m w.set(CoreAnnotations.ParagraphAnnotation.class, paragraphIndex); previousOffset = w.get(CoreAnnotations.CharacterOffsetEndAnnotation.class); } else { w.set(CoreAnnotations.ParagraphAnnotation.class, -1); } } } for (List<Mention> l : predictedOrderedMentionsBySentence) { for (Mention m : l) { m.paragraph = m.headWord.get(CoreAnnotations.ParagraphAnnotation.class); } } numParagraph = paragraphIndex; }
From source file:knu.univ.lingvo.coref.Document.java
License:Open Source License
/** set UtteranceAnnotation for quotations: default UtteranceAnnotation = 0 is given */ private void markQuotations(List<CoreMap> results, boolean normalQuotationType) { boolean insideQuotation = false; for (CoreMap m : results) { for (CoreLabel l : m.get(CoreAnnotations.TokensAnnotation.class)) { String w = l.get(CoreAnnotations.TextAnnotation.class); boolean noSpeakerInfo = !l.containsKey(CoreAnnotations.SpeakerAnnotation.class) || l.get(CoreAnnotations.SpeakerAnnotation.class).equals("") || l.get(CoreAnnotations.SpeakerAnnotation.class).startsWith("PER"); if (w.equals("``") || (!insideQuotation && normalQuotationType && w.equals("\""))) { insideQuotation = true;/*from w ww .jav a2 s .c o m*/ maxUtter++; continue; } else if (w.equals("''") || (insideQuotation && normalQuotationType && w.equals("\""))) { insideQuotation = false; } if (insideQuotation) { l.set(CoreAnnotations.UtteranceAnnotation.class, maxUtter); } if (noSpeakerInfo) { l.set(CoreAnnotations.SpeakerAnnotation.class, "PER" + l.get(CoreAnnotations.UtteranceAnnotation.class)); } } } if (maxUtter == 0 && !normalQuotationType) markQuotations(results, true); }
From source file:nlp.service.implementation.DefaultLanguageProcessor.java
public WordToken createWordToken(GrammarService grammarService, Token parentToken, CoreLabel coreLabel) { if (coreLabel.containsKey(PartOfSpeechAnnotation.class) && coreLabel.containsKey(OriginalTextAnnotation.class) && coreLabel.containsKey(LemmaAnnotation.class) && coreLabel.containsKey(NamedEntityTagAnnotation.class) && coreLabel.containsKey(IndexAnnotation.class)) { String text = coreLabel.get(OriginalTextAnnotation.class); int index = coreLabel.get(IndexAnnotation.class); WordToken wordToken;/* www . ja v a2s . co m*/ if (isPunctuation(grammarService, index)) { wordToken = new WordToken(index, parentToken, text, coreLabel.get(LemmaAnnotation.class), EnumHelper.toNamedEntityTag(coreLabel.get(NamedEntityTagAnnotation.class)), new PartOfSpeechInfo(PartOfSpeech.PUNCT), coreLabel.get(CorefClusterIdAnnotation.class)); } else { PartOfSpeechInfo info = EnumHelper.toPartOfSpeech(coreLabel.get(PartOfSpeechAnnotation.class)); wordToken = new WordToken(index, parentToken, text, coreLabel.get(LemmaAnnotation.class), EnumHelper.toNamedEntityTag(coreLabel.get(NamedEntityTagAnnotation.class)), info, coreLabel.get(CorefClusterIdAnnotation.class)); } if (coreLabel.containsKey(NormalizedNamedEntityTagAnnotation.class)) { String normalizedNer = coreLabel.get(NormalizedNamedEntityTagAnnotation.class); if (normalizedNer != null) { wordToken.setEntityValue(normalizedNer); } } return wordToken; } return null; }
From source file:nlp.service.implementation.DefaultLanguageProcessor.java
public PhraseToken createPhraseToken(Token parentToken, CoreLabel coreLabel) { if (coreLabel.containsKey(CategoryAnnotation.class) && coreLabel.containsKey(BeginIndexAnnotation.class) && coreLabel.containsKey(EndIndexAnnotation.class)) { PartOfSpeechInfo info = EnumHelper.toPartOfSpeech(coreLabel.get(CategoryAnnotation.class)); return new PhraseToken(coreLabel.get(BeginIndexAnnotation.class), coreLabel.get(EndIndexAnnotation.class), parentToken, info); }/*from www . jav a 2s . co m*/ return null; }