Example usage for edu.stanford.nlp.ling CoreLabel containsKey

List of usage examples for edu.stanford.nlp.ling CoreLabel containsKey

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling CoreLabel containsKey.

Prototype

@Override
public <VALUE> boolean containsKey(Class<? extends Key<VALUE>> key) 

Source Link

Usage

From source file:knu.univ.lingvo.coref.ACEMentionExtractor.java

License:Open Source License

public Document nextDoc() throws Exception {
    List<List<CoreLabel>> allWords = new ArrayList<List<CoreLabel>>();
    List<List<Mention>> allGoldMentions = new ArrayList<List<Mention>>();
    List<List<Mention>> allPredictedMentions;
    List<Tree> allTrees = new ArrayList<Tree>();

    Annotation anno;//from   ww  w  .  ja va  2 s.co m

    try {
        String filename = "";
        while (files.length > fileIndex) {
            if (files[fileIndex].contains("apf.xml")) {
                filename = files[fileIndex];
                fileIndex++;
                break;
            } else {
                fileIndex++;
                filename = "";
            }
        }
        if (files.length <= fileIndex && filename.equals(""))
            return null;

        anno = aceReader.parse(corpusPath + filename);
        stanfordProcessor.annotate(anno);

        List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class);

        for (CoreMap s : sentences) {
            int i = 1;
            for (CoreLabel w : s.get(CoreAnnotations.TokensAnnotation.class)) {
                w.set(CoreAnnotations.IndexAnnotation.class, i++);
                if (!w.containsKey(CoreAnnotations.UtteranceAnnotation.class)) {
                    w.set(CoreAnnotations.UtteranceAnnotation.class, 0);
                }
            }
            allTrees.add(s.get(TreeCoreAnnotations.TreeAnnotation.class));
            allWords.add(s.get(CoreAnnotations.TokensAnnotation.class));
            EntityComparator comparator = new EntityComparator();
            extractGoldMentions(s, allGoldMentions, comparator);
        }

        if (Constants.USE_GOLD_MENTIONS)
            allPredictedMentions = allGoldMentions;
        else
            allPredictedMentions = mentionFinder.extractPredictedMentions(anno, maxID, dictionaries);

        printRawDoc(sentences, allGoldMentions, filename, true);
        printRawDoc(sentences, allPredictedMentions, filename, false);
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }

    return arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true);
}

From source file:knu.univ.lingvo.coref.CoNLLMentionExtractor.java

License:Open Source License

@Override
public Document nextDoc() throws Exception {
    List<List<CoreLabel>> allWords = new ArrayList<List<CoreLabel>>();
    List<Tree> allTrees = new ArrayList<Tree>();

    CoNLL2011DocumentReader.Document conllDoc = reader.getNextDocument();
    if (conllDoc == null) {
        return null;
    }//from  w  w  w  .  j  av a2  s  . co m

    Annotation anno = conllDoc.getAnnotation();
    List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        if (!Constants.USE_GOLD_PARSES && !replicateCoNLL) {
            // Remove tree from annotation and replace with parse using stanford parser
            sentence.remove(TreeCoreAnnotations.TreeAnnotation.class);
        } else {
            Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
            if (LEMMATIZE) {
                treeLemmatizer.transformTree(tree);
            }
            // generate the dependency graph
            try {
                SemanticGraph deps = SemanticGraphFactory.makeFromTree(tree,
                        SemanticGraphFactory.Mode.COLLAPSED, includeExtras, threadSafe);
                SemanticGraph basicDeps = SemanticGraphFactory.makeFromTree(tree,
                        SemanticGraphFactory.Mode.BASIC, includeExtras, threadSafe);
                sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, basicDeps);
                sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps);
            } catch (Exception e) {
                logger.log(Level.WARNING,
                        "Exception caught during extraction of Stanford dependencies. Will ignore and continue...",
                        e);
            }
        }
    }

    String preSpeaker = null;
    int utterance = -1;
    for (CoreLabel token : anno.get(CoreAnnotations.TokensAnnotation.class)) {
        if (!token.containsKey(CoreAnnotations.SpeakerAnnotation.class)) {
            token.set(CoreAnnotations.SpeakerAnnotation.class, "");
        }
        String curSpeaker = token.get(CoreAnnotations.SpeakerAnnotation.class);
        if (!curSpeaker.equals(preSpeaker)) {
            utterance++;
            preSpeaker = curSpeaker;
        }
        token.set(CoreAnnotations.UtteranceAnnotation.class, utterance);
    }

    // Run pipeline
    stanfordProcessor.annotate(anno);

    for (CoreMap sentence : anno.get(CoreAnnotations.SentencesAnnotation.class)) {
        allWords.add(sentence.get(CoreAnnotations.TokensAnnotation.class));
        allTrees.add(sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
    }

    // Initialize gold mentions
    List<List<Mention>> allGoldMentions = extractGoldMentions(conllDoc);

    List<List<Mention>> allPredictedMentions;
    if (Constants.USE_GOLD_MENTIONS) {
        //allPredictedMentions = allGoldMentions;
        // Make copy of gold mentions since mentions may be later merged, mentionID's changed and stuff
        allPredictedMentions = makeCopy(allGoldMentions);
    } else if (Constants.USE_GOLD_MENTION_BOUNDARIES) {
        allPredictedMentions = ((RuleBasedCorefMentionFinder) mentionFinder)
                .filterPredictedMentions(allGoldMentions, anno, dictionaries);
    } else {
        allPredictedMentions = mentionFinder.extractPredictedMentions(anno, maxID, dictionaries);
    }

    try {
        recallErrors(allGoldMentions, allPredictedMentions, anno);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    Document doc = arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true);
    doc.conllDoc = conllDoc;
    return doc;
}

From source file:knu.univ.lingvo.coref.Document.java

License:Open Source License

/** Set paragraph index */
private void setParagraphAnnotation() {
    int paragraphIndex = 0;
    int previousOffset = -10;
    for (CoreMap sent : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
        for (CoreLabel w : sent.get(CoreAnnotations.TokensAnnotation.class)) {
            if (w.containsKey(CoreAnnotations.CharacterOffsetBeginAnnotation.class)) {
                if (w.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) > previousOffset + 2)
                    paragraphIndex++;//w  ww .j a  v a  2s  .c  o  m
                w.set(CoreAnnotations.ParagraphAnnotation.class, paragraphIndex);
                previousOffset = w.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
            } else {
                w.set(CoreAnnotations.ParagraphAnnotation.class, -1);
            }
        }
    }
    for (List<Mention> l : predictedOrderedMentionsBySentence) {
        for (Mention m : l) {
            m.paragraph = m.headWord.get(CoreAnnotations.ParagraphAnnotation.class);
        }
    }
    numParagraph = paragraphIndex;
}

From source file:knu.univ.lingvo.coref.Document.java

License:Open Source License

/** set UtteranceAnnotation for quotations: default UtteranceAnnotation = 0 is given */
private void markQuotations(List<CoreMap> results, boolean normalQuotationType) {
    boolean insideQuotation = false;
    for (CoreMap m : results) {
        for (CoreLabel l : m.get(CoreAnnotations.TokensAnnotation.class)) {
            String w = l.get(CoreAnnotations.TextAnnotation.class);

            boolean noSpeakerInfo = !l.containsKey(CoreAnnotations.SpeakerAnnotation.class)
                    || l.get(CoreAnnotations.SpeakerAnnotation.class).equals("")
                    || l.get(CoreAnnotations.SpeakerAnnotation.class).startsWith("PER");

            if (w.equals("``") || (!insideQuotation && normalQuotationType && w.equals("\""))) {
                insideQuotation = true;/*from   w  ww  .jav  a2 s .c o m*/
                maxUtter++;
                continue;
            } else if (w.equals("''") || (insideQuotation && normalQuotationType && w.equals("\""))) {
                insideQuotation = false;
            }
            if (insideQuotation) {
                l.set(CoreAnnotations.UtteranceAnnotation.class, maxUtter);
            }
            if (noSpeakerInfo) {
                l.set(CoreAnnotations.SpeakerAnnotation.class,
                        "PER" + l.get(CoreAnnotations.UtteranceAnnotation.class));
            }
        }
    }
    if (maxUtter == 0 && !normalQuotationType)
        markQuotations(results, true);
}

From source file:nlp.service.implementation.DefaultLanguageProcessor.java

public WordToken createWordToken(GrammarService grammarService, Token parentToken, CoreLabel coreLabel) {
    if (coreLabel.containsKey(PartOfSpeechAnnotation.class)
            && coreLabel.containsKey(OriginalTextAnnotation.class)
            && coreLabel.containsKey(LemmaAnnotation.class)
            && coreLabel.containsKey(NamedEntityTagAnnotation.class)
            && coreLabel.containsKey(IndexAnnotation.class)) {

        String text = coreLabel.get(OriginalTextAnnotation.class);
        int index = coreLabel.get(IndexAnnotation.class);

        WordToken wordToken;/* www  .  ja  v a2s . co m*/

        if (isPunctuation(grammarService, index)) {
            wordToken = new WordToken(index, parentToken, text, coreLabel.get(LemmaAnnotation.class),
                    EnumHelper.toNamedEntityTag(coreLabel.get(NamedEntityTagAnnotation.class)),
                    new PartOfSpeechInfo(PartOfSpeech.PUNCT), coreLabel.get(CorefClusterIdAnnotation.class));
        } else {
            PartOfSpeechInfo info = EnumHelper.toPartOfSpeech(coreLabel.get(PartOfSpeechAnnotation.class));
            wordToken = new WordToken(index, parentToken, text, coreLabel.get(LemmaAnnotation.class),
                    EnumHelper.toNamedEntityTag(coreLabel.get(NamedEntityTagAnnotation.class)), info,
                    coreLabel.get(CorefClusterIdAnnotation.class));
        }

        if (coreLabel.containsKey(NormalizedNamedEntityTagAnnotation.class)) {
            String normalizedNer = coreLabel.get(NormalizedNamedEntityTagAnnotation.class);
            if (normalizedNer != null) {
                wordToken.setEntityValue(normalizedNer);
            }
        }

        return wordToken;
    }
    return null;
}

From source file:nlp.service.implementation.DefaultLanguageProcessor.java

public PhraseToken createPhraseToken(Token parentToken, CoreLabel coreLabel) {
    if (coreLabel.containsKey(CategoryAnnotation.class) && coreLabel.containsKey(BeginIndexAnnotation.class)
            && coreLabel.containsKey(EndIndexAnnotation.class)) {
        PartOfSpeechInfo info = EnumHelper.toPartOfSpeech(coreLabel.get(CategoryAnnotation.class));
        return new PhraseToken(coreLabel.get(BeginIndexAnnotation.class),
                coreLabel.get(EndIndexAnnotation.class), parentToken, info);
    }/*from  www .  jav  a 2s . co m*/

    return null;
}