Example usage for edu.stanford.nlp.ling CoreLabel index

List of usage examples for edu.stanford.nlp.ling CoreLabel index

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling CoreLabel index.

Prototype

@Override
public int index() 

Source Link

Usage

From source file:edu.illinois.cs.cogcomp.pipeline.handlers.StanfordParseHandler.java

License:Open Source License

private static CoreMap buildStanfordSentence(Constituent sentence, String rawText, int sentIndex,
        List<CoreLabel> stanfordTokens) {
    CoreMap stanfordSentence = new ArrayCoreMap();
    CoreLabel firstTok = stanfordTokens.get(0);
    CoreLabel lastTok = stanfordTokens.get(stanfordTokens.size() - 1);

    stanfordSentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, sentence.getStartSpan());
    stanfordSentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, sentence.getEndSpan());
    stanfordSentence.set(CoreAnnotations.TokenBeginAnnotation.class, firstTok.index());
    stanfordSentence.set(CoreAnnotations.TokenEndAnnotation.class, lastTok.index() + 1); // at-the-end
                                                                                         // indexing?
    stanfordSentence.set(CoreAnnotations.TextAnnotation.class, rawText);
    stanfordSentence.set(CoreAnnotations.SentenceIndexAnnotation.class, sentIndex);
    stanfordSentence.set(CoreAnnotations.TokensAnnotation.class, stanfordTokens);
    return stanfordSentence;
}

From source file:knu.univ.lingvo.coref.Mention.java

License:Open Source License

public String getPattern(List<CoreLabel> pTokens) {

    ArrayList<String> phrase_string = new ArrayList<String>();
    String ne = "";
    for (CoreLabel token : pTokens) {
        if (token.index() == headWord.index()) {
            phrase_string.add(token.lemma());
            ne = "";

        } else if ((token.lemma().equals("and") || StringUtils.isPunct(token.lemma()))
                && pTokens.size() > pTokens.indexOf(token) + 1 && pTokens.indexOf(token) > 0
                && pTokens.get(pTokens.indexOf(token) + 1).ner()
                        .equals(pTokens.get(pTokens.indexOf(token) - 1).ner())) {

        } else if (token.index() == headWord.index() - 1 && token.ner().equals(nerString)) {
            phrase_string.add(token.lemma());
            ne = "";

        } else if (!token.ner().equals("O")) {
            if (!token.ner().equals(ne)) {
                ne = token.ner();/*from   w w  w. j  a va  2  s .c om*/
                phrase_string.add("<" + ne + ">");
            }

        } else {
            phrase_string.add(token.lemma());
            ne = "";
        }
    }
    return StringUtils.join(phrase_string);
}

From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java

License:Open Source License

/**
 * Print raw document for analysis//from  w  w w .j  a  va 2s .  c o  m
 */
public static void printRawDoc(Document document, boolean gold) throws FileNotFoundException {
    List<CoreMap> sentences = document.annotation.get(CoreAnnotations.SentencesAnnotation.class);
    List<List<Mention>> allMentions;
    if (gold) {
        allMentions = document.goldOrderedMentionsBySentence;
    } else {
        allMentions = document.predictedOrderedMentionsBySentence;
    }
    //    String filename = document.annotation.get()

    StringBuilder doc = new StringBuilder();
    int previousOffset = 0;

    for (int i = 0; i < sentences.size(); i++) {
        CoreMap sentence = sentences.get(i);
        List<Mention> mentions = allMentions.get(i);

        List<CoreLabel> t = sentence.get(CoreAnnotations.TokensAnnotation.class);
        String[] tokens = new String[t.size()];
        for (CoreLabel c : t) {
            tokens[c.index() - 1] = c.word();
        }
        if (previousOffset + 2 < t.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)) {
            doc.append("\n");
        }
        previousOffset = t.get(t.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
        Counter<Integer> startCounts = new ClassicCounter<Integer>();
        Counter<Integer> endCounts = new ClassicCounter<Integer>();
        Map<Integer, Set<Mention>> endMentions = Generics.newHashMap();
        for (Mention m : mentions) {
            startCounts.incrementCount(m.startIndex);
            endCounts.incrementCount(m.endIndex);
            if (!endMentions.containsKey(m.endIndex)) {
                endMentions.put(m.endIndex, Generics.<Mention>newHashSet());
            }
            endMentions.get(m.endIndex).add(m);
        }
        for (int j = 0; j < tokens.length; j++) {
            if (endMentions.containsKey(j)) {
                for (Mention m : endMentions.get(j)) {
                    int corefChainId = (gold) ? m.goldCorefClusterID : m.corefClusterID;
                    doc.append("]_").append(corefChainId);
                }
            }
            for (int k = 0; k < startCounts.getCount(j); k++) {
                if (doc.length() > 0 && doc.charAt(doc.length() - 1) != '[') {
                    doc.append(" ");
                }
                doc.append("[");
            }
            if (doc.length() > 0 && doc.charAt(doc.length() - 1) != '[') {
                doc.append(" ");
            }
            doc.append(tokens[j]);
        }
        if (endMentions.containsKey(tokens.length)) {
            for (Mention m : endMentions.get(tokens.length)) {
                int corefChainId = (gold) ? m.goldCorefClusterID : m.corefClusterID;
                doc.append("]_").append(corefChainId); //append("_").append(m.mentionID);
            }
        }

        doc.append("\n");
    }
    logger.fine(document.annotation.get(CoreAnnotations.DocIDAnnotation.class));
    if (gold) {
        logger.fine("New DOC: (GOLD MENTIONS) ==================================================");
    } else {
        logger.fine("New DOC: (Predicted Mentions) ==================================================");
    }
    logger.fine(doc.toString());
}

From source file:main.java.parsers.StanfordParser.java

/** 
 * Parses a given input text document using the Stanford CoreNLP parser.
 * //from  w  w w  . j  a  v  a 2s.  c om
 * @param document
 * @throws java.io.UnsupportedEncodingException 
 * @throws java.lang.InterruptedException 
 */
public static void parse(Doc document) throws UnsupportedEncodingException, IOException, InterruptedException {

    // Initialize an Annotation with some text to be annotated. The text is the argument to the constructor.
    Annotation annotation = new Annotation(new String(document.text.getBytes("UTF-8"), "UTF-8"));
    // run all the selected Annotators on this text
    pipeline.annotate(annotation);

    // An Annotation is a Map and you can get and use the various analyses individually.
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);

    //returns if the annotation is empty.
    if (sentences == null || sentences.isEmpty())
        return;

    //map linking token offsets with their tokens annotation from the Stanford tool.        
    for (CoreMap sentence : sentences) {
        String sentenceStr = "";
        int sentenceNum = sentence.get(CoreAnnotations.SentenceIndexAnnotation.class);

        Map<Integer, Integer> tokenNumStartOffset = document.sentenceTokenNumStartOffset.get(sentenceNum);
        if (tokenNumStartOffset == null)
            document.sentenceTokenNumStartOffset.put(sentenceNum, tokenNumStartOffset = new HashMap<>());

        Map<Integer, List<String>> startOffsetSRLRoles = new TreeMap<>();
        //extracting tokenized information from the stanford parser output.
        for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            sentenceStr += token.value() + " ";
            document.startOffsetIndexedWord.put(token.beginPosition(), new IndexedWord(token));
            tokenNumStartOffset.put(token.index(), token.beginPosition());
            startOffsetSRLRoles.put(token.beginPosition(), null);
        }

        //write the tokenized sentence to an output file
        FileOutputStream output = new FileOutputStream(Main.RESOURCES_DIR + "\\senna\\log.txt");
        output.write(sentenceStr.getBytes());
        //the semantic roles labels for the sentence are obtained by applying SENNA
        startOffsetSRLRoles = SENNASrl.getSRLRoles(startOffsetSRLRoles);
        //set the srl tags
        document.startOffsetSRLRoles.putAll(startOffsetSRLRoles);

        //parse tree of the sentence
        String stanfordParseTree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class).toString();
        ParseTree parseTree = new ParseTree(stanfordParseTree);
        parseTree.convertParseTree();
        document.setSentenceParseTree(sentenceNum, parseTree);

        //dependency graph of the sentence
        SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
        document.setSentenceDependencyGraph(sentenceNum, graph);
    }
}

From source file:org.ets.research.nlp.stanford_thrift.coref.StanfordCorefThrift.java

License:Open Source License

private List<String> MUCStyleOutput(Annotation annotation) {
    Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
    Map<Integer, Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>>> mentionMap = new HashMap<Integer, Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>>>();

    List<String> mucOutput = new ArrayList<String>();

    for (CorefChain chain : corefChains.values()) {
        CorefChain.CorefMention ref = chain.getRepresentativeMention();

        for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) {
            if (mention != ref) {
                // first add the mention itself
                Pair<CorefChain.CorefMention, CorefChain.CorefMention> mentions = new Pair<CorefChain.CorefMention, CorefChain.CorefMention>(
                        mention, ref);/*from ww  w .  j ava  2s. c  o  m*/
                if (mentionMap.containsKey(mention.sentNum)) {
                    Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>> value = mentionMap
                            .get(mention.sentNum);
                    value.put(mention.startIndex, mentions);
                    mentionMap.put(mention.sentNum, value);
                } else {
                    Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>> startIndexToMentionMap = new HashMap<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>>();
                    startIndexToMentionMap.put(mention.startIndex, mentions);
                    mentionMap.put(mention.sentNum, startIndexToMentionMap);
                }

                // now make sure the representative is there (TODO make this code less redundant)
                Pair<CorefChain.CorefMention, CorefChain.CorefMention> refMention = new Pair<CorefChain.CorefMention, CorefChain.CorefMention>(
                        ref, ref);
                if (mentionMap.containsKey(ref.sentNum)) {
                    Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>> value = mentionMap
                            .get(ref.sentNum);
                    value.put(ref.startIndex, refMention);
                    mentionMap.put(ref.sentNum, value);
                } else {
                    Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>> startIndexToMentionMap = new HashMap<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>>();
                    startIndexToMentionMap.put(ref.startIndex, refMention);
                    mentionMap.put(ref.sentNum, startIndexToMentionMap);
                }
            }
        }
    }

    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    for (Integer sentenceNum : mentionMap.keySet()) {
        CoreMap currentSentence = sentences.get(sentenceNum - 1);
        Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>> currentSetOfMentions = mentionMap
                .get(sentenceNum);
        CorefChain.CorefMention lastMention = null;
        String outputString = "";
        for (CoreLabel token : currentSentence.get(CoreAnnotations.TokensAnnotation.class)) {
            if (currentSetOfMentions.containsKey(token.index())) {
                lastMention = currentSetOfMentions.get(token.index()).first();
                CorefChain.CorefMention ref = currentSetOfMentions.get(token.index()).second();
                outputString += "<COREF ID=\"" + lastMention.mentionID + "\"";
                if (lastMention.mentionID != ref.mentionID) {
                    outputString += " REF=\"" + ref.mentionID + "\"";
                }
                outputString += ">";
            }
            if (lastMention != null && token.index() == lastMention.endIndex) {
                outputString += "</COREF> ";
            }
            outputString += token.word() + " ";
        }
        mucOutput.add(CoreNLPThriftUtil.closeHTMLTags(outputString.replaceAll(" </", "</")));
    }

    return mucOutput;
}

From source file:org.sam_agent.csparser.ContinuousParser.java

License:Open Source License

public String stringify(CoreMap words) {

    List<String> posList = new ArrayList<String>();
    List<String> posMap = new ArrayList<String>();
    List<String> lemmaMap = new ArrayList<String>();
    List<String> timexMap = new ArrayList<String>();
    Map<String, List<String>> timexIdMap = new HashMap<String, List<String>>();

    for (CoreLabel token : words.get(CoreAnnotations.TokensAnnotation.class)) {

        String word = token.get(CoreAnnotations.TextAnnotation.class);
        String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
        String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
        String wordToken = esc(word) + "-" + token.index();

        posList.add(String.format("{\"token\":\"%s\",\"pos\":\"%s\"}", esc(word), esc(pos)));
        posMap.add(String.format("\"%s\":\"%s\"", wordToken, esc(pos)));
        lemmaMap.add(String.format("\"%s\":\"%s\"", wordToken, esc(lemma)));

        Timex t = token.get(TimeAnnotations.TimexAnnotation.class);
        if (t != null) {
            String tid = t.tid();
            if (timexIdMap.containsKey(tid)) {
                timexIdMap.get(tid).add("\"" + wordToken + "\"");
                continue;
            }//from  w ww  .  j a  v a  2  s.co  m

            List<String> tokens = new ArrayList<String>();
            tokens.add("\"" + wordToken + "\"");
            timexIdMap.put(tid, tokens);

            List<String> attributesList = new ArrayList<String>();
            Element xml = t.toXmlElement();
            NamedNodeMap attrs = xml.getAttributes();
            for (int i = 0; i < attrs.getLength(); i++) {
                Node item = attrs.item(i);
                String name = item.getNodeName();
                String value = item.getNodeValue();
                attributesList.add(String.format("\"%s\":\"%s\"", name, value));
            }
            String json = String.format("\"%s\":{%s}", t.tid(), String.join(",", attributesList));
            timexMap.add(json);
        }
    }

    String posListJSON = "[" + String.join(", ", posList) + "]";
    String posMapJSON = "{" + String.join(", ", posMap) + "}";
    String lemmaMapJSON = "{" + String.join(", ", lemmaMap) + "}";
    String timexMapJSON = "{" + String.join(", ", timexMap) + "}";

    List<String> temp = new ArrayList<String>();
    for (String tid : timexIdMap.keySet()) {
        temp.add(String.format("\"%s\":[%s]", tid, String.join(",", timexIdMap.get(tid))));
    }
    String timexIdMapJSON = "{" + String.join(",", temp) + "}";

    return String.format("\"pos\":{\"map\":%s,\"list\":%s},\"lemma\":%s,\"timex\":%s,\"timexGroups\":%s",
            posMapJSON, posListJSON, lemmaMapJSON, timexMapJSON, timexIdMapJSON);
}

From source file:semRewrite.Interpreter.java

License:Open Source License

/** *************************************************************
 * @param tokens - List of CoreLabel tokens representing a sentence/input
 * @return Map of token position -> POS
 * ex.  Mary-1 -> NNP//from www  .ja  v a2  s . c o m
 *      drives-2 -> VBZ
 *      the-3 -> DT
 *      car-4 -> NN
 */
private static Map<String, String> getPartOfSpeechList(List<CoreLabel> tokens, ClauseSubstitutor substitutor) {

    Map<String, String> posMap = Maps.newHashMap();
    for (CoreLabel token : tokens) {
        CoreLabelSequence seq = substitutor.containsKey(token) ? substitutor.getGrouped(token)
                : CoreLabelSequence.from(token);
        for (CoreLabel label : seq.getLabels()) {
            posMap.put(label.originalText() + "-" + label.index(), label.tag());
        }
    }
    return posMap;
}

From source file:semRewrite.substitutor.CoreLabelSequence.java

License:Open Source License

/** *************************************************************
 * Change the value() of each CoreLabel to be all caps
 *///from w  w  w.  ja  v a  2 s. c  o  m
public semRewrite.substitutor.CoreLabelSequence toUpperCase() {

    //System.out.println("CoreLabelSequence.toUpperCase(): labels: " + labels);
    List<CoreLabel> lcl = new ArrayList<>();
    for (CoreLabel cl : labels) {
        CoreLabel newcl = new CoreLabel();
        newcl.setValue(cl.value().toUpperCase());
        newcl.setIndex(cl.index());
        lcl.add(newcl);
    }
    semRewrite.substitutor.CoreLabelSequence cls = new semRewrite.substitutor.CoreLabelSequence(lcl);
    //System.out.println("CoreLabelSequence.toUpperCase(): cls: " + cls);
    return cls;
}

From source file:semRewrite.substitutor.StanfordCorefSubstitutor.java

License:Open Source License

/** **************************************************************
 *///  w  ww . ja  v a  2  s  . com
private void initialize(Annotation document) {

    List<CoreLabel> labels = document.get(TokensAnnotation.class);
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);

    Map<semRewrite.substitutor.CoreLabelSequence, semRewrite.substitutor.CoreLabelSequence> collectedGroups = Maps
            .newHashMap();

    for (CoreLabel label : labels) {
        List<CorefMention> mentions = getMentions(label, corefChains);
        if (mentions.size() > 1) {
            if (!ignorablePronouns.contains(label.originalText())) {
                int index = label.index();
                int sentenceIdx = 1 + label.sentIndex();

                CorefMention firstMention = findRootMention(mentions);
                if (sentenceIdx != firstMention.sentNum || index < firstMention.startIndex
                        || index >= firstMention.endIndex) {
                    String masterTag = label.tag();
                    if (isSubstitutablePronoun(label)) {
                        masterTag = "";
                    }
                    List<CoreLabel> singleSentence = getSentenceTokens(document, firstMention.sentNum - 1);
                    semRewrite.substitutor.CoreLabelSequence key = extractTextWithSameTag(singleSentence,
                            firstMention, masterTag);
                    if (!key.isEmpty()) {
                        collectedGroups.put(new semRewrite.substitutor.CoreLabelSequence(label), key);
                    }
                }
            }

        }
    }
    addGroups(collectedGroups);
}

From source file:semRewrite.substitutor.StanfordCorefSubstitutor.java

License:Open Source License

/** *************************************************************
 *//*w w  w  .  j  av a 2  s .co m*/
private List<CorefMention> getMentions(final CoreLabel label, Map<Integer, CorefChain> corefs) {

    List<CorefMention> mentions = ImmutableList.of();
    Integer corefClusterId = label.get(CorefClusterIdAnnotation.class);
    while (mentions.size() <= 1 && corefClusterId != null && corefClusterId.compareTo(0) > 0) {
        if (corefs.containsKey(corefClusterId)) {
            List<CorefMention> candidateMentions = corefs.get(corefClusterId).getMentionsInTextualOrder();
            boolean areMentionsContainLabel = candidateMentions.stream().anyMatch(
                    mention -> mention.sentNum == label.sentIndex() + 1 && mention.startIndex == label.index());
            if (areMentionsContainLabel) {
                mentions = candidateMentions;
            }
        }
        corefClusterId = corefClusterId - 1;
    }

    return mentions;
}