Example usage for edu.stanford.nlp.ling CoreLabel lemma

List of usage examples for edu.stanford.nlp.ling CoreLabel lemma

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling CoreLabel lemma.

Prototype

@Override
public String lemma() 

Source Link

Usage

From source file:SentencePair.java

License:Open Source License

private void lemmatize(List<POSTaggedToken> sentence) {
    String text = "";

    /* Convert the sentence back to a single string */
    for (POSTaggedToken tt : sentence) {
        text += tt.token + " ";
    }//from  w  w  w .j a v  a2s  .  c  om

    Annotation d = new Annotation(text);
    nlp.annotate(d);

    for (CoreMap ss : d.get(CoreAnnotations.SentencesAnnotation.class)) {
        Iterator<CoreLabel> itToken = ss.get(CoreAnnotations.TokensAnnotation.class).iterator();
        ListIterator<POSTaggedToken> itSentence = sentence.listIterator();

        while (itToken.hasNext() && itSentence.hasNext()) {
            CoreLabel token = itToken.next();
            POSTaggedToken tt = itSentence.next();
            tt.lemma = token.lemma(); /* add a lemma to the POSTaggedToken */

            itSentence.set(tt);
        }
    }
}

From source file:ca.ualberta.exemplar.core.ParserMalt.java

License:Open Source License

private String[] sentenceToCoNLLInput(List<CoreLabel> tokens) {
    List<String> conllList = new ArrayList<String>(100);

    int num = 1;//from  w ww  . ja va  2 s. c  o  m
    for (CoreLabel token : tokens) {

        String word = token.word();
        String lemmaA = token.lemma();
        String lemma = lemmaA != null && lemmaA.length() > 0 ? lemmaA : "_";
        String posA = token.get(PartOfSpeechAnnotation.class);
        String pos = posA != null && posA.length() > 0 ? posA : "_";

        conllList.add(num + "\t" + word + "\t" + lemma + "\t" + pos + "\t" + pos + "\t" + "_");

        num++;
    }

    String[] conll = new String[conllList.size()];
    conll = conllList.toArray(conll);
    return conll;
}

From source file:com.asimihsan.handytrowel.nlp.StopwordAnnotator.java

License:Open Source License

@Override
public void annotate(Annotation annotation) {
    if (stopwords != null && stopwords.size() > 0 && annotation.containsKey(TokensAnnotation.class)) {
        List<CoreLabel> tokens = annotation.get(TokensAnnotation.class);
        for (CoreLabel token : tokens) {
            boolean isWordStopword = stopwords.contains(token.word().toLowerCase());
            boolean isLemmaStopword = checkLemma ? stopwords.contains(token.lemma().toLowerCase()) : false;
            Pair<Boolean, Boolean> pair = Pair.makePair(isWordStopword, isLemmaStopword);
            token.set(StopwordAnnotator.class, pair);
        }//from w w  w.ja v a 2  s  . c  o m
    }
}

From source file:com.graphbrain.eco.StanfordLemmatizer.java

License:Open Source License

public List<String> lemmatize(String documentText, int returnType) {

    List<String> words = new LinkedList<>();
    List<String> lemmas = new LinkedList<>();

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(documentText);

    // run all Annotators on this text
    this.pipeline.annotate(document);

    // Iterate over all of the sentences found
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        // Iterate over all tokens in a sentence
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            // Retrieve and add the lemma for each word into the
            // list of lemmas
            words.add(token.word());//from ww  w  .j  a  va2 s  .com
            lemmas.add(token.lemma());
            //                lemmas.add(token.get(LemmaAnnotation.class));
        }
    }
    if (returnType == 0) {
        return lemmas;
    } else {
        return words;
    }

}

From source file:edu.cuhk.hccl.util.NLPUtil.java

License:Apache License

public static ArrayList<String[]> extractNounPhrases(StanfordCoreNLP pipeline, String text, int searchRange) {
    ArrayList<String[]> wordPairs = new ArrayList<String[]>();
    Annotation document = new Annotation(text);
    pipeline.annotate(document);//www. ja  v a2s .co  m
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);

    MAX_STEPS = searchRange;

    for (CoreMap sentence : sentences) {
        List<CoreLabel> labels = sentence.get(TokensAnnotation.class);

        // Check negation
        boolean hasNegation = false;
        for (CoreLabel label : labels) {
            if (NEGATIONS.contains(label.lemma().toLowerCase())) {
                hasNegation = true;
            }
        }

        for (int idx = 0; idx < labels.size(); idx++) {
            CoreLabel label = labels.get(idx);
            if (NN_TAGS.contains(label.get(PartOfSpeechAnnotation.class))) {
                for (int step = 1; step <= MAX_STEPS; step++) {
                    CoreLabel leftLabel = labels.get(Math.max(0, idx - step));
                    if (JJ_TAGS.contains(leftLabel.tag())) {
                        if (hasNegation)
                            addPair(wordPairs, NOT_PREFIX + leftLabel.get(LemmaAnnotation.class),
                                    label.get(LemmaAnnotation.class));
                        else
                            addPair(wordPairs, leftLabel.get(LemmaAnnotation.class),
                                    label.get(LemmaAnnotation.class));
                        break;
                    }
                    CoreLabel rightLabel = labels.get(Math.min(idx + step, labels.size() - 1));
                    if (JJ_TAGS.contains(rightLabel.tag())) {
                        if (hasNegation)
                            addPair(wordPairs, NOT_PREFIX + rightLabel.get(LemmaAnnotation.class),
                                    label.get(LemmaAnnotation.class));
                        else
                            addPair(wordPairs, rightLabel.get(LemmaAnnotation.class),
                                    label.get(LemmaAnnotation.class));

                        break;
                    }
                }
            }
        }
    }
    return wordPairs;
}

From source file:edu.ucla.cs.scai.aztec.ir.tokenization.WordTokenizer.java

License:Apache License

public WordTokenizedDocument tokenize(String text, boolean lemmatize, boolean removeStopWords,
        boolean toLowerCase) {
    WordTokenizedDocument res = new WordTokenizedDocument();
    Properties propsTokens = new Properties();
    propsTokens.put("annotators", "tokenize, ssplit, pos, lemma, ner, regexner");
    StanfordCoreNLP pipelineTokens = new StanfordCoreNLP(propsTokens);
    Annotation qaTokens = new Annotation(text);
    pipelineTokens.annotate(qaTokens);//from   w w w  . java2s. c  o  m
    List<CoreMap> sentences = qaTokens.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        WordTokenizedSentence s = new WordTokenizedSentence();
        for (CoreLabel cl : (ArrayList<CoreLabel>) sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            if (!removeStopWords || !stopwords.contains(cl.lemma())) {
                WordToken t = new WordToken(cl.word(), cl.lemma(), cl.tag());
                if (lemmatize) {
                    t.useLemma();
                }
                if (toLowerCase) {
                    t.useLowerCase();
                }
                s.appendToken(t);
            }
        }
        res.appendSentence(s);
    }
    return res;
}

From source file:edu.ucla.cs.scai.qa.questionclassifier.SyntacticTreeNode.java

public SyntacticTreeNode(Tree t, ArrayList<CoreLabel> tokens, SyntacticTreeNode parent) throws Exception {
    this.parent = parent;
    value = t.value();/*w  w  w . ja  v  a  2  s . c o m*/
    if (t.isLeaf()) {
        CoreLabel c = tokens.remove(0);
        begin = c.beginPosition();
        end = c.endPosition();
        if (c == null) {
            throw new Exception("Mapping between TreeNode and CoreLabel not found");
        } else {
            lemma = c.lemma();
            ner = c.ner();
            //System.out.println(value + " -> " + c.value());
            if (!value.equals(c.value())) {
                throw new Exception("Different words have been matched!");
            }
        }
    } else {
        boolean hasNPchildren = false;
        boolean hasWHNPchildren = false;
        boolean hasQPchildren = false;
        begin = Integer.MAX_VALUE;
        end = Integer.MIN_VALUE;
        for (Tree c : t.children()) {
            SyntacticTreeNode child = new SyntacticTreeNode(c, tokens, this);
            children.add(child);
            if (child.value.equals("NP")) {
                hasNPchildren = true;
            } else if (child.value.equals("QP")) {
                hasQPchildren = true;
            } else if (child.value.equals("WHNP")) {
                hasWHNPchildren = true;
            }
            begin = Math.min(begin, child.begin);
            end = Math.max(end, child.end);
        }
        if (value.equals("NP")) {
            if (hasNPchildren) {
                npCompound = true;
            } else if (hasQPchildren) {
                npQp = true;
            } else {
                npSimple = true;
            }
        } else if (value.equals("WHNP")) { //can a WHNP node have QP children?
            if (hasNPchildren || hasWHNPchildren) {
                whnpCompound = true;
            } else if (!hasQPchildren) {
                whnpSimple = true;
            }
        }
    }
}

From source file:eu.modelwriter.semantic.stanford_corenlp.MorphologySimilarityProvider.java

License:Open Source License

/**
 * {@inheritDoc}//from   w w w  .  j a  va2s  . co m
 *
 * @see eu.modelwriter.semantic.ISemanticSimilarityProvider#getSemanticSimilarities(java.util.Map)
 */
public Map<String, Set<Object>> getSemanticSimilarities(Map<String, Set<Object>> labels) {
    final Map<String, Set<Object>> res = new LinkedHashMap<String, Set<Object>>();

    final StringBuilder builder = new StringBuilder();
    final Set<String> words = labels.keySet();
    if (!words.isEmpty()) {
        for (String label : words) {
            builder.append(label);
            builder.append(' ');
        }

        Annotation document = new Annotation(builder.substring(0, builder.length() - 1));
        PIPELINE.annotate(document);

        final List<CoreMap> sentences = document.get(SentencesAnnotation.class);
        for (CoreMap sentence : sentences) {
            for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
                final String label = token.originalText();
                final String lemma = token.lemma();
                Set<Object> lemmaSet = res.get(lemma);
                if (lemmaSet == null) {
                    lemmaSet = new LinkedHashSet<Object>();
                    res.put(lemma, lemmaSet);
                }
                final Set<Object> concepts = labels.get(label);
                if (concepts != null) {
                    lemmaSet.addAll(concepts);
                }
            }
        }
    }

    return res;
}

From source file:ims.cs.corenlp.TokenAligner.java

License:Open Source License

/**
 * Splits a CoreNLP token based on a position. We split only the word form as we don't have sufficient information
 * to split the lemma./*w w  w. j av  a 2 s .  co m*/
 * @param token
 * @param absPosition
 * @return
 */
private CoreLabel[] splitToken(CoreLabel token, int absPosition) {
    String word = token.word();
    String origText = token.originalText();

    // initialize parts
    CoreLabel[] splitting = new CoreLabel[2];
    splitting[0] = new CoreLabel(token);
    splitting[1] = new CoreLabel(token);

    // calculate split position
    int relPosition = absPosition - token.beginPosition();

    // cut up original text
    if (origText.length() >= relPosition) {
        String origText1 = origText.substring(0, relPosition);
        String origText2 = origText.substring(relPosition);

        splitting[0].setOriginalText(origText1);
        splitting[1].setOriginalText(origText2);
    }

    // cut up predicted text
    if (word.length() >= relPosition) {
        String word1 = word.substring(0, relPosition);
        String word2 = word.substring(relPosition);

        splitting[0].setWord(word1);
        splitting[1].setWord(word2);
    }

    // we could do the same with POS and lemma, but that would be complicated ...
    splitting[0].setEndPosition(absPosition); /* set a new end as we just shortened this token */
    splitting[1].setBeginPosition(absPosition); /* set a new position as we just shortened this token */

    // copy lemmas
    splitting[0].setLemma(token.lemma());
    splitting[1].setLemma(token.lemma());

    return splitting;
}

From source file:ims.cs.corenlp.TokenAligner.java

License:Open Source License

/**
 * Combines my token and a CoreNlp token using predicted information
 * @param tok/*  w ww  .  j  a va2 s.  c om*/
 * @param cl
 * @param currentCoreNlpSentenceIndex
 * @return
 */
public static Token combineTokensPred(Token tok, CoreLabel cl, int currentCoreNlpSentenceIndex) {
    Token combined = new Token(tok);
    combined.predText = cl.word();
    combined.predLemma = cl.lemma();
    combined.predPosition = -1; /* will be determined by document aligner */
    combined.predPosTag = cl.tag();
    combined.predSentencePosition = currentCoreNlpSentenceIndex;
    combined.predNer = Helper.translateNer(cl.ner());
    combined.predByteCount = new ByteCount(cl.beginPosition(), cl.endPosition());
    return combined;
}