Example usage for edu.stanford.nlp.ling WordTag WordTag

List of usage examples for edu.stanford.nlp.ling WordTag WordTag

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling WordTag WordTag.

Prototype

public WordTag(Label word, Label tag) 

Source Link

Document

Create a new WordTag from a Label.

Usage

From source file:context.core.task.stemming.LemmaTagger.java

License:Open Source License

/**
 *
 * @param sent//from  w w  w.  j  a v a 2  s .  c  o  m
 * @param language
 * @return
 */
public static List<TaggedWord> lemmatize(List<CoreLabel> sent, String language) {
    MaxentTagger tagger = getTagger(language);
    //        List<HasWord> sent = Sentence.toWordList("This is a sample text");
    List<TaggedWord> taggedSent = tagger.tagSentence(sent);
    for (TaggedWord token : taggedSent) {
        String word = token.word();
        String pos = token.tag();
        String lemma = morphology.lemmatize(new WordTag(word, pos)).lemma();
        token.setTag(lemma);
    }
    //        final List<WordLemmaTag> tagged = (List<WordLemmaTag>) tagger.tagCoreLabelsOrHasWords(sent, morphology, true);
    //        for (TaggedWord tw : taggedSent) {
    //            System.out.println(tw.word() + "\t" + tw.tag());
    //        }
    return taggedSent;
}

From source file:de.tudarmstadt.lt.sentiment.StanfordLemmatizer.java

License:Open Source License

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    for (Token t : select(aJCas, Token.class)) {
        //This corresponds roughly to what is happening in MorphaAnnotator.
        String token = t.getCoveredText();
        String lemma;//from   w ww .ja  va2 s  .  c  o m
        if (t.getPos() != null) {
            lemma = morphology.lemmatize(new WordTag(token, t.getPos().getPosValue())).lemma();
        } else {
            lemma = morphology.stem(token);
        }
        if (lemma == null) {
            lemma = token;
        }
        Lemma l = new Lemma(aJCas, t.getBegin(), t.getEnd());
        l.setValue(lemma);
        l.addToIndexes();
        t.setLemma(l);
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordLemmatizer.java

License:Open Source License

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    if (!"en".equals(aJCas.getDocumentLanguage())) {
        throw new AnalysisEngineProcessException(Messages.BUNDLE, Messages.ERR_UNSUPPORTED_LANGUAGE,
                new String[] { aJCas.getDocumentLanguage() });
    }//from   w  w w .  j a  v  a2  s  .c om

    for (Token t : select(aJCas, Token.class)) {
        // Only verbs are lemmatized, the other words are simply stemmed. This corresponds
        // roughly to what is happening in MorphaAnnotator.
        String token = t.getCoveredText();
        String lemma;
        if (t.getPos() instanceof V) {
            lemma = morphology.lemmatize(new WordTag(token, t.getPos().getPosValue())).lemma();
        } else {
            lemma = morphology.stem(token);
        }
        if (lemma == null) {
            lemma = token;
        }
        Lemma l = new Lemma(aJCas, t.getBegin(), t.getEnd());
        l.setValue(lemma);
        l.addToIndexes();
        t.setLemma(l);
    }
}