List of usage examples for edu.stanford.nlp.ling WordTag WordTag
public WordTag(Label word, Label tag)
From source file:context.core.task.stemming.LemmaTagger.java
License:Open Source License
/** * * @param sent//from w w w. j a v a 2 s . c o m * @param language * @return */ public static List<TaggedWord> lemmatize(List<CoreLabel> sent, String language) { MaxentTagger tagger = getTagger(language); // List<HasWord> sent = Sentence.toWordList("This is a sample text"); List<TaggedWord> taggedSent = tagger.tagSentence(sent); for (TaggedWord token : taggedSent) { String word = token.word(); String pos = token.tag(); String lemma = morphology.lemmatize(new WordTag(word, pos)).lemma(); token.setTag(lemma); } // final List<WordLemmaTag> tagged = (List<WordLemmaTag>) tagger.tagCoreLabelsOrHasWords(sent, morphology, true); // for (TaggedWord tw : taggedSent) { // System.out.println(tw.word() + "\t" + tw.tag()); // } return taggedSent; }
From source file:de.tudarmstadt.lt.sentiment.StanfordLemmatizer.java
License:Open Source License
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { for (Token t : select(aJCas, Token.class)) { //This corresponds roughly to what is happening in MorphaAnnotator. String token = t.getCoveredText(); String lemma;//from w ww .ja va2 s . c o m if (t.getPos() != null) { lemma = morphology.lemmatize(new WordTag(token, t.getPos().getPosValue())).lemma(); } else { lemma = morphology.stem(token); } if (lemma == null) { lemma = token; } Lemma l = new Lemma(aJCas, t.getBegin(), t.getEnd()); l.setValue(lemma); l.addToIndexes(); t.setLemma(l); } }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordLemmatizer.java
License:Open Source License
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { if (!"en".equals(aJCas.getDocumentLanguage())) { throw new AnalysisEngineProcessException(Messages.BUNDLE, Messages.ERR_UNSUPPORTED_LANGUAGE, new String[] { aJCas.getDocumentLanguage() }); }//from w w w . j a v a2 s .c om for (Token t : select(aJCas, Token.class)) { // Only verbs are lemmatized, the other words are simply stemmed. This corresponds // roughly to what is happening in MorphaAnnotator. String token = t.getCoveredText(); String lemma; if (t.getPos() instanceof V) { lemma = morphology.lemmatize(new WordTag(token, t.getPos().getPosValue())).lemma(); } else { lemma = morphology.stem(token); } if (lemma == null) { lemma = token; } Lemma l = new Lemma(aJCas, t.getBegin(), t.getEnd()); l.setValue(lemma); l.addToIndexes(); t.setLemma(l); } }