List of usage examples for edu.stanford.nlp.process Morphology stemStatic
public static synchronized WordTag stemStatic(String word, String tag)
From source file:elkfed.expletives.EF_LRVerb.java
License:Apache License
public void extractFeatures(ExpletiveInstance inst) { String[] pos = inst.getPOS(); String[] words = inst.getWords(); int idx = inst.getIdx(); for (int i = idx - 1; i >= 0; i--) { if (pos[i].startsWith("VB")) { inst.setFeature(FD_LEFT_VERB, Morphology.stemStatic(words[i], pos[i]).toString()); break; }/*from ww w . ja va 2 s . co m*/ } //TODO: look for the full verb - i.e. skip auxiliaries for (int i = idx + 1; i < pos.length; i++) { if (pos[i].startsWith("VB")) { inst.setFeature(FD_RIGHT_VERB, Morphology.stemStatic(words[i], pos[i]).toString()); break; } } }
From source file:elkfed.expletives.EF_Tree.java
License:Apache License
/** constructs a marked subtree for parts which are * outside the path to the pronoun/*ww w. ja v a2 s . co m*/ * @param node the starting point * @return a marked subtree for the tree starting with node */ public static Tree tree_outside(Tree node) { LabelFactory lf = new StringLabelFactory(); // verbs and modals are copied verbatim if (node.value().matches("VB[DZPNG]?")) { return tagged_word(Morphology.stemStatic(node.children()[0].value(), node.value()).value(), "VBX"); //return node; } else if (node.value().matches("TO|MD|IN|RB")) { return node; } Tree result = new LabeledScoredTreeNode(); result.setLabel(lf.newLabel(node.value())); if (node.value().matches("VP")) { List<Tree> dtrs = new ArrayList<Tree>(); dtrs_inside(node, dtrs); result.setChildren(dtrs); } else { List<Tree> dtrs = null; result.setChildren(dtrs); } return result; }
From source file:elkfed.mmax.pipeline.taggers.MorphoAnalyser.java
License:Apache License
public static String lemmatize(String word, String tag) { return Morphology.stemStatic(word, tag.toUpperCase()).word(); }
From source file:nl.rug.eco.lucene.EnglishLemmaTokenizer.java
License:Open Source License
/** * Consumers use this method to advance the stream to the next token. * The token stream emits inflected forms and lemmas interleaved (form1, * lemma1, form2, lemma2, etc.), giving lemmas and their inflected forms * the same PositionAttribute.// ww w . ja va 2 s . c o m */ @Override public final boolean incrementToken() throws IOException { clearAttributes(); if (lemmaNext) { // Emit a lemma posIncr.setPositionIncrement(1); String tag = currentWord.tag(); String form = currentWord.word(); termAtt.setEmpty(); termAtt.append(Morphology.stemStatic(form, tag).word()); } else { // Emit inflected form, if not filtered out. // 0 because the lemma will come in the same position int increment = 0; for (;;) { if (!tagged.hasNext()) return false; currentWord = tagged.next(); if (!unwantedPOS(currentWord.tag())) break; increment++; } posIncr.setPositionIncrement(increment); termAtt.setEmpty(); termAtt.append(currentWord.word()); posAtt.setPartOfSpeech(currentWord.tag()); } lemmaNext = !lemmaNext; return true; }
From source file:org.karsha.base.EnglishLemmaTokenizer.java
License:Open Source License
/** * Consumers use this method to advance the stream to the next token. * The token stream emits inflected forms and lemmas interleaved (form1, * lemma1, form2, lemma2, etc.), giving lemmas and their inflected forms * the same PositionAttribute./*from ww w.j av a 2 s .c o m*/ */ @Override public final boolean incrementToken() throws IOException { if (lemmaNext) { // Emit a lemma posIncr.setPositionIncrement(1); String tag = currentWord.tag(); String form = currentWord.word(); termAtt.setTermBuffer(Morphology.stemStatic(form, tag).word()); } else { // Emit inflected form, if not filtered out. // 0 because the lemma will come in the same position int increment = 0; for (;;) { if (!tagged.hasNext()) return false; currentWord = tagged.next(); if (!unwantedPOS(currentWord.tag())) break; increment++; } posIncr.setPositionIncrement(increment); termAtt.setTermBuffer(currentWord.word()); } lemmaNext = !lemmaNext; return true; }