Example usage for edu.stanford.nlp.process Morphology stemStatic

List of usage examples for edu.stanford.nlp.process Morphology stemStatic

Introduction

In this page you can find the example usage for edu.stanford.nlp.process Morphology stemStatic.

Prototype

public static synchronized WordTag stemStatic(String word, String tag) 

Source Link

Document

Return a new WordTag which has the lemma as the value of word().

Usage

From source file:elkfed.expletives.EF_LRVerb.java

License:Apache License

public void extractFeatures(ExpletiveInstance inst) {
    String[] pos = inst.getPOS();
    String[] words = inst.getWords();
    int idx = inst.getIdx();
    for (int i = idx - 1; i >= 0; i--) {
        if (pos[i].startsWith("VB")) {
            inst.setFeature(FD_LEFT_VERB, Morphology.stemStatic(words[i], pos[i]).toString());
            break;
        }/*from   ww  w .  ja  va  2  s  . co m*/
    }
    //TODO: look for the full verb - i.e. skip auxiliaries
    for (int i = idx + 1; i < pos.length; i++) {
        if (pos[i].startsWith("VB")) {
            inst.setFeature(FD_RIGHT_VERB, Morphology.stemStatic(words[i], pos[i]).toString());
            break;
        }
    }
}

From source file:elkfed.expletives.EF_Tree.java

License:Apache License

/** constructs a marked subtree for parts which are
 * outside the path to the pronoun/*ww w. ja  v  a2 s .  co m*/
 * @param node the starting point
 * @return a marked subtree for the tree starting with node
 */
public static Tree tree_outside(Tree node) {
    LabelFactory lf = new StringLabelFactory();
    // verbs and modals are copied verbatim
    if (node.value().matches("VB[DZPNG]?")) {
        return tagged_word(Morphology.stemStatic(node.children()[0].value(), node.value()).value(), "VBX");
        //return node;
    } else if (node.value().matches("TO|MD|IN|RB")) {
        return node;
    }
    Tree result = new LabeledScoredTreeNode();
    result.setLabel(lf.newLabel(node.value()));
    if (node.value().matches("VP")) {
        List<Tree> dtrs = new ArrayList<Tree>();
        dtrs_inside(node, dtrs);
        result.setChildren(dtrs);
    } else {
        List<Tree> dtrs = null;
        result.setChildren(dtrs);
    }
    return result;
}

From source file:elkfed.mmax.pipeline.taggers.MorphoAnalyser.java

License:Apache License

public static String lemmatize(String word, String tag) {
    return Morphology.stemStatic(word, tag.toUpperCase()).word();
}

From source file:nl.rug.eco.lucene.EnglishLemmaTokenizer.java

License:Open Source License

/**
 * Consumers use this method to advance the stream to the next token.
 * The token stream emits inflected forms and lemmas interleaved (form1,
 * lemma1, form2, lemma2, etc.), giving lemmas and their inflected forms
 * the same PositionAttribute.//  ww w  . ja va 2 s  .  c  o m
 */
@Override
public final boolean incrementToken() throws IOException {
    clearAttributes();

    if (lemmaNext) {
        // Emit a lemma
        posIncr.setPositionIncrement(1);
        String tag = currentWord.tag();
        String form = currentWord.word();
        termAtt.setEmpty();
        termAtt.append(Morphology.stemStatic(form, tag).word());
    } else {
        // Emit inflected form, if not filtered out.

        // 0 because the lemma will come in the same position
        int increment = 0;
        for (;;) {
            if (!tagged.hasNext())
                return false;
            currentWord = tagged.next();
            if (!unwantedPOS(currentWord.tag()))
                break;
            increment++;
        }

        posIncr.setPositionIncrement(increment);
        termAtt.setEmpty();
        termAtt.append(currentWord.word());
        posAtt.setPartOfSpeech(currentWord.tag());
    }

    lemmaNext = !lemmaNext;
    return true;
}

From source file:org.karsha.base.EnglishLemmaTokenizer.java

License:Open Source License

/**
 * Consumers use this method to advance the stream to the next token.
 * The token stream emits inflected forms and lemmas interleaved (form1,
 * lemma1, form2, lemma2, etc.), giving lemmas and their inflected forms
 * the same PositionAttribute./*from   ww w.j av a  2  s .c o  m*/
 */
@Override
public final boolean incrementToken() throws IOException {
    if (lemmaNext) {
        // Emit a lemma
        posIncr.setPositionIncrement(1);
        String tag = currentWord.tag();
        String form = currentWord.word();
        termAtt.setTermBuffer(Morphology.stemStatic(form, tag).word());
    } else {
        // Emit inflected form, if not filtered out.

        // 0 because the lemma will come in the same position
        int increment = 0;
        for (;;) {
            if (!tagged.hasNext())
                return false;
            currentWord = tagged.next();
            if (!unwantedPOS(currentWord.tag()))
                break;
            increment++;
        }

        posIncr.setPositionIncrement(increment);
        termAtt.setTermBuffer(currentWord.word());
    }

    lemmaNext = !lemmaNext;
    return true;
}