Example usage for edu.stanford.nlp.process Morphology Morphology

List of usage examples for edu.stanford.nlp.process Morphology Morphology

Introduction

In this page you can find the example usage for edu.stanford.nlp.process Morphology Morphology.

Prototype

public Morphology() 

Source Link

Usage

From source file:com.github.frapontillo.pulse.crowd.lemmatize.corenlp.CoreNLPLemmatizer.java

License:Apache License

/**
 * Get the existing or build a CoreNLP {@link Morphology} if none exists.
 *
 * @return The CoreNLP {@link Morphology} instance.
 *//*from   www  . jav  a 2s.c  om*/
private Morphology getMorphology() {
    if (morphology == null) {
        morphology = new Morphology();
    }
    return morphology;
}

From source file:com.project.NLP.Requirement.PhrasesIdentification.java

/**
 * parsing a single tree to extract the classes and attributes
 *
 * @param tree//w w  w  . j a  v a 2 s . c om
 */
PhrasesIdentification(Tree tree) {
    this.sTree = tree;
    designElement = new DesignElementClass();
    designEleList = designElement.getDesignElementsList();
    dictionaryForClass = new DictionaryForClass();
    dictionaryForClassList = dictionaryForClass.getDictionaryForClass();
    morphology = new Morphology();
}

From source file:de.tudarmstadt.lt.sentiment.StanfordLemmatizer.java

License:Open Source License

@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
    super.initialize(aContext);

    morphology = new Morphology();
}

From source file:edu.iastate.airl.semtus.parser.Parser.java

License:Open Source License

/**
 * Get morphology base/*from   w  w  w.j a  v a2  s.  c  o m*/
 *
 * @param thisWord
 *            word
 * @return morphology base
 */
static public String morphology(Word thisWord) {
    try {
        Morphology thisMorphology = new Morphology();
        Word thisBase = thisMorphology.stem(thisWord);
        return thisBase.word();
    } catch (Throwable e) {
        return null;
    }
}

From source file:edu.illinois.cs.cogcomp.nlp.lemmatizer.MASCevaluation.StanfordLemmatizerInterface.java

License:Open Source License

/**
 * Implements initLemmatizer method//  w  w  w .ja  v  a2s.com
 */
public void initLemmatizer() {
    morph = new Morphology();
    MASChack = new HashMap<String, String>();
    MASChack.put("d", "have");
    MASChack.put("ll", "will");
    MASChack.put("s", "be");
    MASChack.put("re", "be");
    MASChack.put("m", "be");
    MASChack.put("ve", "have");
    MASChack.put("'d", "have");
    MASChack.put("'ll", "will");
    MASChack.put("'s", "be");
    MASChack.put("'re", "be");
    MASChack.put("'m", "be");
    MASChack.put("'ve", "have");
    MASChack.put("her", "her");
    MASChack.put("him", "him");
    MASChack.put("his", "his");
    MASChack.put("their", "their");
    MASChack.put("them", "them");
    MASChack.put("your", "your");
    MASChack.put("us", "us");
    MASChack.put("me", "me");
    MASChack.put("an", "an");
    MASChack.put("n't", "n't");
    MASChack.put("our", "our");
}

From source file:final_dissertation.Feature_Parsing.java

public static void getTag(String tg, Element elem, Sentences sent, HashMap<String, Noun_Word> noun,
        HashMap<String, String> negation, int ttt) {
    int global_offset = 0;
    NodeList nl = elem.getElementsByTagName(tg);
    int nn = -1, n, aj = 0, ng = 0, cc = 0, vb = 0, tp, flag = 1, temp = -1;
    int len = nl.getLength();
    Node wrd;/*from w ww.jav a  2  s .c  o m*/
    String temp_noun = "", prev = "";
    Morphology mr = new Morphology();

    for (int i = 0; i < len; i++) {
        wrd = nl.item(i);
        Element el = (Element) wrd;
        String vl = el.getAttribute("pos");
        String s = el.getFirstChild().getNodeValue();
        s = s.trim().toLowerCase();
        String s1 = "";
        if ((i + 1) < len) {
            Node wrd1 = nl.item(i + 1);
            Element el1 = (Element) wrd1;
            s1 = el1.getFirstChild().getNodeValue().toLowerCase();
            s1 = mr.stem(s1).trim().toLowerCase();
        }

        if (vl.equals("NN") || vl.equals("NNS") || vl.equals("NNP") || vl.equals("NNPS")) {
            if (s != null) {
                if (s.equals("t")) {
                    num++;
                } else if (!(s.equals("##") || s.equals("p"))) {
                    int ln = s.length();
                    if (ln == 1 || ln == 2) {
                        continue;
                    }
                    nn++;
                    tp = Integer.parseInt(el.getAttribute("wid"));
                    if (vl.equals("NNS") || vl.equals("NNPS")) {
                        s = mr.stem(s);
                    }
                    if (s.equals("pic")) {
                        s = "picture";
                    }
                    temp_noun = s;
                    /*
                       if (temp != -1) 
                       {
                    if (temp + 1 == tp) 
                    {
                        nn--;
                        if (flag == 1) 
                        {
                            flag = 0;
                            Noun_Word w1 = noun.get(sent.nouns[nn]);
                            if (w1.cnt == 1) 
                            {
                                noun.remove(sent.nouns[nn]);
                            } 
                            else 
                            {
                                w1.cnt--;
                                noun.put(sent.nouns[nn], w1);
                            }
                        }
                        global_offset++;        //It is for Noun Phrases and to correct their position.
                        sent.nouns[nn] = sent.nouns[nn] + " " + temp_noun;
                        temp = tp;
                        continue;
                    }
                       }
                    */
                    sent.nouns[nn] = temp_noun;
                    sent.posNoun[nn] = tp - global_offset;
                    sent.reviewID = num;
                    temp = tp;
                    Noun_Word word = noun.get(sent.nouns[nn]);
                    if (word == null) {
                        word = new Noun_Word(sent.nouns[nn], 1);
                    } else {
                        word.increment();
                    }
                    noun.put(sent.nouns[nn], word);
                }
            }
        } else if (vl.equals("JJ") || vl.equals("JJS") || vl.equals("JJR")) {
            flag = 1;
            if (s != null) {
                if (!(s.equals("##") || s.equals("<") || s.equals("="))) {
                    //s = mr.stem(s);
                    int tp1 = Integer.parseInt(el.getAttribute("wid"));
                    sent.adjective[aj] = s;
                    sent.posAdjective[aj] = Integer.toString(tp1 - global_offset);
                    //System.out.println("Adjective: "+sent.adjective[aj]+" Pos"+sent.posAdjective[nn]);
                    aj++;
                }
            }
        } else if (vl.equals("CC")) {
            flag = 1;
            if (s != null) {
                if (!(s.equals("x") || s.equals("+") || s.equals("&")
                        || (s.equals("but") && s1.equals("also")))) {
                    int tp1 = Integer.parseInt(el.getAttribute("wid"));
                    sent.conjunction[cc] = s;
                    sent.posConjunction[cc] = Integer.toString(tp1 - global_offset);
                    //System.out.println("Conjunction: "+sent.conjunction[cc]+" Pos "+sent.posConjunction[cc]);
                    cc++;
                }
            }
        } else if (vl.equals("VB") || vl.equals("VBD") || vl.equals("VBG") || vl.equals("VBN")
                || vl.equals("VBP") || vl.equals("VBZ")) {
            flag = 1;
            if (s != null) {
                s = mr.stem(s);
                int tp1 = Integer.parseInt(el.getAttribute("wid"));
                sent.verbs[vb] = s;
                sent.posVerb[vb] = Integer.toString(tp1 - global_offset);
                //System.out.println("verbs: "+sent.verbs[vb]+" Pos "+sent.posVerb[vb]);
                vb++;
            }
        } else if (s != null) {
            flag = 1;
            s = mr.stem(s);
            if (negation.containsKey(s)) {

                if ((prev.equals("do") || prev.equals("does")) && s.equals("n't")) {
                    int tp1 = Integer.parseInt(el.getAttribute("wid"));
                    sent.negation[ng] = s;
                    sent.posNegation[ng] = tp1 - global_offset;
                    //System.out.println("Negation Words :"+prev+s);
                    ng++;

                } else if (!((s.equals("not") && (s1.equals("only") || s1.equals("just")))
                        || (s.equals("no") && (s1.equals("wonder") || s1.equals("problem"))))) //Excludind not only?, "no problem" ,not just? and no wonder?.
                {
                    int tp1 = Integer.parseInt(el.getAttribute("wid"));
                    sent.negation[ng] = s;
                    sent.posNegation[ng] = tp1 - global_offset;
                    //System.out.println("Negation Words ignore : "+prev+s+s1);
                    ng++;
                }
            }
        }
        prev = s;
    } //End for loop

}

From source file:ie.pars.bnc.preprocess.MainBNCProcess.java

License:Open Source License

private static void getZippedFile() throws IOException, ArchiveException, Exception {
    String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
    String parseModel = LexicalizedParser.DEFAULT_PARSER_LOC;

    InputStream is = new FileInputStream(pathInput);
    TarArchiveInputStream tarStream = (TarArchiveInputStream) new ArchiveStreamFactory()
            .createArchiveInputStream("tar", is);
    TarArchiveEntry entry = null;//from  ww  w . j a  v a 2s .  c o  m
    int countfiles = 0;
    while ((entry = (TarArchiveEntry) tarStream.getNextEntry()) != null) {
        //     for(File lf: listFiles){ 
        if (!entry.isDirectory()) {

            byte[] content = new byte[(int) entry.getSize()];
            int offset = 0;
            tarStream.read(content, offset, content.length - offset);
            String id = entry.getName().split("/")[entry.getName().split("/").length - 1].split(".xml")[0];

            if (!filesProcesed.contains(id) && id.startsWith(letter.toUpperCase())) {
                if (countfiles++ % 10 == 0) {
                    tagger = new MaxentTagger(taggerPath);
                    m = new Morphology();
                    parser = ParserGrammar.loadModel(parseModel);
                    parser.loadTagger();
                }
                System.out.print("Entry " + entry.getName());

                InputStream bis = new ByteArrayInputStream(content);
                StringBuilder parseBNCXML = ProcessNLP.parseBNCXML(bis, m, tagger, parser);
                bis.close();
                OutputStream out = new FileOutputStream(pathOutput + File.separatorChar + id + ".vert");
                Writer writer = new OutputStreamWriter(out, "UTF-8");

                writer.write("<text id=\"" + id + "\">\n");
                writer.write(parseBNCXML.toString());
                writer.write("</text>\n");
                writer.close();
                out.close();
            } else {
                System.out.println(">> Bypass Entry " + entry.getName());
            }
            //break;
        }

    }
    is.close();
    System.out.println("There are " + countfiles);
    //    tarStream.close();

}

From source file:info.atmykitchen.basic_annotation_convert.ConvertToBIO.java

License:Open Source License

private static void init() {
    String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
    tagger = new MaxentTagger(taggerPath);
    m = new Morphology();

}

From source file:org.knime.ext.textprocessing.nodes.preprocessing.stanfordlemmatizer.StanfordLemmatizer.java

License:Open Source License

/**
 * {@inheritDoc}/* w w  w . j  a  v  a 2  s  . co  m*/
 */
@Override
public Term preprocessTerm(final Term term) {

    Morphology morph = new Morphology();
    final List<Tag> tags = term.getTags();
    String tag = "";
    // if term doesn't have any tags
    if (tags.isEmpty()) {
        // either skip or throw an exception
        if (!m_skipTerms) {
            m_warnMessage.set("Warning: Some terms have no POS tags.");
            return term;
        } else {
            throw new RuntimeException("Some terms have no POS tags.");
        }

    }
    // take the first POS tag found that is not UNKNOWN
    for (Tag elem : tags) {
        if (elem.getTagType().equals("POS") && !elem.getTagValue().equals("UNKNOWN")) {
            tag = elem.getTagValue();
            break;
        }
    }
    // also skip if no POS tag is found
    if (tag.isEmpty()) {
        return term;
    }

    final List<Word> words = term.getWords();
    final List<Word> newWords = new ArrayList<Word>();
    for (final Word w : words) {
        newWords.add(new Word(morph.lemma(w.getWord(), tag), w.getWhitespaceSuffix()));
    }
    return new Term(newWords, term.getTags(), term.isUnmodifiable());
}