List of usage examples for edu.stanford.nlp.process Morphology Morphology
public Morphology()
From source file:com.github.frapontillo.pulse.crowd.lemmatize.corenlp.CoreNLPLemmatizer.java
License:Apache License
/** * Get the existing or build a CoreNLP {@link Morphology} if none exists. * * @return The CoreNLP {@link Morphology} instance. *//*from www . jav a 2s.c om*/ private Morphology getMorphology() { if (morphology == null) { morphology = new Morphology(); } return morphology; }
From source file:com.project.NLP.Requirement.PhrasesIdentification.java
/** * parsing a single tree to extract the classes and attributes * * @param tree//w w w . j a v a 2 s . c om */ PhrasesIdentification(Tree tree) { this.sTree = tree; designElement = new DesignElementClass(); designEleList = designElement.getDesignElementsList(); dictionaryForClass = new DictionaryForClass(); dictionaryForClassList = dictionaryForClass.getDictionaryForClass(); morphology = new Morphology(); }
From source file:de.tudarmstadt.lt.sentiment.StanfordLemmatizer.java
License:Open Source License
@Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); morphology = new Morphology(); }
From source file:edu.iastate.airl.semtus.parser.Parser.java
License:Open Source License
/** * Get morphology base/*from w w w.j a v a2 s. c o m*/ * * @param thisWord * word * @return morphology base */ static public String morphology(Word thisWord) { try { Morphology thisMorphology = new Morphology(); Word thisBase = thisMorphology.stem(thisWord); return thisBase.word(); } catch (Throwable e) { return null; } }
From source file:edu.illinois.cs.cogcomp.nlp.lemmatizer.MASCevaluation.StanfordLemmatizerInterface.java
License:Open Source License
/** * Implements initLemmatizer method// w w w .ja v a2s.com */ public void initLemmatizer() { morph = new Morphology(); MASChack = new HashMap<String, String>(); MASChack.put("d", "have"); MASChack.put("ll", "will"); MASChack.put("s", "be"); MASChack.put("re", "be"); MASChack.put("m", "be"); MASChack.put("ve", "have"); MASChack.put("'d", "have"); MASChack.put("'ll", "will"); MASChack.put("'s", "be"); MASChack.put("'re", "be"); MASChack.put("'m", "be"); MASChack.put("'ve", "have"); MASChack.put("her", "her"); MASChack.put("him", "him"); MASChack.put("his", "his"); MASChack.put("their", "their"); MASChack.put("them", "them"); MASChack.put("your", "your"); MASChack.put("us", "us"); MASChack.put("me", "me"); MASChack.put("an", "an"); MASChack.put("n't", "n't"); MASChack.put("our", "our"); }
From source file:final_dissertation.Feature_Parsing.java
public static void getTag(String tg, Element elem, Sentences sent, HashMap<String, Noun_Word> noun, HashMap<String, String> negation, int ttt) { int global_offset = 0; NodeList nl = elem.getElementsByTagName(tg); int nn = -1, n, aj = 0, ng = 0, cc = 0, vb = 0, tp, flag = 1, temp = -1; int len = nl.getLength(); Node wrd;/*from w ww.jav a 2 s .c o m*/ String temp_noun = "", prev = ""; Morphology mr = new Morphology(); for (int i = 0; i < len; i++) { wrd = nl.item(i); Element el = (Element) wrd; String vl = el.getAttribute("pos"); String s = el.getFirstChild().getNodeValue(); s = s.trim().toLowerCase(); String s1 = ""; if ((i + 1) < len) { Node wrd1 = nl.item(i + 1); Element el1 = (Element) wrd1; s1 = el1.getFirstChild().getNodeValue().toLowerCase(); s1 = mr.stem(s1).trim().toLowerCase(); } if (vl.equals("NN") || vl.equals("NNS") || vl.equals("NNP") || vl.equals("NNPS")) { if (s != null) { if (s.equals("t")) { num++; } else if (!(s.equals("##") || s.equals("p"))) { int ln = s.length(); if (ln == 1 || ln == 2) { continue; } nn++; tp = Integer.parseInt(el.getAttribute("wid")); if (vl.equals("NNS") || vl.equals("NNPS")) { s = mr.stem(s); } if (s.equals("pic")) { s = "picture"; } temp_noun = s; /* if (temp != -1) { if (temp + 1 == tp) { nn--; if (flag == 1) { flag = 0; Noun_Word w1 = noun.get(sent.nouns[nn]); if (w1.cnt == 1) { noun.remove(sent.nouns[nn]); } else { w1.cnt--; noun.put(sent.nouns[nn], w1); } } global_offset++; //It is for Noun Phrases and to correct their position. sent.nouns[nn] = sent.nouns[nn] + " " + temp_noun; temp = tp; continue; } } */ sent.nouns[nn] = temp_noun; sent.posNoun[nn] = tp - global_offset; sent.reviewID = num; temp = tp; Noun_Word word = noun.get(sent.nouns[nn]); if (word == null) { word = new Noun_Word(sent.nouns[nn], 1); } else { word.increment(); } noun.put(sent.nouns[nn], word); } } } else if (vl.equals("JJ") || vl.equals("JJS") || vl.equals("JJR")) { flag = 1; if (s != null) { if (!(s.equals("##") || s.equals("<") || s.equals("="))) { //s = mr.stem(s); int tp1 = Integer.parseInt(el.getAttribute("wid")); sent.adjective[aj] = s; sent.posAdjective[aj] = Integer.toString(tp1 - global_offset); //System.out.println("Adjective: "+sent.adjective[aj]+" Pos"+sent.posAdjective[nn]); aj++; } } } else if (vl.equals("CC")) { flag = 1; if (s != null) { if (!(s.equals("x") || s.equals("+") || s.equals("&") || (s.equals("but") && s1.equals("also")))) { int tp1 = Integer.parseInt(el.getAttribute("wid")); sent.conjunction[cc] = s; sent.posConjunction[cc] = Integer.toString(tp1 - global_offset); //System.out.println("Conjunction: "+sent.conjunction[cc]+" Pos "+sent.posConjunction[cc]); cc++; } } } else if (vl.equals("VB") || vl.equals("VBD") || vl.equals("VBG") || vl.equals("VBN") || vl.equals("VBP") || vl.equals("VBZ")) { flag = 1; if (s != null) { s = mr.stem(s); int tp1 = Integer.parseInt(el.getAttribute("wid")); sent.verbs[vb] = s; sent.posVerb[vb] = Integer.toString(tp1 - global_offset); //System.out.println("verbs: "+sent.verbs[vb]+" Pos "+sent.posVerb[vb]); vb++; } } else if (s != null) { flag = 1; s = mr.stem(s); if (negation.containsKey(s)) { if ((prev.equals("do") || prev.equals("does")) && s.equals("n't")) { int tp1 = Integer.parseInt(el.getAttribute("wid")); sent.negation[ng] = s; sent.posNegation[ng] = tp1 - global_offset; //System.out.println("Negation Words :"+prev+s); ng++; } else if (!((s.equals("not") && (s1.equals("only") || s1.equals("just"))) || (s.equals("no") && (s1.equals("wonder") || s1.equals("problem"))))) //Excludind not only?, "no problem" ,not just? and no wonder?. { int tp1 = Integer.parseInt(el.getAttribute("wid")); sent.negation[ng] = s; sent.posNegation[ng] = tp1 - global_offset; //System.out.println("Negation Words ignore : "+prev+s+s1); ng++; } } } prev = s; } //End for loop }
From source file:ie.pars.bnc.preprocess.MainBNCProcess.java
License:Open Source License
private static void getZippedFile() throws IOException, ArchiveException, Exception { String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; String parseModel = LexicalizedParser.DEFAULT_PARSER_LOC; InputStream is = new FileInputStream(pathInput); TarArchiveInputStream tarStream = (TarArchiveInputStream) new ArchiveStreamFactory() .createArchiveInputStream("tar", is); TarArchiveEntry entry = null;//from ww w . j a v a 2s . c o m int countfiles = 0; while ((entry = (TarArchiveEntry) tarStream.getNextEntry()) != null) { // for(File lf: listFiles){ if (!entry.isDirectory()) { byte[] content = new byte[(int) entry.getSize()]; int offset = 0; tarStream.read(content, offset, content.length - offset); String id = entry.getName().split("/")[entry.getName().split("/").length - 1].split(".xml")[0]; if (!filesProcesed.contains(id) && id.startsWith(letter.toUpperCase())) { if (countfiles++ % 10 == 0) { tagger = new MaxentTagger(taggerPath); m = new Morphology(); parser = ParserGrammar.loadModel(parseModel); parser.loadTagger(); } System.out.print("Entry " + entry.getName()); InputStream bis = new ByteArrayInputStream(content); StringBuilder parseBNCXML = ProcessNLP.parseBNCXML(bis, m, tagger, parser); bis.close(); OutputStream out = new FileOutputStream(pathOutput + File.separatorChar + id + ".vert"); Writer writer = new OutputStreamWriter(out, "UTF-8"); writer.write("<text id=\"" + id + "\">\n"); writer.write(parseBNCXML.toString()); writer.write("</text>\n"); writer.close(); out.close(); } else { System.out.println(">> Bypass Entry " + entry.getName()); } //break; } } is.close(); System.out.println("There are " + countfiles); // tarStream.close(); }
From source file:info.atmykitchen.basic_annotation_convert.ConvertToBIO.java
License:Open Source License
private static void init() { String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; tagger = new MaxentTagger(taggerPath); m = new Morphology(); }
From source file:org.knime.ext.textprocessing.nodes.preprocessing.stanfordlemmatizer.StanfordLemmatizer.java
License:Open Source License
/** * {@inheritDoc}/* w w w . j a v a 2 s . co m*/ */ @Override public Term preprocessTerm(final Term term) { Morphology morph = new Morphology(); final List<Tag> tags = term.getTags(); String tag = ""; // if term doesn't have any tags if (tags.isEmpty()) { // either skip or throw an exception if (!m_skipTerms) { m_warnMessage.set("Warning: Some terms have no POS tags."); return term; } else { throw new RuntimeException("Some terms have no POS tags."); } } // take the first POS tag found that is not UNKNOWN for (Tag elem : tags) { if (elem.getTagType().equals("POS") && !elem.getTagValue().equals("UNKNOWN")) { tag = elem.getTagValue(); break; } } // also skip if no POS tag is found if (tag.isEmpty()) { return term; } final List<Word> words = term.getWords(); final List<Word> newWords = new ArrayList<Word>(); for (final Word w : words) { newWords.add(new Word(morph.lemma(w.getWord(), tag), w.getWhitespaceSuffix())); } return new Term(newWords, term.getTags(), term.isUnmodifiable()); }