List of usage examples for edu.stanford.nlp.trees Tree preTerminalYield
public List<Label> preTerminalYield()
From source file:com.mycompany.stanlp.ChildSpeech.java
/** * @param args the command line arguments *///www .ja va 2s . com public static void main(String[] args) throws IOException { PrintWriter pw = new PrintWriter(new File("out.csv")); StringBuilder sb = new StringBuilder(); // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); TreeMap<String, String[]> tm = new TreeMap<String, String[]>(); String csvFile = "C:\\Users\\steve\\Downloads\\verbsome.csv"; BufferedReader br = null; String line = ""; String cvsSplitBy = ","; try { br = new BufferedReader(new FileReader(csvFile)); while ((line = br.readLine()) != null) { System.out.println("reached"); String[] country = line.split(cvsSplitBy); String[] input = new String[2]; input[0] = country[0]; input[1] = country[5]; tm.put(country[4], input); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (br != null) { try { br.close(); } catch (IOException e) { e.printStackTrace(); } } } for (Map.Entry<String, String[]> entry : tm.entrySet()) { String[] value = entry.getValue(); Annotation document = new Annotation(value[1]); pipeline.annotate(document); List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { for (CoreLabel token : sentence.get(TokensAnnotation.class)) { // this is the text of the token String word = token.get(TextAnnotation.class); //ArrayList<CoreLabel> al = new ArrayList(); if (word.equals(value[0])) { Tree tree = sentence.get(TreeAnnotation.class); //TregexPattern patternMW = TregexPattern.compile("VP([ >># VB | >># VBP | >># VBD] <<" + value[0] + // ")"); TregexPattern patternMW = TregexPattern .compile(" VP [ <# VB | <# VBP | <# VBD] & <<" + value[0]); TregexMatcher matcher = patternMW.matcher(tree); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); String tempString = tree.toString(); sb.append(entry.getKey()); sb.append(","); sb.append(value[0]); sb.append(","); sb.append(tempString); sb.append(","); if (match.preTerminalYield().size() == 1) { for (Label l : tree.preTerminalYield()) { sb.append(l.toString()); sb.append("&"); } } else { for (Label l : match.preTerminalYield()) { sb.append(l.toString()); sb.append("&"); } } sb.append(","); sb.append(match.toString()); //sb.append(","); //sb.append(token.get(PartOfSpeechAnnotation.class)); sb.append('\n'); } } // this is the POS tag of the token // this is the NER label of the token //String ne = token.get(NamedEntityTagAnnotation.class); } //SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); } //Map<Integer, edu.stanford.nlp.dcoref.CorefChain> graph = //document.get(CorefChainAnnotation.class); } pw.write(sb.toString()); pw.close(); }
From source file:elkfed.expletives.ExpletiveInstance.java
License:Apache License
public ExpletiveInstance(Tree root, Tree pronoun, String id) { _root = root;//from w ww . ja va 2 s.c o m _pronoun = pronoun; _id = id; List<Tree> wordsT = root.getLeaves(); List<Label> posT = root.preTerminalYield(); // get words and POS into an array so that // we get an idea of the pronoun's surrounding String[] words = new String[wordsT.size()]; String[] pos = new String[wordsT.size()]; if (!root.dominates(pronoun)) { System.err.format("%s does not dominate %s. WTF?", root, pronoun); } for (int here = 0; here < wordsT.size(); here++) { Tree w1 = wordsT.get(here); Label p1 = posT.get(here); words[here] = w1.toString(); pos[here] = p1.value(); if (w1 == pronoun) { _idx = here; } else if (pronoun.dominates(w1)) { _idx = here; pronoun = w1; } } assert _idx >= 0 : String.format("wanted %s in %s", pronoun, root); assert pos[_idx].equals("PRP") : String.format("wanted PRP got '%s'", pos[_idx]); _words = words; _pos = pos; }
From source file:elkfed.mmax.importer.ImportOntonotes.java
License:Apache License
/** adds pos and chunk information */ private void addParseInfo(int start, Tree tree) { /** Retrieve chunk tags from the parse tree and add chunk markables */ boolean inNP = false; int startNP = -1; int wordLoc = 0; int depth = 0; for (String tok : tree.toString().replaceAll("\\)", ") ").split("\\s+")) { if (tok.matches("\\(NP")) { inNP = true;/* w w w. j av a2s.c o m*/ startNP = wordLoc; depth = 0; } if ((inNP) && (tok.matches(".*\\)"))) { depth--; } if ((inNP) && (tok.matches("\\(.*"))) { depth++; } if (tok.matches(".+\\)")) { wordLoc++; } if ((depth == 0) && (inNP)) { inNP = false; Tag t = new Tag(); t.tag = DEFAULT_CHUNK_LEVEL; t.attrs.put("tag", "np"); t.start = start + startNP; t.end = start + wordLoc - 1; tags.add(t); } } /** Retrieve POS tags from the parse tree */ List<Label> taggedSent = new ArrayList<Label>(tree.preTerminalYield()); for (int i = 0; i < taggedSent.size(); i++) { Tag t = new Tag(); t.tag = DEFAULT_POS_LEVEL; t.start = t.end = start + i; String tag = taggedSent.get(i).value(); t.attrs.put("tag", tag.toLowerCase()); tags.add(t); } }
From source file:elkfed.mmax.pipeline.Parser.java
License:Apache License
/** Add parser, part of speech, and chunk markables */ protected void addMarkables() { final StringBuffer markableBuffer = new StringBuffer(); List<Markable> sentences = null; try {// w w w . ja v a2 s . co m sentences = DiscourseUtils.getSentences(currentDocument); } catch (Exception mmax2e) { mmax2e.printStackTrace(); } for (int sentence = 0; sentence < sentences.size(); sentence++) { /** Add the parse tree markables */ final Map<String, String> attributes = new HashMap<String, String>(levelAttributes); attributes.put(TAG_ATTRIBUTE, forest.get(sentence).replaceAll("&", "&")); markableBuffer.setLength(0); Markable sent_m = sentences.get(sentence); int start = sent_m.getLeftmostDiscoursePosition(); int end = sent_m.getRightmostDiscoursePosition(); currentLevel.addMarkable(start, end, attributes); /** Retrieve chunk tags from the parse tree and add chunk markables */ boolean inNP = false; int startNP = -1; int wordLoc = 0; int depth = 0; for (String tok : forest.get(sentence).replaceAll("\\)", ") ").split("\\s+")) { if (tok.matches("\\(NP")) { inNP = true; startNP = wordLoc; depth = 0; } if ((inNP) && (tok.matches(".*\\)"))) { depth--; } if ((inNP) && (tok.matches("\\(.*"))) { depth++; } if (tok.matches(".+\\)")) { wordLoc++; } if ((depth == 0) && (inNP)) { inNP = false; final Map<String, String> cAttributes = new HashMap<String, String>(chunkAttributes); markableBuffer.setLength(0); cAttributes.put(TAG_ATTRIBUTE, "np"); //TODO: check if it's not start+wordLoc-1 ? chunkLevel.addMarkable(start + startNP, start + wordLoc - 1, cAttributes); } } /** Create a tree object from the current sentence */ Tree currentTree = new LabeledScoredTreeNode(); // System.err.println("processing sentence: "+forest.get(sentence)); currentTree = (LabeledScoredTreeNode) Tree.valueOf(forest.get(sentence)); /** Retrieve POS tags from the parse tree */ List<Label> taggedSent = new ArrayList<Label>(currentTree.preTerminalYield()); for (int i = 0; i < taggedSent.size(); i++) { posTags.add(taggedSent.get(i).value()); } } /** Add POS tag markables */ for (int pos = 0; pos < posTags.size(); pos++) { final HashMap<String, String> attributes = new HashMap<String, String>(posAttributes); attributes.put(TAG_ATTRIBUTE, posTags.get(pos).toLowerCase()); posLevel.addMarkable(pos, pos, attributes); } }