List of usage examples for edu.stanford.nlp.trees Tree addChild
public void addChild(Tree t)
From source file:Engines.Test.StanfordParser.TreeHandling.java
License:Open Source License
private static Tree putOnBranch(TypedDependency dep, Tree tree) { /*// www .ja v a2s . c o m * Each node is a tree with a single child */ Tree mySubtree = lstf.newTreeNode(dep.gov().backingLabel(), new LinkedList<Tree>()); mySubtree.setValue("[<-" + dep.reln() + "-] " + dep.dep().value());//nudge in the dependency relation information if (tree.children().length == 0) { if (tree.label().value().toString().equals("DUMMYROOT")) { tree.addChild(mySubtree); return tree; } else { //Shouldn't happen System.err.println("Forgot to add a child earlier."); return null; } } else { // System.err.println(dep.dep().label() +"\t[on]\t" + tree.label()); for (Tree child : tree.children()) { //if dep is child's parent, insert dep between child and its parent if (((CoreLabel) child.label()).index() == ((CoreLabel) ((Labeled) dep.dep()).label()).index()) { tree.removeChild(tree.objectIndexOf(child)); mySubtree.addChild(child); } } if (mySubtree.children().length > 1) { tree.addChild(mySubtree); return tree; } for (Tree child : tree.children()) { //if dep is Child's sibling, or child if (((CoreLabel) child.label()).index() == ((CoreLabel) ((Labeled) dep.gov()).label()).index()) { tree.addChild(mySubtree); return tree; } if (child.children().length > 0) { if (putOnBranch(dep, child) != null) { return tree; } } } } // tree.getLeaves() == null //check its childrens, recurisively. return null; }
From source file:LVCoref.Document.java
License:Open Source License
/** * Initialize node parent rerferences, sentence borders and roots *///from w w w . j a v a 2 s. c o m public void initializeNodeTree() { Sentence prevSentence = null; for (Sentence s : sentences) { s.setRootNode(new Node("_ROOT_", "_ROOT_", "_", -1, -1, this)); s.getRootNode().sentence = s; if (prevSentence != null) { s.getRootNode().setParentNode(prevSentence.getRootNode()); } else { s.getRootNode().setParentNode(null); } if (s.getNodes().size() > 0) { List<Node> nodes = s.getNodes(); Node start = nodes.get(0); Node end = nodes.get(nodes.size() - 1); s.setStart(start.id); s.setEnd(end.id); start.sentStart = true; end.sentEnd = true; } else { log.warning("Empty sentence " + s); } boolean rootSet = false; // seen node with parent = 0 Node prevNode = null; for (Node n : s.getNodes()) { if (Constants.USE_SINTAX) { if (n.parentIndex == 0) { n.sentRoot = true; if (rootSet) { log.fine("Multiple roots " + s); } rootSet = true; n.setParentNode(s.getRootNode()); s.getRootNode().addChild(n); } else { Node parent = getNode(n.parentIndex + s.getStart() - 1); n.setParentNode(parent); if (parent != null) parent.addChild(n); } } else { if (prevNode == null) { n.setParentNode(s.getRootNode()); s.getRootNode().addChild(n); n.sentRoot = true; } else { Node parent = prevNode; n.setParentNode(parent); if (parent != null) parent.addChild(n); } } prevNode = n; } prevSentence = s; } // Parse tree for (Sentence s : sentences) { for (Node n : s) { Tree t = new TreeGraphNode(new Word(n.word + "--t--" + n.tag + "--d--" + n.dependency)); n.tree = t; } } for (Sentence s : sentences) { Tree root = new TreeGraphNode(new Word("ROOT")); s.setRootTree(root); for (Node n : s) { Tree t = n.tree; Node parent = n.parent; if (parent != null) { if (parent.isSentenceRoot()) { root.addChild(t); } else { parent.tree.addChild(t); } } } //root.pennPrint(); } }
From source file:qmul.util.parse.CreateTreeFromClarkCurranCcgGrs.java
License:Open Source License
/** * @param reader//from w ww. jav a 2 s. c om * a {@link BufferedReader} * @return the Stanford {@link Tree} */ public static Tree makeTree(BufferedReader reader) { if (options == null) { setDefaultOptions(); } String line = null; treeStack.clear(); tagStack.clear(); try { while ((line = reader.readLine()) != null) { line = line.trim(); // empty line is the gap between sentences - stop and return what we've built so far if (line.isEmpty() && !treeStack.isEmpty()) { break; } // wait for <c> line if (!line.startsWith("<c>")) { continue; } String[] chunks = line.split("\\s+"); for (String chunk : chunks) { if (chunk.equals("<c>")) { continue; } String[] fields = chunk.split("\\|"); if (fields.length != 6) { throw new IllegalArgumentException("strange chunk " + chunk + " " + fields.length); } String word = fields[0]; String stem = fields[1]; String postag = fields[2]; String tag1 = fields[3]; String tag2 = fields[4]; String supertag = fields[5]; if (supertag.matches("^[,.:;!?]+$")) { continue; } // each word gets a lexical leaf node plus a non-terminal supertag node Tree synNode = tf.newTreeNode(supertag, new ArrayList<Tree>()); synNode.addChild(tf.newLeaf(word)); shift(synNode, supertag); reduce(); } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); System.exit(0); } reduce(); if (treeStack.isEmpty()) { System.out.println("No nodes, return null"); return null; } else if (treeStack.size() == 1) { System.out.println("1 nodes, return " + treeStack.get(0)); return treeStack.get(0); } else { Tree tree = tf.newTreeNode("ROOT", treeStack); System.out.println(treeStack.size() + " nodes, return " + tree); return tree; } }
From source file:qmul.util.parse.CreateTreeFromClarkCurranCcgGrs.java
License:Open Source License
private static void reduce() { int n = (tagStack.size() - 1); if (n < 1) { return;//from w ww. ja v a 2 s . co m } System.out.println("reduce " + tagStack); String tag = null; if (tag == null) { tag = matchRight(tagStack.get(n - 1), tagStack.get(n)); } if (tag == null) { tag = matchLeft(tagStack.get(n - 1), tagStack.get(n)); } if (tag == null) { return; } tagStack.remove(n); tagStack.remove(n - 1); tagStack.add(tag); Tree node = tf.newTreeNode(tag, new ArrayList<Tree>()); node.addChild(treeStack.get(n - 1)); node.addChild(treeStack.get(n)); treeStack.remove(n); treeStack.remove(n - 1); treeStack.add(node); reduce(); return; }
From source file:qmul.util.parse.CreateTreeFromClarkCurranCCGProlog.java
License:Open Source License
/** * @param reader// ww w . j av a2 s .c o m * a {@link BufferedReader} * @return the Stanford {@link Tree} */ public static Tree makeTree(BufferedReader reader) { if (options == null) { setDefaultOptions(); } NodeFilter nodeFilter = new NodeFilter(); String line = null; boolean doingTree = false; boolean doingWords = false; HashMap<Integer, Tree> leaves = new HashMap<Integer, Tree>(); Tree currentNode = null; Tree rootNode = null; int treeLevel = 0; try { while ((line = reader.readLine()) != null) { line = line.trim(); // first we need to get the ccg/2 tree structure if (line.startsWith("ccg(")) { doingTree = true; doingWords = false; treeLevel = 1; // nothing useful on the actual ccg functor line continue; } // next the w/8 word definitions if (line.startsWith("w(")) { if (!doingTree && !doingWords) { // if we've hit the word definitions without seeing a tree, stop return null; } doingTree = false; doingWords = true; } if (doingTree) { Matcher m = LEAF_PAT.matcher(line); if (m.find()) { // System.out.println("matched leaf " + line); Tree nonTerminal = tf.newTreeNode(getSynLabel(m.group(3)), new ArrayList<Tree>()); if (rootNode == null) { rootNode = nonTerminal; } else { currentNode.addChild(nonTerminal); } Tree leaf = tf.newLeaf("DUMMY"); nonTerminal.addChild(leaf); leaves.put(Integer.parseInt(m.group(2)), leaf); // adjust currentNode int numOpening = line.replaceAll("[^(]", "").length(); int numClosing = line.replaceAll("\\)\\.$", "").replaceAll("[^)]", "").length(); int levelChange = numOpening - numClosing; if (levelChange > 0) { throw new RuntimeException("deepening with leaf node!"); } else if (levelChange < 0) { do { // System.out.println("cu node " + currentNode.label()); currentNode = currentNode.parent(rootNode); // System.out.println("up node " + (currentNode == null ? null : currentNode.label())); treeLevel--; levelChange++; } while (levelChange < 0); } continue; } m = RULE_PAT.matcher(line); if (m.find()) { // System.out.println("matched rule " + line); treeLevel++; Tree node = tf.newTreeNode(getSynLabel(m.group(2)), new ArrayList<Tree>()); if (rootNode == null) { rootNode = node; } if (currentNode != null) { currentNode.addChild(node); } currentNode = node; // System.out.println("current node " + node.label()); continue; } m = LEXR_PAT.matcher(line); if (m.find()) { // System.out.println("matched lexr " + line); treeLevel++; Tree node = tf.newTreeNode(getSynLabel(m.group(3)), new ArrayList<Tree>()); if (rootNode == null) { rootNode = node; } if (currentNode != null) { currentNode.addChild(node); } currentNode = node; // System.out.println("current node " + node.label()); continue; } m = CONJ_PAT.matcher(line); if (m.find()) { // System.out.println("matched conj " + line); treeLevel++; Tree node = tf.newTreeNode(getSynLabel(m.group(4)), new ArrayList<Tree>()); if (rootNode == null) { rootNode = node; } if (currentNode != null) { currentNode.addChild(node); } currentNode = node; // System.out.println("current node " + node.label()); continue; } throw new RuntimeException("no match for line " + line); } if (doingWords) { Matcher m = WORD_PAT.matcher(line); if (m.find()) { Tree leaf = leaves.get(Integer.parseInt(m.group(2))); if (leaf == null) { throw new RuntimeException("Missing leaf " + m.group(2)); } leaf.setLabel(new StringLabel(m.group(3))); leaves.remove(Integer.parseInt(m.group(2))); } else { if (line.isEmpty()) { doingWords = false; if (!leaves.isEmpty()) { throw new RuntimeException("unmatched leaves " + leaves); } continue; } else { throw new RuntimeException("strange word line " + line); } } continue; } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); System.exit(0); } // prune to (optionally) remove punctuation nodes etc, then flatten to remove their dedicated parents if (rootNode != null) { // System.out.println(); // System.out.println("raw tree " + rootNode.pennString()); // System.out.println("pru tree " + rootNode.prune(nodeFilter).pennString()); // System.out.println("fla tree " + rootNode.prune(nodeFilter).flatten().pennString()); // rootNode = rootNode.prune(nodeFilter).flatten(); } return rootNode; }
From source file:qmul.util.parse.CreateTreeFromDCPSE.java
License:Open Source License
/** * @param reader/*from ww w .ja va 2 s. c o m*/ * a {@link Reader} * @return the Stanford {@link Tree} */ public static Tree makeTree(Reader reader) { if (options == null) { setDefaultOptions(); } List<Tree> children = new ArrayList<Tree>(); Tree t0 = null; Tree tPrev = null; Tree tAll = null; Tree tTemp = null; int n = 0; int countspace = 0; int countspaceprevious = 0; int countspacepreviousprevious = 0; char c1 = 'x'; int childWhere = Integer.MAX_VALUE; String gads = ""; String otherStuff = ""; String[] gadsWord = null; boolean isAword = false;// do not change boolean processLine = true;// do not change try { while ((n = reader.read()) != -1) { char c = (char) n; if (c == '[' && gads.matches("")) { processLine = false; // System.out.println(otherStuff); otherStuff = ""; } if (processLine) { if (c == '\n') { if (gads.matches("^\\s+$")) { // we've hit a line containing only whitespace: end of the tree break; } if (options.get(INCLUDE_NO_PAUSE)) { if (gads.contains("PAUSE")) { gads = IGNORE_MARKER; } } // remove "ignored" nodes; unless we need to keep them to work out features, in which case we'll // remove them later in DCPSECorpus if (options.get(INCLUDE_NO_IGNORE) && !options.get(PP_LEXICAL_FEATURES)) { if (gads.contains("ignore)")) { gads = IGNORE_MARKER; } } if (options.get(INCLUDE_NO_UMM)) { if (gads.contains("DISMK,INTERJEC")) { gads = IGNORE_MARKER; } } if (options.get(INCLUDE_NO_REACT)) { if (gads.contains("DISMK,REACT")) { gads = IGNORE_MARKER; } } if (options.get(INCLUDE_NO_UNCLEAR)) { if (gads.contains("INDET,?")) { gads = IGNORE_MARKER; } } if (gads.contains("{")) { // remove all annoying browser markup gadsWord = gads.replaceAll("\\[.*?\\]", "").split("\\s+"); gads = gadsWord[0]; isAword = true; } if (options.get(INCLUDE_NO_BRACKETS)) { if (gads.contains("(")) { gads = gads.replaceAll("\\(.+\\)", ""); } } if (options.get(CATEGORIES_NOT_FUNCTIONS) && !gads.matches(IGNORE_MARKER)) { gads = gads.replaceFirst(".*?,", ""); } if (!gads.matches(IGNORE_MARKER)) { tPrev = t0; t0 = tf.newTreeNode(gads.trim(), children); if (childWhere == Integer.MAX_VALUE) { tAll = t0; } else if (childWhere >= 0) { // up x tTemp = tPrev.ancestor(childWhere + 1, tAll); if (tTemp == null) { System.out.println("c1 = " + c1); System.out.println("gads = " + gads); System.out.println("t0 = "); t0.indentedListPrint(); System.out.println("tPrev = "); tPrev.indentedListPrint(); System.out.println("tAll = "); tAll.indentedListPrint(); System.err.println("ERROR: null ancestor at " + (childWhere + 1) + " " + tAll); } tTemp.addChild(t0); } else if (childWhere < 0) { // down one level tPrev.addChild(t0); } if (isAword) { tPrev = t0; String wordLabel = gadsWord[1]; for (int iWord = 2; iWord < gadsWord.length; iWord++) { wordLabel += " " + gadsWord[iWord]; } tTemp = tf.newLeaf(wordLabel.trim()); tPrev.addChild(tTemp); isAword = false; tTemp = null; } } if (gads.matches(IGNORE_MARKER)) {// reset previous counter if is a line to ignore countspaceprevious = countspaceprevious + childWhere; } gads = ""; c1 = 'y'; } else if (c1 == 'y' && c == ' ') {// was just a return character and is space countspace++; } else {// not a leading space or a return character gads += c; c1 = 'x'; if (countspace != 0) { childWhere = countspaceprevious - countspace; countspaceprevious = countspace; countspacepreviousprevious = countspaceprevious; countspace = 0; } } } else if (c == '\n') { // (if not processLine = True) processLine = true; } else { // processLine = false and not a return character otherStuff += c; } } } catch (IOException ioe) { System.err.println("IOException: " + ioe.getMessage()); } if (tAll == null && !otherStuff.isEmpty()) { Tree tSpec = tf.newTreeNode("EMPTY", children); return tSpec; } else { return tAll; } }
From source file:qmul.util.parse.CreateTreeFromSWBD.java
License:Open Source License
/** * @param reader/*from w w w .ja v a 2s . c om*/ * a {@link Reader} * @return the Stanford {@link Tree} */ public static Tree makeTree(Reader reader) { if (options == null) { setDefaultOptions(); } List<Tree> children = new ArrayList<Tree>(); Tree t0 = null; Tree tPrev = null; Tree tAll = null; Tree tTemp = null; int n = 0; String funcStr[] = { "", "" }; int openBrackets = 0; int closeBrackets = 0; int totalBrackets = 0; int childWhere = Integer.MAX_VALUE; String gads = ""; String otherStuff = ""; String[] gadsWord = null; boolean isAword = false;// do not change boolean wasAword = false;// do not change boolean processLine = false;// do not change try { while ((n = reader.read()) != -1) { char c = (char) n; char charsToIgnore[] = { '.', ',', '?', '\n', '\t', '\r' }; if (gads == IGNORE_MARKER) { if (c == '\n') { gads = ""; } } else { for (int i = 0; i < charsToIgnore.length; i++) { if (c == charsToIgnore[i]) { c = '~'; } } if (c == '(' || c == ')' || c == ' ' || c == '~') { if (c == '(') { totalBrackets++; } else if (c == ')') { totalBrackets--; } if (gads.matches("") && totalBrackets != 0) { // there is nothing yet to process. Collect brackets funcStr[0] += c; processLine = false; } else if (totalBrackets == 0) { processLine = true; } else { processLine = true; // Something needs to be put on a tree... I think funcStr[1] += c; // start collecting next set of function stuff if (funcStr[0].matches("^\\s$")) { // need to put something here to prevent it having a fit when multiple words and // also to ignore those which are part of the function if (c != ' ' || (c == '~' && openBrackets <= 0)) { isAword = true; if (!wasAword) { openBrackets++; } else { openBrackets--; } } else { processLine = false; gads += c; } } else if (openBrackets < 0 && gads.matches("^[a-zA-Z][a-z]+$")) { isAword = true; if (c == ' ') { processLine = false; gads += c; } else { // hold previous brackets and reset own... for (int i = 0; i < closeBrackets; i++) { funcStr[1] += ')'; } for (int j = 0; j < openBrackets; j++) { funcStr[1] += '('; } closeBrackets = 0; for (int k = 0; k < funcStr[0].length(); k++) { if (funcStr[0].charAt(k) == '(') { openBrackets++; } else if (funcStr[0].charAt(k) == ')') { closeBrackets++; } } } } else { for (int j = 0; j < funcStr[0].length(); j++) { if (funcStr[0].charAt(j) == '(') { openBrackets++; } else if (funcStr[0].charAt(j) == ')') { closeBrackets++; } } } } } else if (c != '~') { gads += c; } if ((gads.matches("^\\s$") || gads.matches(""))) { if (totalBrackets != 0 || tAll == null) { processLine = false; } } if (gads.matches("^\\*x\\*")) { gads = IGNORE_MARKER; } // // this is actually done later in SwitchboardCorpus using a NodeFilter // if (options.get(INCLUDE_NO_INTJ)) { // if (gads.contains("INTJ")) { // gads = IGNORE_MARKER; // } // } } if (processLine) { if (gads.matches("E\\_S") || totalBrackets == 0) { // we've hit an end of segment; end the tree System.out.println("end of segment"); break; } if (!gads.matches(IGNORE_MARKER)) { // System.out.println("gads is: " + gads); tPrev = t0; if (isAword) { t0 = tf.newLeaf(gads); } else { t0 = tf.newTreeNode(gads, children); } if (childWhere == Integer.MAX_VALUE) { // System.out.println("It is the first in the tree"); tAll = t0;// set initially childWhere = 0; } else if (openBrackets <= closeBrackets) { // System.out.println("It should be going up " + (closeBrackets-openBrackets)); // up x if (openBrackets < 0) { openBrackets++; } tTemp = tPrev.ancestor((closeBrackets - openBrackets) + 1, tAll); if (tTemp == null) { System.out.println("open = " + openBrackets); System.out.println("close = " + closeBrackets); System.out.println("gads = " + gads); System.out.println("t0 = "); t0.indentedListPrint(); System.out.println("tPrev = "); tPrev.indentedListPrint(); System.out.println("tAll = "); tAll.indentedListPrint(); System.err.println("ERROR: null ancestor at " + (childWhere + 1) + " " + tAll); } tTemp.addChild(t0); if (isAword) { // System.out.println("It is a word"); openBrackets = 0; closeBrackets = 0; } // tPrev.addChild(t0); } else if (openBrackets > closeBrackets) { // down one level if (isAword) { // System.out.println("It is a word"); openBrackets--; } // System.out.println("It should be going down one"); tPrev.addChild(t0); } } if (!isAword) { openBrackets = 0; closeBrackets = 0; wasAword = false; } else { wasAword = true; isAword = false; openBrackets--; // System.out.println("closeBrackets is: "+ closeBrackets); } gads = ""; processLine = false; funcStr[0] = funcStr[1]; funcStr[1] = ""; } } } catch (IOException ioe) { System.err.println("IOException: " + ioe.getMessage()); } if (tAll == null) { Tree tSpec = tf.newTreeNode("EMPTY", children); return tSpec; } else { // tAll.indentedListPrint(); return tAll; } }
From source file:qmul.util.parse.StanfordParser.java
License:Open Source License
/** * Convenience method: splits utt into sentences, uses {@link LexicalizedParser}'s parse() to tokenize and parse * each sentence/* ww w . ja v a2 s .c o m*/ * * @param utt * @return a {@link Tree} with ROOT node, with the getBestParse() trees for each sentence as children */ public Tree parse(String utt) { String[] sentences = utt.split("[.!?]"); // System.out.println("there are sentences:" + sentences.length); // LinkedList<Tree> list=new LinkedList<Tree>(); Label rootLabel = new StringLabel("ROOT"); Tree concat = new LabeledScoredTreeNode(rootLabel, new LinkedList<Tree>()); try { for (int i = 0; i < sentences.length; i++) { boolean parsed = false; if (sentences[i].length() > 0) parsed = lp.parse(sentences[i]); else continue; Tree t = lp.getBestParse(); Tree rootChild; if (t.children().length == 1) rootChild = t.removeChild(0); else rootChild = t; concat.addChild(rootChild); } if (concat.children().length > 1) return concat; else return concat.removeChild(0); } catch (Throwable t) { System.out.println(t.getMessage()); System.out.println("Reinitializing parser because of trying to parse error " + utt); this.lp = null; Runtime r = Runtime.getRuntime(); r.gc(); lp = new LexicalizedParser(System.getProperty("user.dir") + File.separator + "utils" + File.separator + "englishPCFG.ser.gz"); this.lp.setOptionFlags(new String[] { "-maxLength", "100", "-retainTmpSubcategories" }); return null; } }