List of usage examples for edu.stanford.nlp.trees Tree setLabel
@Override public void setLabel(Label label)
From source file:elkfed.expletives.EF_Tree.java
License:Apache License
public static Tree tagged_word(String word, String tag) { LabelFactory lf = new StringLabelFactory(); Tree result = new LabeledScoredTreeNode(); result.setLabel(lf.newLabel(tag)); Tree[] dtrs = new Tree[1]; dtrs[0] = new LabeledScoredTreeNode(lf.newLabel(word)); result.setChildren(dtrs);//from w w w. jav a 2 s . c o m return result; }
From source file:elkfed.expletives.EF_Tree.java
License:Apache License
public static Tree tree_pred(Tree node) { LabelFactory lf = new StringLabelFactory(); Tree result = new LabeledScoredTreeNode(); result.setLabel(lf.newLabel(node.value() + "-PRD")); if (node.value().equals("PP") && node.children().length == 2 && node.children()[0].value().equals("IN")) { Tree[] dtrs = new Tree[2]; dtrs[0] = node.children()[0];/* ww w . jav a 2 s. c o m*/ dtrs[1] = node.children()[1].headPreTerminal(new ModCollinsHeadFinder()); result.setChildren(dtrs); } Tree[] dtrs = new Tree[1]; dtrs[0] = node.headPreTerminal(new ModCollinsHeadFinder()); result.setChildren(dtrs); return result; }
From source file:elkfed.expletives.EF_Tree.java
License:Apache License
/** constructs a marked subtree for a subclause * outside the path to the pronoun/*from w w w . j a v a 2 s .c om*/ * @param node the starting point * @return a marked subtree for the tree starting with node */ public static Tree tree_pruned(Tree node) { LabelFactory lf = new StringLabelFactory(); Tree result = new LabeledScoredTreeNode(); result.setLabel(lf.newLabel(node.value() + "-X")); List<Tree> dtrs = new ArrayList<Tree>(); boolean cpl_seen = false; if (node.value().matches("S|SBAR|VP")) { for (Tree t : node.children()) { // modals are copied verbatim String cat = t.value(); if (cat.matches("TO|MD|IN")) { dtrs.add(t); cpl_seen = true; } else if (cat.startsWith("WH")) { Tree dtr = tagged_word(cat, "WH"); cpl_seen = true; } else if (t.value().startsWith("VB")) { break; } else if (t.value().matches("S|SBAR|VP")) { if (cpl_seen) { //ignore } else { dtrs.add(tree_pruned(t)); } } } } result.setChildren(dtrs); return result; }
From source file:elkfed.expletives.EF_Tree.java
License:Apache License
/** constructs a marked subtree for parts which are * outside the path to the pronoun//from ww w .ja v a 2 s .c o m * @param node the starting point * @return a marked subtree for the tree starting with node */ public static Tree tree_outside(Tree node) { LabelFactory lf = new StringLabelFactory(); // verbs and modals are copied verbatim if (node.value().matches("VB[DZPNG]?")) { return tagged_word(Morphology.stemStatic(node.children()[0].value(), node.value()).value(), "VBX"); //return node; } else if (node.value().matches("TO|MD|IN|RB")) { return node; } Tree result = new LabeledScoredTreeNode(); result.setLabel(lf.newLabel(node.value())); if (node.value().matches("VP")) { List<Tree> dtrs = new ArrayList<Tree>(); dtrs_inside(node, dtrs); result.setChildren(dtrs); } else { List<Tree> dtrs = null; result.setChildren(dtrs); } return result; }
From source file:elkfed.expletives.EF_Tree.java
License:Apache License
/** * constructs a marked subtree for the part where the * pronoun is <i>inside</i> the subtree * @param node the starting point/*from ww w . j a v a 2 s. c om*/ * @param pron our pronoun * @return a marked subtree for the tree starting with node */ public static Tree tree_inside(Tree node, Tree pron) { LabelFactory lf = new StringLabelFactory(); int pron_left = pron.leftCharEdge(node); int pron_right = pron.rightCharEdge(node); List<Tree> dtrs = new ArrayList<Tree>(node.children().length); boolean node_seen = false; for (Tree t : node.children()) { if (t == pron) { dtrs.add(t); node_seen = true; } else if (t.dominates(pron)) { dtrs.add(tree_inside(t, pron)); node_seen = true; } else { String cat = t.value(); if (cat.matches("S|SBAR")) { dtrs.add(tree_pruned(t)); } else { dtrs.add(tree_outside(t)); } } } Tree result = new LabeledScoredTreeNode(); result.setLabel(lf.newLabel(node.value() + "-I")); result.setChildren(dtrs); return result; }
From source file:elkfed.expletives.EF_Tree.java
License:Apache License
public static Tree tree_markonly(Tree node, Tree pron) { LabelFactory lf = new StringLabelFactory(); List<Tree> dtrs = new ArrayList<Tree>(node.children().length); for (Tree t : node.children()) { if (t == pron) { dtrs.add(t);//from w ww . ja v a 2 s . c om } else if (t.dominates(pron)) { dtrs.add(tree_markonly(t, pron)); } else { dtrs.add(t); } } Tree result = new LabeledScoredTreeNode(); result.setLabel(lf.newLabel(node.value() + "-I")); result.setChildren(dtrs); return result; }
From source file:knu.univ.lingvo.coref.MentionExtractor.java
License:Open Source License
/** * Sets the label of the leaf nodes to be the CoreLabels in the given sentence * The original value() of the Tree nodes is preserved *//* ww w. j ava2s . co m*/ public static void mergeLabels(Tree tree, List<CoreLabel> sentence) { int idx = 0; for (Tree t : tree.getLeaves()) { CoreLabel cl = sentence.get(idx++); String value = t.value(); cl.set(CoreAnnotations.ValueAnnotation.class, value); t.setLabel(cl); } tree.indexLeaves(); }
From source file:qmul.util.parse.CreateTreeFromClarkCurranCCGProlog.java
License:Open Source License
/** * @param reader/*from www. j a v a 2 s . c o m*/ * a {@link BufferedReader} * @return the Stanford {@link Tree} */ public static Tree makeTree(BufferedReader reader) { if (options == null) { setDefaultOptions(); } NodeFilter nodeFilter = new NodeFilter(); String line = null; boolean doingTree = false; boolean doingWords = false; HashMap<Integer, Tree> leaves = new HashMap<Integer, Tree>(); Tree currentNode = null; Tree rootNode = null; int treeLevel = 0; try { while ((line = reader.readLine()) != null) { line = line.trim(); // first we need to get the ccg/2 tree structure if (line.startsWith("ccg(")) { doingTree = true; doingWords = false; treeLevel = 1; // nothing useful on the actual ccg functor line continue; } // next the w/8 word definitions if (line.startsWith("w(")) { if (!doingTree && !doingWords) { // if we've hit the word definitions without seeing a tree, stop return null; } doingTree = false; doingWords = true; } if (doingTree) { Matcher m = LEAF_PAT.matcher(line); if (m.find()) { // System.out.println("matched leaf " + line); Tree nonTerminal = tf.newTreeNode(getSynLabel(m.group(3)), new ArrayList<Tree>()); if (rootNode == null) { rootNode = nonTerminal; } else { currentNode.addChild(nonTerminal); } Tree leaf = tf.newLeaf("DUMMY"); nonTerminal.addChild(leaf); leaves.put(Integer.parseInt(m.group(2)), leaf); // adjust currentNode int numOpening = line.replaceAll("[^(]", "").length(); int numClosing = line.replaceAll("\\)\\.$", "").replaceAll("[^)]", "").length(); int levelChange = numOpening - numClosing; if (levelChange > 0) { throw new RuntimeException("deepening with leaf node!"); } else if (levelChange < 0) { do { // System.out.println("cu node " + currentNode.label()); currentNode = currentNode.parent(rootNode); // System.out.println("up node " + (currentNode == null ? null : currentNode.label())); treeLevel--; levelChange++; } while (levelChange < 0); } continue; } m = RULE_PAT.matcher(line); if (m.find()) { // System.out.println("matched rule " + line); treeLevel++; Tree node = tf.newTreeNode(getSynLabel(m.group(2)), new ArrayList<Tree>()); if (rootNode == null) { rootNode = node; } if (currentNode != null) { currentNode.addChild(node); } currentNode = node; // System.out.println("current node " + node.label()); continue; } m = LEXR_PAT.matcher(line); if (m.find()) { // System.out.println("matched lexr " + line); treeLevel++; Tree node = tf.newTreeNode(getSynLabel(m.group(3)), new ArrayList<Tree>()); if (rootNode == null) { rootNode = node; } if (currentNode != null) { currentNode.addChild(node); } currentNode = node; // System.out.println("current node " + node.label()); continue; } m = CONJ_PAT.matcher(line); if (m.find()) { // System.out.println("matched conj " + line); treeLevel++; Tree node = tf.newTreeNode(getSynLabel(m.group(4)), new ArrayList<Tree>()); if (rootNode == null) { rootNode = node; } if (currentNode != null) { currentNode.addChild(node); } currentNode = node; // System.out.println("current node " + node.label()); continue; } throw new RuntimeException("no match for line " + line); } if (doingWords) { Matcher m = WORD_PAT.matcher(line); if (m.find()) { Tree leaf = leaves.get(Integer.parseInt(m.group(2))); if (leaf == null) { throw new RuntimeException("Missing leaf " + m.group(2)); } leaf.setLabel(new StringLabel(m.group(3))); leaves.remove(Integer.parseInt(m.group(2))); } else { if (line.isEmpty()) { doingWords = false; if (!leaves.isEmpty()) { throw new RuntimeException("unmatched leaves " + leaves); } continue; } else { throw new RuntimeException("strange word line " + line); } } continue; } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); System.exit(0); } // prune to (optionally) remove punctuation nodes etc, then flatten to remove their dedicated parents if (rootNode != null) { // System.out.println(); // System.out.println("raw tree " + rootNode.pennString()); // System.out.println("pru tree " + rootNode.prune(nodeFilter).pennString()); // System.out.println("fla tree " + rootNode.prune(nodeFilter).flatten().pennString()); // rootNode = rootNode.prune(nodeFilter).flatten(); } return rootNode; }
From source file:reck.parser.lexparser.RECKLexicalizedParser.java
License:Open Source License
public RECKCTTreeNodeImpl convertToRECKTree(Tree root, int startSentence, String content) { RECKCTTreeNodeImpl newRoot = null;//from w w w . ja v a2s . c o m Charseq pos = null; List nodeList = root.getLeaves(); HashSet parentSet = new HashSet(); int docIndex = startSentence; String st = null; // compute leaves' positions for (int i = 0; i < nodeList.size(); i++) { Tree oldNode = (Tree) nodeList.get(i); st = oldNode.toString(); int start = content.indexOf(st, docIndex); if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) { if (st.indexOf("&") != -1) { String tmp = st.replaceAll("&", "&"); start = content.indexOf(tmp, docIndex); if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) { tmp = st.replaceAll("&", "&"); start = content.indexOf(tmp, docIndex); } } if (start != -1 && start - docIndex <= maxDistanceBetweenLeaves) { docIndex = start + st.length() + 4; } else { st = reConvert(st); start = content.indexOf(st, docIndex); if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) { if (st.equals("-LRB-") || st.equals("-LCB-")) { int i1 = content.indexOf("(", docIndex); int i2 = content.indexOf("[", docIndex); int i3 = content.indexOf("{", docIndex); if (i1 == -1) i1 = content.length(); if (i2 == -1) i2 = content.length(); if (i3 == -1) i3 = content.length(); if ((i1 == i2) && (i1 == i3)) System.out.println("Come here !"); else if (i1 < i2) { if (i3 < i1) { // st = "{"; start = i3; } else { // st = "("; start = i1; } } else { if (i3 < i2) { // st = "{"; start = i3; } else { // st = "["; start = i2; } } docIndex = start + 1; } else if (st.equals("-RRB-") || st.equals("-RCB-")) { int i1 = content.indexOf(")", docIndex); int i2 = content.indexOf("]", docIndex); int i3 = content.indexOf("}", docIndex); if (i1 == -1) i1 = content.length(); if (i2 == -1) i2 = content.length(); if (i3 == -1) i3 = content.length(); if ((i1 == i2) && (i1 == i3)) System.out.println("Come here !"); else if (i1 < i2) { if (i3 < i1) { // st = "}"; start = i3; } else { // st = ")"; start = i1; } } else { if (i3 < i2) { // st = "}"; start = i3; } else { // st = "]"; start = i2; } } docIndex = start + 1; } else { for (int k = 0; k < newStrings.length; k++) { st = st.replace(newStrings[k], oldStrings[k]); } String oldSubSt1 = new String(new char[] { (char) 39, (char) 39 }); String oldSubSt2 = new String(new char[] { (char) 96, (char) 96 }); String newSubSt = new String(new char[] { (char) 34 }); if (st.indexOf(oldSubSt1) != -1 && content.substring(docIndex).indexOf(newSubSt) != -1) st = st.replace(oldSubSt1, newSubSt); else if (st.indexOf(oldSubSt2) != -1 && content.substring(docIndex).indexOf(newSubSt) != -1) st = st.replace(oldSubSt2, newSubSt); int i39 = content.indexOf(39, docIndex); int i96 = content.indexOf(96, docIndex); if ((st.indexOf(39) != -1) && (i96 != -1 && i96 - docIndex <= maxDistanceBetweenLeaves)) st = st.replace((char) 39, (char) 96); else if ((st.indexOf(96) != -1) && (i39 != -1 && i39 - docIndex <= maxDistanceBetweenLeaves)) st = st.replace((char) 96, (char) 39); start = content.indexOf(st, docIndex); if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) System.out.println("Come here !"); else docIndex = start + st.length(); } } else docIndex = start + st.length(); } } else docIndex = start + st.length(); // Test if next node is a sentence splitter, means "." if (st.endsWith(".") && i < nodeList.size() - 1) { Tree nextNode = (Tree) nodeList.get(i + 1); String nextLabel = nextNode.label().value(); int nextStart = content.indexOf(nextLabel, docIndex); if (nextLabel.equals(".") && (nextStart == -1 || nextStart - docIndex > maxDistanceBetweenLeaves)) { docIndex--; oldNode.setLabel(new StringLabel(st.substring(0, st.length() - 1))); } } pos = new Charseq(start, docIndex); RECKCTTreeNodeImpl newNode = new RECKCTTreeNodeImpl(new StringLabel(st), (List) oldNode.getChildrenAsList(), pos); Tree parent = oldNode.parent(root); parent.setChild(parent.indexOf(oldNode), newNode); parentSet.add(parent); } nodeList.clear(); nodeList.addAll(parentSet); // compute upper nodes' positions while (!nodeList.isEmpty()) { parentSet = new HashSet(); for (int i = 0; i < nodeList.size(); i++) { Tree oldNode = (Tree) nodeList.get(i); Iterator nodeIter = oldNode.getChildrenAsList().iterator(); Tree node = (Tree) nodeIter.next(); while (node instanceof RECKCTTreeNodeImpl && nodeIter.hasNext()) { node = (Tree) nodeIter.next(); } if (node instanceof RECKCTTreeNodeImpl) { Long start = ((RECKCTTreeNodeImpl) oldNode.firstChild()).getPosition().getStart(); Long end = ((RECKCTTreeNodeImpl) oldNode.lastChild()).getPosition().getEnd(); pos = new Charseq(start, end); RECKCTTreeNodeImpl newNode = new RECKCTTreeNodeImpl(oldNode.label(), (List) oldNode.getChildrenAsList(), pos); Tree parent = oldNode.parent(root); parent.setChild(parent.indexOf(oldNode), newNode); parentSet.add(parent); // if oldNode is in parentSet, remove it if (parentSet.contains(oldNode)) { parentSet.remove(oldNode); } } else { parentSet.add(oldNode); } } nodeList.clear(); if (parentSet.size() == 1 && parentSet.contains(root)) { Long start = ((RECKCTTreeNodeImpl) root.firstChild()).getPosition().getStart(); Long end = ((RECKCTTreeNodeImpl) root.lastChild()).getPosition().getEnd(); pos = new Charseq(start, end); newRoot = new RECKCTTreeNodeImpl(root.label(), (List) root.getChildrenAsList(), pos); } else { nodeList.addAll(parentSet); } } return newRoot; }