List of usage examples for edu.stanford.nlp.trees Tree equals
@Override public boolean equals(Object o)
From source file:sg.edu.nus.comp.pdtb.parser.ArgExtComp.java
License:Open Source License
public List<String[]> generateFeatures(Type corpus, File article, FeatureType featureType) throws IOException { List<String[]> features = new ArrayList<>(); ArrayList<String> spanArray = null; Map<String, String> spanHashMap = null; List<String> explicitSpans = null; if (corpus.equals(Type.PDTB)) { spanArray = Corpus.getSpanMapAsList(article, featureType); spanHashMap = Corpus.getSpanMap(article, featureType); trees = Corpus.getTrees(article, featureType); explicitSpans = Corpus.getExplicitSpans(article, featureType); if (featureType == FeatureType.ErrorPropagation || featureType == FeatureType.Auto) { explicitSpans = Corpus.filterErrorProp(explicitSpans, article, featureType); }//w ww .j a v a 2 s . c om if (featureType == FeatureType.Auto) { sentMap = Corpus.getSentMap(article); } } else { spanArray = Corpus.getBioSpanMapAsList(article, featureType); spanHashMap = Corpus.getBioSpanMap(article, featureType); trees = Corpus.getBioTrees(article, featureType); explicitSpans = Corpus.getBioExplicitSpans(article, featureType); if (featureType == FeatureType.ErrorPropagation || featureType == FeatureType.Auto) { explicitSpans = Corpus.filterBioErrorProp(explicitSpans, article, featureType); } } int index = 0; int contIndex = 0; for (String rel : explicitSpans) { String[] cols = rel.split("\\|", -1); String argPos = null; if (corpus == Type.PDTB) { if (featureType == FeatureType.Training) { argPos = Corpus.getLabel(cols[23], cols[33]); } else { argPos = labels.get(majorIndex); } } else { argPos = Corpus.getBioLabel(cols[14], cols[20], spanArray); } if (argPos.equals("FS")) { continue; } if (featureType != FeatureType.Training) { argPos = labels.get(majorIndex); } ++majorIndex; if (argPos.equals("SS")) { Set<Integer> done = new HashSet<>(); index = contIndex; List<Node> nodes = new ArrayList<>(); Tree root = null; String[] spans = corpus.equals(Type.PDTB) ? cols[3].split(";") : cols[1].split(";"); for (String spanTmp : spans) { String[] span = spanTmp.split("\\.\\."); for (; index < spanArray.size(); ++index) { // wsj_1371,0,6,9..21,Shareholders String line = spanArray.get(index); String[] spanCols = line.split(","); String[] canSpan = spanCols[3].split("\\.\\."); // Start matches // if (span[0].equals(canSpan[0]) || nodes.size() > 0) { boolean flag = span[0].equals(canSpan[0]) || (nodes.size() > 0 && spans.length == 1 && Integer.parseInt(canSpan[1]) <= Integer.parseInt(span[1])); if (corpus.equals(Type.BIO_DRB)) { int start = Integer.parseInt(canSpan[0]); int end = Integer.parseInt(canSpan[1]); int outStart = Integer.parseInt(span[0]); int outEnd = Integer.parseInt(span[1]); flag = outStart <= start && end <= outEnd; } if (flag) { if (nodes.size() == 0) { contIndex = index; } root = trees.get(Integer.parseInt(spanCols[1])); List<Tree> leaves = root.getLeaves(); int start = Integer.parseInt(spanCols[2]); Tree node = root.getNodeNumber(start); int nodeNum = 0; for (; nodeNum < leaves.size(); ++nodeNum) { Tree potNode = leaves.get(nodeNum); if (node.equals(potNode)) { int tmp = potNode.nodeNumber(root); String tmpSpan = spanHashMap.get(spanCols[1] + ":" + tmp); if (tmpSpan.equals(spanCols[3])) { break; } } } if (!done.contains(nodeNum)) { done.add(nodeNum); nodes.add(new Node(node, nodeNum)); } if (span[1].equals(canSpan[1])) { ++index; break; } } } } if (!nodes.isEmpty()) { String connStr = null; String connCat = null; if (corpus.equals(Type.PDTB)) { connStr = cols[8].trim().replace(' ', '_'); String c = cols[5].substring(0, 1); if (c.toLowerCase().equals(connStr.substring(0, 1))) { connStr = c + connStr.substring(1); } connCat = ConnComp.findCategory(cols[8]); } else { StringBuilder tmp = new StringBuilder(); for (Node node : nodes) { tmp.append(node.tree + " "); } connStr = tmp.toString(); connStr = connStr.trim().toLowerCase().replace(' ', '_'); connCat = ConnComp.findCategory(connStr.replace('_', ' '), corpus); } Tree connNode = nodes.get(nodes.size() - 1).tree.parent(root); Tree[] argNodes = corpus.equals(Type.PDTB) ? getArgNodes(root, cols, spanArray, connCat, connNode) : getBioArgNodes(root, cols, spanArray, connCat, connNode); List<Tree> internal = getInternalNodes(root, root); String treeNum; String line; int total = (doneSoFar + features.size()); if (corpus.equals(Type.PDTB)) { treeNum = featureType == FeatureType.AnyText ? cols[7] : getNodeNum(cols[23], featureType); line = article.getName() + ":" + total + "-" + (total + internal.size()) + ":Arg1(" + cols[22] + "):Arg2(" + cols[32] + "):" + cols[3]; } else { treeNum = Corpus.spanToSenIds(cols[14], spanArray).getFirst().toString(); line = article.getName() + ":" + total + "-" + (total + internal.size()) + ":Arg1(" + cols[14] + "):Arg2(" + cols[20] + "):" + cols[1]; } for (Tree node : internal) { String label = ""; if (node.equals(argNodes[0])) { label = "arg1_node"; } else if (node.equals(argNodes[1])) { label = "arg2_node"; } else { label = "none"; } if (featureType != FeatureType.Training) { label = treeNum + ":" + node.nodeNumber(root); } String feature = printFeature(root, node, connStr, connCat, connNode, label); features.add(new String[] { feature, rel, line }); } } } } return features; }
From source file:sg.edu.nus.comp.pdtb.parser.ArgExtComp.java
License:Open Source License
/** * <pre>/*from w ww. j a v a2 s . co m*/ * # 0: node1 and node2 in the same path to root * # 1: node2 is at the rhs of node1's path to root * # -1: node2 is at the lhs of node1's path to root * </pre> * * @param root * @param connNode * @param node * @return */ private static int relativePosition(Tree root, Tree connNode, Tree node) { Tree curr = connNode; while (curr != null && !curr.equals(root)) { if (curr.equals(node)) { return 0; } Tree parent = curr.parent(root); Tree[] children = parent.children(); for (int i = 0; i < children.length; ++i) { if (children[i].contains(node)) { int nodeNum = node.nodeNumber(root); int connNum = connNode.nodeNumber(root); if (nodeNum < connNum) { return -1; } else { return 1; } } } curr = parent; } return 0; }
From source file:sg.edu.nus.comp.pdtb.parser.ArgExtComp.java
License:Open Source License
private static List<String> findUpwardPath(Tree root, Tree connNode, Tree lca) { List<String> path = new ArrayList<>(); if (connNode == null || lca == null) {// || connNode.equals(lca)) { return path; }// www. ja va 2 s . c o m Tree curr = connNode; while (curr != null && !curr.equals(lca)) { String val = curr.value(); if (val != null) { int t = val.indexOf("-"); if (t > 0) { val = val.substring(0, t); } path.add(val); } curr = curr.parent(root); } if (curr != null && curr.equals(lca)) { String val = curr.value(); if (val != null) { int t = val.indexOf("-"); if (t > 0) { val = val.substring(0, t); } path.add(val); } } if (curr == null && path.isEmpty()) { return new ArrayList<>(); } else { return path; } }
From source file:sg.edu.nus.comp.pdtb.parser.ArgPosComp.java
License:Open Source License
public List<String[]> generateFeatures(Corpus.Type corpus, File article, FeatureType featureType) throws IOException { List<String[]> features = new ArrayList<>(); ArrayList<String> spanMap = null; Map<String, String> spanHashMap = null; List<Tree> trees = null; List<String> explicitSpans = null; if (corpus.equals(Type.PDTB)) { trees = Corpus.getTrees(article, featureType); spanHashMap = Corpus.getSpanMap(article, featureType); spanMap = Corpus.getSpanMapAsList(article, featureType); explicitSpans = Corpus.getExplicitSpans(article, featureType); if (featureType == FeatureType.ErrorPropagation || featureType == FeatureType.Auto) { explicitSpans = Corpus.filterErrorProp(explicitSpans, article, featureType); }//from ww w .j a va2s . co m } else if (corpus.equals(Type.BIO_DRB)) { trees = Corpus.getBioTrees(article, featureType); spanHashMap = Corpus.getBioSpanMap(article, featureType); spanMap = Corpus.getBioSpanMapAsList(article, featureType); explicitSpans = Corpus.getBioExplicitSpans(article, featureType); if (featureType == FeatureType.ErrorPropagation || featureType == FeatureType.Auto) { explicitSpans = Corpus.filterBioErrorProp(explicitSpans, article, featureType); } } else { log.error("Unimplemented corpus type: " + corpus); } int index = 0; int contIndex = 0; for (String rel : explicitSpans) { String[] cols = rel.split("\\|", -1); Set<Integer> done = new HashSet<>(); String label = null; if (featureType == FeatureType.AnyText) { label = "NA"; } else { if (corpus == Type.PDTB) { label = Corpus.getLabel(cols[23], cols[33]); } else { label = Corpus.getBioLabel(cols[14], cols[20], spanMap); } } label = label.endsWith("PS") ? "PS" : label; if (label.equals("FS")) { continue; } index = contIndex; List<Node> nodes = new ArrayList<>(); Tree root = null; String[] spans = corpus.equals(Type.PDTB) ? cols[3].split(";") : cols[1].split(";"); for (String spanTmp : spans) { String[] span = spanTmp.split("\\.\\."); for (; index < spanMap.size(); ++index) { // wsj_1371,0,6,9..21,Shareholders String line = spanMap.get(index); String[] spanCols = line.split(","); String[] canSpan = spanCols[3].split("\\.\\."); // Start matches if (span[0].equals(canSpan[0]) || (nodes.size() > 0 && spans.length == 1 && Integer.parseInt(canSpan[1]) <= Integer.parseInt(span[1]))) { if (nodes.size() == 0) { contIndex = index; } root = trees.get(Integer.parseInt(spanCols[1])); List<Tree> leaves = root.getLeaves(); int start = Integer.parseInt(spanCols[2]); Tree node = root.getNodeNumber(start); int nodeNum = 0; for (; nodeNum < leaves.size(); ++nodeNum) { Tree potNode = leaves.get(nodeNum); if (node.equals(potNode)) { int tmp = potNode.nodeNumber(root); String tmpSpan = spanHashMap.get(spanCols[1] + ":" + tmp); if (tmpSpan.equals(spanCols[3])) { break; } } } if (corpus == Type.PDTB) { if (!done.contains(nodeNum) && cols[5].contains(node.value().trim())) { done.add(nodeNum); nodes.add(new Node(node, nodeNum)); } } else { if (!done.contains(nodeNum)) { done.add(nodeNum); nodes.add(new Node(node, nodeNum)); } } if (span[1].equals(canSpan[1])) { ++index; break; } } } } if (!nodes.isEmpty()) { String feature = printFeature(root, nodes, label); features.add(new String[] { feature, rel }); } } return features; }
From source file:sg.edu.nus.comp.pdtb.parser.ConnComp.java
License:Open Source License
private static String[] getSyntacticfeatures(Tree node, Tree root) { StringBuilder selfToRoot = new StringBuilder(); StringBuilder selfToRootNoRepeat = new StringBuilder(); String val = node.value().split("-")[0].split("=")[0]; selfToRoot.append(val); selfToRootNoRepeat.append(node.value().split("-")[0].split("=")[0]); Tree prev = node;/*from w w w . j a v a 2s .c o m*/ node = node.parent(root); while (!node.equals(root)) { selfToRoot.append("_>_"); selfToRoot.append(node.value().split("-")[0].split("=")[0]); if (!prev.value().split("-")[0].equals(node.value().split("-")[0].split("=")[0])) { selfToRootNoRepeat.append("_>_"); selfToRootNoRepeat.append(node.value().split("-")[0].split("=")[0]); } prev = node; node = node.parent(root); } return new String[] { selfToRoot.toString(), selfToRootNoRepeat.toString() }; }
From source file:sg.edu.nus.comp.pdtb.parser.ConnComp.java
License:Open Source License
public static Tree getMutualParent(List<Node> nodes, Tree root) { int maxNodeNum = 0; for (Node node : nodes) { maxNodeNum = Math.max(maxNodeNum, node.tree.nodeNumber(root)); }//from w w w.ja va 2 s . c o m int nodeNum = 0; Tree parent = nodes.get(0).tree; while (nodeNum < maxNodeNum && !parent.equals(root)) { parent = parent.parent(root); List<Tree> children = parent.getLeaves(); Tree rightMostChild = children.get(children.size() - 1); nodeNum = rightMostChild.nodeNumber(root); } return parent; }
From source file:sg.edu.nus.comp.pdtb.parser.ExplicitComp.java
License:Open Source License
public List<String[]> generateFeatures(Type corpus, File article, FeatureType featureType) throws IOException { List<String[]> features = new ArrayList<>(); ArrayList<String> spanMap = null; Map<String, String> spanHashMap = null; List<String> explicitSpans = null; List<Tree> trees;//from w w w . j a v a2 s .co m if (corpus.equals(Type.PDTB)) { spanMap = Corpus.getSpanMapAsList(article, featureType); spanHashMap = Corpus.getSpanMap(article, featureType); trees = Corpus.getTrees(article, featureType); explicitSpans = Corpus.getExplicitSpans(article, featureType); if (featureType == FeatureType.ErrorPropagation || featureType == FeatureType.Auto) { explicitSpans = Corpus.filterErrorProp(explicitSpans, article, featureType); } } else { spanMap = Corpus.getBioSpanMapAsList(article, featureType); spanHashMap = Corpus.getBioSpanMap(article, featureType); trees = Corpus.getBioTrees(article, featureType); explicitSpans = Corpus.getBioExplicitSpans(article, featureType); if (featureType == FeatureType.ErrorPropagation || featureType == FeatureType.Auto) { explicitSpans = Corpus.filterBioErrorProp(explicitSpans, article, featureType); } } int index = 0; int contIndex = 0; for (String rel : explicitSpans) { String[] cols = rel.split("\\|", -1); index = contIndex; List<Node> nodes = new ArrayList<>(); Tree root = null; String[] spans = cols[corpus.equals(Type.PDTB) ? 3 : 1].split(";"); for (String spanTmp : spans) { String[] span = spanTmp.split("\\.\\."); for (; index < spanMap.size(); ++index) { // wsj_1371,0,6,9..21,Shareholders String line = spanMap.get(index); String[] spanCols = line.split(","); String[] canSpan = spanCols[3].split("\\.\\."); // Start matches if (span[0].equals(canSpan[0]) || nodes.size() > 0) { if (nodes.size() == 0) { contIndex = index; } root = trees.get(Integer.parseInt(spanCols[1])); List<Tree> leaves = root.getLeaves(); int start = Integer.parseInt(spanCols[2]); Tree node = root.getNodeNumber(start); int nodeNum = 0; for (; nodeNum < leaves.size(); ++nodeNum) { Tree potNode = leaves.get(nodeNum); if (node.equals(potNode)) { int tmp = potNode.nodeNumber(root); String tmpSpan = spanHashMap.get(spanCols[1] + ":" + tmp); if (tmpSpan.equals(spanCols[3])) { break; } } } nodes.add(new Node(node, nodeNum)); if (span[1].equals(canSpan[1])) { break; } } } } if (!nodes.isEmpty()) { Set<String> semantics = Util.getUniqueSense(new String[] { cols[corpus.equals(Type.PDTB) ? 11 : 8], cols[corpus.equals(Type.PDTB) ? 12 : 9] }); String sem = ""; if (featureType == FeatureType.Training) { for (String sm : semantics) { sm = sm.replace(' ', '_'); String feature = printFeature(root, nodes, sm); features.add(new String[] { feature }); } } else { if (featureType == FeatureType.AnyText) { semantics.add("xxx"); } for (String e : semantics) { sem += e.replace(' ', '_'); sem += ""; } String feature = printFeature(root, nodes, sem); features.add(new String[] { feature, rel }); } } } return features; }