Example usage for edu.stanford.nlp.trees Tree parent

List of usage examples for edu.stanford.nlp.trees Tree parent

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree parent.

Prototype

public Tree parent(Tree root) 

Source Link

Document

Return the parent of the tree node.

Usage

From source file:sg.edu.nus.comp.pdtb.parser.ArgPosComp.java

License:Open Source License

private String printFeature(Tree root, List<Node> nodes, String label) {
    StringBuilder tmp = new StringBuilder();
    StringBuilder tmp2 = new StringBuilder();
    for (Node node : nodes) {
        if (node.tree.parent(root) != null) {
            tmp.append(node.tree.parent(root).value() + " ");
            tmp2.append(node.tree.value() + " ");
        }/*from ww  w  .j a v a 2s .c  om*/
    }
    String POS = tmp.toString().trim().replace(' ', '_');
    String connStr = tmp2.toString().trim().replace(' ', '_');

    if (connStr.equalsIgnoreCase("if_then") || connStr.equalsIgnoreCase("either_or")
            || connStr.equalsIgnoreCase("neither_nor")) {
        connStr = connStr.replaceAll("_", "..");
    }
    List<Tree> leaves = root.getLeaves();

    int firstNodeNum = nodes.get(0).index;

    Tree prevNode = firstNodeNum > 0 ? leaves.get(--firstNodeNum) : null;

    StringBuilder feature = new StringBuilder();

    feature.append("conn:");
    feature.append(connStr);
    feature.append(' ');

    feature.append("conn_POS:");
    feature.append(POS);
    feature.append(' ');

    if (!connStr.contains("..")) {

        int pos = nodes.get(0).index;
        if (pos <= 2) {
            feature.append("sent_pos:");
            feature.append(pos);
            feature.append(' ');
        } else {
            pos = nodes.get(nodes.size() - 1).index;
            if (pos >= leaves.size() - 3) {
                int pos2 = pos - leaves.size();
                feature.append("sent_pos:");
                feature.append(pos2);
                feature.append(' ');
            }
        }

        if (prevNode != null) {
            while (prevNode.parent(root).value().equals("-NONE-") && firstNodeNum > 0) {
                prevNode = leaves.get(--firstNodeNum);
            }

            if (prevNode != null) {

                String prevPOS = prevNode.parent(root).value().replace(' ', '_');
                String prev = prevNode.value().replace(' ', '_');

                feature.append("prev1:");
                feature.append(prev);
                feature.append(' ');

                feature.append("prev1_POS:");
                feature.append(prevPOS);
                feature.append(' ');

                feature.append("with_prev1_full:");
                feature.append(prev);
                feature.append('_');
                feature.append(connStr);
                feature.append(' ');

                feature.append("with_prev1_POS_full:");
                feature.append(prevPOS);
                feature.append('_');
                feature.append(POS);
                feature.append(' ');
                if (firstNodeNum > 0) {
                    Tree prev2Node = leaves.get(--firstNodeNum);
                    if (prev2Node != null) {
                        while (prev2Node.parent(root).value().equals("-NONE-") && firstNodeNum > 0) {
                            prev2Node = leaves.get(--firstNodeNum);
                        }

                        if (prev2Node != null) {
                            String prev2POS = prev2Node.parent(root).value().replace(' ', '_');
                            String prev2 = prev2Node.value().replace(' ', '_');

                            feature.append("prev2:");
                            feature.append(prev2);
                            feature.append(' ');

                            feature.append("prev2_POS:");
                            feature.append(prev2POS);
                            feature.append(' ');

                            feature.append("with_prev2_full:");
                            feature.append(prev2);
                            feature.append('_');
                            feature.append(connStr);
                            feature.append(' ');

                            feature.append("with_prev2_POS_full:");
                            feature.append(prev2POS);
                            feature.append('_');
                            feature.append(POS);
                            feature.append(' ');
                        }
                    }
                }
            }
        }

    }

    feature.append(label.replace(' ', '_'));

    return feature.toString().replaceAll("/", "\\\\/");
}

From source file:sg.edu.nus.comp.pdtb.parser.ConnComp.java

License:Open Source License

public static String printFeature(Tree root, List<Node> nodes, boolean isExplicit, boolean isIntra) {
    char label = isExplicit ? '1' : '0';
    StringBuilder feature = new StringBuilder();

    StringBuilder tmp = new StringBuilder();
    StringBuilder tmp2 = new StringBuilder();
    for (Node node : nodes) {

        tmp.append(node.tree.parent(root).value() + " ");
        tmp2.append(node.tree.value() + " ");
    }//from   ww w.j  av  a 2s.c om
    String POS = tmp.toString().trim().replace(' ', '_');
    String connStr = tmp2.toString().trim().replace(' ', '_');
    if (isIntra) {
        connStr = connStr.replaceAll("_", "..");
    }
    List<Tree> leaves = root.getLeaves();

    int firstNodeNum = nodes.get(0).index;
    int lastNodeNum = nodes.get(nodes.size() - 1).index;

    Tree prevNode = firstNodeNum > 0 ? leaves.get(--firstNodeNum) : null;

    Tree nextNode = (leaves.size() > lastNodeNum + 1) ? leaves.get(++lastNodeNum) : null;

    feature.append("conn_lc:");
    feature.append(connStr.toLowerCase());
    feature.append(' ');

    feature.append("conn:");
    feature.append(connStr);
    feature.append(' ');

    feature.append("lexsyn:conn_POS:");
    feature.append(POS);
    feature.append(' ');

    if (prevNode != null) {
        while (prevNode.parent(root).value().equals("-NONE-") && firstNodeNum > 0) {
            prevNode = leaves.get(--firstNodeNum);
        }

        if (prevNode != null) {
            String prevPOS = prevNode.parent(root).value().replace(' ', '_');

            feature.append("lexsyn:with_prev_full:");
            feature.append(prevNode.value().replace(' ', '_'));
            feature.append('_');
            feature.append(connStr);
            feature.append(' ');

            feature.append("lexsyn:prev_POS:");
            feature.append(prevPOS);
            feature.append(' ');

            feature.append("lexsyn:with_prev_POS:");
            feature.append(prevPOS);
            feature.append('_');
            feature.append(POS.split("_")[0]);
            feature.append(' ');

            feature.append("lexsyn:with_prev_POS_full:");
            feature.append(prevPOS);
            feature.append('_');
            feature.append(POS);
            feature.append(' ');
        }
    }

    if (nextNode != null) {

        while (nextNode.parent(root).value().equals("-NONE-")) {
            nextNode = leaves.get(++lastNodeNum);
        }

        String nextPOS = nextNode.parent(root).value().replace(' ', '_');

        feature.append("lexsyn:with_next_full:");
        feature.append(connStr);
        feature.append('_');
        feature.append(nextNode.value().replace(' ', '_'));
        feature.append(' ');

        feature.append("lexsyn:next_POS:");
        feature.append(nextPOS);
        feature.append(' ');

        feature.append("lexsyn:with_next_POS:");
        feature.append(POS.split("_")[nodes.size() - 1]);
        feature.append('_');
        feature.append(nextPOS);
        feature.append(' ');

        feature.append("lexsyn:with_next_POS_full:");
        feature.append(POS);
        feature.append('_');
        feature.append(nextPOS);
        feature.append(' ');
    }

    // Pitler & Nenkova (ACL 09) features:
    Tree parent = getMutualParent(nodes, root);
    Tree grandparent = parent.parent(root);
    List<Tree> siblings = grandparent.getChildrenAsList();

    String selfCat = parent.value().split("-")[0].split("=")[0];
    String parentCat = grandparent.value() == null ? "NONE" : grandparent.value().split("-")[0].split("=")[0];

    Tree leftSib = null;
    Tree rightSib = null;
    String leftCat = "NONE";
    String rightCat = "NONE";

    int index = siblings.indexOf(parent);

    if (index > 0) {
        leftSib = siblings.get(index - 1);
        leftCat = leftSib.value().startsWith("-") ? leftSib.value()
                : leftSib.value().split("-")[0].split("=")[0];
        leftCat = leftCat.isEmpty() ? "-NONE-" : leftCat;
    }

    if (index < siblings.size() - 1) {
        rightSib = siblings.get(index + 1);
        rightCat = rightSib.value().startsWith("-") ? rightSib.value()
                : rightSib.value().split("-")[0].split("=")[0];
        rightCat = rightCat.isEmpty() ? "-NONE-" : rightCat;
    }

    boolean rightVP = containsNode(rightSib, "VP");
    boolean rightTrace = containsTrace(rightSib);

    List<String> syn = new ArrayList<>();
    syn.add("selfCat:" + selfCat);
    syn.add("parentCat:" + parentCat);
    syn.add("leftCat:" + leftCat);
    syn.add("rightCat:" + rightCat);

    if (rightVP) {
        syn.add("rightVP");
    }
    if (rightTrace) {
        syn.add("rightTrace");
    }

    for (String cat : syn) {
        feature.append("syn:");
        feature.append(cat);
        feature.append(' ');
    }

    for (String cat : syn) {
        feature.append("conn-syn:conn:");
        feature.append(connStr);
        feature.append('-');
        feature.append(cat);
        feature.append(' ');
    }

    for (int i = 0; i < syn.size(); ++i) {
        for (int j = i + 1; j < syn.size(); ++j) {
            feature.append("syn-syn:");
            feature.append(syn.get(i));
            feature.append('-');
            feature.append(syn.get(j));
            feature.append(' ');
        }
    }

    String[] synFeatures = getSyntacticfeatures(parent, root);

    feature.append("path-self>root:");
    feature.append(synFeatures[0]);
    feature.append(' ');

    feature.append("path-self>root2:");
    feature.append(synFeatures[1]);
    feature.append(' ');

    feature.append(label);

    return feature.toString().replaceAll("/", "\\\\/");
}

From source file:sg.edu.nus.comp.pdtb.parser.ConnComp.java

License:Open Source License

private static String[] getSyntacticfeatures(Tree node, Tree root) {
    StringBuilder selfToRoot = new StringBuilder();
    StringBuilder selfToRootNoRepeat = new StringBuilder();
    String val = node.value().split("-")[0].split("=")[0];
    selfToRoot.append(val);
    selfToRootNoRepeat.append(node.value().split("-")[0].split("=")[0]);

    Tree prev = node;//w  w w .j a v a 2s  .c  om
    node = node.parent(root);

    while (!node.equals(root)) {

        selfToRoot.append("_>_");
        selfToRoot.append(node.value().split("-")[0].split("=")[0]);
        if (!prev.value().split("-")[0].equals(node.value().split("-")[0].split("=")[0])) {
            selfToRootNoRepeat.append("_>_");
            selfToRootNoRepeat.append(node.value().split("-")[0].split("=")[0]);
        }
        prev = node;
        node = node.parent(root);

    }
    return new String[] { selfToRoot.toString(), selfToRootNoRepeat.toString() };
}

From source file:sg.edu.nus.comp.pdtb.parser.ConnComp.java

License:Open Source License

public static Tree getMutualParent(List<Node> nodes, Tree root) {
    int maxNodeNum = 0;

    for (Node node : nodes) {
        maxNodeNum = Math.max(maxNodeNum, node.tree.nodeNumber(root));
    }/*from w  ww.j a v  a 2  s.  c  o  m*/
    int nodeNum = 0;
    Tree parent = nodes.get(0).tree;

    while (nodeNum < maxNodeNum && !parent.equals(root)) {
        parent = parent.parent(root);
        List<Tree> children = parent.getLeaves();
        Tree rightMostChild = children.get(children.size() - 1);
        nodeNum = rightMostChild.nodeNumber(root);
    }

    return parent;
}

From source file:sg.edu.nus.comp.pdtb.parser.ExplicitComp.java

License:Open Source License

private String printFeature(Tree root, List<Node> nodes, String label) {

    StringBuilder feature = new StringBuilder();

    StringBuilder tmp = new StringBuilder();
    StringBuilder tmp2 = new StringBuilder();
    for (Node node : nodes) {
        if (node.tree.parent(root) != null) {
            tmp.append(node.tree.parent(root).value() + " ");
            tmp2.append(node.tree.value() + " ");
        }//  w  ww .  j a  va2 s .  c om
    }
    String POS = tmp.toString().trim().replace(' ', '_');
    String connStr = tmp2.toString().trim().replace(' ', '_');
    if (connStr.equalsIgnoreCase("if_then") || connStr.equalsIgnoreCase("either_or")
            || connStr.equalsIgnoreCase("neither..nor")) {
        connStr = connStr.replaceAll("_", "..");
    }
    List<Tree> leaves = root.getLeaves();

    int firstNodeNum = nodes.get(0).index;

    Tree prevNode = firstNodeNum > 0 ? leaves.get(--firstNodeNum) : null;

    feature.append("conn_lc:");
    feature.append(connStr.toLowerCase());
    feature.append(' ');

    feature.append("conn:");
    feature.append(connStr);
    feature.append(' ');

    feature.append("conn_POS:");
    feature.append(POS);
    feature.append(' ');

    if (prevNode != null) {
        while (prevNode.parent(root).value().equals("-NONE-") && firstNodeNum > 0) {
            prevNode = leaves.get(--firstNodeNum);
        }

        if (prevNode != null) {

            feature.append("with_prev_full:");
            feature.append(prevNode.value().replace(' ', '_').toLowerCase());
            feature.append('_');
            feature.append(connStr.toLowerCase());
            feature.append(' ');

        }
    }

    feature.append(label.replace(' ', '_'));

    return feature.toString().replaceAll("/", "\\\\/");
}

From source file:sg.edu.nus.comp.pdtb.runners.SpanTreeExtractor.java

License:Open Source License

public static void anyTextToSpanGen(File treeFile, File inputFile) throws IOException {
    log.info("Generating the spans of each node in the parse trees.");

    String orgText = Util.readFile(inputFile);
    orgText = orgText.replaceAll("`", "'").replaceAll("", "\"").replaceAll("", "'");
    ;//from   ww  w  .  j a  v a  2s  .  co m
    PrintWriter pw = new PrintWriter(treeFile + ".csv");
    TreeFactory tf = new LabeledScoredTreeFactory();
    Reader r = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), Util.ENCODING));
    TreeReader tr = new PennTreeReader(r, tf);
    int index = 0;
    Tree root = tr.readTree();
    int treeNumber = 0;
    while (root != null) {

        List<Tree> leaves = root.getLeaves();

        for (Tree leaf : leaves) {
            int nodeNumber = leaf.nodeNumber(root);
            String parentValue = leaf.parent(root).value();
            if (parentValue.equals("-NONE-")) {
                continue;
            }
            String word = nodeToString(leaf).trim();
            word = word.replaceAll("`", "'");
            word = word.replaceAll("", "'");
            word = word.replaceAll("\\.\\.\\.", ". . .");
            int span = orgText.indexOf(word, index);

            if (span == -1) {
                continue;
            }
            index = span + word.length() - 1;
            String spanString = (span + ".." + (span + word.length()));
            String key = treeFile.getName() + "," + treeNumber + "," + nodeNumber;
            word = word.trim().replaceAll("\\s+", "");
            word = word.replaceAll(",", "COMMA");
            pw.println(key + "," + spanString + "," + word);
        }
        root = tr.readTree();
        pw.flush();
        ++treeNumber;
    }
    pw.close();
    tr.close();

    log.info("Done.");
}

From source file:sg.edu.nus.comp.pdtb.runners.SpanTreeExtractor.java

License:Open Source License

/**
 * Generate the spans of each node in the auto parse trees.
 * /* w  w  w .j  ava2 s.  co  m*/
 * @param treePath
 * @param rawTextPath
 * @throws IOException
 */
@SuppressWarnings("unused")
public static void textToSpanGenAuto(String treePath, String rawTextPath) throws IOException {
    log.info("Generating the spans of each node in the auto parse trees.");
    String folder = "23/";
    File[] files = new File(treePath + folder).listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.startsWith("wsj_") && name.endsWith(".mrg");
        }
    });

    for (File file : files) {

        String fileName = file.getName().replaceAll("\\.mrg", "");
        String orgText = Util.readFile(rawTextPath + folder + fileName);
        orgText = orgText.replaceAll("`", "'");

        PrintWriter pw = new PrintWriter(treePath + folder + fileName + ".csv");

        TreeFactory tf = new LabeledScoredTreeFactory();
        Reader r = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        TreeReader tr = new PennTreeReader(r, tf);

        int index = 9;

        Tree root = tr.readTree();
        int treeNumber = 0;

        while (root != null) {
            StringBuilder tmp = new StringBuilder();
            List<Tree> leaves = root.getLeaves();
            for (Tree leaf : leaves) {
                int nodeNumber = leaf.nodeNumber(root);
                String parentValue = leaf.parent(root).value();
                if (parentValue.equals("-NONE-")) {
                    continue;
                }

                String word = Corpus.nodeToString(leaf).trim();

                if (word.equals(".")) {
                    continue;
                }

                word = word.replaceAll("`", "'");

                word = word.replaceAll("^\\p{Punct}*", "");
                word = word.replaceAll("\\p{Punct}*$", "");

                if (fileName.equals("wsj_2300") && index == 1457 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2330") && index == 6344 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2351") && index == 1040 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2360") && index == 2066 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2369") && index == 6434 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2381") && index == 2399 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2386") && index == 3522 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2386") && index == 3647 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2387") && index == 1466 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2387") && index == 5389 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2397") && index == 1032 && word.equals("n't")) {
                    word = "'t";
                }

                if (fileName.equals("wsj_2306") && index == 5692 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2308") && index == 2373 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2315") && index == 1056 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2321") && index == 1279 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2330") && index == 1563 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2345") && index == 1838 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2350") && index == 699 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2351") && index == 778 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2351") && index == 2391 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2363") && index == 2868 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2367") && index == 1379 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2376") && index == 6687 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2377") && index == 2464 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2379") && index == 4711 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2379") && index == 5174 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2381") && index == 565 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2387") && index == 5430 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2387") && index == 5779 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2394") && index == 179 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2397") && index == 5243 && word.equals("will")) {
                    word = "wo";
                }

                int span = orgText.indexOf(word, index);
                while (span == -1) {
                    span = orgText.indexOf(word, index);

                }

                if (span - index > 1) {
                    String difference = orgText.substring(index, span).trim();
                    boolean isError = true;
                    isError &= !difference.matches("(\\p{Punct}+\\s*)+") && difference.length() > 0;
                    isError &= !difference.equals("");

                }
                index = span + word.length();
                String spanString = (span + ".." + (span + word.length()));
                String key = fileName + "," + treeNumber + "," + nodeNumber;
                word = word.trim().replaceAll("\\s+", "");
                word = word.replaceAll(",", "COMMA");
                tmp.append(key + "," + spanString + "," + word);
                tmp.append('\n');
            }
            root = tr.readTree();
            pw.print(tmp);
            ++treeNumber;
        }
        pw.close();
        tr.close();
    }

    log.info("Done.");
}

From source file:sg.edu.nus.comp.pdtb.runners.SpanTreeExtractor.java

License:Open Source License

/**
 * Generate the spans of each node in the parse trees.
 * //from w w  w  . j a v a  2  s  . c  o m
 * @param treePath
 * @param rawTextPath
 * @throws IOException
 */
@SuppressWarnings("unused")
public static void textToSpanGen(String treePath, String rawTextPath) throws IOException {
    log.info("Generating the spans of each node in the parse trees.");
    String[] topFolders = new File(treePath).list(new FilenameFilter() {
        @Override
        public boolean accept(File dir, String name) {
            return new File(dir, name).isDirectory();
        }
    });

    for (String topFolder : topFolders) {
        String folder = topFolder + "/";
        File tmp = new File(treePath + folder);
        if (tmp.isDirectory() && tmp.exists()) {
            File[] files = tmp.listFiles(new FilenameFilter() {
                @Override
                public boolean accept(File dir, String name) {
                    return name.endsWith("mrg");
                }
            });
            for (File file : files) {
                log.info("Processing tree: " + file.getName());
                String fileName = file.getName().replaceAll("\\.mrg", "");

                String orgText = Util.readFile(rawTextPath + folder + fileName);
                orgText = orgText.replaceAll("`", "'");

                PrintWriter pw = new PrintWriter(treePath + folder + fileName + ".csv");

                TreeFactory tf = new LabeledScoredTreeFactory();
                Reader r = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
                TreeReader tr = new PennTreeReader(r, tf);

                int index = 9;

                if (fileName.equals("wsj_0285")) {
                    index = 200;
                }
                if (fileName.equals("wsj_0901")) {
                    index = 14;
                }

                Tree root = tr.readTree();
                int treeNumber = 0;

                while (root != null) {

                    List<Tree> leaves = root.getLeaves();

                    for (Tree leaf : leaves) {
                        int nodeNumber = leaf.nodeNumber(root);
                        String parentValue = leaf.parent(root).value();
                        if (parentValue.equals("-NONE-")) {
                            continue;
                        }

                        String word = Corpus.nodeToString(leaf).trim();

                        if (fileName.equals("wsj_0998") && index == 4644) {
                            continue;
                        }

                        if (word.equals(".") && !fileName.startsWith("wsj_23")) {
                            continue;
                        }

                        // skipping dots after U.S. present in the parse
                        // trees but not present in the original
                        // text
                        if (word.equals(".")) {
                            if (fileName.equals("wsj_2303") && index == 1526) {
                                continue;
                            }

                            if (fileName.equals("wsj_2314") && (index == 7625 || index == 7929)) {
                                continue;
                            }

                            if (fileName.equals("wsj_2320") && (index == 474 || index == 3180)) {
                                continue;
                            }

                            if (fileName.equals("wsj_2321") && index == 268) {
                                continue;
                            }

                            if (fileName.equals("wsj_2324") && index == 490) {
                                continue;
                            }

                            if (fileName.equals("wsj_2361") && index == 6563) {
                                continue;
                            }

                            if (fileName.equals("wsj_2397")
                                    && (index == 2845 || index == 3273 || index == 3515)) {
                                continue;
                            }

                            if (fileName.equals("wsj_2398") && index == 2793) {
                                continue;
                            }
                        }

                        word = word.replaceAll("`", "'");
                        word = word.replaceAll("\\.\\.\\.", ". . .");

                        if (fileName.equals("wsj_0004") && word.equals("IBC")) {
                            word = "IBC/Donoghue";
                        }
                        if (fileName.equals("wsj_0032") && word.equals("S.p.A.")) {
                            word = "S.p.\nA.";
                        }
                        if (fileName.equals("wsj_0986") && index == 1804) {
                            word = "5/ 16";
                        }
                        if (fileName.equals("wsj_1737") && index == 689 && word.equals("U.S.")) {
                            word = "U. S.";
                        }
                        if (fileName.equals("wsj_1974") && index == 1802 && word.equals("5/16")) {
                            word = "5/ 16";
                        }

                        int span = orgText.indexOf(word, index);
                        if (fileName.equals("wsj_0110") && word.equals("7/16")) {
                            word = "7/ 16";
                        }
                        if (fileName.equals("wsj_0111") && word.equals("Rey/Fawcett")) {
                            word = "Rey/ Fawcett";
                        }
                        if (fileName.equals("wsj_0162") && word.equals("International")) {
                            word = "In< ternational";
                        }
                        if (fileName.equals("wsj_0359") && word.equals("Stovall/Twenty-First")) {
                            word = "Stovall/ Twenty-First";
                        }
                        if (fileName.equals("wsj_0400") && word.equals("16/32")) {
                            word = "16/ 32";
                        }
                        if (fileName.equals("wsj_0463") && word.equals("G.m.b.H.")) {
                            word = "G.m.b.\nH.";
                        }
                        if (fileName.matches("wsj_(0660|1368|1371)")
                                && word.matches("S\\.p\\.A\\.?(-controlled)?")) {
                            word = word.replaceAll("S\\.p\\.A", "S.p.\nA");
                        }
                        if (fileName.equals("wsj_0911") && word.equals("mystery/comedy")) {
                            word = "mystery/ comedy";
                        }
                        if (fileName.matches("wsj_(0917|1329)") && word.equals("G.m.b.H.")) {
                            word = "G.m.b.\nH.";
                        }
                        if (fileName.equals("wsj_0998") && word.equals("Co.")) {
                            word = "Co,.";
                        }
                        if (fileName.equals("wsj_1237") && word.equals("Bard/EMS")) {
                            word = "Bard/ EMS";
                        }
                        if (fileName.equals("wsj_1457")) {
                            if (word.equals("fancy'shvartzer")) {
                                word = "fancy 'shvartzer";
                            } else if (word.equals("the'breakup")) {
                                word = "the 'breakup";
                            }
                        }
                        if (fileName.equals("wsj_1503") && word.equals("Gaming")) {
                            word = "gaming";
                        }
                        if (fileName.equals("wsj_1568") && word.equals(". . .")) {
                            word = "...";
                        }
                        if (fileName.equals("wsj_1583") && word.equals("'T-")) {
                            word = "'T";
                        }
                        if (fileName.equals("wsj_1625") && word.equals("staff")) {
                            word = "staf";
                        }
                        if (fileName.equals("wsj_1773") && word.equals("H.F.")) {
                            word = "H. F.";
                        }

                        span = orgText.indexOf(word, index);

                        if (fileName.equals("wsj_2170") && index == 7227 && word.equals("'s")) {
                            span = 7227;
                            word = "";
                        }

                        if (span == -1) {
                            continue;
                        }

                        if (span - index > 1) {
                            String difference = orgText.substring(index, span).trim();
                            boolean isError = true;
                            isError &= !(fileName.equals("wsj_0118") && difference.equals(".START"));
                            isError &= !(fileName.matches("wsj_(0166|1156|2346)")
                                    && difference.equals(". \n\n.START"));
                            isError &= !(fileName.equals("wsj_0203") && index == 2835 && span == 2955);
                            isError &= !difference.matches("\\p{Punct}") && difference.length() > 0;
                            isError &= !difference.equals("") && !difference.equals("><")
                                    && !difference.equals(". \n\n>");
                            isError &= !(fileName.equals("wsj_1625") && difference.equals("f"));
                            isError &= !(fileName.equals("wsj_1839") && difference.equals(". ."));
                            isError &= !(fileName.equals("wsj_2170") && difference.equals("'s"));
                            isError &= !(fileName.equals("wsj_2346") && difference.equals(".START"));

                        }
                        index = span + word.length();

                        String spanString = (span + ".." + (span + word.length()));
                        String key = fileName + "," + treeNumber + "," + nodeNumber;
                        word = word.trim().replaceAll("\\s+", "");
                        word = word.replaceAll(",", "COMMA");
                        pw.println(key + "," + spanString + "," + word);
                    }
                    root = tr.readTree();
                    pw.flush();
                    ++treeNumber;
                }
                pw.close();
                tr.close();
            }
        }
    }
    log.info("Done.");
}

From source file:wtute.engine.TreeHelper.java

public static Tree nextSibling(Tree child, Tree root) {
    Tree parent = child.parent(root);
    List<Tree> preList = parent.getChildrenAsList();
    int indexOfChild = preList.indexOf(child);

    try {//from   www  .  ja v a2  s .  com
        return parent.getChild(indexOfChild + 1);
    } catch (Exception e) {
        return null;
    }
}

From source file:wtute.engine.TreeHelper.java

public static Tree prevSibling(Tree child, Tree root) {
    Tree parent = child.parent(root);
    List<Tree> postList = parent.getChildrenAsList();
    int indexOfChild = postList.indexOf(child);
    ListIterator liter = postList.listIterator();

    try {/* w  ww .j a v  a2  s .  c  o m*/
        return parent.getChild(indexOfChild - 1);
    } catch (Exception e) {
        return null;
    }
}