Example usage for edu.stanford.nlp.trees Tree indentedListPrint

List of usage examples for edu.stanford.nlp.trees Tree indentedListPrint

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree indentedListPrint.

Prototype

public void indentedListPrint() 

Source Link

Document

Indented list printing of a tree.

Usage

From source file:qmul.util.parse.CreateTreeFromClarkCurranCcgGrs.java

License:Open Source License

public static void main(String[] args) {
    // String test =
    // "<c> Right|right|RB|I-ADVP|I-TIM|S/S ,|,|,|O|O|, hello|hello|UH|I-INTJ|O|S/S ,|,|,|O|O|, yeah|yeah|UH|I-INTJ|O|S/S ,|,|,|O|O|, we|we|PRP|I-NP|O|NP 're|'re|VBP|I-VP|O|(S[dcl]\\NP)/(S[adj]\\NP) back|back|RB|I-ADVP|O|S[adj]\\NP .|.|.|O|O|.";
    File test = new File(
            "c:/Documents and Settings/mpurver/My Documents/dyndial/align/parsedno_idnewclean_ks_ksw");
    Tree t = makeTree(test);
    t.indentedListPrint();
    List<Tree> l = makeTrees(test);
    System.out.println("found " + l.size() + " trees");
}

From source file:qmul.util.parse.CreateTreeFromDCPSE.java

License:Open Source License

/**
 * @param reader/* ww w .  ja  v a  2  s .c o  m*/
 *            a {@link Reader}
 * @return the Stanford {@link Tree}
 */
public static Tree makeTree(Reader reader) {
    if (options == null) {
        setDefaultOptions();
    }
    List<Tree> children = new ArrayList<Tree>();
    Tree t0 = null;
    Tree tPrev = null;
    Tree tAll = null;
    Tree tTemp = null;
    int n = 0;
    int countspace = 0;
    int countspaceprevious = 0;
    int countspacepreviousprevious = 0;
    char c1 = 'x';
    int childWhere = Integer.MAX_VALUE;
    String gads = "";
    String otherStuff = "";
    String[] gadsWord = null;
    boolean isAword = false;// do not change
    boolean processLine = true;// do not change

    try {
        while ((n = reader.read()) != -1) {
            char c = (char) n;
            if (c == '[' && gads.matches("")) {
                processLine = false;
                // System.out.println(otherStuff);
                otherStuff = "";
            }

            if (processLine) {
                if (c == '\n') {
                    if (gads.matches("^\\s+$")) {
                        // we've hit a line containing only whitespace: end of the tree
                        break;
                    }
                    if (options.get(INCLUDE_NO_PAUSE)) {
                        if (gads.contains("PAUSE")) {
                            gads = IGNORE_MARKER;
                        }
                    }
                    // remove "ignored" nodes; unless we need to keep them to work out features, in which case we'll
                    // remove them later in DCPSECorpus
                    if (options.get(INCLUDE_NO_IGNORE) && !options.get(PP_LEXICAL_FEATURES)) {
                        if (gads.contains("ignore)")) {
                            gads = IGNORE_MARKER;
                        }
                    }
                    if (options.get(INCLUDE_NO_UMM)) {
                        if (gads.contains("DISMK,INTERJEC")) {
                            gads = IGNORE_MARKER;
                        }
                    }
                    if (options.get(INCLUDE_NO_REACT)) {
                        if (gads.contains("DISMK,REACT")) {
                            gads = IGNORE_MARKER;
                        }
                    }
                    if (options.get(INCLUDE_NO_UNCLEAR)) {
                        if (gads.contains("INDET,?")) {
                            gads = IGNORE_MARKER;
                        }
                    }
                    if (gads.contains("{")) {
                        // remove all annoying browser markup
                        gadsWord = gads.replaceAll("\\[.*?\\]", "").split("\\s+");
                        gads = gadsWord[0];
                        isAword = true;
                    }
                    if (options.get(INCLUDE_NO_BRACKETS)) {
                        if (gads.contains("(")) {
                            gads = gads.replaceAll("\\(.+\\)", "");
                        }
                    }
                    if (options.get(CATEGORIES_NOT_FUNCTIONS) && !gads.matches(IGNORE_MARKER)) {
                        gads = gads.replaceFirst(".*?,", "");
                    }
                    if (!gads.matches(IGNORE_MARKER)) {
                        tPrev = t0;
                        t0 = tf.newTreeNode(gads.trim(), children);
                        if (childWhere == Integer.MAX_VALUE) {
                            tAll = t0;
                        } else if (childWhere >= 0) {
                            // up x
                            tTemp = tPrev.ancestor(childWhere + 1, tAll);
                            if (tTemp == null) {
                                System.out.println("c1 = " + c1);
                                System.out.println("gads = " + gads);
                                System.out.println("t0 = ");
                                t0.indentedListPrint();
                                System.out.println("tPrev = ");
                                tPrev.indentedListPrint();
                                System.out.println("tAll = ");
                                tAll.indentedListPrint();
                                System.err.println("ERROR: null ancestor at " + (childWhere + 1) + " " + tAll);
                            }
                            tTemp.addChild(t0);
                        } else if (childWhere < 0) {
                            // down one level
                            tPrev.addChild(t0);
                        }
                        if (isAword) {
                            tPrev = t0;
                            String wordLabel = gadsWord[1];
                            for (int iWord = 2; iWord < gadsWord.length; iWord++) {
                                wordLabel += " " + gadsWord[iWord];
                            }
                            tTemp = tf.newLeaf(wordLabel.trim());
                            tPrev.addChild(tTemp);
                            isAword = false;
                            tTemp = null;
                        }
                    }
                    if (gads.matches(IGNORE_MARKER)) {// reset previous counter if is a line to ignore
                        countspaceprevious = countspaceprevious + childWhere;

                    }
                    gads = "";
                    c1 = 'y';
                } else if (c1 == 'y' && c == ' ') {// was just a return character and is space
                    countspace++;
                } else {// not a leading space or a return character
                    gads += c;
                    c1 = 'x';
                    if (countspace != 0) {
                        childWhere = countspaceprevious - countspace;
                        countspaceprevious = countspace;
                        countspacepreviousprevious = countspaceprevious;
                        countspace = 0;
                    }
                }
            } else if (c == '\n') { // (if not processLine = True)
                processLine = true;
            } else { // processLine = false and not a return character
                otherStuff += c;
            }
        }
    } catch (IOException ioe) {
        System.err.println("IOException: " + ioe.getMessage());
    }
    if (tAll == null && !otherStuff.isEmpty()) {
        Tree tSpec = tf.newTreeNode("EMPTY", children);
        return tSpec;
    } else {
        return tAll;
    }
}

From source file:qmul.util.parse.CreateTreeFromDCPSE.java

License:Open Source License

public static void main(String[] args) {
    Tree t = makeTree();
    t.indentedListPrint();
    List<Tree> l = makeTrees();
    System.out.println("found " + l.size() + " trees");
}

From source file:qmul.util.parse.CreateTreeFromSWBD.java

License:Open Source License

/**
 * @param reader/*ww w  . ja  va2 s.c  om*/
 *            a {@link Reader}
 * @return the Stanford {@link Tree}
 */
public static Tree makeTree(Reader reader) {
    if (options == null) {
        setDefaultOptions();
    }
    List<Tree> children = new ArrayList<Tree>();
    Tree t0 = null;
    Tree tPrev = null;
    Tree tAll = null;
    Tree tTemp = null;
    int n = 0;
    String funcStr[] = { "", "" };
    int openBrackets = 0;
    int closeBrackets = 0;
    int totalBrackets = 0;
    int childWhere = Integer.MAX_VALUE;
    String gads = "";
    String otherStuff = "";
    String[] gadsWord = null;
    boolean isAword = false;// do not change
    boolean wasAword = false;// do not change
    boolean processLine = false;// do not change

    try {
        while ((n = reader.read()) != -1) {
            char c = (char) n;
            char charsToIgnore[] = { '.', ',', '?', '\n', '\t', '\r' };

            if (gads == IGNORE_MARKER) {
                if (c == '\n') {
                    gads = "";
                }
            } else {
                for (int i = 0; i < charsToIgnore.length; i++) {
                    if (c == charsToIgnore[i]) {
                        c = '~';
                    }
                }
                if (c == '(' || c == ')' || c == ' ' || c == '~') {
                    if (c == '(') {
                        totalBrackets++;
                    } else if (c == ')') {
                        totalBrackets--;
                    }
                    if (gads.matches("") && totalBrackets != 0) {
                        // there is nothing yet to process. Collect brackets
                        funcStr[0] += c;
                        processLine = false;
                    } else if (totalBrackets == 0) {
                        processLine = true;
                    } else {
                        processLine = true;
                        // Something needs to be put on a tree... I think
                        funcStr[1] += c; // start collecting next set of function stuff
                        if (funcStr[0].matches("^\\s$")) {
                            // need to put something here to prevent it having a fit when multiple words and
                            // also to ignore those which are part of the function
                            if (c != ' ' || (c == '~' && openBrackets <= 0)) {
                                isAword = true;
                                if (!wasAword) {
                                    openBrackets++;
                                } else {
                                    openBrackets--;
                                }
                            } else {
                                processLine = false;
                                gads += c;
                            }
                        } else if (openBrackets < 0 && gads.matches("^[a-zA-Z][a-z]+$")) {
                            isAword = true;
                            if (c == ' ') {
                                processLine = false;
                                gads += c;
                            } else {
                                // hold previous brackets and reset own...
                                for (int i = 0; i < closeBrackets; i++) {
                                    funcStr[1] += ')';
                                }
                                for (int j = 0; j < openBrackets; j++) {
                                    funcStr[1] += '(';
                                }
                                closeBrackets = 0;
                                for (int k = 0; k < funcStr[0].length(); k++) {
                                    if (funcStr[0].charAt(k) == '(') {
                                        openBrackets++;
                                    } else if (funcStr[0].charAt(k) == ')') {
                                        closeBrackets++;
                                    }
                                }
                            }
                        } else {
                            for (int j = 0; j < funcStr[0].length(); j++) {
                                if (funcStr[0].charAt(j) == '(') {
                                    openBrackets++;
                                } else if (funcStr[0].charAt(j) == ')') {
                                    closeBrackets++;
                                }
                            }
                        }
                    }
                } else if (c != '~') {
                    gads += c;
                }
                if ((gads.matches("^\\s$") || gads.matches(""))) {
                    if (totalBrackets != 0 || tAll == null) {
                        processLine = false;
                    }
                }
                if (gads.matches("^\\*x\\*")) {
                    gads = IGNORE_MARKER;
                }
                // // this is actually done later in SwitchboardCorpus using a NodeFilter
                // if (options.get(INCLUDE_NO_INTJ)) {
                // if (gads.contains("INTJ")) {
                // gads = IGNORE_MARKER;
                // }
                // }
            }
            if (processLine) {
                if (gads.matches("E\\_S") || totalBrackets == 0) {
                    // we've hit an end of segment; end the tree
                    System.out.println("end of segment");
                    break;
                }
                if (!gads.matches(IGNORE_MARKER)) {
                    // System.out.println("gads is: " + gads);
                    tPrev = t0;
                    if (isAword) {
                        t0 = tf.newLeaf(gads);
                    } else {
                        t0 = tf.newTreeNode(gads, children);
                    }
                    if (childWhere == Integer.MAX_VALUE) {
                        // System.out.println("It is the first in the tree");
                        tAll = t0;// set initially
                        childWhere = 0;
                    } else if (openBrackets <= closeBrackets) {
                        // System.out.println("It should be going up " + (closeBrackets-openBrackets));
                        // up x
                        if (openBrackets < 0) {
                            openBrackets++;
                        }
                        tTemp = tPrev.ancestor((closeBrackets - openBrackets) + 1, tAll);
                        if (tTemp == null) {
                            System.out.println("open = " + openBrackets);
                            System.out.println("close = " + closeBrackets);
                            System.out.println("gads = " + gads);
                            System.out.println("t0 = ");
                            t0.indentedListPrint();
                            System.out.println("tPrev = ");
                            tPrev.indentedListPrint();
                            System.out.println("tAll = ");
                            tAll.indentedListPrint();
                            System.err.println("ERROR: null ancestor at " + (childWhere + 1) + " " + tAll);
                        }
                        tTemp.addChild(t0);
                        if (isAword) {
                            // System.out.println("It is a word");
                            openBrackets = 0;
                            closeBrackets = 0;
                        }
                        // tPrev.addChild(t0);
                    } else if (openBrackets > closeBrackets) {
                        // down one level
                        if (isAword) {
                            // System.out.println("It is a word");
                            openBrackets--;
                        }
                        // System.out.println("It should be going down one");
                        tPrev.addChild(t0);
                    }
                }
                if (!isAword) {
                    openBrackets = 0;
                    closeBrackets = 0;
                    wasAword = false;
                } else {
                    wasAword = true;
                    isAword = false;
                    openBrackets--;
                    // System.out.println("closeBrackets is: "+ closeBrackets);
                }
                gads = "";
                processLine = false;
                funcStr[0] = funcStr[1];
                funcStr[1] = "";
            }
        }
    } catch (IOException ioe) {
        System.err.println("IOException: " + ioe.getMessage());
    }
    if (tAll == null) {
        Tree tSpec = tf.newTreeNode("EMPTY", children);
        return tSpec;
    } else {
        // tAll.indentedListPrint();
        return tAll;
    }
}