List of usage examples for edu.stanford.nlp.trees Tree indentedListPrint
public void indentedListPrint()
From source file:qmul.util.parse.CreateTreeFromClarkCurranCcgGrs.java
License:Open Source License
public static void main(String[] args) { // String test = // "<c> Right|right|RB|I-ADVP|I-TIM|S/S ,|,|,|O|O|, hello|hello|UH|I-INTJ|O|S/S ,|,|,|O|O|, yeah|yeah|UH|I-INTJ|O|S/S ,|,|,|O|O|, we|we|PRP|I-NP|O|NP 're|'re|VBP|I-VP|O|(S[dcl]\\NP)/(S[adj]\\NP) back|back|RB|I-ADVP|O|S[adj]\\NP .|.|.|O|O|."; File test = new File( "c:/Documents and Settings/mpurver/My Documents/dyndial/align/parsedno_idnewclean_ks_ksw"); Tree t = makeTree(test); t.indentedListPrint(); List<Tree> l = makeTrees(test); System.out.println("found " + l.size() + " trees"); }
From source file:qmul.util.parse.CreateTreeFromDCPSE.java
License:Open Source License
/** * @param reader/* ww w . ja v a 2 s .c o m*/ * a {@link Reader} * @return the Stanford {@link Tree} */ public static Tree makeTree(Reader reader) { if (options == null) { setDefaultOptions(); } List<Tree> children = new ArrayList<Tree>(); Tree t0 = null; Tree tPrev = null; Tree tAll = null; Tree tTemp = null; int n = 0; int countspace = 0; int countspaceprevious = 0; int countspacepreviousprevious = 0; char c1 = 'x'; int childWhere = Integer.MAX_VALUE; String gads = ""; String otherStuff = ""; String[] gadsWord = null; boolean isAword = false;// do not change boolean processLine = true;// do not change try { while ((n = reader.read()) != -1) { char c = (char) n; if (c == '[' && gads.matches("")) { processLine = false; // System.out.println(otherStuff); otherStuff = ""; } if (processLine) { if (c == '\n') { if (gads.matches("^\\s+$")) { // we've hit a line containing only whitespace: end of the tree break; } if (options.get(INCLUDE_NO_PAUSE)) { if (gads.contains("PAUSE")) { gads = IGNORE_MARKER; } } // remove "ignored" nodes; unless we need to keep them to work out features, in which case we'll // remove them later in DCPSECorpus if (options.get(INCLUDE_NO_IGNORE) && !options.get(PP_LEXICAL_FEATURES)) { if (gads.contains("ignore)")) { gads = IGNORE_MARKER; } } if (options.get(INCLUDE_NO_UMM)) { if (gads.contains("DISMK,INTERJEC")) { gads = IGNORE_MARKER; } } if (options.get(INCLUDE_NO_REACT)) { if (gads.contains("DISMK,REACT")) { gads = IGNORE_MARKER; } } if (options.get(INCLUDE_NO_UNCLEAR)) { if (gads.contains("INDET,?")) { gads = IGNORE_MARKER; } } if (gads.contains("{")) { // remove all annoying browser markup gadsWord = gads.replaceAll("\\[.*?\\]", "").split("\\s+"); gads = gadsWord[0]; isAword = true; } if (options.get(INCLUDE_NO_BRACKETS)) { if (gads.contains("(")) { gads = gads.replaceAll("\\(.+\\)", ""); } } if (options.get(CATEGORIES_NOT_FUNCTIONS) && !gads.matches(IGNORE_MARKER)) { gads = gads.replaceFirst(".*?,", ""); } if (!gads.matches(IGNORE_MARKER)) { tPrev = t0; t0 = tf.newTreeNode(gads.trim(), children); if (childWhere == Integer.MAX_VALUE) { tAll = t0; } else if (childWhere >= 0) { // up x tTemp = tPrev.ancestor(childWhere + 1, tAll); if (tTemp == null) { System.out.println("c1 = " + c1); System.out.println("gads = " + gads); System.out.println("t0 = "); t0.indentedListPrint(); System.out.println("tPrev = "); tPrev.indentedListPrint(); System.out.println("tAll = "); tAll.indentedListPrint(); System.err.println("ERROR: null ancestor at " + (childWhere + 1) + " " + tAll); } tTemp.addChild(t0); } else if (childWhere < 0) { // down one level tPrev.addChild(t0); } if (isAword) { tPrev = t0; String wordLabel = gadsWord[1]; for (int iWord = 2; iWord < gadsWord.length; iWord++) { wordLabel += " " + gadsWord[iWord]; } tTemp = tf.newLeaf(wordLabel.trim()); tPrev.addChild(tTemp); isAword = false; tTemp = null; } } if (gads.matches(IGNORE_MARKER)) {// reset previous counter if is a line to ignore countspaceprevious = countspaceprevious + childWhere; } gads = ""; c1 = 'y'; } else if (c1 == 'y' && c == ' ') {// was just a return character and is space countspace++; } else {// not a leading space or a return character gads += c; c1 = 'x'; if (countspace != 0) { childWhere = countspaceprevious - countspace; countspaceprevious = countspace; countspacepreviousprevious = countspaceprevious; countspace = 0; } } } else if (c == '\n') { // (if not processLine = True) processLine = true; } else { // processLine = false and not a return character otherStuff += c; } } } catch (IOException ioe) { System.err.println("IOException: " + ioe.getMessage()); } if (tAll == null && !otherStuff.isEmpty()) { Tree tSpec = tf.newTreeNode("EMPTY", children); return tSpec; } else { return tAll; } }
From source file:qmul.util.parse.CreateTreeFromDCPSE.java
License:Open Source License
public static void main(String[] args) { Tree t = makeTree(); t.indentedListPrint(); List<Tree> l = makeTrees(); System.out.println("found " + l.size() + " trees"); }
From source file:qmul.util.parse.CreateTreeFromSWBD.java
License:Open Source License
/** * @param reader/*ww w . ja va2 s.c om*/ * a {@link Reader} * @return the Stanford {@link Tree} */ public static Tree makeTree(Reader reader) { if (options == null) { setDefaultOptions(); } List<Tree> children = new ArrayList<Tree>(); Tree t0 = null; Tree tPrev = null; Tree tAll = null; Tree tTemp = null; int n = 0; String funcStr[] = { "", "" }; int openBrackets = 0; int closeBrackets = 0; int totalBrackets = 0; int childWhere = Integer.MAX_VALUE; String gads = ""; String otherStuff = ""; String[] gadsWord = null; boolean isAword = false;// do not change boolean wasAword = false;// do not change boolean processLine = false;// do not change try { while ((n = reader.read()) != -1) { char c = (char) n; char charsToIgnore[] = { '.', ',', '?', '\n', '\t', '\r' }; if (gads == IGNORE_MARKER) { if (c == '\n') { gads = ""; } } else { for (int i = 0; i < charsToIgnore.length; i++) { if (c == charsToIgnore[i]) { c = '~'; } } if (c == '(' || c == ')' || c == ' ' || c == '~') { if (c == '(') { totalBrackets++; } else if (c == ')') { totalBrackets--; } if (gads.matches("") && totalBrackets != 0) { // there is nothing yet to process. Collect brackets funcStr[0] += c; processLine = false; } else if (totalBrackets == 0) { processLine = true; } else { processLine = true; // Something needs to be put on a tree... I think funcStr[1] += c; // start collecting next set of function stuff if (funcStr[0].matches("^\\s$")) { // need to put something here to prevent it having a fit when multiple words and // also to ignore those which are part of the function if (c != ' ' || (c == '~' && openBrackets <= 0)) { isAword = true; if (!wasAword) { openBrackets++; } else { openBrackets--; } } else { processLine = false; gads += c; } } else if (openBrackets < 0 && gads.matches("^[a-zA-Z][a-z]+$")) { isAword = true; if (c == ' ') { processLine = false; gads += c; } else { // hold previous brackets and reset own... for (int i = 0; i < closeBrackets; i++) { funcStr[1] += ')'; } for (int j = 0; j < openBrackets; j++) { funcStr[1] += '('; } closeBrackets = 0; for (int k = 0; k < funcStr[0].length(); k++) { if (funcStr[0].charAt(k) == '(') { openBrackets++; } else if (funcStr[0].charAt(k) == ')') { closeBrackets++; } } } } else { for (int j = 0; j < funcStr[0].length(); j++) { if (funcStr[0].charAt(j) == '(') { openBrackets++; } else if (funcStr[0].charAt(j) == ')') { closeBrackets++; } } } } } else if (c != '~') { gads += c; } if ((gads.matches("^\\s$") || gads.matches(""))) { if (totalBrackets != 0 || tAll == null) { processLine = false; } } if (gads.matches("^\\*x\\*")) { gads = IGNORE_MARKER; } // // this is actually done later in SwitchboardCorpus using a NodeFilter // if (options.get(INCLUDE_NO_INTJ)) { // if (gads.contains("INTJ")) { // gads = IGNORE_MARKER; // } // } } if (processLine) { if (gads.matches("E\\_S") || totalBrackets == 0) { // we've hit an end of segment; end the tree System.out.println("end of segment"); break; } if (!gads.matches(IGNORE_MARKER)) { // System.out.println("gads is: " + gads); tPrev = t0; if (isAword) { t0 = tf.newLeaf(gads); } else { t0 = tf.newTreeNode(gads, children); } if (childWhere == Integer.MAX_VALUE) { // System.out.println("It is the first in the tree"); tAll = t0;// set initially childWhere = 0; } else if (openBrackets <= closeBrackets) { // System.out.println("It should be going up " + (closeBrackets-openBrackets)); // up x if (openBrackets < 0) { openBrackets++; } tTemp = tPrev.ancestor((closeBrackets - openBrackets) + 1, tAll); if (tTemp == null) { System.out.println("open = " + openBrackets); System.out.println("close = " + closeBrackets); System.out.println("gads = " + gads); System.out.println("t0 = "); t0.indentedListPrint(); System.out.println("tPrev = "); tPrev.indentedListPrint(); System.out.println("tAll = "); tAll.indentedListPrint(); System.err.println("ERROR: null ancestor at " + (childWhere + 1) + " " + tAll); } tTemp.addChild(t0); if (isAword) { // System.out.println("It is a word"); openBrackets = 0; closeBrackets = 0; } // tPrev.addChild(t0); } else if (openBrackets > closeBrackets) { // down one level if (isAword) { // System.out.println("It is a word"); openBrackets--; } // System.out.println("It should be going down one"); tPrev.addChild(t0); } } if (!isAword) { openBrackets = 0; closeBrackets = 0; wasAword = false; } else { wasAword = true; isAword = false; openBrackets--; // System.out.println("closeBrackets is: "+ closeBrackets); } gads = ""; processLine = false; funcStr[0] = funcStr[1]; funcStr[1] = ""; } } } catch (IOException ioe) { System.err.println("IOException: " + ioe.getMessage()); } if (tAll == null) { Tree tSpec = tf.newTreeNode("EMPTY", children); return tSpec; } else { // tAll.indentedListPrint(); return tAll; } }