Example usage for edu.stanford.nlp.trees TreeReader readTree

List of usage examples for edu.stanford.nlp.trees TreeReader readTree

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees TreeReader readTree.

Prototype

public Tree readTree() throws IOException;

Source Link

Document

Reads a single tree.

Usage

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeUtils.java

License:Open Source License

/**
 * Reads in a Penn Treebank-style String and returns a tree.
 * //from w w  w .ja  va  2  s. co m
 * @param pennString
 *            A Penn Treebank-style String as produced by the StandfordParser
 * @return a tree representation of the PennString (LabeledScoredTree)
 */
public static Tree pennString2Tree(String pennString) {
    TreeReader tr = null;
    try {
        tr = new PennTreeReader(new StringReader(pennString), new LabeledScoredTreeFactory());
        return tr.readTree();
    } catch (IOException e) {
        throw new IllegalStateException(e);
    } finally {
        closeQuietly(tr);
    }
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

public Tree readTreeFromString(String parseStr) {
    //read in the input into a Tree data structure
    TreeReader treeReader = new PennTreeReader(new StringReader(parseStr), tree_factory);
    Tree inputTree = null;//from w ww  . ja  va  2  s .  com
    try {
        inputTree = treeReader.readTree();

    } catch (IOException e) {
        e.printStackTrace();
    }
    return inputTree;
}

From source file:edu.cmu.ark.nlp.question.QuestionUtil.java

License:Open Source License

public static Tree readTreeFromString(String parseStr) {
    //read in the input into a Tree data structure
    TreeReader treeReader = new PennTreeReader(new StringReader(parseStr), QuestionUtil.getTreeFactory());
    Tree inputTree = null;//from   ww w  . j a va 2  s . com
    try {
        inputTree = treeReader.readTree();

    } catch (IOException e) {
        e.printStackTrace();
    }
    return inputTree;
}

From source file:edu.jhu.agiga.StanfordAgigaSentence.java

License:Open Source License

public Tree getStanfordContituencyTree() {
    TreeFactory tf = new LabeledScoredTreeFactory();
    StringReader r = new StringReader(getParseText());
    TreeReader tr = new PennTreeReader(r, tf);
    try {//from  ww  w. j  a  v  a2s.  co m
        return tr.readTree();
    } catch (IOException e) {
        throw new RuntimeException("Error: IOException should not be thrown by StringReader");
    }
}

From source file:elkfed.expletives.TrainingData.java

License:Apache License

public static void extractExamples(String file, Set<String> anaphoricPronouns,
        List<ExpletiveInstance> instances) throws FileNotFoundException, IOException {
    TreeReader tr = new PennTreeReader(new FileReader(file), new LabeledScoredTreeFactory(),
            new BobChrisTreeNormalizer());
    Tree t;/*from   w w  w  . j a va2  s .c o  m*/
    String file_id = file.substring(file.length() - 8, file.length() - 4);
    int sent_idx = 1;
    while ((t = tr.readTree()) != null) {
        //t.pennPrint();
        int word_idx = 1;
        for (Tree t1 : t.getLeaves()) {
            String s = t1.toString();
            if ("it".equals(s) || "It".equals(s)) {
                String id = String.format("%s:S%d:%d-%d", file_id, sent_idx, word_idx, word_idx);
                ExpletiveInstance inst = new ExpletiveInstance(t, t1, id);
                boolean is_positive = anaphoricPronouns.contains(id);
                inst.setFeature(PairInstance.FD_POSITIVE, !is_positive);
                instances.add(inst);
                String cls = is_positive ? "+1" : "-1";
                System.out.format("%s\t%s\t(%s)\n", s, id, cls);
            }
            word_idx++;
        }
        //System.out.println();
        //System.out.println(t);
        sent_idx++;
    }

}

From source file:elkfed.mmax.importer.ImportOntonotes.java

License:Apache License

public MiniDiscourse importFile(String fname) {
    try {//from  w w w  . j  av a 2s .  c o  m
        boolean had_space = true;
        boolean need_bugfix = System.getProperty("elkfed.BuggyOntonotes", "no").matches("y|yes|true");
        List<Tag> names_stack = new ArrayList<Tag>();
        Alphabet<String> sets = new Alphabet<String>();
        sets.lookupIndex("*DUMMY*");
        int sent_id = 0;
        Tag sentence_tag = null;
        OntonotesReader reader = new OntonotesReader(new File(fname + ".coref"));
        OntonotesReader readerNE = new OntonotesReader(new File(fname + ".name"));
        TreeReader tr = new PennTreeReader(new FileReader(fname + ".parse"), new LabeledScoredTreeFactory(),
                new BobChrisTreeNormalizer());
        Tree tree = null;
        int eventType = reader.getNextEvent();
        boolean in_text = false;
        do {
            if (eventType == OntonotesReader.START_TAG && "COREF".equals(reader.getName())) {
                Tag t;
                if (need_bugfix) {
                    t = buggy_push_tag("coref", tag_stack);
                } else {
                    t = push_tag("coref");
                }
                if ("IDENT".equals(reader.getAttribute("TYPE"))) {
                    t.attrs.put("coref_set", "set_" + sets.lookupIndex(reader.getAttribute("ID")));
                }
                had_space = true;
            } else if (eventType == OntonotesReader.END_TAG && "COREF".equals(reader.getName())) {
                Tag t = pop_tag("coref");
                DetermineMinSpan.addMinSpan(sentence_tag.start, tree, t, tokens);
                had_space = true;
            } else if (in_text && eventType == OntonotesReader.TOKEN) {
                if (!reader.isTrace()) {
                    // process up to the next token in the names part
                    int names_event = readerNE.getNextEvent();
                    while (names_event != OntonotesReader.TOKEN) {
                        if (names_event == OntonotesReader.START_TAG && "ENAMEX".equals(readerNE.getName())) {
                            Tag t = push_tag("enamex", names_stack);
                            t.attrs.put("tag", readerNE.getAttribute("TYPE"));
                        } else if (names_event == OntonotesReader.END_TAG
                                && "ENAMEX".equals(readerNE.getName())) {
                            Tag t = pop_tag("enamex", names_stack);
                        } else {
                            throw new IllegalStateException("Unexpected event:" + names_event);
                        }
                        names_event = readerNE.getNextEvent();
                    }
                    assert (reader.getToken().equals(readerNE.getToken()));
                    String tok = reader.getToken();
                    if (tok.equals("-LRB-"))
                        tok = "(";
                    if (tok.equals("-RRB-"))
                        tok = ")";
                    if (tok.equals("-LSB-"))
                        tok = "[";
                    if (tok.equals("-RSB-"))
                        tok = "]";
                    if (tok.equals("-LCB-"))
                        tok = "{";
                    if (tok.equals("-RCB-"))
                        tok = "}";
                    add_token(tok);
                }
            } else if (in_text && eventType == OntonotesReader.NEWLINE) {
                //System.out.println("sentence break");
                if (sentence_tag != null) {
                    sentence_tag.end = tokens.size() - 1;
                    if (sentence_tag.end >= sentence_tag.start) {
                        tags.add(sentence_tag);
                        if (tree != null) {
                            Tag parse_tag = new Tag();
                            parse_tag.tag = "parse";
                            parse_tag.start = sentence_tag.start;
                            parse_tag.end = sentence_tag.end;
                            parse_tag.attrs.put("tag", tree.toString());
                            tags.add(parse_tag);
                            assert sentence_tag.end - sentence_tag.start + 1 == tree.yield().size() : String
                                    .format("%s / %s", tokens.subList(sentence_tag.start, sentence_tag.end + 1),
                                            tree.yield());
                            addParseInfo(sentence_tag.start, tree);
                        }
                    }
                }
                // process up to end of sentence in names annotation
                int names_event = readerNE.getNextEvent();
                while (names_event != OntonotesReader.NEWLINE) {
                    if (names_event == OntonotesReader.START_TAG && "ENAMEX".equals(readerNE.getName())) {
                        Tag t = push_tag("enamex", names_stack);
                        t.attrs.put("tag", readerNE.getAttribute("TYPE"));
                    } else if (names_event == OntonotesReader.END_TAG && "ENAMEX".equals(readerNE.getName())) {
                        Tag t = pop_tag("enamex", names_stack);
                    } else if (names_event == OntonotesReader.END_TAG && "DOC".equals(readerNE.getName())) {
                        // ignore
                    } else {
                        throw new IllegalStateException(
                                "Unexpected event:" + readerNE.describeEvent(names_event));
                    }
                    names_event = readerNE.getNextEvent();
                }
                // prepare new parse and sentence
                sentence_tag = new Tag();
                sentence_tag.start = tokens.size();
                sentence_tag.tag = "sentence";
                sentence_tag.attrs.put("orderid", "" + sent_id++);
                tree = tr.readTree();
            } else if (eventType == OntonotesReader.END_TAG && "DOCNO".equals(reader.getName())) {
                in_text = true;
                // go to the end of the DOCNO part in name doc
                int names_event = readerNE.getNextEvent();
                while (names_event != OntonotesReader.END_TAG || !"DOCNO".equals(reader.getName())) {
                    names_event = readerNE.getNextEvent();
                }
            } else if (eventType == OntonotesReader.START_TAG && "TURN".equals(reader.getName())) {
                int names_event = readerNE.getNextEvent();
                if (names_event != OntonotesReader.START_TAG || !"TURN".equals(readerNE.getName())) {
                    throw new UnsupportedOperationException("TURN in coref but not in names");
                }
                // parse level seems to be inconsistent... so don't check here :-|
                System.err.println("TURN parse:" + tree.toString());
                tree = tr.readTree();
                eventType = reader.getNextEvent();
                names_event = readerNE.getNextEvent();
                if (eventType != OntonotesReader.NEWLINE || names_event != OntonotesReader.NEWLINE) {
                    throw new UnsupportedOperationException("No Newline after TURN");
                }
            }
            eventType = reader.getNextEvent();
        } while (eventType != OntonotesReader.END_DOCUMENT);
        return create();
    } catch (IOException ex) {
        throw new RuntimeException("Cannot read file", ex);
    }

}

From source file:qmul.corpus.SwitchboardCorpus.java

License:Open Source License

/**
 * @param dialogueName/*  w  w w  .  java 2 s  .  c  om*/
 * @param genre
 * @param reader
 * @return whether to carry on or not
 */
private boolean getSentences(String dialogueName, String genre, TreeReader reader) {
    Pattern p = Pattern.compile("\\(CODE\\s+(?:\\([A-Z]+\\s+)?Speaker([A-Za-z]+)(\\d+)");
    try {
        Dialogue dialogue = null;
        DialogueSpeaker speaker = null;
        DialogueSpeaker lastSpeaker = null;
        DialogueTurn currentTurn = null;
        int currentSubdialogue = -1;
        int turnNum = -1;
        Tree tree = reader.readTree();
        Filter<Tree> nodeFilter = new NodeFilter();
        while (tree != null) {
            Matcher m = p.matcher(tree.toString());
            if (m.find()) {
                // get the metadata
                turnNum = Integer.parseInt(m.group(2));
                int subDialogue = 0; // apparently no subdialogues in SWBD ...
                String spk = m.group(1).toUpperCase();
                // start new dialogue if subdialogue changed
                if (subDialogue != currentSubdialogue) {
                    if (dialogue != null) {
                        if (!checkDialogue(dialogue)) {
                            return false;
                        }
                    }
                    // dialogue = addDialogue(dialogueName + ":" + subDialogue, genre);
                    dialogue = addDialogue(dialogueName, genre);
                    // TODO genre in SWBD?
                    getGenreMap().put(dialogueName, genre);
                }
                currentSubdialogue = subDialogue;
                // set up speaker
                String spkId = dialogue.getId() + ":" + spk;
                if (!getSpeakerMap().containsKey(spkId)) {
                    // TODO speaker info in SWBD?
                    getSpeakerMap().put(spkId, new DialogueSpeaker(spkId, "", "", "", "", ""));
                    // System.out.println("added new speaker " + spkId);
                }
                speaker = getSpeakerMap().get(spkId);
            } else {
                // get the tree and extract the transcription
                String trans = "";
                // SWBD embeds trees within an extra unlabelled level ((S etc))
                if (((tree.label() == null) || (tree.label().value() == null))
                        && (tree.children().length == 1)) {
                    tree = tree.getChild(0);
                }
                if (tree != null) {
                    tree = tree.prune(nodeFilter);
                    if (tree != null) {
                        for (Tree leaf : tree.getLeaves()) {
                            trans += leaf.label() + " ";
                        }
                        trans = trans.substring(0, trans.length() - 1);
                        // start new turn if speaker has changed
                        if ((lastSpeaker == null) || !speaker.equals(lastSpeaker) || (currentTurn == null)) {
                            currentTurn = dialogue.addTurn(turnNum, speaker);
                            // System.out.println("new turn " + turnNum + ", " + speaker + " " + currentTurn);
                            lastSpeaker = speaker;
                        }
                        // add sentence
                        dialogue.addSent(-1, currentTurn, trans, tree);
                        // DialogueSentence s = dialogue.addSent(-1, currentTurn, trans, tree);
                        // System.out.println("new sent " + s);
                        // System.out.println(s.getSyntax().pennString());
                    }
                }
            }
            tree = reader.readTree();
        }
        return checkDialogue(dialogue);
    } catch (IOException e) {
        System.err.println("Error reading sentence line" + e.getMessage());
        return false;
    }
}

From source file:sg.edu.nus.comp.pdtb.runners.SpanTreeExtractor.java

License:Open Source License

private static void sharedTaskSpanGen(File treeFile) throws IOException {
    TreeFactory tf = new LabeledScoredTreeFactory();
    Reader r = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), Util.ENCODING));
    TreeReader tr = new PennTreeReader(r, tf);
    Tree root = tr.readTree();
    String article = treeFile.getName().substring(0, 8);
    String outFileName = treeFile.toString();
    outFileName = outFileName.substring(0, outFileName.lastIndexOf('.'));
    BufferedReader reader = Util.reader(outFileName + ".tkn");
    PrintWriter printer = new PrintWriter(outFileName + ".csv");
    int treeNumber = 0;
    while (root != null) {
        String lineRead = reader.readLine();
        if (root.children().length > 0) {
            List<Tree> leaves = root.getLeaves();
            HashMap<String, String[]> tokens = sharedTaskTokens(lineRead);
            for (Tree leaf : leaves) {
                int nodeNumber = leaf.nodeNumber(root);
                String word = leaf.toString();
                String wordKey = word.replaceAll("/", "\\\\/");
                wordKey = wordKey.replaceAll("\\*", "\\\\*");
                String[] spanLine = tokens.get(wordKey);

                String key = article + "," + treeNumber + "," + nodeNumber;
                word = word.trim().replaceAll("\\s+", "");
                word = word.replaceAll(",", "COMMA");
                printer.println(key + "," + spanLine[1] + "," + word + "," + spanLine[2]);

            }//w w w . j a v a 2 s  .  c om
        }
        root = tr.readTree();
        printer.flush();
        ++treeNumber;
    }
    printer.close();
    tr.close();
}

From source file:sg.edu.nus.comp.pdtb.runners.SpanTreeExtractor.java

License:Open Source License

public static void anyTextToSpanGen(File treeFile, File inputFile) throws IOException {
    log.info("Generating the spans of each node in the parse trees.");

    String orgText = Util.readFile(inputFile);
    orgText = orgText.replaceAll("`", "'").replaceAll("", "\"").replaceAll("", "'");
    ;//from   w w  w .  j a  v  a 2 s .  c  om
    PrintWriter pw = new PrintWriter(treeFile + ".csv");
    TreeFactory tf = new LabeledScoredTreeFactory();
    Reader r = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), Util.ENCODING));
    TreeReader tr = new PennTreeReader(r, tf);
    int index = 0;
    Tree root = tr.readTree();
    int treeNumber = 0;
    while (root != null) {

        List<Tree> leaves = root.getLeaves();

        for (Tree leaf : leaves) {
            int nodeNumber = leaf.nodeNumber(root);
            String parentValue = leaf.parent(root).value();
            if (parentValue.equals("-NONE-")) {
                continue;
            }
            String word = nodeToString(leaf).trim();
            word = word.replaceAll("`", "'");
            word = word.replaceAll("", "'");
            word = word.replaceAll("\\.\\.\\.", ". . .");
            int span = orgText.indexOf(word, index);

            if (span == -1) {
                continue;
            }
            index = span + word.length() - 1;
            String spanString = (span + ".." + (span + word.length()));
            String key = treeFile.getName() + "," + treeNumber + "," + nodeNumber;
            word = word.trim().replaceAll("\\s+", "");
            word = word.replaceAll(",", "COMMA");
            pw.println(key + "," + spanString + "," + word);
        }
        root = tr.readTree();
        pw.flush();
        ++treeNumber;
    }
    pw.close();
    tr.close();

    log.info("Done.");
}

From source file:sg.edu.nus.comp.pdtb.runners.SpanTreeExtractor.java

License:Open Source License

/**
 * Generate the spans of each node in the auto parse trees.
 * //from  w w w .j a  v  a2 s  .  c o  m
 * @param treePath
 * @param rawTextPath
 * @throws IOException
 */
@SuppressWarnings("unused")
public static void textToSpanGenAuto(String treePath, String rawTextPath) throws IOException {
    log.info("Generating the spans of each node in the auto parse trees.");
    String folder = "23/";
    File[] files = new File(treePath + folder).listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.startsWith("wsj_") && name.endsWith(".mrg");
        }
    });

    for (File file : files) {

        String fileName = file.getName().replaceAll("\\.mrg", "");
        String orgText = Util.readFile(rawTextPath + folder + fileName);
        orgText = orgText.replaceAll("`", "'");

        PrintWriter pw = new PrintWriter(treePath + folder + fileName + ".csv");

        TreeFactory tf = new LabeledScoredTreeFactory();
        Reader r = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        TreeReader tr = new PennTreeReader(r, tf);

        int index = 9;

        Tree root = tr.readTree();
        int treeNumber = 0;

        while (root != null) {
            StringBuilder tmp = new StringBuilder();
            List<Tree> leaves = root.getLeaves();
            for (Tree leaf : leaves) {
                int nodeNumber = leaf.nodeNumber(root);
                String parentValue = leaf.parent(root).value();
                if (parentValue.equals("-NONE-")) {
                    continue;
                }

                String word = Corpus.nodeToString(leaf).trim();

                if (word.equals(".")) {
                    continue;
                }

                word = word.replaceAll("`", "'");

                word = word.replaceAll("^\\p{Punct}*", "");
                word = word.replaceAll("\\p{Punct}*$", "");

                if (fileName.equals("wsj_2300") && index == 1457 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2330") && index == 6344 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2351") && index == 1040 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2360") && index == 2066 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2369") && index == 6434 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2381") && index == 2399 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2386") && index == 3522 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2386") && index == 3647 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2387") && index == 1466 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2387") && index == 5389 && word.equals("n't")) {
                    word = "'t";
                }
                if (fileName.equals("wsj_2397") && index == 1032 && word.equals("n't")) {
                    word = "'t";
                }

                if (fileName.equals("wsj_2306") && index == 5692 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2308") && index == 2373 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2315") && index == 1056 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2321") && index == 1279 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2330") && index == 1563 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2345") && index == 1838 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2350") && index == 699 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2351") && index == 778 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2351") && index == 2391 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2363") && index == 2868 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2367") && index == 1379 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2376") && index == 6687 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2377") && index == 2464 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2379") && index == 4711 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2379") && index == 5174 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2381") && index == 565 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2387") && index == 5430 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2387") && index == 5779 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2394") && index == 179 && word.equals("will")) {
                    word = "wo";
                }
                if (fileName.equals("wsj_2397") && index == 5243 && word.equals("will")) {
                    word = "wo";
                }

                int span = orgText.indexOf(word, index);
                while (span == -1) {
                    span = orgText.indexOf(word, index);

                }

                if (span - index > 1) {
                    String difference = orgText.substring(index, span).trim();
                    boolean isError = true;
                    isError &= !difference.matches("(\\p{Punct}+\\s*)+") && difference.length() > 0;
                    isError &= !difference.equals("");

                }
                index = span + word.length();
                String spanString = (span + ".." + (span + word.length()));
                String key = fileName + "," + treeNumber + "," + nodeNumber;
                word = word.trim().replaceAll("\\s+", "");
                word = word.replaceAll(",", "COMMA");
                tmp.append(key + "," + spanString + "," + word);
                tmp.append('\n');
            }
            root = tr.readTree();
            pw.print(tmp);
            ++treeNumber;
        }
        pw.close();
        tr.close();
    }

    log.info("Done.");
}