Example usage for edu.stanford.nlp.trees Tree prune

List of usage examples for edu.stanford.nlp.trees Tree prune

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree prune.

Prototype

public Tree prune(final Predicate<Tree> filter) 

Source Link

Document

Creates a deep copy of the tree, where all nodes that the filter does not accept and all children of such nodes are pruned.

Usage

From source file:qmul.corpus.DCPSECorpus.java

License:Open Source License

/**
 * @param dialogueName/* w  w  w. j  ava  2  s  .  c om*/
 * @param genre
 * @param reader
 * @return whether to carry on or not
 */
private boolean getSentences(String dialogueName, String genre, BufferedReader reader) {
    Pattern p = Pattern.compile("<#(\\d+):(\\d+):(\\w+)>\\s+<sent>");
    try {
        Dialogue dialogue = null;
        DialogueSpeaker lastSpeaker = null;
        DialogueTurn currentTurn = null;
        int currentSubdialogue = -1;
        Filter<Tree> nodeFilter = new NodeFilter();
        String line = reader.readLine();
        while (line != null) {
            Matcher m = p.matcher(line);
            if (m.find()) {
                // get the metadata
                int sentNum = Integer.parseInt(m.group(1));
                int subDialogue = Integer.parseInt(m.group(2));
                String spk = m.group(3).toUpperCase();
                // start new dialogue if subdialogue changed
                if (subDialogue != currentSubdialogue) {
                    if (dialogue != null) {
                        if (!checkDialogue(dialogue)) {
                            return false;
                        }
                    }
                    dialogue = addDialogue(dialogueName + ":" + subDialogue, genre);
                }
                currentSubdialogue = subDialogue;
                // set up speaker
                String spkId = dialogue.getId() + ":" + spk;
                DialogueSpeaker speaker = getSpeakerMap().get(spkId);
                // System.out.println("Getting tree for sent " + sentNum + " spk [" + spkId + "]=[" + speaker + "] "
                // + line);
                // get the tree and extract the transcription
                Tree tree = CreateTreeFromDCPSE.makeTree(reader);
                String trans = "";
                if (tree != null) {
                    tree = tree.prune(nodeFilter);
                    if (tree != null) {
                        for (Tree leaf : tree.getLeaves()) {
                            String label = leaf.label().toString();
                            label = label.replaceAll("^\\s*\\{(.*)\\}\\s*$", "$1");
                            label = label.replaceAll("^\\s*<([,.:;?!]+)>\\s*$", "$1");
                            trans += label + " ";
                        }
                        trans = trans.substring(0, trans.length() - 1);
                        // start new turn if speaker has changed
                        if ((lastSpeaker == null) || !speaker.equals(lastSpeaker) || (currentTurn == null)) {
                            currentTurn = dialogue.addTurn(-1, speaker);
                            // System.out.println(currentTurn);
                        }
                        // add sentence
                        dialogue.addSent(sentNum, currentTurn, trans, tree);
                        // System.out.println(sent);
                        lastSpeaker = speaker;
                    }
                }
            }
            line = reader.readLine();
        }
        return checkDialogue(dialogue);
    } catch (IOException e) {
        System.err.println("Error reading sentence line" + e.getMessage());
        return false;
    }
}

From source file:qmul.corpus.SwitchboardCorpus.java

License:Open Source License

/**
 * @param dialogueName// ww  w  .  j av a  2 s.  c om
 * @param genre
 * @param reader
 * @return whether to carry on or not
 */
private boolean getSentences(String dialogueName, String genre, TreeReader reader) {
    Pattern p = Pattern.compile("\\(CODE\\s+(?:\\([A-Z]+\\s+)?Speaker([A-Za-z]+)(\\d+)");
    try {
        Dialogue dialogue = null;
        DialogueSpeaker speaker = null;
        DialogueSpeaker lastSpeaker = null;
        DialogueTurn currentTurn = null;
        int currentSubdialogue = -1;
        int turnNum = -1;
        Tree tree = reader.readTree();
        Filter<Tree> nodeFilter = new NodeFilter();
        while (tree != null) {
            Matcher m = p.matcher(tree.toString());
            if (m.find()) {
                // get the metadata
                turnNum = Integer.parseInt(m.group(2));
                int subDialogue = 0; // apparently no subdialogues in SWBD ...
                String spk = m.group(1).toUpperCase();
                // start new dialogue if subdialogue changed
                if (subDialogue != currentSubdialogue) {
                    if (dialogue != null) {
                        if (!checkDialogue(dialogue)) {
                            return false;
                        }
                    }
                    // dialogue = addDialogue(dialogueName + ":" + subDialogue, genre);
                    dialogue = addDialogue(dialogueName, genre);
                    // TODO genre in SWBD?
                    getGenreMap().put(dialogueName, genre);
                }
                currentSubdialogue = subDialogue;
                // set up speaker
                String spkId = dialogue.getId() + ":" + spk;
                if (!getSpeakerMap().containsKey(spkId)) {
                    // TODO speaker info in SWBD?
                    getSpeakerMap().put(spkId, new DialogueSpeaker(spkId, "", "", "", "", ""));
                    // System.out.println("added new speaker " + spkId);
                }
                speaker = getSpeakerMap().get(spkId);
            } else {
                // get the tree and extract the transcription
                String trans = "";
                // SWBD embeds trees within an extra unlabelled level ((S etc))
                if (((tree.label() == null) || (tree.label().value() == null))
                        && (tree.children().length == 1)) {
                    tree = tree.getChild(0);
                }
                if (tree != null) {
                    tree = tree.prune(nodeFilter);
                    if (tree != null) {
                        for (Tree leaf : tree.getLeaves()) {
                            trans += leaf.label() + " ";
                        }
                        trans = trans.substring(0, trans.length() - 1);
                        // start new turn if speaker has changed
                        if ((lastSpeaker == null) || !speaker.equals(lastSpeaker) || (currentTurn == null)) {
                            currentTurn = dialogue.addTurn(turnNum, speaker);
                            // System.out.println("new turn " + turnNum + ", " + speaker + " " + currentTurn);
                            lastSpeaker = speaker;
                        }
                        // add sentence
                        dialogue.addSent(-1, currentTurn, trans, tree);
                        // DialogueSentence s = dialogue.addSent(-1, currentTurn, trans, tree);
                        // System.out.println("new sent " + s);
                        // System.out.println(s.getSyntax().pennString());
                    }
                }
            }
            tree = reader.readTree();
        }
        return checkDialogue(dialogue);
    } catch (IOException e) {
        System.err.println("Error reading sentence line" + e.getMessage());
        return false;
    }
}