List of usage examples for edu.stanford.nlp.trees Tree prune
public Tree prune(final Predicate<Tree> filter)
From source file:qmul.corpus.DCPSECorpus.java
License:Open Source License
/** * @param dialogueName/* w w w. j ava 2 s . c om*/ * @param genre * @param reader * @return whether to carry on or not */ private boolean getSentences(String dialogueName, String genre, BufferedReader reader) { Pattern p = Pattern.compile("<#(\\d+):(\\d+):(\\w+)>\\s+<sent>"); try { Dialogue dialogue = null; DialogueSpeaker lastSpeaker = null; DialogueTurn currentTurn = null; int currentSubdialogue = -1; Filter<Tree> nodeFilter = new NodeFilter(); String line = reader.readLine(); while (line != null) { Matcher m = p.matcher(line); if (m.find()) { // get the metadata int sentNum = Integer.parseInt(m.group(1)); int subDialogue = Integer.parseInt(m.group(2)); String spk = m.group(3).toUpperCase(); // start new dialogue if subdialogue changed if (subDialogue != currentSubdialogue) { if (dialogue != null) { if (!checkDialogue(dialogue)) { return false; } } dialogue = addDialogue(dialogueName + ":" + subDialogue, genre); } currentSubdialogue = subDialogue; // set up speaker String spkId = dialogue.getId() + ":" + spk; DialogueSpeaker speaker = getSpeakerMap().get(spkId); // System.out.println("Getting tree for sent " + sentNum + " spk [" + spkId + "]=[" + speaker + "] " // + line); // get the tree and extract the transcription Tree tree = CreateTreeFromDCPSE.makeTree(reader); String trans = ""; if (tree != null) { tree = tree.prune(nodeFilter); if (tree != null) { for (Tree leaf : tree.getLeaves()) { String label = leaf.label().toString(); label = label.replaceAll("^\\s*\\{(.*)\\}\\s*$", "$1"); label = label.replaceAll("^\\s*<([,.:;?!]+)>\\s*$", "$1"); trans += label + " "; } trans = trans.substring(0, trans.length() - 1); // start new turn if speaker has changed if ((lastSpeaker == null) || !speaker.equals(lastSpeaker) || (currentTurn == null)) { currentTurn = dialogue.addTurn(-1, speaker); // System.out.println(currentTurn); } // add sentence dialogue.addSent(sentNum, currentTurn, trans, tree); // System.out.println(sent); lastSpeaker = speaker; } } } line = reader.readLine(); } return checkDialogue(dialogue); } catch (IOException e) { System.err.println("Error reading sentence line" + e.getMessage()); return false; } }
From source file:qmul.corpus.SwitchboardCorpus.java
License:Open Source License
/** * @param dialogueName// ww w . j av a 2 s. c om * @param genre * @param reader * @return whether to carry on or not */ private boolean getSentences(String dialogueName, String genre, TreeReader reader) { Pattern p = Pattern.compile("\\(CODE\\s+(?:\\([A-Z]+\\s+)?Speaker([A-Za-z]+)(\\d+)"); try { Dialogue dialogue = null; DialogueSpeaker speaker = null; DialogueSpeaker lastSpeaker = null; DialogueTurn currentTurn = null; int currentSubdialogue = -1; int turnNum = -1; Tree tree = reader.readTree(); Filter<Tree> nodeFilter = new NodeFilter(); while (tree != null) { Matcher m = p.matcher(tree.toString()); if (m.find()) { // get the metadata turnNum = Integer.parseInt(m.group(2)); int subDialogue = 0; // apparently no subdialogues in SWBD ... String spk = m.group(1).toUpperCase(); // start new dialogue if subdialogue changed if (subDialogue != currentSubdialogue) { if (dialogue != null) { if (!checkDialogue(dialogue)) { return false; } } // dialogue = addDialogue(dialogueName + ":" + subDialogue, genre); dialogue = addDialogue(dialogueName, genre); // TODO genre in SWBD? getGenreMap().put(dialogueName, genre); } currentSubdialogue = subDialogue; // set up speaker String spkId = dialogue.getId() + ":" + spk; if (!getSpeakerMap().containsKey(spkId)) { // TODO speaker info in SWBD? getSpeakerMap().put(spkId, new DialogueSpeaker(spkId, "", "", "", "", "")); // System.out.println("added new speaker " + spkId); } speaker = getSpeakerMap().get(spkId); } else { // get the tree and extract the transcription String trans = ""; // SWBD embeds trees within an extra unlabelled level ((S etc)) if (((tree.label() == null) || (tree.label().value() == null)) && (tree.children().length == 1)) { tree = tree.getChild(0); } if (tree != null) { tree = tree.prune(nodeFilter); if (tree != null) { for (Tree leaf : tree.getLeaves()) { trans += leaf.label() + " "; } trans = trans.substring(0, trans.length() - 1); // start new turn if speaker has changed if ((lastSpeaker == null) || !speaker.equals(lastSpeaker) || (currentTurn == null)) { currentTurn = dialogue.addTurn(turnNum, speaker); // System.out.println("new turn " + turnNum + ", " + speaker + " " + currentTurn); lastSpeaker = speaker; } // add sentence dialogue.addSent(-1, currentTurn, trans, tree); // DialogueSentence s = dialogue.addSent(-1, currentTurn, trans, tree); // System.out.println("new sent " + s); // System.out.println(s.getSyntax().pennString()); } } } tree = reader.readTree(); } return checkDialogue(dialogue); } catch (IOException e) { System.err.println("Error reading sentence line" + e.getMessage()); return false; } }