List of usage examples for edu.stanford.nlp.trees Tree toString
@Override
public String toString()
From source file:qmul.corpus.SwitchboardCorpus.java
License:Open Source License
/** * @param dialogueName//w ww . j ava 2 s . co m * @param genre * @param reader * @return whether to carry on or not */ private boolean getSentences(String dialogueName, String genre, TreeReader reader) { Pattern p = Pattern.compile("\\(CODE\\s+(?:\\([A-Z]+\\s+)?Speaker([A-Za-z]+)(\\d+)"); try { Dialogue dialogue = null; DialogueSpeaker speaker = null; DialogueSpeaker lastSpeaker = null; DialogueTurn currentTurn = null; int currentSubdialogue = -1; int turnNum = -1; Tree tree = reader.readTree(); Filter<Tree> nodeFilter = new NodeFilter(); while (tree != null) { Matcher m = p.matcher(tree.toString()); if (m.find()) { // get the metadata turnNum = Integer.parseInt(m.group(2)); int subDialogue = 0; // apparently no subdialogues in SWBD ... String spk = m.group(1).toUpperCase(); // start new dialogue if subdialogue changed if (subDialogue != currentSubdialogue) { if (dialogue != null) { if (!checkDialogue(dialogue)) { return false; } } // dialogue = addDialogue(dialogueName + ":" + subDialogue, genre); dialogue = addDialogue(dialogueName, genre); // TODO genre in SWBD? getGenreMap().put(dialogueName, genre); } currentSubdialogue = subDialogue; // set up speaker String spkId = dialogue.getId() + ":" + spk; if (!getSpeakerMap().containsKey(spkId)) { // TODO speaker info in SWBD? getSpeakerMap().put(spkId, new DialogueSpeaker(spkId, "", "", "", "", "")); // System.out.println("added new speaker " + spkId); } speaker = getSpeakerMap().get(spkId); } else { // get the tree and extract the transcription String trans = ""; // SWBD embeds trees within an extra unlabelled level ((S etc)) if (((tree.label() == null) || (tree.label().value() == null)) && (tree.children().length == 1)) { tree = tree.getChild(0); } if (tree != null) { tree = tree.prune(nodeFilter); if (tree != null) { for (Tree leaf : tree.getLeaves()) { trans += leaf.label() + " "; } trans = trans.substring(0, trans.length() - 1); // start new turn if speaker has changed if ((lastSpeaker == null) || !speaker.equals(lastSpeaker) || (currentTurn == null)) { currentTurn = dialogue.addTurn(turnNum, speaker); // System.out.println("new turn " + turnNum + ", " + speaker + " " + currentTurn); lastSpeaker = speaker; } // add sentence dialogue.addSent(-1, currentTurn, trans, tree); // DialogueSentence s = dialogue.addSent(-1, currentTurn, trans, tree); // System.out.println("new sent " + s); // System.out.println(s.getSyntax().pennString()); } } } tree = reader.readTree(); } return checkDialogue(dialogue); } catch (IOException e) { System.err.println("Error reading sentence line" + e.getMessage()); return false; } }
From source file:reck.parser.lexparser.RECKLexicalizedParser.java
License:Open Source License
public RECKCTTreeNodeImpl convertToRECKTree(Tree root, int startSentence, String content) { RECKCTTreeNodeImpl newRoot = null;/* www . j a v a2s.c om*/ Charseq pos = null; List nodeList = root.getLeaves(); HashSet parentSet = new HashSet(); int docIndex = startSentence; String st = null; // compute leaves' positions for (int i = 0; i < nodeList.size(); i++) { Tree oldNode = (Tree) nodeList.get(i); st = oldNode.toString(); int start = content.indexOf(st, docIndex); if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) { if (st.indexOf("&") != -1) { String tmp = st.replaceAll("&", "&"); start = content.indexOf(tmp, docIndex); if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) { tmp = st.replaceAll("&", "&"); start = content.indexOf(tmp, docIndex); } } if (start != -1 && start - docIndex <= maxDistanceBetweenLeaves) { docIndex = start + st.length() + 4; } else { st = reConvert(st); start = content.indexOf(st, docIndex); if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) { if (st.equals("-LRB-") || st.equals("-LCB-")) { int i1 = content.indexOf("(", docIndex); int i2 = content.indexOf("[", docIndex); int i3 = content.indexOf("{", docIndex); if (i1 == -1) i1 = content.length(); if (i2 == -1) i2 = content.length(); if (i3 == -1) i3 = content.length(); if ((i1 == i2) && (i1 == i3)) System.out.println("Come here !"); else if (i1 < i2) { if (i3 < i1) { // st = "{"; start = i3; } else { // st = "("; start = i1; } } else { if (i3 < i2) { // st = "{"; start = i3; } else { // st = "["; start = i2; } } docIndex = start + 1; } else if (st.equals("-RRB-") || st.equals("-RCB-")) { int i1 = content.indexOf(")", docIndex); int i2 = content.indexOf("]", docIndex); int i3 = content.indexOf("}", docIndex); if (i1 == -1) i1 = content.length(); if (i2 == -1) i2 = content.length(); if (i3 == -1) i3 = content.length(); if ((i1 == i2) && (i1 == i3)) System.out.println("Come here !"); else if (i1 < i2) { if (i3 < i1) { // st = "}"; start = i3; } else { // st = ")"; start = i1; } } else { if (i3 < i2) { // st = "}"; start = i3; } else { // st = "]"; start = i2; } } docIndex = start + 1; } else { for (int k = 0; k < newStrings.length; k++) { st = st.replace(newStrings[k], oldStrings[k]); } String oldSubSt1 = new String(new char[] { (char) 39, (char) 39 }); String oldSubSt2 = new String(new char[] { (char) 96, (char) 96 }); String newSubSt = new String(new char[] { (char) 34 }); if (st.indexOf(oldSubSt1) != -1 && content.substring(docIndex).indexOf(newSubSt) != -1) st = st.replace(oldSubSt1, newSubSt); else if (st.indexOf(oldSubSt2) != -1 && content.substring(docIndex).indexOf(newSubSt) != -1) st = st.replace(oldSubSt2, newSubSt); int i39 = content.indexOf(39, docIndex); int i96 = content.indexOf(96, docIndex); if ((st.indexOf(39) != -1) && (i96 != -1 && i96 - docIndex <= maxDistanceBetweenLeaves)) st = st.replace((char) 39, (char) 96); else if ((st.indexOf(96) != -1) && (i39 != -1 && i39 - docIndex <= maxDistanceBetweenLeaves)) st = st.replace((char) 96, (char) 39); start = content.indexOf(st, docIndex); if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) System.out.println("Come here !"); else docIndex = start + st.length(); } } else docIndex = start + st.length(); } } else docIndex = start + st.length(); // Test if next node is a sentence splitter, means "." if (st.endsWith(".") && i < nodeList.size() - 1) { Tree nextNode = (Tree) nodeList.get(i + 1); String nextLabel = nextNode.label().value(); int nextStart = content.indexOf(nextLabel, docIndex); if (nextLabel.equals(".") && (nextStart == -1 || nextStart - docIndex > maxDistanceBetweenLeaves)) { docIndex--; oldNode.setLabel(new StringLabel(st.substring(0, st.length() - 1))); } } pos = new Charseq(start, docIndex); RECKCTTreeNodeImpl newNode = new RECKCTTreeNodeImpl(new StringLabel(st), (List) oldNode.getChildrenAsList(), pos); Tree parent = oldNode.parent(root); parent.setChild(parent.indexOf(oldNode), newNode); parentSet.add(parent); } nodeList.clear(); nodeList.addAll(parentSet); // compute upper nodes' positions while (!nodeList.isEmpty()) { parentSet = new HashSet(); for (int i = 0; i < nodeList.size(); i++) { Tree oldNode = (Tree) nodeList.get(i); Iterator nodeIter = oldNode.getChildrenAsList().iterator(); Tree node = (Tree) nodeIter.next(); while (node instanceof RECKCTTreeNodeImpl && nodeIter.hasNext()) { node = (Tree) nodeIter.next(); } if (node instanceof RECKCTTreeNodeImpl) { Long start = ((RECKCTTreeNodeImpl) oldNode.firstChild()).getPosition().getStart(); Long end = ((RECKCTTreeNodeImpl) oldNode.lastChild()).getPosition().getEnd(); pos = new Charseq(start, end); RECKCTTreeNodeImpl newNode = new RECKCTTreeNodeImpl(oldNode.label(), (List) oldNode.getChildrenAsList(), pos); Tree parent = oldNode.parent(root); parent.setChild(parent.indexOf(oldNode), newNode); parentSet.add(parent); // if oldNode is in parentSet, remove it if (parentSet.contains(oldNode)) { parentSet.remove(oldNode); } } else { parentSet.add(oldNode); } } nodeList.clear(); if (parentSet.size() == 1 && parentSet.contains(root)) { Long start = ((RECKCTTreeNodeImpl) root.firstChild()).getPosition().getStart(); Long end = ((RECKCTTreeNodeImpl) root.lastChild()).getPosition().getEnd(); pos = new Charseq(start, end); newRoot = new RECKCTTreeNodeImpl(root.label(), (List) root.getChildrenAsList(), pos); } else { nodeList.addAll(parentSet); } } return newRoot; }
From source file:sg.edu.nus.comp.pdtb.runners.SpanTreeExtractor.java
License:Open Source License
private static void sharedTaskSpanGen(File treeFile) throws IOException { TreeFactory tf = new LabeledScoredTreeFactory(); Reader r = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), Util.ENCODING)); TreeReader tr = new PennTreeReader(r, tf); Tree root = tr.readTree();//from w ww . j a v a2s . com String article = treeFile.getName().substring(0, 8); String outFileName = treeFile.toString(); outFileName = outFileName.substring(0, outFileName.lastIndexOf('.')); BufferedReader reader = Util.reader(outFileName + ".tkn"); PrintWriter printer = new PrintWriter(outFileName + ".csv"); int treeNumber = 0; while (root != null) { String lineRead = reader.readLine(); if (root.children().length > 0) { List<Tree> leaves = root.getLeaves(); HashMap<String, String[]> tokens = sharedTaskTokens(lineRead); for (Tree leaf : leaves) { int nodeNumber = leaf.nodeNumber(root); String word = leaf.toString(); String wordKey = word.replaceAll("/", "\\\\/"); wordKey = wordKey.replaceAll("\\*", "\\\\*"); String[] spanLine = tokens.get(wordKey); String key = article + "," + treeNumber + "," + nodeNumber; word = word.trim().replaceAll("\\s+", ""); word = word.replaceAll(",", "COMMA"); printer.println(key + "," + spanLine[1] + "," + word + "," + spanLine[2]); } } root = tr.readTree(); printer.flush(); ++treeNumber; } printer.close(); tr.close(); }
From source file:sg.edu.nus.comp.pdtb.runners.SpanTreeExtractor.java
License:Open Source License
private static String nodeToString(Tree leaf) { String leafStr = leaf.toString(); leafStr = leafStr.replaceAll("-LRB-", "("); leafStr = leafStr.replaceAll("-LCB-", "{"); leafStr = leafStr.replaceAll("-LSB-", "["); leafStr = leafStr.replaceAll("-RRB-", ")"); leafStr = leafStr.replaceAll("-RCB-", "}"); leafStr = leafStr.replaceAll("-RSB-", "]"); return leafStr; }
From source file:sg.edu.nus.comp.pdtb.util.Corpus.java
License:Open Source License
public static String nodeToString(Tree leaf) { String leafStr = leaf.toString(); leafStr = leafStr.replaceAll("-LRB-", "("); leafStr = leafStr.replaceAll("-LCB-", "{"); leafStr = leafStr.replaceAll("-LSB-", "["); leafStr = leafStr.replaceAll("-RRB-", ")"); leafStr = leafStr.replaceAll("-RCB-", "}"); leafStr = leafStr.replaceAll("-RSB-", "]"); leafStr = leafStr.replaceAll("``", "\""); leafStr = leafStr.replaceAll("''", "\""); leafStr = leafStr.replaceAll("--", ""); leafStr = leafStr.replaceAll("`", "'"); return leafStr; }
From source file:twittersentimentanalysis.StanfordCoreNLPTool.java
public static int findSentiment(String tweet) { int mainSentiment = 0; if (tweet != null && tweet.length() > 0) { int longest = 0; int sumOfSentimentScore = 0; int noOfSentences = 0; Annotation annotation = pipeline.process(processSentence(tweet)); for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); int sentiment = RNNCoreAnnotations.getPredictedClass(tree); String partText = sentence.toString(); sumOfSentimentScore += sentiment; noOfSentences++;//from www . j a v a 2s. c om System.out.println("-------------**********------------------"); System.out.println(sentence.toString() + " = " + sentiment); System.out.println("Tree : " + tree.toString()); System.out.println("-------------**********-----------------"); //StanfordCoreNLP considers the sentiment score of the longest statement in the tweet /*if (partText.length() > longest) { mainSentiment = sentiment; longest = partText.length(); }*/ //Our Action : We are considering the average of all the statements to get the approximate sentiment score } if (noOfSentences != 0) mainSentiment = sumOfSentimentScore / noOfSentences; else mainSentiment = -1; } return mainSentiment; }
From source file:uk.ac.gla.mir.util.TripletExtractor.java
License:Open Source License
private static boolean isToDependencies(Tree t) { return t.toString().equalsIgnoreCase("to"); }
From source file:uk.ac.gla.mir.util.TripletExtractor.java
License:Open Source License
private static boolean isNoToDependencies(Tree t) { if (t.value().equalsIgnoreCase("WP") || t.value().equalsIgnoreCase("WDT") || t.toString().equalsIgnoreCase("that")) { return true; }/*from w w w . j a va 2 s .c o m*/ return false; }
From source file:uk.ac.gla.mir.util.TripletExtractor.java
License:Open Source License
private static boolean isAndDependencies(Tree t) { return t.toString().equalsIgnoreCase("and"); }
From source file:uk.ac.gla.mir.util.TripletExtractor.java
License:Open Source License
private static boolean isButDependencies(Tree t) { return t.toString().equalsIgnoreCase("but") || t.toString().equalsIgnoreCase("though") || t.toString().equalsIgnoreCase("although"); }