Example usage for edu.stanford.nlp.trees Tree toString

List of usage examples for edu.stanford.nlp.trees Tree toString

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree toString.

Prototype

@Override
public String toString() 

Source Link

Document

Converts parse tree to string in Penn Treebank format.

Usage

From source file:qmul.corpus.SwitchboardCorpus.java

License:Open Source License

/**
 * @param dialogueName//w ww  .  j ava  2  s  . co  m
 * @param genre
 * @param reader
 * @return whether to carry on or not
 */
private boolean getSentences(String dialogueName, String genre, TreeReader reader) {
    Pattern p = Pattern.compile("\\(CODE\\s+(?:\\([A-Z]+\\s+)?Speaker([A-Za-z]+)(\\d+)");
    try {
        Dialogue dialogue = null;
        DialogueSpeaker speaker = null;
        DialogueSpeaker lastSpeaker = null;
        DialogueTurn currentTurn = null;
        int currentSubdialogue = -1;
        int turnNum = -1;
        Tree tree = reader.readTree();
        Filter<Tree> nodeFilter = new NodeFilter();
        while (tree != null) {
            Matcher m = p.matcher(tree.toString());
            if (m.find()) {
                // get the metadata
                turnNum = Integer.parseInt(m.group(2));
                int subDialogue = 0; // apparently no subdialogues in SWBD ...
                String spk = m.group(1).toUpperCase();
                // start new dialogue if subdialogue changed
                if (subDialogue != currentSubdialogue) {
                    if (dialogue != null) {
                        if (!checkDialogue(dialogue)) {
                            return false;
                        }
                    }
                    // dialogue = addDialogue(dialogueName + ":" + subDialogue, genre);
                    dialogue = addDialogue(dialogueName, genre);
                    // TODO genre in SWBD?
                    getGenreMap().put(dialogueName, genre);
                }
                currentSubdialogue = subDialogue;
                // set up speaker
                String spkId = dialogue.getId() + ":" + spk;
                if (!getSpeakerMap().containsKey(spkId)) {
                    // TODO speaker info in SWBD?
                    getSpeakerMap().put(spkId, new DialogueSpeaker(spkId, "", "", "", "", ""));
                    // System.out.println("added new speaker " + spkId);
                }
                speaker = getSpeakerMap().get(spkId);
            } else {
                // get the tree and extract the transcription
                String trans = "";
                // SWBD embeds trees within an extra unlabelled level ((S etc))
                if (((tree.label() == null) || (tree.label().value() == null))
                        && (tree.children().length == 1)) {
                    tree = tree.getChild(0);
                }
                if (tree != null) {
                    tree = tree.prune(nodeFilter);
                    if (tree != null) {
                        for (Tree leaf : tree.getLeaves()) {
                            trans += leaf.label() + " ";
                        }
                        trans = trans.substring(0, trans.length() - 1);
                        // start new turn if speaker has changed
                        if ((lastSpeaker == null) || !speaker.equals(lastSpeaker) || (currentTurn == null)) {
                            currentTurn = dialogue.addTurn(turnNum, speaker);
                            // System.out.println("new turn " + turnNum + ", " + speaker + " " + currentTurn);
                            lastSpeaker = speaker;
                        }
                        // add sentence
                        dialogue.addSent(-1, currentTurn, trans, tree);
                        // DialogueSentence s = dialogue.addSent(-1, currentTurn, trans, tree);
                        // System.out.println("new sent " + s);
                        // System.out.println(s.getSyntax().pennString());
                    }
                }
            }
            tree = reader.readTree();
        }
        return checkDialogue(dialogue);
    } catch (IOException e) {
        System.err.println("Error reading sentence line" + e.getMessage());
        return false;
    }
}

From source file:reck.parser.lexparser.RECKLexicalizedParser.java

License:Open Source License

public RECKCTTreeNodeImpl convertToRECKTree(Tree root, int startSentence, String content) {

    RECKCTTreeNodeImpl newRoot = null;/* www . j  a  v  a2s.c  om*/

    Charseq pos = null;

    List nodeList = root.getLeaves();
    HashSet parentSet = new HashSet();
    int docIndex = startSentence;
    String st = null;

    // compute leaves' positions
    for (int i = 0; i < nodeList.size(); i++) {
        Tree oldNode = (Tree) nodeList.get(i);
        st = oldNode.toString();

        int start = content.indexOf(st, docIndex);
        if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) {
            if (st.indexOf("&") != -1) {
                String tmp = st.replaceAll("&", "&amp;");
                start = content.indexOf(tmp, docIndex);
                if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) {
                    tmp = st.replaceAll("&", "&AMP;");
                    start = content.indexOf(tmp, docIndex);
                }
            }
            if (start != -1 && start - docIndex <= maxDistanceBetweenLeaves) {
                docIndex = start + st.length() + 4;
            } else {
                st = reConvert(st);
                start = content.indexOf(st, docIndex);
                if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) {
                    if (st.equals("-LRB-") || st.equals("-LCB-")) {
                        int i1 = content.indexOf("(", docIndex);
                        int i2 = content.indexOf("[", docIndex);
                        int i3 = content.indexOf("{", docIndex);
                        if (i1 == -1)
                            i1 = content.length();
                        if (i2 == -1)
                            i2 = content.length();
                        if (i3 == -1)
                            i3 = content.length();

                        if ((i1 == i2) && (i1 == i3))
                            System.out.println("Come here !");
                        else if (i1 < i2) {
                            if (i3 < i1) {
                                // st = "{";
                                start = i3;
                            } else {
                                // st = "(";
                                start = i1;
                            }
                        } else {
                            if (i3 < i2) {
                                // st = "{";
                                start = i3;
                            } else {
                                // st = "[";
                                start = i2;
                            }
                        }
                        docIndex = start + 1;
                    }

                    else if (st.equals("-RRB-") || st.equals("-RCB-")) {
                        int i1 = content.indexOf(")", docIndex);
                        int i2 = content.indexOf("]", docIndex);
                        int i3 = content.indexOf("}", docIndex);
                        if (i1 == -1)
                            i1 = content.length();
                        if (i2 == -1)
                            i2 = content.length();
                        if (i3 == -1)
                            i3 = content.length();

                        if ((i1 == i2) && (i1 == i3))
                            System.out.println("Come here !");
                        else if (i1 < i2) {
                            if (i3 < i1) {
                                // st = "}";
                                start = i3;
                            } else {
                                // st = ")";
                                start = i1;
                            }
                        } else {
                            if (i3 < i2) {
                                // st = "}";
                                start = i3;
                            } else {
                                // st = "]";
                                start = i2;
                            }
                        }
                        docIndex = start + 1;
                    }

                    else {

                        for (int k = 0; k < newStrings.length; k++) {
                            st = st.replace(newStrings[k], oldStrings[k]);
                        }

                        String oldSubSt1 = new String(new char[] { (char) 39, (char) 39 });
                        String oldSubSt2 = new String(new char[] { (char) 96, (char) 96 });
                        String newSubSt = new String(new char[] { (char) 34 });
                        if (st.indexOf(oldSubSt1) != -1 && content.substring(docIndex).indexOf(newSubSt) != -1)
                            st = st.replace(oldSubSt1, newSubSt);
                        else if (st.indexOf(oldSubSt2) != -1
                                && content.substring(docIndex).indexOf(newSubSt) != -1)
                            st = st.replace(oldSubSt2, newSubSt);

                        int i39 = content.indexOf(39, docIndex);
                        int i96 = content.indexOf(96, docIndex);

                        if ((st.indexOf(39) != -1) && (i96 != -1 && i96 - docIndex <= maxDistanceBetweenLeaves))
                            st = st.replace((char) 39, (char) 96);
                        else if ((st.indexOf(96) != -1)
                                && (i39 != -1 && i39 - docIndex <= maxDistanceBetweenLeaves))
                            st = st.replace((char) 96, (char) 39);

                        start = content.indexOf(st, docIndex);
                        if (start == -1 || start - docIndex > maxDistanceBetweenLeaves)
                            System.out.println("Come here !");
                        else
                            docIndex = start + st.length();
                    }
                } else
                    docIndex = start + st.length();
            }
        } else
            docIndex = start + st.length();

        // Test if next node is a sentence splitter, means "."
        if (st.endsWith(".") && i < nodeList.size() - 1) {
            Tree nextNode = (Tree) nodeList.get(i + 1);
            String nextLabel = nextNode.label().value();
            int nextStart = content.indexOf(nextLabel, docIndex);

            if (nextLabel.equals(".") && (nextStart == -1 || nextStart - docIndex > maxDistanceBetweenLeaves)) {
                docIndex--;
                oldNode.setLabel(new StringLabel(st.substring(0, st.length() - 1)));
            }
        }

        pos = new Charseq(start, docIndex);
        RECKCTTreeNodeImpl newNode = new RECKCTTreeNodeImpl(new StringLabel(st),
                (List) oldNode.getChildrenAsList(), pos);
        Tree parent = oldNode.parent(root);
        parent.setChild(parent.indexOf(oldNode), newNode);
        parentSet.add(parent);
    }

    nodeList.clear();
    nodeList.addAll(parentSet);

    // compute upper nodes' positions
    while (!nodeList.isEmpty()) {
        parentSet = new HashSet();
        for (int i = 0; i < nodeList.size(); i++) {
            Tree oldNode = (Tree) nodeList.get(i);
            Iterator nodeIter = oldNode.getChildrenAsList().iterator();
            Tree node = (Tree) nodeIter.next();
            while (node instanceof RECKCTTreeNodeImpl && nodeIter.hasNext()) {
                node = (Tree) nodeIter.next();
            }
            if (node instanceof RECKCTTreeNodeImpl) {
                Long start = ((RECKCTTreeNodeImpl) oldNode.firstChild()).getPosition().getStart();
                Long end = ((RECKCTTreeNodeImpl) oldNode.lastChild()).getPosition().getEnd();
                pos = new Charseq(start, end);
                RECKCTTreeNodeImpl newNode = new RECKCTTreeNodeImpl(oldNode.label(),
                        (List) oldNode.getChildrenAsList(), pos);
                Tree parent = oldNode.parent(root);
                parent.setChild(parent.indexOf(oldNode), newNode);
                parentSet.add(parent);

                // if oldNode is in parentSet, remove it
                if (parentSet.contains(oldNode)) {
                    parentSet.remove(oldNode);
                }
            } else {
                parentSet.add(oldNode);
            }
        }

        nodeList.clear();
        if (parentSet.size() == 1 && parentSet.contains(root)) {
            Long start = ((RECKCTTreeNodeImpl) root.firstChild()).getPosition().getStart();
            Long end = ((RECKCTTreeNodeImpl) root.lastChild()).getPosition().getEnd();
            pos = new Charseq(start, end);
            newRoot = new RECKCTTreeNodeImpl(root.label(), (List) root.getChildrenAsList(), pos);
        } else {
            nodeList.addAll(parentSet);
        }
    }

    return newRoot;

}

From source file:sg.edu.nus.comp.pdtb.runners.SpanTreeExtractor.java

License:Open Source License

private static void sharedTaskSpanGen(File treeFile) throws IOException {
    TreeFactory tf = new LabeledScoredTreeFactory();
    Reader r = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), Util.ENCODING));
    TreeReader tr = new PennTreeReader(r, tf);
    Tree root = tr.readTree();//from   w  ww  .  j  a  v a2s  .  com
    String article = treeFile.getName().substring(0, 8);
    String outFileName = treeFile.toString();
    outFileName = outFileName.substring(0, outFileName.lastIndexOf('.'));
    BufferedReader reader = Util.reader(outFileName + ".tkn");
    PrintWriter printer = new PrintWriter(outFileName + ".csv");
    int treeNumber = 0;
    while (root != null) {
        String lineRead = reader.readLine();
        if (root.children().length > 0) {
            List<Tree> leaves = root.getLeaves();
            HashMap<String, String[]> tokens = sharedTaskTokens(lineRead);
            for (Tree leaf : leaves) {
                int nodeNumber = leaf.nodeNumber(root);
                String word = leaf.toString();
                String wordKey = word.replaceAll("/", "\\\\/");
                wordKey = wordKey.replaceAll("\\*", "\\\\*");
                String[] spanLine = tokens.get(wordKey);

                String key = article + "," + treeNumber + "," + nodeNumber;
                word = word.trim().replaceAll("\\s+", "");
                word = word.replaceAll(",", "COMMA");
                printer.println(key + "," + spanLine[1] + "," + word + "," + spanLine[2]);

            }
        }
        root = tr.readTree();
        printer.flush();
        ++treeNumber;
    }
    printer.close();
    tr.close();
}

From source file:sg.edu.nus.comp.pdtb.runners.SpanTreeExtractor.java

License:Open Source License

private static String nodeToString(Tree leaf) {
    String leafStr = leaf.toString();
    leafStr = leafStr.replaceAll("-LRB-", "(");
    leafStr = leafStr.replaceAll("-LCB-", "{");
    leafStr = leafStr.replaceAll("-LSB-", "[");
    leafStr = leafStr.replaceAll("-RRB-", ")");
    leafStr = leafStr.replaceAll("-RCB-", "}");
    leafStr = leafStr.replaceAll("-RSB-", "]");

    return leafStr;
}

From source file:sg.edu.nus.comp.pdtb.util.Corpus.java

License:Open Source License

public static String nodeToString(Tree leaf) {
     String leafStr = leaf.toString();
     leafStr = leafStr.replaceAll("-LRB-", "(");
     leafStr = leafStr.replaceAll("-LCB-", "{");
     leafStr = leafStr.replaceAll("-LSB-", "[");
     leafStr = leafStr.replaceAll("-RRB-", ")");
     leafStr = leafStr.replaceAll("-RCB-", "}");
     leafStr = leafStr.replaceAll("-RSB-", "]");
     leafStr = leafStr.replaceAll("``", "\"");
     leafStr = leafStr.replaceAll("''", "\"");
     leafStr = leafStr.replaceAll("--", "");
     leafStr = leafStr.replaceAll("`", "'");

     return leafStr;
 }

From source file:twittersentimentanalysis.StanfordCoreNLPTool.java

public static int findSentiment(String tweet) {

    int mainSentiment = 0;
    if (tweet != null && tweet.length() > 0) {
        int longest = 0;
        int sumOfSentimentScore = 0;
        int noOfSentences = 0;
        Annotation annotation = pipeline.process(processSentence(tweet));
        for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
            Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
            int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
            String partText = sentence.toString();
            sumOfSentimentScore += sentiment;
            noOfSentences++;//from www  . j  a  v a 2s. c om
            System.out.println("-------------**********------------------");
            System.out.println(sentence.toString() + " = " + sentiment);
            System.out.println("Tree : " + tree.toString());
            System.out.println("-------------**********-----------------");
            //StanfordCoreNLP considers the sentiment score of the longest statement in the tweet
            /*if (partText.length() > longest) {
            mainSentiment = sentiment;
            longest = partText.length();
            }*/
            //Our Action : We are considering the average of all the statements to get the approximate sentiment score

        }
        if (noOfSentences != 0)
            mainSentiment = sumOfSentimentScore / noOfSentences;
        else
            mainSentiment = -1;
    }
    return mainSentiment;
}

From source file:uk.ac.gla.mir.util.TripletExtractor.java

License:Open Source License

private static boolean isToDependencies(Tree t) {
    return t.toString().equalsIgnoreCase("to");
}

From source file:uk.ac.gla.mir.util.TripletExtractor.java

License:Open Source License

private static boolean isNoToDependencies(Tree t) {
    if (t.value().equalsIgnoreCase("WP") || t.value().equalsIgnoreCase("WDT")
            || t.toString().equalsIgnoreCase("that")) {
        return true;
    }/*from  w w w . j  a  va  2 s .c  o  m*/
    return false;
}

From source file:uk.ac.gla.mir.util.TripletExtractor.java

License:Open Source License

private static boolean isAndDependencies(Tree t) {
    return t.toString().equalsIgnoreCase("and");
}

From source file:uk.ac.gla.mir.util.TripletExtractor.java

License:Open Source License

private static boolean isButDependencies(Tree t) {
    return t.toString().equalsIgnoreCase("but") || t.toString().equalsIgnoreCase("though")
            || t.toString().equalsIgnoreCase("although");
}