Example usage for edu.stanford.nlp.trees Tree valueOf

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree valueOf.

Prototype

public static Tree valueOf(String str)

Source Link

Document

This gives you a tree from a String representation (as a bracketed Tree, of the kind produced by toString() , pennPrint() , or as in the Penn Treebank).

Usage

From source file:org.ets.research.nlp.stanford_thrift.tregex.StanfordTregexThrift.java

License:Open Source License

public List<String> evaluateTregexPattern(String parseTree, String tregexPattern) {
    List<String> foundMatches = new ArrayList<String>();

    TregexPattern pattern = TregexPattern.compile(tregexPattern);
    TregexMatcher matches = pattern.matcher(Tree.valueOf(parseTree));
    Set<String> nodes = matches.getNodeNames();
    while (matches.find()) {
        foundMatches.add(matches.getMatch().pennString());
        for (String node : nodes) {
            foundMatches.add(matches.getNode(node).pennString());
        }//  w  ww . ja  v  a 2s. co  m
    }

    return foundMatches;
}

From source file:pltag.parser.Example.java

License:Open Source License

/**
 * Read input from Dundee corpus. The format is:
 * RC_label|w_1 id_1 w_2 id_2 .../* w ww .ja v  a2  s  . co m*/
 * 
 * @param line 
 */
private void readDundeeInput(String line) {
    int index = line.indexOf("|");
    if (index > -1)
        sentenceRc = line.substring(0, index);
    line = line.substring(index + 1);
    StringBuilder sent = new StringBuilder();
    boolean usePosTagger = opts.goldPosTags;
    StringBuilder posTagDummy = new StringBuilder();
    if (line.charAt(0) == '(') // some examples may contain already parsed input in tree format
    {
        List<Word> words = Tree.valueOf(treeProcessDundeeIds(line)).yieldWords();
        for (Word word : words) {
            sent.append(word).append(" ");
            if (!usePosTagger)
                posTagDummy.append("N/A ").append(word).append("\t");
        }
    } else {
        // remove quotes
        line = replaceParenthesesDundee(removeQuotesDundee(line).trim()).trim();
        String[] tokens = line.split(" ");
        wordIds = new String[tokens.length / 2];
        for (int i = 0; i < tokens.length - 1; i += 2) {
            String word = !usePosTagger ? wordRemoveDigits(tokens[i]) : tokens[i];
            sent.append(word).append(" ");
            if (!usePosTagger)
                posTagDummy.append("N/A ").append(word).append("\t");
            wordIds[i / 2] = tokens[i + 1];
        }
        sentence = sent.toString().trim();
        if (usePosTagger) {
            Pair<String, String>[] posWords = PosTagger.posTagLineToArray(sentence);
            // One or more words has been expanded due to PTB-compliant splitting. 
            // For each new constituent assign the word-id of the original word.
            if (posWords.length != wordIds.length) {
                adjustWordIdsDundee(posWords, sentence.split(" "));
            }
            sentence = sentRemoveDigits(sentence);
            readPosTagged(wordRemoveDigits(PosTagger.tokensToLinePosTagged(posWords)));
        } else
            posTagged = posTagDummy.toString().trim();
    }
}

From source file:pltag.parser.json.JsonResult.java

License:Open Source License

private String toPtbFormat(String input) {
    Tree tree = Tree.valueOf(input);
    return tree.pennString();
}

From source file:pltag.parser.performance.BracketPerformance.java

License:Open Source License

@Override
public double[] add(String predAnalysis, String goldStandard, String name) {
    Tree evalGuess = treeCollinizer.transformTree(Tree.valueOf(predAnalysis));
    Tree evalGold = treeCollinizer.transformTree(Tree.valueOf(goldStandard));
    //        if (evalGold == null || evalGuess == null)
    //        {/*  w  ww  . j a  va2  s.  c  o m*/
    //            LogInfo.error(name + ": Cannot compare against a null gold or guess tree!\n");
    //            return 0.0d;
    //
    //        }
    //        if (evalGuess.yield().size() != evalGold.yield().size())
    //        {
    //            try
    //            {
    //                evalGuess = Utils.removeEmptyNodes(evalGuess);               
    //            }
    //            catch(Exception e)
    //            {
    //                LogInfo.error("Example " + name);
    //                e.printStackTrace();
    //            }
    //        }
    evalb.evaluate(evalGuess, evalGold, name);
    totalEvalbF1 = evalb.getEvalbF1();
    return new double[] { evalb.getLastF1() };
}

From source file:pltag.parser.performance.BracketPerformanceOracle.java

License:Open Source License

@Override
public double[] add(String[] nBestAnalyses, String[] goldStandard, String name) {
    Tree evalGold = treeCollinizer.transformTree(Tree.valueOf(goldStandard[0]));
    int maxPos = 0, curPos = 0;
    double curF1, maxF1 = Double.NEGATIVE_INFINITY;
    String nBestAnalysis = null;// w w w.  j  a v  a2 s.  co  m
    for (String analysis : nBestAnalyses) {
        Tree curEvalGuess = treeCollinizer.transformTree(Tree.valueOf(analysis));
        curEvalb.evaluate(curEvalGuess, evalGold, name);
        curF1 = curEvalb.getLastF1();
        if (curF1 > maxF1) {
            maxF1 = curF1;
            maxPos = curPos;
            nBestAnalysis = analysis;
        }
        curPos++;
    }
    if (nBestAnalysis != null) {
        Tree evalGuess = treeCollinizer.transformTree(Tree.valueOf(nBestAnalysis));
        evalb.evaluate(evalGuess, evalGold, name);
        totalEvalbF1 = evalb.getEvalbF1();
        accuracy += maxPos == 0 ? 1 : 0;
    }

    return new double[] { evalb.getLastF1(), maxPos };
}

From source file:pltag.parser.performance.IncrementalBracketPerformance.java

License:Open Source License

public double[] add(IncrementalBracketPerformance performanceIn, String predSent, String goldSent,
        String name) {/*w  ww .  ja v  a2  s .  co  m*/
    updateMap(evalbF1Map, performanceIn.evalbF1Map);
    updateMap(evalbNumExamplesMap, performanceIn.evalbNumExamplesMap);
    Tree evalGold = treeCollinizer.transformTree(Tree.valueOf(goldSent));
    Tree evalGuess = treeCollinizer.transformTree(Tree.valueOf(predSent));
    fullSentEvalb.evaluate(evalGuess, evalGold, name);
    processedExamplesCount++;
    totalEvalbF1 = fullSentEvalb.getEvalbF1();
    return new double[] { fullSentEvalb.getLastF1() };
}

From source file:pltag.parser.performance.IncrementalBracketPerformance.java

License:Open Source License

@Override
public double[] add(IncrementalBracketWidget predWidget, IncrementalBracketWidget goldWidget, String name) {
    int timestamp = predWidget.getTimestamp();
    String goldPartialTree = goldWidget.getTreeAt(timestamp);
    String predPartialTree = predWidget.getTreeAt(timestamp);
    Tree evalGold = treeCollinizer.transformTree(Tree.valueOf(goldPartialTree));
    Tree evalGuess = treeCollinizer.transformTree(Tree.valueOf(predPartialTree));
    partialEvalb.evaluate(evalGuess, evalGold, name);
    add(evalbF1Map, timestamp, partialEvalb.getLastF1());
    add(evalbNumExamplesMap, timestamp, 1.0d);
    //        if(predWidget.isFullSentence())
    //        {//from ww  w  .j  a  va 2s . co m
    //            fullSentEvalb.evaluate(evalGuess, evalGold, name);
    //            totalEvalbF1 = fullSentEvalb.getEvalbF1();
    //            processedExamplesCount++;
    //        }
    return new double[] { partialEvalb.getLastF1() };
}

From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java

License:Open Source License

private void extractStringTreeAnalysisFeatures(String inputTree, String stringAnalysisTree, boolean endOfSent,
        IncrementalAnalysis analysis, DiscriminativeFeatureIndexers featureIndexers, boolean train) {
    //        System.out.println(inputTree);        
    Tree tree = Tree.valueOf(inputTree);
    List<Tree> leaves = tree.getLeaves();
    Tree currentWord = leaves.get(leaves.size() - 1);
    int currentWordIndex = featureIndexers.getWordIndex(currentWord.nodeString(), train);
    // right branch (2): 1. Count number of nodes from root to rightmost non-terminal, 2. Count rest nodes
    // compute domination path from root to rightmost leaf. Subtract 2 from size to exclude root and pre-terminal
    int pathSize = tree.dominationPath(currentWord.parent(tree)).size();
    analysis.setRightBranchSpine(pathSize > 2 ? pathSize - 2 : 0);
    int rest = tree.size() - analysis.getRightBranchSpine() - leaves.size() * 2 - 1;
    analysis.setRightBranchRest(rest > 0 ? rest : 0); // Subtract the number of terminals, pre-terminals (leaves.size()*2) and root symbol

    // get list of rightmost complete non-terminals. We don't compute the whole list of complete non-terminals, but only the ones that have been just completed,
    // hence lie at the rightmost position of the tree. Since we compute the features piecemeal, by the end of the sentence we will have computed all complete
    // non-terminals, depending on the training strategy. Used for features: heavy, neighbours, and edges
    Tree analysisTree = Tree.valueOf(stringAnalysisTree);
    analysisTree.indexLeaves();/*from   ww w.  java 2s  . c o m*/
    List<Tree> complete = getListOfRightMostCompleteNonTerminals(analysisTree);
    String[] heavyStr = new String[complete.size()];
    String[] neighboursL1Str = new String[complete.size()];
    String[] neighboursL2Str = new String[complete.size()];
    int i = 0;
    for (Tree subTree : complete) {
        // heavy feature
        int categoryIndex = featureIndexers.getCategoryIndex(subTree.nodeString(), train);
        List<Label> yield = subTree.yield();
        String yieldSize = yield.size() > 5 ? "5+" : String.valueOf(yield.size());
        heavyStr[i] = String.format("%s %s %s", categoryIndex, yieldSize, endOfSent ? "y" : "n");
        // neighbours l1, l2 features            
        int leftmostLeafId = ((CoreLabel) yield.get(0)).index();
        if (leftmostLeafId > 1) {
            int l1CategoryId = featureIndexers
                    .getCategoryIndex(leaves.get(leftmostLeafId - 2).parent(tree).nodeString(), train);
            if (leftmostLeafId > 2) {
                neighboursL1Str[i] = String.format("%s %s %s", categoryIndex, yieldSize, l1CategoryId);
                int l2CategoryId = featureIndexers
                        .getCategoryIndex(leaves.get(leftmostLeafId - 3).parent(tree).nodeString(), train);
                neighboursL2Str[i] = String.format("%s %s %s %s", categoryIndex, yieldSize, l2CategoryId,
                        l1CategoryId);
            } else {
                neighboursL2Str[i] = String.format("%s %s SoS %s", categoryIndex, yieldSize, l1CategoryId);
            }
        } else // leftmost leaf is at the beginning of the sentence
        {
            neighboursL1Str[i] = String.format("%s %s SoS", categoryIndex, yieldSize);
            neighboursL2Str[i] = String.format("%s %s SoS SoS", categoryIndex, yieldSize);
        }

        // coPar and coLenPar features
        Tree[] children = subTree.children();
        if (children.length > 2) {
            // found structure: (X (A ...) (CC and/or) (B ...))
            if (children.length == 3 && children[1].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[0], children[2]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[0], children[2], endOfSent), featureIndexers,
                        train);
            }
            // found structure ((CC either) (A ...) (CC or) (B...))
            else if (children.length == 4 && children[0].nodeString().startsWith("CC")
                    && children[2].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[1], children[3]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[1], children[3], endOfSent), featureIndexers,
                        train);
            }
            // found structure ((A ...) (, ,) (CC but) (B...))
            else if (children.length == 4 && children[1].nodeString().equals(",")
                    && children[2].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[0], children[3]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[0], children[3], endOfSent), featureIndexers,
                        train);
            }
        }
        i++;
    }
    analysis.setHeavy(heavyStr, featureIndexers, train);
    analysis.setNeighboursL1(neighboursL1Str, featureIndexers, train);
    analysis.setNeighboursL2(neighboursL2Str, featureIndexers, train);

    // compute word + L=2 ancestor nodes, L=3 ancestor nodes
    Tree preTerminal = currentWord.parent(tree);
    Tree wordL2 = preTerminal.parent(tree);
    if (wordL2 != null) {
        int preTerminalIndex = featureIndexers.getCategoryIndex(preTerminal.nodeString(), train);
        int wordL2Index = featureIndexers.getCategoryIndex(wordL2.nodeString(), train);
        analysis.setWordL2(String.format("%s %s %s", currentWordIndex, preTerminalIndex, wordL2Index),
                featureIndexers, train);
        Tree wordL3 = wordL2.parent(tree);
        if (wordL3 != null) {
            analysis.setWordL3(String.format("%s %s %s %s", currentWordIndex, preTerminalIndex, wordL2Index,
                    featureIndexers.getCategoryIndex(wordL3.nodeString())), featureIndexers, train);
        }
    }

    // get integration point + elem tree (Parent-emulation feature)
    analysis.setIpElemTree(String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTree()),
            featureIndexers, train);
    analysis.setIpElemTreeUnlex(
            String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTreeUnlex()),
            featureIndexers, train);
}

From source file:pltag.parser.semantics.IncrementalSemanticsPerformance.java

License:Open Source License

public double[] add(IncrementalSemanticsPerformance performanceIn, String predSent, String goldSent,
        String name) {//ww  w .  j a v a 2  s.c  o  m
    updateMap(predResultMap, performanceIn.predResultMap);
    updateMap(predWithSenseResultMap, performanceIn.predWithSenseResultMap);
    updateMap(argWordResultMap, performanceIn.argWordResultMap);
    updateMap(argRoleResultMap, performanceIn.argRoleResultMap);
    updateMap(argPredWordResultMap, performanceIn.argPredWordResultMap);
    updateMap(incompleteTripleMap, performanceIn.incompleteTripleMap);
    updateMap(srlResultMap, performanceIn.srlResultMap);
    predResult.add(performanceIn.predResult);
    argsResult.add(performanceIn.argsResult);
    srlResult.add(performanceIn.srlResult);
    processedExamplesCount += performanceIn.processedExamplesCount;

    updateMapDouble(evalbF1Map, performanceIn.evalbF1Map);
    updateMapDouble(evalbNumExamplesMap, performanceIn.evalbNumExamplesMap);
    Tree evalGold = treeCollinizer.transformTree(Tree.valueOf(goldSent));
    Tree evalGuess = treeCollinizer.transformTree(Tree.valueOf(predSent));
    fullSentEvalb.evaluate(evalGuess, evalGold, name);
    totalEvalbF1 = fullSentEvalb.getEvalbF1();

    return new double[] { fullSentEvalb.getLastF1(), performanceIn.srlResult.f1() };
}

From source file:pltag.parser.semantics.IncrementalSemanticsPerformance.java

License:Open Source License

@Override
public double[] add(IncrementalSemanticsWidget predWidget, IncrementalSemanticsWidget goldWidget, String name) {
    int timestamp = predWidget.getTimestamp();
    EvalResult predicateResult = new EvalResult(), predWithSenseResult = new EvalResult(),
            argWordResult = new EvalResult(), argRoleResult = new EvalResult(),
            incompleteArcResult = new EvalResult(), srlResultLocal = new EvalResult();
    add(getEvalResultAt(timestamp, predResultMap), predicateResult, predWidget.getPredicates(),
            goldWidget.getPredicates());
    add(getEvalResultAt(timestamp, predWithSenseResultMap), predWithSenseResult,
            predWidget.getPredicatesWithSense(), goldWidget.getPredicatesWithSense());
    add(getEvalResultAt(timestamp, argWordResultMap), argWordResult, predWidget.getArgWords(),
            goldWidget.getArgWords());/*from  w w w . j  a  v  a2s. c  o  m*/
    add(getEvalResultAt(timestamp, argRoleResultMap), argRoleResult, predWidget.getArgRoles(),
            goldWidget.getArgRoles());
    // add incomplete arc score (predicate-incomplete, and argument-incomplete only
    add(getEvalResultAt(timestamp, incompleteTripleMap), incompleteArcResult, predWidget.getIncompleteArcs(),
            goldWidget.getIncompleteArcs());
    // add UPS (argument-incomplete, predicate-incomplete and complete triples, without role disambiguation)

    // add UAS + predicate identification score
    getEvalResultAt(timestamp, argPredWordResultMap).add(argWordResult);
    getEvalResultAt(timestamp, argPredWordResultMap).add(predicateResult);
    // add full SRL score (complete dependencies)
    srlResultLocal.add(argRoleResult);
    //        srlResultLocal.add(predWithSenseResult);
    srlResultLocal.add(predicateResult);
    getEvalResultAt(timestamp, srlResultMap).add(srlResultLocal);
    if (predWidget.isFullSentence()) {
        //            predResult.add(predWithSenseResult);            
        predResult.add(predicateResult);
        argsResult.add(argRoleResult);
        srlResult.add(srlResultLocal);
        processedExamplesCount++;
    }

    if (timestamp > 0) {
        String goldPartialTree = goldWidget.getTreeAt(timestamp);
        String predPartialTree = predWidget.getTreeAt(timestamp);
        Tree evalGold = treeCollinizer.transformTree(Tree.valueOf(goldPartialTree));
        Tree evalGuess = treeCollinizer.transformTree(Tree.valueOf(predPartialTree));
        partialEvalb.evaluate(evalGuess, evalGold, name);
        add(evalbF1Map, timestamp, partialEvalb.getLastF1());
        add(evalbNumExamplesMap, timestamp, 1.0d);
    }

    return new double[] { partialEvalb.getLastF1(), srlResultLocal.f1() };
}