List of usage examples for edu.stanford.nlp.trees Tree valueOf
public static Tree valueOf(String str)
From source file:org.ets.research.nlp.stanford_thrift.tregex.StanfordTregexThrift.java
License:Open Source License
public List<String> evaluateTregexPattern(String parseTree, String tregexPattern) { List<String> foundMatches = new ArrayList<String>(); TregexPattern pattern = TregexPattern.compile(tregexPattern); TregexMatcher matches = pattern.matcher(Tree.valueOf(parseTree)); Set<String> nodes = matches.getNodeNames(); while (matches.find()) { foundMatches.add(matches.getMatch().pennString()); for (String node : nodes) { foundMatches.add(matches.getNode(node).pennString()); }// w ww . ja v a 2s. co m } return foundMatches; }
From source file:pltag.parser.Example.java
License:Open Source License
/** * Read input from Dundee corpus. The format is: * RC_label|w_1 id_1 w_2 id_2 .../* w ww .ja v a2 s . co m*/ * * @param line */ private void readDundeeInput(String line) { int index = line.indexOf("|"); if (index > -1) sentenceRc = line.substring(0, index); line = line.substring(index + 1); StringBuilder sent = new StringBuilder(); boolean usePosTagger = opts.goldPosTags; StringBuilder posTagDummy = new StringBuilder(); if (line.charAt(0) == '(') // some examples may contain already parsed input in tree format { List<Word> words = Tree.valueOf(treeProcessDundeeIds(line)).yieldWords(); for (Word word : words) { sent.append(word).append(" "); if (!usePosTagger) posTagDummy.append("N/A ").append(word).append("\t"); } } else { // remove quotes line = replaceParenthesesDundee(removeQuotesDundee(line).trim()).trim(); String[] tokens = line.split(" "); wordIds = new String[tokens.length / 2]; for (int i = 0; i < tokens.length - 1; i += 2) { String word = !usePosTagger ? wordRemoveDigits(tokens[i]) : tokens[i]; sent.append(word).append(" "); if (!usePosTagger) posTagDummy.append("N/A ").append(word).append("\t"); wordIds[i / 2] = tokens[i + 1]; } sentence = sent.toString().trim(); if (usePosTagger) { Pair<String, String>[] posWords = PosTagger.posTagLineToArray(sentence); // One or more words has been expanded due to PTB-compliant splitting. // For each new constituent assign the word-id of the original word. if (posWords.length != wordIds.length) { adjustWordIdsDundee(posWords, sentence.split(" ")); } sentence = sentRemoveDigits(sentence); readPosTagged(wordRemoveDigits(PosTagger.tokensToLinePosTagged(posWords))); } else posTagged = posTagDummy.toString().trim(); } }
From source file:pltag.parser.json.JsonResult.java
License:Open Source License
private String toPtbFormat(String input) { Tree tree = Tree.valueOf(input); return tree.pennString(); }
From source file:pltag.parser.performance.BracketPerformance.java
License:Open Source License
@Override public double[] add(String predAnalysis, String goldStandard, String name) { Tree evalGuess = treeCollinizer.transformTree(Tree.valueOf(predAnalysis)); Tree evalGold = treeCollinizer.transformTree(Tree.valueOf(goldStandard)); // if (evalGold == null || evalGuess == null) // {/* w ww . j a va2 s. c o m*/ // LogInfo.error(name + ": Cannot compare against a null gold or guess tree!\n"); // return 0.0d; // // } // if (evalGuess.yield().size() != evalGold.yield().size()) // { // try // { // evalGuess = Utils.removeEmptyNodes(evalGuess); // } // catch(Exception e) // { // LogInfo.error("Example " + name); // e.printStackTrace(); // } // } evalb.evaluate(evalGuess, evalGold, name); totalEvalbF1 = evalb.getEvalbF1(); return new double[] { evalb.getLastF1() }; }
From source file:pltag.parser.performance.BracketPerformanceOracle.java
License:Open Source License
@Override public double[] add(String[] nBestAnalyses, String[] goldStandard, String name) { Tree evalGold = treeCollinizer.transformTree(Tree.valueOf(goldStandard[0])); int maxPos = 0, curPos = 0; double curF1, maxF1 = Double.NEGATIVE_INFINITY; String nBestAnalysis = null;// w w w. j a v a2 s. co m for (String analysis : nBestAnalyses) { Tree curEvalGuess = treeCollinizer.transformTree(Tree.valueOf(analysis)); curEvalb.evaluate(curEvalGuess, evalGold, name); curF1 = curEvalb.getLastF1(); if (curF1 > maxF1) { maxF1 = curF1; maxPos = curPos; nBestAnalysis = analysis; } curPos++; } if (nBestAnalysis != null) { Tree evalGuess = treeCollinizer.transformTree(Tree.valueOf(nBestAnalysis)); evalb.evaluate(evalGuess, evalGold, name); totalEvalbF1 = evalb.getEvalbF1(); accuracy += maxPos == 0 ? 1 : 0; } return new double[] { evalb.getLastF1(), maxPos }; }
From source file:pltag.parser.performance.IncrementalBracketPerformance.java
License:Open Source License
public double[] add(IncrementalBracketPerformance performanceIn, String predSent, String goldSent, String name) {/*w ww . ja v a2 s . co m*/ updateMap(evalbF1Map, performanceIn.evalbF1Map); updateMap(evalbNumExamplesMap, performanceIn.evalbNumExamplesMap); Tree evalGold = treeCollinizer.transformTree(Tree.valueOf(goldSent)); Tree evalGuess = treeCollinizer.transformTree(Tree.valueOf(predSent)); fullSentEvalb.evaluate(evalGuess, evalGold, name); processedExamplesCount++; totalEvalbF1 = fullSentEvalb.getEvalbF1(); return new double[] { fullSentEvalb.getLastF1() }; }
From source file:pltag.parser.performance.IncrementalBracketPerformance.java
License:Open Source License
@Override public double[] add(IncrementalBracketWidget predWidget, IncrementalBracketWidget goldWidget, String name) { int timestamp = predWidget.getTimestamp(); String goldPartialTree = goldWidget.getTreeAt(timestamp); String predPartialTree = predWidget.getTreeAt(timestamp); Tree evalGold = treeCollinizer.transformTree(Tree.valueOf(goldPartialTree)); Tree evalGuess = treeCollinizer.transformTree(Tree.valueOf(predPartialTree)); partialEvalb.evaluate(evalGuess, evalGold, name); add(evalbF1Map, timestamp, partialEvalb.getLastF1()); add(evalbNumExamplesMap, timestamp, 1.0d); // if(predWidget.isFullSentence()) // {//from ww w .j a va 2s . co m // fullSentEvalb.evaluate(evalGuess, evalGold, name); // totalEvalbF1 = fullSentEvalb.getEvalbF1(); // processedExamplesCount++; // } return new double[] { partialEvalb.getLastF1() }; }
From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java
License:Open Source License
private void extractStringTreeAnalysisFeatures(String inputTree, String stringAnalysisTree, boolean endOfSent, IncrementalAnalysis analysis, DiscriminativeFeatureIndexers featureIndexers, boolean train) { // System.out.println(inputTree); Tree tree = Tree.valueOf(inputTree); List<Tree> leaves = tree.getLeaves(); Tree currentWord = leaves.get(leaves.size() - 1); int currentWordIndex = featureIndexers.getWordIndex(currentWord.nodeString(), train); // right branch (2): 1. Count number of nodes from root to rightmost non-terminal, 2. Count rest nodes // compute domination path from root to rightmost leaf. Subtract 2 from size to exclude root and pre-terminal int pathSize = tree.dominationPath(currentWord.parent(tree)).size(); analysis.setRightBranchSpine(pathSize > 2 ? pathSize - 2 : 0); int rest = tree.size() - analysis.getRightBranchSpine() - leaves.size() * 2 - 1; analysis.setRightBranchRest(rest > 0 ? rest : 0); // Subtract the number of terminals, pre-terminals (leaves.size()*2) and root symbol // get list of rightmost complete non-terminals. We don't compute the whole list of complete non-terminals, but only the ones that have been just completed, // hence lie at the rightmost position of the tree. Since we compute the features piecemeal, by the end of the sentence we will have computed all complete // non-terminals, depending on the training strategy. Used for features: heavy, neighbours, and edges Tree analysisTree = Tree.valueOf(stringAnalysisTree); analysisTree.indexLeaves();/*from ww w. java 2s . c o m*/ List<Tree> complete = getListOfRightMostCompleteNonTerminals(analysisTree); String[] heavyStr = new String[complete.size()]; String[] neighboursL1Str = new String[complete.size()]; String[] neighboursL2Str = new String[complete.size()]; int i = 0; for (Tree subTree : complete) { // heavy feature int categoryIndex = featureIndexers.getCategoryIndex(subTree.nodeString(), train); List<Label> yield = subTree.yield(); String yieldSize = yield.size() > 5 ? "5+" : String.valueOf(yield.size()); heavyStr[i] = String.format("%s %s %s", categoryIndex, yieldSize, endOfSent ? "y" : "n"); // neighbours l1, l2 features int leftmostLeafId = ((CoreLabel) yield.get(0)).index(); if (leftmostLeafId > 1) { int l1CategoryId = featureIndexers .getCategoryIndex(leaves.get(leftmostLeafId - 2).parent(tree).nodeString(), train); if (leftmostLeafId > 2) { neighboursL1Str[i] = String.format("%s %s %s", categoryIndex, yieldSize, l1CategoryId); int l2CategoryId = featureIndexers .getCategoryIndex(leaves.get(leftmostLeafId - 3).parent(tree).nodeString(), train); neighboursL2Str[i] = String.format("%s %s %s %s", categoryIndex, yieldSize, l2CategoryId, l1CategoryId); } else { neighboursL2Str[i] = String.format("%s %s SoS %s", categoryIndex, yieldSize, l1CategoryId); } } else // leftmost leaf is at the beginning of the sentence { neighboursL1Str[i] = String.format("%s %s SoS", categoryIndex, yieldSize); neighboursL2Str[i] = String.format("%s %s SoS SoS", categoryIndex, yieldSize); } // coPar and coLenPar features Tree[] children = subTree.children(); if (children.length > 2) { // found structure: (X (A ...) (CC and/or) (B ...)) if (children.length == 3 && children[1].nodeString().startsWith("CC")) { analysis.setCoPar(getCoParString(children[0], children[2]), featureIndexers, train); analysis.setCoLenPar(getCoLenParString(children[0], children[2], endOfSent), featureIndexers, train); } // found structure ((CC either) (A ...) (CC or) (B...)) else if (children.length == 4 && children[0].nodeString().startsWith("CC") && children[2].nodeString().startsWith("CC")) { analysis.setCoPar(getCoParString(children[1], children[3]), featureIndexers, train); analysis.setCoLenPar(getCoLenParString(children[1], children[3], endOfSent), featureIndexers, train); } // found structure ((A ...) (, ,) (CC but) (B...)) else if (children.length == 4 && children[1].nodeString().equals(",") && children[2].nodeString().startsWith("CC")) { analysis.setCoPar(getCoParString(children[0], children[3]), featureIndexers, train); analysis.setCoLenPar(getCoLenParString(children[0], children[3], endOfSent), featureIndexers, train); } } i++; } analysis.setHeavy(heavyStr, featureIndexers, train); analysis.setNeighboursL1(neighboursL1Str, featureIndexers, train); analysis.setNeighboursL2(neighboursL2Str, featureIndexers, train); // compute word + L=2 ancestor nodes, L=3 ancestor nodes Tree preTerminal = currentWord.parent(tree); Tree wordL2 = preTerminal.parent(tree); if (wordL2 != null) { int preTerminalIndex = featureIndexers.getCategoryIndex(preTerminal.nodeString(), train); int wordL2Index = featureIndexers.getCategoryIndex(wordL2.nodeString(), train); analysis.setWordL2(String.format("%s %s %s", currentWordIndex, preTerminalIndex, wordL2Index), featureIndexers, train); Tree wordL3 = wordL2.parent(tree); if (wordL3 != null) { analysis.setWordL3(String.format("%s %s %s %s", currentWordIndex, preTerminalIndex, wordL2Index, featureIndexers.getCategoryIndex(wordL3.nodeString())), featureIndexers, train); } } // get integration point + elem tree (Parent-emulation feature) analysis.setIpElemTree(String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTree()), featureIndexers, train); analysis.setIpElemTreeUnlex( String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTreeUnlex()), featureIndexers, train); }
From source file:pltag.parser.semantics.IncrementalSemanticsPerformance.java
License:Open Source License
public double[] add(IncrementalSemanticsPerformance performanceIn, String predSent, String goldSent, String name) {//ww w . j a v a 2 s.c o m updateMap(predResultMap, performanceIn.predResultMap); updateMap(predWithSenseResultMap, performanceIn.predWithSenseResultMap); updateMap(argWordResultMap, performanceIn.argWordResultMap); updateMap(argRoleResultMap, performanceIn.argRoleResultMap); updateMap(argPredWordResultMap, performanceIn.argPredWordResultMap); updateMap(incompleteTripleMap, performanceIn.incompleteTripleMap); updateMap(srlResultMap, performanceIn.srlResultMap); predResult.add(performanceIn.predResult); argsResult.add(performanceIn.argsResult); srlResult.add(performanceIn.srlResult); processedExamplesCount += performanceIn.processedExamplesCount; updateMapDouble(evalbF1Map, performanceIn.evalbF1Map); updateMapDouble(evalbNumExamplesMap, performanceIn.evalbNumExamplesMap); Tree evalGold = treeCollinizer.transformTree(Tree.valueOf(goldSent)); Tree evalGuess = treeCollinizer.transformTree(Tree.valueOf(predSent)); fullSentEvalb.evaluate(evalGuess, evalGold, name); totalEvalbF1 = fullSentEvalb.getEvalbF1(); return new double[] { fullSentEvalb.getLastF1(), performanceIn.srlResult.f1() }; }
From source file:pltag.parser.semantics.IncrementalSemanticsPerformance.java
License:Open Source License
@Override public double[] add(IncrementalSemanticsWidget predWidget, IncrementalSemanticsWidget goldWidget, String name) { int timestamp = predWidget.getTimestamp(); EvalResult predicateResult = new EvalResult(), predWithSenseResult = new EvalResult(), argWordResult = new EvalResult(), argRoleResult = new EvalResult(), incompleteArcResult = new EvalResult(), srlResultLocal = new EvalResult(); add(getEvalResultAt(timestamp, predResultMap), predicateResult, predWidget.getPredicates(), goldWidget.getPredicates()); add(getEvalResultAt(timestamp, predWithSenseResultMap), predWithSenseResult, predWidget.getPredicatesWithSense(), goldWidget.getPredicatesWithSense()); add(getEvalResultAt(timestamp, argWordResultMap), argWordResult, predWidget.getArgWords(), goldWidget.getArgWords());/*from w w w . j a v a2s. c o m*/ add(getEvalResultAt(timestamp, argRoleResultMap), argRoleResult, predWidget.getArgRoles(), goldWidget.getArgRoles()); // add incomplete arc score (predicate-incomplete, and argument-incomplete only add(getEvalResultAt(timestamp, incompleteTripleMap), incompleteArcResult, predWidget.getIncompleteArcs(), goldWidget.getIncompleteArcs()); // add UPS (argument-incomplete, predicate-incomplete and complete triples, without role disambiguation) // add UAS + predicate identification score getEvalResultAt(timestamp, argPredWordResultMap).add(argWordResult); getEvalResultAt(timestamp, argPredWordResultMap).add(predicateResult); // add full SRL score (complete dependencies) srlResultLocal.add(argRoleResult); // srlResultLocal.add(predWithSenseResult); srlResultLocal.add(predicateResult); getEvalResultAt(timestamp, srlResultMap).add(srlResultLocal); if (predWidget.isFullSentence()) { // predResult.add(predWithSenseResult); predResult.add(predicateResult); argsResult.add(argRoleResult); srlResult.add(srlResultLocal); processedExamplesCount++; } if (timestamp > 0) { String goldPartialTree = goldWidget.getTreeAt(timestamp); String predPartialTree = predWidget.getTreeAt(timestamp); Tree evalGold = treeCollinizer.transformTree(Tree.valueOf(goldPartialTree)); Tree evalGuess = treeCollinizer.transformTree(Tree.valueOf(predPartialTree)); partialEvalb.evaluate(evalGuess, evalGold, name); add(evalbF1Map, timestamp, partialEvalb.getLastF1()); add(evalbNumExamplesMap, timestamp, 1.0d); } return new double[] { partialEvalb.getLastF1(), srlResultLocal.f1() }; }