List of usage examples for edu.stanford.nlp.trees Tree getLeaves
public <T extends Tree> List<T> getLeaves()
From source file:ConstituencyParse.java
License:Apache License
public int[] constTreeParents(Tree tree) { Tree binarized = binarizer.transformTree(tree); Tree collapsedUnary = transformer.transformTree(binarized); Trees.convertToCoreLabels(collapsedUnary); collapsedUnary.indexSpans();/*from w w w . j av a2 s. c o m*/ List<Tree> leaves = collapsedUnary.getLeaves(); int size = collapsedUnary.size() - leaves.size(); int[] parents = new int[size]; HashMap<Integer, Integer> index = new HashMap<Integer, Integer>(); int idx = leaves.size(); int leafIdx = 0; for (Tree leaf : leaves) { Tree cur = leaf.parent(collapsedUnary); // go to preterminal int curIdx = leafIdx++; boolean done = false; while (!done) { Tree parent = cur.parent(collapsedUnary); if (parent == null) { parents[curIdx] = 0; break; } int parentIdx; int parentNumber = parent.nodeNumber(collapsedUnary); if (!index.containsKey(parentNumber)) { parentIdx = idx++; index.put(parentNumber, parentIdx); } else { parentIdx = index.get(parentNumber); done = true; } parents[curIdx] = parentIdx + 1; cur = parent; curIdx = parentIdx; } } return parents; }
From source file:KleinBilingualParser.java
public static void main(String[] args) { boolean trainF = false; boolean trainE = false; boolean bitrainE = false; boolean bitrainF = false; boolean saveToSerializedFile = false; boolean saveToTextFile = false; String serializedInputFileOrUrl = null; String textInputFileOrUrl = null; String serializedOutputFileOrUrl = null; String textOutputFileOrUrl = null; String treebankPathF = null;//w ww .ja va2 s .co m Treebank testTreebankF = null; Treebank tuneTreebankF = null; String testPathF = null; FileFilter testFilterF = null; String treebankPathE = null; Treebank testTreebankE = null; Treebank tuneTreebankE = null; String testPathE = null; FileFilter testFilterE = null; String tunePath = null; FileFilter tuneFilter = null; FileFilter trainFilterF = null; FileFilter trainFilterE = null; String secondaryTreebankPath = null; double secondaryTreebankWeight = 1.0; FileFilter secondaryTrainFilter = null; String trainAlignFile = null; String testAlignFile = null; String bitrainPathE = null; FileFilter bitrainFilterE = null; String bitrainPathF = null; FileFilter bitrainFilterF = null; Treebank bitrainTreebankF = null; Treebank bitrainTreebankE = null; // variables needed to process the files to be parsed TokenizerFactory<? extends HasWord> tokenizerFactory = null; String tokenizerOptions = null; String tokenizerFactoryClass = null; String tokenizerMethod = null; boolean tokenized = false; // whether or not the input file has already been tokenized Function<List<HasWord>, List<HasWord>> escaper = null; String tagDelimiter = null; String sentenceDelimiter = null; String elementDelimiter = null; int argIndex = 0; if (args.length < 1) { log.info( "Basic usage (see Javadoc for more): java edu.stanford.nlp.parser.lexparser.LexicalizedParser parserFileOrUrl filename*"); return; } Options fOp = new Options(); Options eOp = new Options(); List<String> optionArgs = new ArrayList<>(); String encodingF = null; // while loop through option arguments while (!args[argIndex].equals("--") && argIndex < args.length && args[argIndex].charAt(0) == '-') { if (args[argIndex].equalsIgnoreCase("-train") || args[argIndex].equalsIgnoreCase("-trainTreebank")) { trainF = true; Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-train"); argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1; treebankPathF = treebankDescription.first(); trainFilterF = treebankDescription.second(); } else if (args[argIndex].equalsIgnoreCase("-bitrain") || args[argIndex].equalsIgnoreCase("-bitrainTreebank")) { bitrainF = true; Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-bitrain"); argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1; bitrainPathF = treebankDescription.first(); bitrainFilterF = treebankDescription.second(); } else if (args[argIndex].equalsIgnoreCase("-tLPP") && (argIndex + 1 < args.length)) { try { fOp.tlpParams = (TreebankLangParserParams) Class.forName(args[argIndex + 1]).newInstance(); } catch (ClassNotFoundException e) { log.info("Class not found: " + args[argIndex + 1]); throw new RuntimeException(e); } catch (InstantiationException e) { log.info("Couldn't instantiate: " + args[argIndex + 1] + ": " + e.toString()); throw new RuntimeException(e); } catch (IllegalAccessException e) { log.info("Illegal access" + e); throw new RuntimeException(e); } argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-encoding")) { // sets encoding for TreebankLangParserParams // redone later to override any serialized parser one read in encodingF = args[argIndex + 1]; fOp.tlpParams.setInputEncoding(encodingF); fOp.tlpParams.setOutputEncoding(encodingF); argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-treebank") || args[argIndex].equalsIgnoreCase("-testTreebank") || args[argIndex].equalsIgnoreCase("-test")) { Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-test"); argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1; testPathF = treebankDescription.first(); testFilterF = treebankDescription.second(); } else { int oldIndex = argIndex; argIndex = fOp.setOptionOrWarn(args, argIndex); optionArgs.addAll(Arrays.asList(args).subList(oldIndex, argIndex)); } System.out.println(argIndex + " " + args.length); } // end while loop through arguments for french argIndex++;//go to english arguments while (argIndex < args.length && args[argIndex].charAt(0) == '-') { if (args[argIndex].equalsIgnoreCase("-train") || args[argIndex].equalsIgnoreCase("-trainTreebank")) { trainE = true; Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-train"); argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1; treebankPathE = treebankDescription.first(); trainFilterE = treebankDescription.second(); } else if (args[argIndex].equalsIgnoreCase("-bitrain") || args[argIndex].equalsIgnoreCase("-bitrainTreebank")) { bitrainE = true; Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-bitrain"); argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1; bitrainPathE = treebankDescription.first(); bitrainFilterE = treebankDescription.second(); } else if (args[argIndex].equalsIgnoreCase("-treebank") || args[argIndex].equalsIgnoreCase("-testTreebank") || args[argIndex].equalsIgnoreCase("-test")) { Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-test"); argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1; testPathE = treebankDescription.first(); testFilterE = treebankDescription.second(); } else if (args[argIndex].equalsIgnoreCase("-trainAlignFile")) { Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-trainAlignFile"); argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1; trainAlignFile = treebankDescription.first(); } else if (args[argIndex].equalsIgnoreCase("-testAlignFile")) { Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-testAlignFile"); argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1; testAlignFile = treebankDescription.first(); } else { int oldIndex = argIndex; argIndex = eOp.setOptionOrWarn(args, argIndex); optionArgs.addAll(Arrays.asList(args).subList(oldIndex, argIndex)); } } // end while loop through arguments for english // if (!train && fOp.testOptions.verbose) { // StringUtils.logInvocationString(log, args); // } LexicalizedParser lpF; // always initialized in next if-then-else block LexicalizedParser lpE; //TRAIN A PARSER // so we train a parser using the treebank GrammarCompactor compactorF = null; GrammarCompactor compactorE = null; if (fOp.trainOptions.compactGrammar() == 3) { compactorF = new ExactGrammarCompactor(fOp, false, false); } if (eOp.trainOptions.compactGrammar() == 3) { compactorE = new ExactGrammarCompactor(eOp, false, false); } Treebank trainTreebankF = makeTreebank(treebankPathF, fOp, trainFilterF); Treebank trainTreebankE = makeTreebank(treebankPathE, eOp, trainFilterE); fOp.testOptions.quietEvaluation = true; eOp.testOptions.quietEvaluation = true; lpF = getParserFromTreebank(trainTreebankF, null, secondaryTreebankWeight, compactorF, fOp, tuneTreebankF, null); lpE = getParserFromTreebank(trainTreebankE, null, secondaryTreebankWeight, compactorE, eOp, tuneTreebankE, null); // the following has to go after reading parser to make sure // op and tlpParams are the same for train and test // THIS IS BUTT UGLY BUT IT STOPS USER SPECIFIED ENCODING BEING // OVERWRITTEN BY ONE SPECIFIED IN SERIALIZED PARSER if (encodingF != null) { fOp.tlpParams.setInputEncoding(encodingF); fOp.tlpParams.setOutputEncoding(encodingF); } if (bitrainFilterF != null || bitrainPathF != null) { if (bitrainPathF == null) { //? if (treebankPathF == null) { throw new RuntimeException("No bitrain treebank path specified..."); } else { log.info("No test treebank path specified. Using train path: \"" + treebankPathF + '\"'); bitrainPathF = treebankPathF; } } bitrainTreebankF = fOp.tlpParams.testMemoryTreebank(); bitrainTreebankF.loadPath(bitrainPathF, bitrainFilterF); } if (bitrainFilterE != null || bitrainPathE != null) { if (bitrainPathE == null) { if (treebankPathE == null) { throw new RuntimeException("No test treebank path specified..."); } else { log.info("No test treebank path specified. Using train path: \"" + treebankPathE + '\"'); bitrainPathE = treebankPathE; } } bitrainTreebankE = eOp.tlpParams.testMemoryTreebank(); bitrainTreebankE.loadPath(bitrainPathE, bitrainFilterE); } if (encodingF != null) { fOp.tlpParams.setInputEncoding(encodingF); fOp.tlpParams.setOutputEncoding(encodingF); } if (testFilterF != null || testPathF != null) { if (testPathF == null) { if (treebankPathF == null) { throw new RuntimeException("No test treebank path specified..."); } else { log.info("No test treebank path specified. Using train path: \"" + treebankPathF + '\"'); testPathF = treebankPathF; } } testTreebankF = fOp.tlpParams.testMemoryTreebank(); testTreebankF.loadPath(testPathF, testFilterF); } fOp.trainOptions.sisterSplitters = Generics.newHashSet(Arrays.asList(fOp.tlpParams.sisterSplitters())); if (testFilterE != null || testPathE != null) { if (testPathE == null) { if (treebankPathE == null) { throw new RuntimeException("No test treebank path specified..."); } else { log.info("No test treebank path specified. Using train path: \"" + treebankPathE + '\"'); testPathE = treebankPathE; } } testTreebankE = eOp.tlpParams.testMemoryTreebank(); testTreebankE.loadPath(testPathE, testFilterE); } eOp.trainOptions.sisterSplitters = Generics.newHashSet(Arrays.asList(eOp.tlpParams.sisterSplitters())); //PARALLEL ALIGNMENT FEATURE CALCULATION, CALCULATION OF 'A' MATRIX double[] weights = new double[8]; double diff; weights[0] = 0.01; weights[1] = -0.002; weights[2] = 0.002; weights[3] = 0.002; weights[4] = 0.002; weights[5] = 0.002; weights[6] = -0.002; weights[7] = -0.002; ArrayList<HashMap<Integer, ArrayList<Integer>>> bitrainAlignments = null; ArrayList<HashMap<Integer, ArrayList<Integer>>> testAlignments = null; //String alignFile="../../berkeleyaligner/output/test.align"; try { AlignmentProcessor trainAP = new AlignmentProcessor(trainAlignFile); bitrainAlignments = trainAP.createAlignments(); AlignmentProcessor testAP = new AlignmentProcessor(testAlignFile); testAlignments = testAP.createAlignments(); } catch (FileNotFoundException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } int kE = 10; int kF = 10; int numFeatures = 8; int numBigSentences = 0; do { diff = 0.0; Iterator<Tree> eTrees = bitrainTreebankE.iterator(); Iterator<Tree> fTrees = bitrainTreebankF.iterator(); Iterator<HashMap<Integer, ArrayList<Integer>>> alignIterator = bitrainAlignments.iterator(); numBigSentences = 0; //features are used in the order they are defined double A[][][][] = new double[bitrainTreebankE.size()][numFeatures][kE][kF]; int ePsGold[] = new int[bitrainTreebankE.size()]; int fPsGold[] = new int[bitrainTreebankF.size()]; int i = 0; while (eTrees.hasNext() && fTrees.hasNext() && alignIterator.hasNext()) { HashMap<Integer, ArrayList<Integer>> alignMap = alignIterator.next(); Tree fTree = fTrees.next(); Tree eTree = eTrees.next(); if (fTree.getLeaves().size() > 70 || fTree.getLeaves().size() > 70) { //System.out.println("Too big : " + i); numBigSentences++; fPsGold[i] = 3; ePsGold[i] = 3; i++; continue; } List<? extends HasWord> sentenceF = Sentence.toCoreLabelList(fTree.yieldWords()); List<? extends HasWord> sentenceE = Sentence.toCoreLabelList(eTree.yieldWords()); LexicalizedParserQuery lpqE = (LexicalizedParserQuery) lpE.parserQuery(); LexicalizedParserQuery lpqF = (LexicalizedParserQuery) lpF.parserQuery(); lpqE.parse(sentenceE); lpqF.parse(sentenceF); List<ScoredObject<Tree>> kBestF = lpqF.getKBestPCFGParses(kF); List<ScoredObject<Tree>> kBestE = lpqE.getKBestPCFGParses(kE); fPsGold[i] = 3; ePsGold[i] = 3; int j = 0; int k = 0; for (ScoredObject<Tree> eScoredObj : kBestE) { k = 0; for (ScoredObject<Tree> fScoredObj : kBestF) { eScoredObj.object().setSpans(); fScoredObj.object().setSpans(); HashMap<Tree, Tree> alignment = getHungarianAlignment(eScoredObj.object(), fScoredObj.object(), weights, alignMap); //had to reduce likelihood scores by factor of 10 to keep the optimizer working A[i][0][j][k] = eScoredObj.score() / 1000; A[i][1][j][k] = fScoredObj.score() / 1000; for (Map.Entry entry : alignment.entrySet()) { Tree nodeF = (Tree) entry.getKey(); Tree nodeE = (Tree) entry.getValue(); A[i][2][j][k] += spanDiff(nodeF, nodeE); A[i][3][j][k] += numChildren(nodeF, nodeE); A[i][4][j][k] += insideBoth(nodeF, nodeE, alignMap); A[i][5][j][k] += insideSrcOutsideTgt(nodeF, nodeE, alignMap); A[i][6][j][k] += insideTgtOutsideSrc(nodeF, nodeE, alignMap); A[i][7][j][k] += bias(nodeF, nodeE); } k++; } j++; } //System.out.println("Sentence " + i); i++; } /////////////////////// // // MALLET optimizer // /////////////////////// System.out.println(); System.out.println("*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*"); System.out.println(); System.out.println("Beginning convex optimization..."); System.out.println(); System.out.println("*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*"); System.out.println(); OptimizerExample optimizable = new OptimizerExample(weights, A, ePsGold, fPsGold); Optimizer optimizer = new LimitedMemoryBFGS(optimizable); boolean converged = false; try { converged = optimizer.optimize(); } catch (IllegalArgumentException e) { // This exception may be thrown if L-BFGS // cannot step in the current direction. // This condition does not necessarily mean that // the optimizer has failed, but it doesn't want // to claim to have succeeded... } catch (cc.mallet.optimize.OptimizationException e) { System.out.println(e.getMessage()); } for (int x = 0; x < weights.length; x++) { diff += (optimizable.getParameter(x) - weights[x]) * (optimizable.getParameter(x) - weights[x]); weights[x] = optimizable.getParameter(x); System.out.print(weights[x] + ", "); } System.out.println(); diff /= weights.length; System.out.println("Current difference: " + diff); } while (diff > 0.0005); //TESTING BILINGUAL PARSER Treebank bilingTestTreebankF = testTreebankF; Treebank bilingTestTreebankE = testTreebankE; Iterator<Tree> eTreesBling = testTreebankE.iterator(); Iterator<Tree> fTreesBling = testTreebankF.iterator(); boolean runningAveragesF = Boolean.parseBoolean(fOp.testOptions.evals.getProperty("runningAverages")); boolean runningAveragesE = Boolean.parseBoolean(eOp.testOptions.evals.getProperty("runningAverages")); AbstractEval pcfgLBf = new Evalb("pcfg LP/LR", runningAveragesF); AbstractEval factLBf = new Evalb("factor LP/LR", runningAveragesF); AbstractEval pcfgLBe = new Evalb("pcfg LP/LR", runningAveragesE); AbstractEval factLBe = new Evalb("factor LP/LR", runningAveragesE); int i = 0; Iterator<HashMap<Integer, ArrayList<Integer>>> alignIteratorTEST = testAlignments.iterator(); while (eTreesBling.hasNext() && fTreesBling.hasNext() && alignIteratorTEST.hasNext()) { HashMap<Integer, ArrayList<Integer>> alignMap = alignIteratorTEST.next(); Tree fTree = fTreesBling.next(); Tree eTree = eTreesBling.next(); List<? extends HasWord> sentenceF = Sentence.toCoreLabelList(fTree.yieldWords()); List<? extends HasWord> sentenceE = Sentence.toCoreLabelList(eTree.yieldWords()); LexicalizedParserQuery lpqE = (LexicalizedParserQuery) lpE.parserQuery(); LexicalizedParserQuery lpqF = (LexicalizedParserQuery) lpF.parserQuery(); lpqE.parse(sentenceE); lpqF.parse(sentenceF); List<ScoredObject<Tree>> kBestF = lpqF.getKBestPCFGParses(kF); List<ScoredObject<Tree>> kBestE = lpqE.getKBestPCFGParses(kE); int j = 0; int k = 0; double maxScore = -Double.MAX_VALUE; Tree bestFtree = null; Tree bestEtree = null; for (ScoredObject<Tree> eScoredObj : kBestE) { k = 0; for (ScoredObject<Tree> fScoredObj : kBestF) { eScoredObj.object().setSpans(); fScoredObj.object().setSpans(); HashMap<Tree, Tree> alignment = getHungarianAlignment(eScoredObj.object(), fScoredObj.object(), weights, alignMap); double currentScore = 0.0; for (Map.Entry entry : alignment.entrySet()) { Tree nodeF = (Tree) entry.getKey(); Tree nodeE = (Tree) entry.getValue(); currentScore += weights[0] * eScoredObj.score() / 1000; currentScore += weights[1] * fScoredObj.score() / 1000; currentScore += weights[2] * spanDiff(nodeF, nodeE); currentScore += weights[3] * numChildren(nodeF, nodeE); currentScore += weights[4] * insideBoth(nodeF, nodeE, alignMap); currentScore += weights[5] * insideSrcOutsideTgt(nodeF, nodeE, alignMap); currentScore += weights[6] * insideTgtOutsideSrc(nodeF, nodeE, alignMap); currentScore += weights[7] * bias(nodeF, nodeE); } if (currentScore > maxScore) { maxScore = currentScore; bestFtree = fScoredObj.object(); bestEtree = eScoredObj.object(); } k++; } j++; } i++; pcfgLBe.evaluate(bestEtree, eTree); factLBe.evaluate(bestEtree, eTree); pcfgLBf.evaluate(bestFtree, fTree); factLBf.evaluate(bestFtree, fTree); } System.out.println("------------------------"); System.out.println(" English Results "); System.out.println("------------------------"); System.out.println("PCFG labeled f1: " + pcfgLBe.getEvalbF1Percent()); System.out.println("Factored labeled f1: " + factLBe.getEvalbF1Percent()); System.out.println("------------------------"); System.out.println(" French Results "); System.out.println("------------------------"); System.out.println("PCFG labeled f1: " + pcfgLBf.getEvalbF1Percent()); System.out.println("Factored labeled f1: " + factLBf.getEvalbF1Percent()); System.out.println("------------------------"); System.out.println("Number of sentences too big: " + numBigSentences); }
From source file:KleinBilingualParser.java
private static double spanDiff(Tree nodeF, Tree nodeE) { return ((double) Math.abs(nodeF.getLeaves().size() - nodeE.getLeaves().size())) / 100; }
From source file:KleinBilingualParser.java
private static HashMap<Tree, Tree> getHungarianAlignment(Tree eParseTree, Tree fParseTree, double[] weights, HashMap<Integer, ArrayList<Integer>> alignMap) { // remember to ignore the top two weights because they are monolingual features int numFrenchNodes = fParseTree.size() - fParseTree.getLeaves().size(); int numEnglishNodes = eParseTree.size() - eParseTree.getLeaves().size(); double[][] costMatrix = new double[numFrenchNodes][numEnglishNodes]; int i, j;/*from w w w .j a v a 2 s . c om*/ i = 0; for (Tree fSubTree : fParseTree) { if (!fSubTree.isLeaf()) { j = 0; for (Tree eSubTree : eParseTree) { if (!eSubTree.isLeaf()) { //IF IT GETS TOO SLOW DON'T COMPUTE WORD ALIGNMENT FEATURES FOR LARGE SENTENCES costMatrix[i][j] = weights[2] * spanDiff(fSubTree, eSubTree) + weights[3] * numChildren(fSubTree, eSubTree) + weights[7] * bias(fSubTree, eSubTree); if (numFrenchNodes < 50 && numEnglishNodes < 50) { costMatrix[i][j] += weights[4] * insideBoth(fSubTree, eSubTree, alignMap) + weights[5] * insideSrcOutsideTgt(fSubTree, eSubTree, alignMap) + weights[6] * insideTgtOutsideSrc(fSubTree, eSubTree, alignMap); } costMatrix[i][j] = 0 - costMatrix[i][j]; j++; } } i++; } } HungarianAlgorithm hungAlgSolver = new HungarianAlgorithm(costMatrix); int[] assignments = hungAlgSolver.execute(); HashMap<Tree, Tree> alignment = new HashMap<>(); i = 0; for (Tree fSubTree : fParseTree) { if (!fSubTree.isLeaf()) { j = 0; for (Tree eSubTree : eParseTree) { if (!eSubTree.isLeaf()) { if (j == assignments[i]) { alignment.put(fSubTree, eSubTree); } j++; } } i++; } } return alignment; }
From source file:EddyRoseDomainAdaptation.java
public static void main(String[] args) { boolean trainF = false; boolean trainE = false; boolean bitrainE = false; boolean bitrainF = false; boolean saveToSerializedFile = false; boolean saveToTextFile = false; String serializedInputFileOrUrl = null; String textInputFileOrUrl = null; String serializedOutputFileOrUrl = null; String textOutputFileOrUrl = null; String treebankPathF = null;//from ww w . j ava 2 s .co m Treebank testTreebankF = null; Treebank seqTestTreebank = null; Treebank tuneTreebankF = null; String testPathF = null; FileFilter testFilterF = null; String treebankPathE = null; Treebank testTreebankE = null; Treebank tuneTreebankE = null; String testPathE = null; FileFilter testFilterE = null; String seqTestPath = null; FileFilter seqTestFilter = null; String tunePath = null; FileFilter tuneFilter = null; FileFilter trainFilterF = null; FileFilter trainFilterE = null; String secondaryTreebankPath = null; double secondaryTreebankWeight = 1.0; FileFilter secondaryTrainFilter = null; String trainAlignFile = null; String testAlignFile = null; String bitrainPathE = null; FileFilter bitrainFilterE = null; String bitrainPathF = null; FileFilter bitrainFilterF = null; Treebank bitrainTreebankF = null; Treebank bitrainTreebankE = null; // variables needed to process the files to be parsed TokenizerFactory<? extends HasWord> tokenizerFactory = null; String tokenizerOptions = null; String tokenizerFactoryClass = null; String tokenizerMethod = null; boolean tokenized = false; // whether or not the input file has already been tokenized Function<List<HasWord>, List<HasWord>> escaper = null; String tagDelimiter = null; String sentenceDelimiter = null; String elementDelimiter = null; int argIndex = 0; if (args.length < 1) { log.info( "Basic usage (see Javadoc for more): java edu.stanford.nlp.parser.lexparser.LexicalizedParser parserFileOrUrl filename*"); return; } Options fOp = new Options(); Options eOp = new Options(); List<String> optionArgs = new ArrayList<>(); String encodingF = null; // while loop through option arguments while (!args[argIndex].equals("--") && argIndex < args.length && args[argIndex].charAt(0) == '-') { if (args[argIndex].equalsIgnoreCase("-train") || args[argIndex].equalsIgnoreCase("-trainTreebank")) { trainF = true; Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-train"); argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1; treebankPathF = treebankDescription.first(); trainFilterF = treebankDescription.second(); } else if (args[argIndex].equalsIgnoreCase("-bitrain") || args[argIndex].equalsIgnoreCase("-bitrainTreebank")) { bitrainF = true; Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-bitrain"); argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1; bitrainPathF = treebankDescription.first(); bitrainFilterF = treebankDescription.second(); } else if (args[argIndex].equalsIgnoreCase("-tLPP") && (argIndex + 1 < args.length)) { try { fOp.tlpParams = (TreebankLangParserParams) Class.forName(args[argIndex + 1]).newInstance(); } catch (ClassNotFoundException e) { log.info("Class not found: " + args[argIndex + 1]); throw new RuntimeException(e); } catch (InstantiationException e) { log.info("Couldn't instantiate: " + args[argIndex + 1] + ": " + e.toString()); throw new RuntimeException(e); } catch (IllegalAccessException e) { log.info("Illegal access" + e); throw new RuntimeException(e); } argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-encoding")) { // sets encoding for TreebankLangParserParams // redone later to override any serialized parser one read in encodingF = args[argIndex + 1]; fOp.tlpParams.setInputEncoding(encodingF); fOp.tlpParams.setOutputEncoding(encodingF); argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-treebank") || args[argIndex].equalsIgnoreCase("-testTreebank") || args[argIndex].equalsIgnoreCase("-test")) { Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-test"); argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1; testPathF = treebankDescription.first(); testFilterF = treebankDescription.second(); } else { int oldIndex = argIndex; argIndex = fOp.setOptionOrWarn(args, argIndex); optionArgs.addAll(Arrays.asList(args).subList(oldIndex, argIndex)); } System.out.println(argIndex + " " + args.length); } // end while loop through arguments for french argIndex++;//go to english arguments while (argIndex < args.length && args[argIndex].charAt(0) == '-') { if (args[argIndex].equalsIgnoreCase("-train") || args[argIndex].equalsIgnoreCase("-trainTreebank")) { trainE = true; Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-train"); argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1; treebankPathE = treebankDescription.first(); trainFilterE = treebankDescription.second(); } else if (args[argIndex].equalsIgnoreCase("-bitrain") || args[argIndex].equalsIgnoreCase("-bitrainTreebank")) { bitrainE = true; Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-bitrain"); argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1; bitrainPathE = treebankDescription.first(); bitrainFilterE = treebankDescription.second(); } else if (args[argIndex].equalsIgnoreCase("-treebank") || args[argIndex].equalsIgnoreCase("-testTreebank") || args[argIndex].equalsIgnoreCase("-test")) { Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-test"); argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1; testPathE = treebankDescription.first(); testFilterE = treebankDescription.second(); } else if (args[argIndex].equalsIgnoreCase("-seqtest")) { Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-test"); argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1; seqTestPath = treebankDescription.first(); seqTestFilter = treebankDescription.second(); } else if (args[argIndex].equalsIgnoreCase("-trainAlignFile")) { Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-trainAlignFile"); argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1; trainAlignFile = treebankDescription.first(); } else if (args[argIndex].equalsIgnoreCase("-testAlignFile")) { Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-testAlignFile"); argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1; testAlignFile = treebankDescription.first(); } else { int oldIndex = argIndex; argIndex = eOp.setOptionOrWarn(args, argIndex); optionArgs.addAll(Arrays.asList(args).subList(oldIndex, argIndex)); } } // end while loop through arguments for english // if (!train && fOp.testOptions.verbose) { // StringUtils.logInvocationString(log, args); // } LexicalizedParser lpF; // always initialized in next if-then-else block LexicalizedParser lpE; //TRAIN A PARSER // so we train a parser using the treebank GrammarCompactor compactorF = null; if (fOp.trainOptions.compactGrammar() == 3) { compactorF = new ExactGrammarCompactor(fOp, false, false); } Treebank trainTreebankF = makeTreebank(treebankPathF, fOp, trainFilterF); fOp.testOptions.quietEvaluation = true; GrammarCompactor compactorE = null; if (eOp.trainOptions.compactGrammar() == 3) { compactorE = new ExactGrammarCompactor(eOp, false, false); } Treebank trainTreebankE = makeTreebank(treebankPathE, eOp, trainFilterE); eOp.testOptions.quietEvaluation = true; lpF = getParserFromTreebank(trainTreebankF, null, secondaryTreebankWeight, compactorF, fOp, tuneTreebankF, null); lpE = getParserFromTreebank(trainTreebankE, null, secondaryTreebankWeight, compactorE, eOp, tuneTreebankE, null); // the following has to go after reading parser to make sure // op and tlpParams are the same for train and test // THIS IS BUTT UGLY BUT IT STOPS USER SPECIFIED ENCODING BEING // OVERWRITTEN BY ONE SPECIFIED IN SERIALIZED PARSER if (encodingF != null) { fOp.tlpParams.setInputEncoding(encodingF); fOp.tlpParams.setOutputEncoding(encodingF); } if (bitrainFilterF != null || bitrainPathF != null) { if (bitrainPathF == null) { //? if (treebankPathF == null) { throw new RuntimeException("No bitrain treebank path specified..."); } else { log.info("No test treebank path specified. Using train path: \"" + treebankPathF + '\"'); bitrainPathF = treebankPathF; } } bitrainTreebankF = fOp.tlpParams.testMemoryTreebank(); bitrainTreebankF.loadPath(bitrainPathF, bitrainFilterF); } if (bitrainFilterE != null || bitrainPathE != null) { if (bitrainPathE == null) { if (treebankPathE == null) { throw new RuntimeException("No test treebank path specified..."); } else { log.info("No test treebank path specified. Using train path: \"" + treebankPathE + '\"'); bitrainPathE = treebankPathE; } } bitrainTreebankE = eOp.tlpParams.testMemoryTreebank(); bitrainTreebankE.loadPath(bitrainPathE, bitrainFilterE); } if (encodingF != null) { fOp.tlpParams.setInputEncoding(encodingF); fOp.tlpParams.setOutputEncoding(encodingF); } if (testFilterF != null || testPathF != null) { if (testPathF == null) { if (treebankPathF == null) { throw new RuntimeException("No test treebank path specified..."); } else { log.info("No test treebank path specified. Using train path: \"" + treebankPathF + '\"'); testPathF = treebankPathF; } } testTreebankF = fOp.tlpParams.testMemoryTreebank(); testTreebankF.loadPath(testPathF, testFilterF); } //generate sequioa treebank seqTestTreebank = fOp.tlpParams.testMemoryTreebank(); seqTestTreebank.loadPath(seqTestPath, seqTestFilter); fOp.trainOptions.sisterSplitters = Generics.newHashSet(Arrays.asList(fOp.tlpParams.sisterSplitters())); if (testFilterE != null || testPathE != null) { if (testPathE == null) { if (treebankPathE == null) { throw new RuntimeException("No test treebank path specified..."); } else { log.info("No test treebank path specified. Using train path: \"" + treebankPathE + '\"'); testPathE = treebankPathE; } } testTreebankE = eOp.tlpParams.testMemoryTreebank(); testTreebankE.loadPath(testPathE, testFilterE); } eOp.trainOptions.sisterSplitters = Generics.newHashSet(Arrays.asList(eOp.tlpParams.sisterSplitters())); ////////////////////// // // Self-Training // ////////////////////// MemoryTreebank selfTrainInitTreebank = new MemoryTreebank(); MemoryTreebank selfTrainFinalTreebank = new MemoryTreebank(); selfTrainInitTreebank.addAll(trainTreebankF); selfTrainFinalTreebank.addAll(trainTreebankF); LexicalizedParser fSelfTrainInit = getParserFromTreebank(selfTrainInitTreebank, null, secondaryTreebankWeight, compactorF, fOp, tuneTreebankF, null); int z = 0; boolean runningAveragesF = Boolean.parseBoolean(fOp.testOptions.evals.getProperty("runningAverages")); AbstractEval factLBf = new Evalb("factor LP/LR", runningAveragesF); for (Tree goldTree : testTreebankF) { List<? extends HasWord> sentence = Sentence.toCoreLabelList(goldTree.yieldWords()); Tree guessTree = fSelfTrainInit.parseTree(sentence); selfTrainFinalTreebank.add(guessTree); factLBf.evaluate(guessTree, goldTree); System.out.println("Self-training : " + (++z)); } LexicalizedParser fSelfTrainFinal = getParserFromTreebank(selfTrainFinalTreebank, null, secondaryTreebankWeight, compactorF, fOp, tuneTreebankF, null); EvaluateTreebank evaluatorH = new EvaluateTreebank(fSelfTrainFinal); double scoreF1 = evaluatorH.testOnTreebank(seqTestTreebank); System.out.println("------------------------"); System.out.println(" Self Train Results "); System.out.println("------------------------"); System.out.println("Test set F1: " + scoreF1); System.out.println("F1 on projected training data: " + factLBf.getEvalbF1Percent()); ////////////////////// ////////////////////// //PARALLEL ALIGNMENT FEATURE CALCULATION, CALCULATION OF 'A' MATRIX double[] weights = new double[8]; double diff; weights[0] = 0.01; weights[1] = -0.002; weights[2] = 0.002; weights[3] = 0.002; weights[4] = 0.002; weights[5] = 0.002; weights[6] = -0.002; weights[7] = -0.002; ArrayList<HashMap<Integer, ArrayList<Integer>>> bitrainAlignments = null; ArrayList<HashMap<Integer, ArrayList<Integer>>> testAlignments = null; //String alignFile="../../berkeleyaligner/output/test.align"; try { AlignmentProcessor trainAP = new AlignmentProcessor(trainAlignFile); bitrainAlignments = trainAP.createAlignments(); AlignmentProcessor testAP = new AlignmentProcessor(testAlignFile); testAlignments = testAP.createAlignments(); } catch (FileNotFoundException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } int kE = 10; int kF = 10; int numFeatures = 8; int numBigSentences = 0; do { diff = 0.0; Iterator<Tree> eTrees = bitrainTreebankE.iterator(); Iterator<Tree> fTrees = bitrainTreebankF.iterator(); Iterator<HashMap<Integer, ArrayList<Integer>>> alignIterator = bitrainAlignments.iterator(); numBigSentences = 0; //features are used in the order they are defined double A[][][][] = new double[bitrainTreebankE.size()][numFeatures][kE][kF]; int ePsGold[] = new int[bitrainTreebankE.size()]; int fPsGold[] = new int[bitrainTreebankF.size()]; int i = 0; while (eTrees.hasNext() && fTrees.hasNext() && alignIterator.hasNext()) { HashMap<Integer, ArrayList<Integer>> alignMap = alignIterator.next(); Tree fTree = fTrees.next(); Tree eTree = eTrees.next(); if (fTree.getLeaves().size() > 70 || fTree.getLeaves().size() > 70) { //System.out.println("Too big : " + i); numBigSentences++; fPsGold[i] = 3; ePsGold[i] = 3; i++; continue; } List<? extends HasWord> sentenceF = Sentence.toCoreLabelList(fTree.yieldWords()); List<? extends HasWord> sentenceE = Sentence.toCoreLabelList(eTree.yieldWords()); LexicalizedParserQuery lpqE = (LexicalizedParserQuery) lpE.parserQuery(); LexicalizedParserQuery lpqF = (LexicalizedParserQuery) lpF.parserQuery(); lpqE.parse(sentenceE); lpqF.parse(sentenceF); List<ScoredObject<Tree>> kBestF = lpqF.getKBestPCFGParses(kF); List<ScoredObject<Tree>> kBestE = lpqE.getKBestPCFGParses(kE); fPsGold[i] = 3; ePsGold[i] = 3; int j = 0; int k = 0; for (ScoredObject<Tree> eScoredObj : kBestE) { k = 0; for (ScoredObject<Tree> fScoredObj : kBestF) { eScoredObj.object().setSpans(); fScoredObj.object().setSpans(); HashMap<Tree, Tree> alignment = getHungarianAlignment(eScoredObj.object(), fScoredObj.object(), weights, alignMap); //had to reduce likelihood scores by factor of 10 to keep the optimizer working A[i][0][j][k] = eScoredObj.score() / 1000; A[i][1][j][k] = fScoredObj.score() / 1000; for (Map.Entry entry : alignment.entrySet()) { Tree nodeF = (Tree) entry.getKey(); Tree nodeE = (Tree) entry.getValue(); A[i][2][j][k] += spanDiff(nodeF, nodeE); A[i][3][j][k] += numChildren(nodeF, nodeE); A[i][4][j][k] += insideBoth(nodeF, nodeE, alignMap); A[i][5][j][k] += insideSrcOutsideTgt(nodeF, nodeE, alignMap); A[i][6][j][k] += insideTgtOutsideSrc(nodeF, nodeE, alignMap); A[i][7][j][k] += bias(nodeF, nodeE); } k++; } j++; } //System.out.println("Sentence " + i); i++; } /////////////////////// // // MALLET optimizer // /////////////////////// System.out.println(); System.out.println("*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*"); System.out.println(); System.out.println("Beginning convex optimization..."); System.out.println(); System.out.println("*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*"); System.out.println(); OptimizerExample optimizable = new OptimizerExample(weights, A, ePsGold, fPsGold); Optimizer optimizer = new LimitedMemoryBFGS(optimizable); boolean converged = false; try { converged = optimizer.optimize(); } catch (IllegalArgumentException e) { // This exception may be thrown if L-BFGS // cannot step in the current direction. // This condition does not necessarily mean that // the optimizer has failed, but it doesn't want // to claim to have succeeded... } catch (cc.mallet.optimize.OptimizationException e) { System.out.println(e.getMessage()); } for (int x = 0; x < weights.length; x++) { diff += (optimizable.getParameter(x) - weights[x]) * (optimizable.getParameter(x) - weights[x]); weights[x] = optimizable.getParameter(x); System.out.print(weights[x] + ", "); } System.out.println(); diff /= weights.length; System.out.println("Current difference: " + diff); } while (diff > 0.0005); //GENERATE TRAINING DATA USING KLEIN RERANKER //assumes the 'test' data from KleinBilingualParser.java is the unannotated data //that the reranker has to annotate. factLBf = new Evalb("factor LP/LR", runningAveragesF); MemoryTreebank eddyRoseFullTrainTreebank = new MemoryTreebank(); eddyRoseFullTrainTreebank.addAll(trainTreebankF); eddyRoseFullTrainTreebank.addAll(bitrainTreebankF); Treebank unannotTreebankF = testTreebankF; Treebank annotTreebankE = testTreebankE; Iterator<Tree> eTreesBling = unannotTreebankF.iterator(); Iterator<Tree> fTreesBling = annotTreebankE.iterator(); int i = 0; Iterator<HashMap<Integer, ArrayList<Integer>>> alignIteratorTEST = testAlignments.iterator(); while (eTreesBling.hasNext() && fTreesBling.hasNext() && alignIteratorTEST.hasNext()) { HashMap<Integer, ArrayList<Integer>> alignMap = alignIteratorTEST.next(); Tree fTree = fTreesBling.next(); Tree eTree = eTreesBling.next(); List<? extends HasWord> sentenceF = Sentence.toCoreLabelList(fTree.yieldWords()); LexicalizedParserQuery lpqF = (LexicalizedParserQuery) fSelfTrainFinal.parserQuery(); lpqF.parse(sentenceF); List<ScoredObject<Tree>> kBestF = lpqF.getKBestPCFGParses(kF); int j = 0; int k = 0; double maxScore = -Double.MAX_VALUE; Tree bestFtree = null; for (ScoredObject<Tree> fScoredObj : kBestF) { eTree.setSpans(); fScoredObj.object().setSpans(); HashMap<Tree, Tree> alignment = getHungarianAlignment(eTree, fScoredObj.object(), weights, alignMap); double currentScore = 0.0; for (Map.Entry entry : alignment.entrySet()) { Tree nodeF = (Tree) entry.getKey(); Tree nodeE = (Tree) entry.getValue(); currentScore += weights[0] * 0.0;//because gold standard tree is assumed to have probability 1 currentScore += weights[1] * fScoredObj.score() / 1000; currentScore += weights[2] * spanDiff(nodeF, nodeE); currentScore += weights[3] * numChildren(nodeF, nodeE); currentScore += weights[4] * insideBoth(nodeF, nodeE, alignMap); currentScore += weights[5] * insideSrcOutsideTgt(nodeF, nodeE, alignMap); currentScore += weights[6] * insideTgtOutsideSrc(nodeF, nodeE, alignMap); currentScore += weights[7] * bias(nodeF, nodeE); } if (currentScore > maxScore) { maxScore = currentScore; bestFtree = fScoredObj.object(); } k++; } i++; System.out.println("Reranker " + i); eddyRoseFullTrainTreebank.add(bestFtree); factLBf.evaluate(bestFtree, fTree); } LexicalizedParser lpEddyRose = getParserFromTreebank(eddyRoseFullTrainTreebank, null, secondaryTreebankWeight, compactorF, fOp, tuneTreebankF, null); EvaluateTreebank evaluator = new EvaluateTreebank(lpEddyRose); double eddyRoseF1 = evaluator.testOnTreebank(seqTestTreebank); System.out.println("------------------------"); System.out.println(" EddyRose Results "); System.out.println("------------------------"); System.out.println("Test set F1: " + eddyRoseF1); System.out.println("F1 on projected training data: " + factLBf.getEvalbF1Percent()); }
From source file:artinex.TypDep.java
public static void main(String[] args) { String str = "What is index in array"; TypDep parser = new TypDep(); Tree tree = parser.parse(str); List<Tree> leaves = tree.getLeaves(); // Print words and Pos Tags for (Tree leaf : leaves) { Tree parent = leaf.parent(tree); System.out.print(leaf.label().value() + "-" + parent.label().value() + " "); }/*from ww w. j av a 2s.co m*/ System.out.println(); //Type dependencies // Tree tree1 = str.get(TreeAnnotation.class); // Get dependency tree TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(tree); Collection<TypedDependency> td = gs.typedDependenciesCollapsed(); System.out.println(td); }
From source file:at.ac.tuwien.inso.subcat.utility.sentiment.SentimentAnalyser.java
License:Open Source License
public SentimentBlock get(String str) { if (pipeline == null) { Properties props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, parse, sentiment"); pipeline = new StanfordCoreNLP(props); }//from ww w . ja v a 2 s. c o m LinkedList<SentenceSentiment> sentiments = new LinkedList<SentenceSentiment>(); int[] classes = new int[5]; double positiveSum = 0; double somewhatPositiveSum = 0; double neutralSum = 0; double somewhatNegativeSum = 0; double negativeSum = 0; double positiveWSum = 0; double somewhatPositiveWSum = 0; double neutralWSum = 0; double somewhatNegativeWSum = 0; double negativeWSum = 0; int words = 0; Annotation annotation = pipeline.process(str); List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap sentence : sentences) { Tree tree = sentence.get(SentimentCoreAnnotations.AnnotatedTree.class); int sentenceWordCount = tree.getLeaves().size(); // TODO: calculate it instead int predictedClass = RNNCoreAnnotations.getPredictedClass(tree); SimpleMatrix matrix = RNNCoreAnnotations.getPredictions(tree); classes[predictedClass]++; SentenceSentiment sentiment = new SentenceSentiment(matrix.get(0, 0), matrix.get(1, 0), matrix.get(2, 0), matrix.get(3, 0), matrix.get(4, 0), predictedClass, sentenceWordCount); positiveSum += sentiment.getPositive(); somewhatPositiveSum += sentiment.getSomewhatPositive(); neutralSum += sentiment.getNeutral(); somewhatNegativeSum += sentiment.getSomewhatNegative(); negativeSum += sentiment.getNegative(); positiveWSum += sentiment.getPositive() * sentenceWordCount; somewhatPositiveWSum += sentiment.getSomewhatPositive() * sentenceWordCount; neutralWSum += sentiment.getNeutral() * sentenceWordCount; somewhatNegativeWSum += sentiment.getSomewhatNegative() * sentenceWordCount; negativeWSum += sentiment.getNegative() * sentenceWordCount; words += sentenceWordCount; sentiments.add(sentiment); } double positiveMean = 0; double somewhatPositiveMean = 0; double neutralMean = 0; double somewhatNegativeMean = 0; double negativeMean = 0; double positiveWMean = 0; double somewhatPositiveWMean = 0; double neutralWMean = 0; double somewhatNegativeWMean = 0; double negativeWMean = 0; if (sentiments.size() > 0) { positiveMean = positiveSum / sentiments.size(); somewhatPositiveMean = somewhatPositiveSum / sentiments.size(); neutralMean = neutralSum / sentiments.size(); somewhatNegativeMean = somewhatNegativeSum / sentiments.size(); negativeMean = negativeSum / sentiments.size(); } if (words > 0) { positiveWMean = positiveWSum / words; somewhatPositiveWMean = somewhatPositiveWSum / words; neutralWMean = neutralWSum / words; somewhatNegativeWMean = somewhatNegativeWSum / words; negativeWMean = negativeWSum / words; } //System.out.println ("n:" + positiveMean + "," + somewhatPositiveMean + "," + neutralMean + "," + somewhatNegativeMean + "," + negativeMean); //System.out.println ("w:" + positiveWMean + "," + somewhatPositiveWMean + "," + neutralWMean + "," + somewhatNegativeWMean + "," + negativeWMean); SentimentBlock block = new SentimentBlock(sentiments, classes, positiveMean, somewhatPositiveMean, neutralMean, somewhatNegativeMean, negativeMean, positiveWMean, somewhatPositiveWMean, neutralWMean, somewhatNegativeWMean, negativeWMean, words); return block; }
From source file:cc.vidr.parseviz.ParseViz.java
License:Open Source License
public static void printTreeDot(Tree tree, StringBuilder sb) { sb.append("graph{\nnode[shape=none];\n"); printTreeDot(tree, sb, tree);//w w w . j a v a 2 s . c o m sb.append("{rank=same;\n"); for (Tree leaf : tree.getLeaves()) sb.append("n").append(leaf.nodeNumber(tree)).append(";\n"); sb.append("};\n"); sb.append("}\n"); }
From source file:Ceist.CeistView.java
License:Open Source License
/** * Displays the match results in a table with the matched parts * formatted./*from w ww . ja va 2s. com*/ * * @param m the matcher containing the match results * @param matchedTree the tree which was matched * @param showTagged whether to show POS tags or not * @return the HTML to be displayed in the table row */ private String[] getMatcherTableRow(TregexMatcher m, Tree matchedTree, boolean showTagged) { //List<Tree> allMatches = new ArrayList<Tree>(); // Find matches for templates String strQuestion = QuestionTemplate.getQuestionString(m, txtQuestionTemplate.getText()); String strAnswer = AnswerTemplate.getAnswerString(m, txtAnswerTemplate.getText()); // Display the full tree in which the match was found String strMatchAll = "<html>"; String lastRef = ""; for (Tree t : matchedTree.getLeaves()) { String nodeValue = t.nodeString(); if (nodeValue.startsWith("{Q")) { // This is a match for the question string String ref = nodeValue.substring(2, nodeValue.indexOf("}")); nodeValue = nodeValue.substring(nodeValue.indexOf("}") + 1); t.setValue(nodeValue); if (!ref.equals(lastRef)) lastRef = ref; else ref = ""; if (!showTagged) strMatchAll += "<sup>" + ref + "</sup><b><font color=green>" + nodeValue + "</font></b> "; else strMatchAll += "<sup>" + ref + "</sup><b><font color=green>" + nodeValue + "</font><font color=gray>/" + t.parent(matchedTree).nodeString() + "</font></b> "; } else if (nodeValue.startsWith("{A")) { // This is a match for the answer string String ref = nodeValue.substring(2, nodeValue.indexOf("}")); nodeValue = nodeValue.substring(nodeValue.indexOf("}") + 1); t.setValue(nodeValue); if (!ref.equals(lastRef)) lastRef = ref; else ref = ""; if (!showTagged) strMatchAll += "<sup>" + ref + "</sup><b>" + nodeValue + "</b> "; else strMatchAll += "<sup>" + ref + "</sup><b>" + nodeValue + "<font color=gray>/" + t.parent(matchedTree).nodeString() + "</font></b> "; } else { // Normal unmatched text if (!showTagged) strMatchAll += nodeValue + " "; else strMatchAll += nodeValue + "<font color=gray>/" + t.parent(matchedTree).nodeString() + "</font> "; } } strMatchAll += "</html>"; return new String[] { strMatchAll, strQuestion, strAnswer }; }
From source file:com.project.NLP.Requirement.PhrasesIdentification.java
/** * method to get the negative characters from the sentence * * @return arrayList of negative words in a sentence which are denoted by RB * and CC//from w ww . jav a2s . com */ public ArrayList NegativeSentenceDetection() { String phraseNotation = "RB|CC";//@" + phrase + "! << @" + phrase; TregexPattern VBpattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = VBpattern.matcher(sTree); ArrayList negativeLists = new ArrayList(); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); Tree[] innerChild = match.children(); for (Tree inChild : innerChild) { negativeLists.add(inChild.getLeaves().get(0).yieldWords().get(0).word()); } } return negativeLists; }