List of usage examples for org.apache.commons.collections4.map MultiValueMap containsValue
public boolean containsValue(final Object key, final Object value)
From source file:pltag.corpus.ConnectionPathCalculator.java
/** * From all the nodes that are in the set of connection nodes, find those that have * larger smallest origins than the current leaf number, and sort them by their smallest * origins, and put them into a MultiValueHashMap. This map will now contain as many keys * as prediction trees needed to connect the current word. The values of these keys contain * the nodes that are needed in the prediction tree to achieve connectivity. * (+ one additional node for the head node???, all arguments before spine? that should be * in prediction tree build procedure.)/*from w w w.ja va 2 s . c o m*/ * * @param currentLeafNumber * @return */ private MultiValueMap findNodesWithGreaterLeafnumbers(int currentLeafNumber) { MultiValueMap<Integer, Integer> connectionNodes = new MultiValueMap(); //deleteUnary(connectedNodes); int leafToBeConnectedNumber = currentLeafNumber; // int leafToBeConnectedNumber = Integer.parseInt(currentLeafNumber); for (Integer node : connectedNodes) { Integer innerNodeLowestUp = stringTree.getLowestOrigin(node, stringTree.originUp); // int innerNodeLowestUp = Integer.parseInt(stringTree.getLowestOrigin(node, stringTree.originUp)); Integer inld = stringTree.getLowestOrigin(node, stringTree.originDown); if (innerNodeLowestUp > leafToBeConnectedNumber && !connectionNodes.containsValue(innerNodeLowestUp, node)) { //check whether that node can be accounted for by one of the connection trees that were already found? connectionNodes.put(innerNodeLowestUp, node); } if (inld != null) { Integer innerNodeLowestDown = stringTree.getLowestOrigin(node, stringTree.originDown); // int innerNodeLowestDown = Integer.parseInt(stringTree.getLowestOrigin(node, stringTree.originDown)); if (innerNodeLowestDown > leafToBeConnectedNumber && !connectionNodes.containsValue(innerNodeLowestDown, node)) // && !connectionNodes.containsValue(innerNodeLowestDown + "", node)) { connectionNodes.put(innerNodeLowestDown, node); // connectionNodes.put(innerNodeLowestDown + "", node); } } } return connectionNodes; }
From source file:pltag.parser.Lexicon.java
/** * Reads the lexicon file and sorts entries by their type (arg or mod). * For each of those types, it creates a MultiValueMap that's keyed on the lexeme, and whose * values are the Strings that represent the trees. * //from www . j ava 2s. co m * @param lines * @return a MultiValueMap Array, with the arg string lexicon in first position, and * mod string lexicon in second position. */ protected MultiValueMap<String, ?>[] read(String[] lines) { MultiValueMap<String, String> modLexentriesString = new MultiValueMap(); MultiValueMap<String, String> argLexentriesString = new MultiValueMap(); for (String line : lines) { String[] lexcontent = Utils.getCatInventory(line.trim(), opts.combineNNVBcats).split("\t+"); int freq = Integer.parseInt(lexcontent[0]); if (lexcontent[3].contains("<>")) { String endswithLex = lexcontent[3].substring(0, lexcontent[3].indexOf("<>")); String anchor = endswithLex.substring(endswithLex.lastIndexOf(" ") + 1); if (!biWordMap.containsKey(anchor)) { biWordMap.put(anchor, freq); } else { biWordMap.put(anchor, biWordMap.get(anchor) + freq);//*/ } if (lexcontent[3].contains("1_1)")) { endswithLex = lexcontent[3].substring(0, lexcontent[3].indexOf("1_1)") - 1); anchor += "%" + endswithLex.substring(endswithLex.lastIndexOf(" ") + 1); if (!biWordMap.containsKey(anchor)) { biWordMap.put(anchor, freq); } else { biWordMap.put(anchor, biWordMap.get(anchor) + freq);//*/ } anchor = "UNK%" + endswithLex.substring(endswithLex.lastIndexOf(" ") + 1); if (!biWordMap.containsKey(anchor)) { biWordMap.put(anchor, 1); } else { biWordMap.put(anchor, biWordMap.get(anchor) + 1);//*/ } if (!biWordMap.containsKey("UNK")) { biWordMap.put("UNK", 1); } else { biWordMap.put("UNK", biWordMap.get("UNK") + 1); } } } if (lexcontent.length < 4) { if (opts.verbose) { System.out.println("wrong lex"); } } lexcontent = lexEntryRemoveDigits(lexcontent); int baumAnz = Integer.parseInt(lexcontent[0]); if (lexcontent[0].equals("1")) { lexcontent[0] = "0"; //continue; } else if (!opts.freqBaseline) { lexcontent[0] = "1"; } String word = lexcontent[1]; // String wordNoSemantics = stripSemanticFrame(lexcontent[1]); String val = lexcontent[0] + "\t" + lexcontent[3]; if (lexcontent[2].equals("ARG")) { // if (!lexcontent[1].equals("NUM") || !argLexentriesString.containsValue(lexcontent[1], lexcontent[0].toString() + "\t" + lexcontent[3].toString())) if (!lexcontent[1].equals("NUM") || !argLexentriesString.containsValue(word, lexcontent[0].toString() + "\t" + lexcontent[3].toString())) { // argLexentriesString.put(lexcontent[1], val); argLexentriesString.put(word, val); } } else if (lexcontent[2].equals("MOD")) { // if (!lexcontent[1].equals("NUM") || !modLexentriesString.containsValue(lexcontent[1], lexcontent[0].toString() + "\t" + lexcontent[3].toString())) if (!lexcontent[1].equals("NUM") || !modLexentriesString.containsValue(word, lexcontent[0].toString() + "\t" + lexcontent[3].toString())) { modLexentriesString.put(word, val); // modLexentriesString.put(lexcontent[1], val); } } else { System.err.println("Incorrect Lexicon format: line " + line); } // String posword = getPosFromTreeString(lexcontent[3], lexcontent[1]).toLowerCase(); String posword = getPosFromTreeString(lexcontent[3], word).toLowerCase(); if (posTagNo.containsKey(posword)) { posTagNo.put(posword, posTagNo.get(posword) + baumAnz); } else { posTagNo.put(posword, baumAnz); } if (noOfTrees.containsKey(val)) { this.noOfTrees.put(val, noOfTrees.get(val) + baumAnz); } else { this.noOfTrees.put(val, baumAnz); } } MultiValueMap<String, String>[] entries = new MultiValueMap[2]; entries[0] = argLexentriesString; entries[1] = modLexentriesString; return entries; }
From source file:pltag.parser.ParsingTask.java
/** * Combines two trees by first checking whether they can be combined (no two shadow trees, or unused shadow tree) * and then calling the integration function and inserting results into the chart. * @param trees//from w ww . j a v a 2 s. c om * @param chart * @param chartindex * @param posOfNextWord * @return */ @SuppressWarnings("unchecked") // private ArrayList<ChartEntry> combineTrees(Collection<ElementaryStringTree> trees, Chart chart, // Collection<ChartEntry> lastSliceCopy, short chartindex, String[] origPosTags, double startTime, String posOfNextWord, SuperTagger superTagger, int beamWidth, boolean endfOfSent, int timestamp) { ArrayList<ChartEntry> newEntries = new ArrayList<ChartEntry>(); // try to integrate each tree (some may have two alternative fringes) // with prefix trees from prev slice. // add successful combinations to chart. ArrayList<ChartEntry> lastSliceCopy = getLastSlice(chartindex, chart); HashMap<ElementaryStringTree, ArrayList<TreeState>> treeStateMap = new HashMap<ElementaryStringTree, ArrayList<TreeState>>(); double totalTimeSuperTag = 0.0; int a = 0; int b = 0; double bestprob = Double.NEGATIVE_INFINITY; for (ChartEntry chartEntry : lastSliceCopy) { a++; Collection<ElementaryStringTree> selectedTrees; double start = System.currentTimeMillis(); //System.out.print(trees.size()+" "); // call the supertagger only on prediction trees (when trees are elementary trees, posOfNextWord is always "") boolean shadowTree = !posOfNextWord.equals(""); // selectedTrees = opts.train || shadowTree ? superTagger.superTag(trees, chartEntry, posOfNextWord) : trees ; selectedTrees = opts.train || !shadowTree ? trees : superTagger.superTag(trees, chartEntry, posOfNextWord); // selectedTrees = trees; //System.out.println(selectedTrees.size()); totalTimeSuperTag += (System.currentTimeMillis() - start); for (ElementaryStringTree tree : selectedTrees) { b++; ArrayList<TreeState> elemtreeStates; if (treeStateMap.containsKey(tree)) { elemtreeStates = treeStateMap.get(tree); } else { elemtreeStates = getTreeStateForTree(tree, chartindex); treeStateMap.put(tree, elemtreeStates); } for (TreeState elemtreetreeState : elemtreeStates) { MultiValueMap<String, ParserOperation> combinations = new MultiValueMap(); // if (System.currentTimeMillis() - startTime > 60000) return newEntries; if (tree.hasShadowInd() && chartEntry.getTreeState().getShadowTrees().size() > 2) { continue; } TreeState treeState = chartEntry.getTreeState(); if (opts.verbose && treeState.getUnaccessibles().isEmpty() && treeState.getFutureFringe().getNext() != null) { LogInfo.error("why not expanded???"); } String fringeString = treeState.getFringe().toString(); List<ChartEntry> ces = new ArrayList<ChartEntry>(); if (combinations.containsKey(fringeString)) { Collection<ParserOperation> operations = combinations.getCollection(fringeString); if (!elemtreetreeState.getRootNode().isShadow()) { ces.addAll(match(tree, elemtreetreeState, chart, chartEntry, origPosTags, chartindex)); } if (operations.size() == 1 && operations.contains(null)) { //don't need to do anything because this prefix tree does not integrate with prefix tree //need to do this at tree level. } else { for (ParserOperation operation : operations) { if (operation == null || operation == ParserOperation.verify) { continue; } else { ces.addAll(operation.combine(model, opts, words, origPosTags, treeState, elemtreetreeState, tree, chartindex)); } } } } else {//*/ combinations.put(fringeString, null); if (combineTwoShadowTreesInARow(chartEntry, elemtreetreeState)) { continue; } //***************** ces = integrate(treeState, elemtreetreeState, tree, chart, chartEntry, origPosTags, chartindex); //***************** } // clean up results for (ChartEntry cefirst : ces) { ArrayList<ChartEntry> expandedCEs = makeExpansions(cefirst); for (ChartEntry ce : expandedCEs) { for (BuildBlock bb : ce.getBuildBlocks()) { //System.out.println(bb.toString()+ bb.getIpi()+"ELEM:" + tree); if (didNotUseShadow(chartEntry, bb, bb.getIpi(), chartindex, tree)) { LinkedList<BuildBlock> list = new LinkedList<BuildBlock>(); list.add(bb); // ce.getTreeState().getFutureFringe().getBBHist().remove(bb); // TODO: FIX ce.getTreeState().getFutureFringe().getBBHist().remove(list); // TODO: FIX continue; } bb.setPrevChartEntry(chartEntry); if (!opts.train || opts.useProbabilityModel) { bb.retrieveProbability(model.getParams(), opts.freqBaseline); bb.removeElemTree(); // bb.removeFreqCounter(); } if (!elemtreetreeState.getShadowTrees().isEmpty()) { ShadowStringTree shadowt = ce.getTreeState().getShadowTrees() .get(ce.getTreeState().getShadowTrees().size() - 1); shadowt.setPredictProb(bb.getProbability()); } if (!combinations.containsValue(fringeString, bb.getOperation())) { combinations.put(fringeString, bb.getOperation()); } //System.out.print("\n"+ce.getTreeState().getFringe()); if (!opts.train || opts.useProbabilityModel) { // double vlap = computeLAP(tree); ce.updateProbs(ce.getTreeState().getFutureFringe(), 0); } //System.out.println(ce.getBestProbability()+"\t"+ce+"\n"+elemtreetreeState+"\n"+tree+a+" "+b+"\n---\n"); if (opts.pruneUsingScores) { if (ce.getBestScore() > bestprob) { bestprob = ce.getBestScore(); } if (ce.getBestScore() > bestprob - beamWidth) { newEntries.add(ce); } } else { double bestPropWithVlap = ce.getBestProbability() + (opts.train || !shadowTree ? 0.0d : computeLAP(tree)); if (bestPropWithVlap > bestprob) { bestprob = bestPropWithVlap; } if (opts.train || bestPropWithVlap > bestprob - beamWidth) { newEntries.add(ce); } } } // for each build block } // for each expanded chart entry } // for each new chart entry } // for each elementary tree state (fringes) ALWAYS ONE } // for each elementary tree } // for each chart entry // if (opts.timeProfile && totalTimeSuperTag > 100) // { // Utils.log("supertagtime: " + totalTimeSuperTag / 1000 + "\t"); // } return newEntries; }