List of usage examples for org.apache.commons.collections4.map MultiValueMap put
@Override @SuppressWarnings("unchecked") public Object put(final K key, final Object value)
From source file:com.farhad.ngram.lang.detector.util.FileTools.java
public MultiValueMap readFile(String path) { //Map<String, String> corpus=new HashMap<>(); MultiValueMap corpus = new MultiValueMap(); try {/*from w w w .j ava2 s .com*/ BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF8")); String sCurrentLine; while ((sCurrentLine = br.readLine()) != null) { //System.out.println(sCurrentLine); String[] parts = sCurrentLine.split(",'"); String key = parts[0]; String text = parts[1]; text = text.substring(0, text.length() - 1); // replace spaces with _ // text=text.replaceAll("\\s+", "_").toLowerCase(); //replace number and punctuations with blankspace text = text.replaceAll("\\p{Punct}+", ""); text = text.replaceAll("\\d+", ""); text = text.replaceAll("\\s+", " "); // System.err.println(key + "| "+ text); corpus.put(key, text.toLowerCase()); // } } catch (IOException e) { e.printStackTrace(); } return corpus; }
From source file:info.rmapproject.webapp.service.DataDisplayServiceImpl.java
@Override public Graph getDiSCOGraph(DiSCODTO discoDTO) throws Exception { Graph graph = graphFactory.newGraph(); String sDiscoUri = discoDTO.getUri().toString(); graph.addNode(sDiscoUri, sDiscoUri, discoNodeType); if (WebappUtils.isUri(discoDTO.getCreator())) { graph.addNode(discoDTO.getCreator(), agentNodeType); graph.addEdge(sDiscoUri, discoDTO.getCreator(), DCTERMS.CREATOR.toString()); }//from w w w . j a v a 2 s . com if (WebappUtils.isUri(discoDTO.getProviderId())) { List<URI> rdfTypes = rmapService.getResourceRdfTypesInContext(new URI(discoDTO.getProviderId()), discoDTO.getUri()); String targetNodeType = WebappUtils.getNodeType(rdfTypes); graph.addNode(discoDTO.getProviderId(), targetNodeType); graph.addEdge(sDiscoUri, discoDTO.getProviderId(), RMAP.PROVIDERID.toString()); } for (URI aggregate : discoDTO.getAggregatedResources()) { List<URI> rdfTypes = rmapService.getResourceRdfTypesInContext(aggregate, discoDTO.getUri()); String targetNodeType = WebappUtils.getNodeType(rdfTypes); graph.addNode(aggregate.toString(), targetNodeType); graph.addEdge(sDiscoUri, aggregate.toString(), Terms.ORE_AGGREGATES_PATH); } List<RMapTriple> triples = discoDTO.getRelatedStatements(); List<RMapTriple> filteredTriples = new ArrayList<RMapTriple>(); List<RMapTriple> labelTriples = new ArrayList<RMapTriple>(); List<String> labelFlds = Arrays.asList(this.labelTypes.split(",")); MultiValueMap<String, URI> typeMap = new MultiValueMap<String, URI>(); //sort in triples into lists that will be used for graph creation for (RMapTriple triple : triples) { RMapResource subj = triple.getSubject(); RMapIri pred = triple.getPredicate(); RMapValue obj = triple.getObject(); boolean isType = pred.toString().equals(RDF.TYPE.toString()); if (!isType && !(obj instanceof RMapLiteral)) { filteredTriples.add(triple); } if (isType && obj instanceof RMapIri) { typeMap.put(subj.toString(), new URI(obj.toString())); } if (obj instanceof RMapLiteral && labelFlds.contains(pred.toString())) { labelTriples.add(triple); } } triples = null; if ((filteredTriples.size() + graph.getEdges().size()) <= maxObjGraphRelationships) { for (RMapTriple triple : filteredTriples) { String subject = triple.getSubject().toString(); String predicate = triple.getPredicate().toString(); String object = triple.getObject().toString(); List<URI> subjTypes = null; List<URI> objTypes = null; if (typeMap != null && typeMap.getCollection(subject) != null) { subjTypes = new ArrayList<URI>(typeMap.getCollection(subject)); } if (typeMap != null && typeMap.getCollection(object) != null) { objTypes = new ArrayList<URI>(typeMap.getCollection(object)); } graph.addNode(subject, WebappUtils.getNodeType(subjTypes)); graph.addNode(object, WebappUtils.getNodeType(objTypes)); graph.addEdge(subject, object, predicate); } //make node labels from label triples within the graph for (RMapTriple triple : labelTriples) { if (graph.getNodes().containsKey(triple.getSubject().toString())) { graph.getNodes().get(triple.getSubject().toString()).setLabel(triple.getObject().toString()); } } } else { //don't do graph because it's too large and will just be an unbearable mess!! graph = null; } return graph; }
From source file:org.lockss.plugin.silverchair.PostHttpClientUrlConnection.java
public void storeResponseHeaderInto(Properties props, String prefix) { // store all header properties (this is the only way to iterate) // first collect all values for any repeated headers. MultiValueMap<String, String> map = new MultiValueMap<String, String>(); Header[] headers = method.getResponseHeaders(); for (int ix = 0; ix < headers.length; ix++) { Header hdr = headers[ix];/*w ww . ja v a 2 s .co m*/ String key = hdr.getName(); String value = hdr.getValue(); if (value != null) { // only store headers with values // qualify header names to avoid conflict with our properties if (key == null) { key = "header_" + ix; } String propKey = (prefix == null) ? key : prefix + key; if (!singleValuedHdrs.isEmpty() && singleValuedHdrs.contains(key.toLowerCase())) { map.remove(propKey); } map.put(propKey, value); } } // now combine multiple values into comma-separated string for (String key : map.keySet()) { Collection<String> val = map.getCollection(key); props.setProperty(key, ((val.size() > 1) ? StringUtil.separatedString(val, ",") : CollectionUtil.getAnElement(val))); } }
From source file:pltag.corpus.ConnectionPathCalculator.java
/** * From all the nodes that are in the set of connection nodes, find those that have * larger smallest origins than the current leaf number, and sort them by their smallest * origins, and put them into a MultiValueHashMap. This map will now contain as many keys * as prediction trees needed to connect the current word. The values of these keys contain * the nodes that are needed in the prediction tree to achieve connectivity. * (+ one additional node for the head node???, all arguments before spine? that should be * in prediction tree build procedure.)//from w ww . ja va 2 s. c om * * @param currentLeafNumber * @return */ private MultiValueMap findNodesWithGreaterLeafnumbers(int currentLeafNumber) { MultiValueMap<Integer, Integer> connectionNodes = new MultiValueMap(); //deleteUnary(connectedNodes); int leafToBeConnectedNumber = currentLeafNumber; // int leafToBeConnectedNumber = Integer.parseInt(currentLeafNumber); for (Integer node : connectedNodes) { Integer innerNodeLowestUp = stringTree.getLowestOrigin(node, stringTree.originUp); // int innerNodeLowestUp = Integer.parseInt(stringTree.getLowestOrigin(node, stringTree.originUp)); Integer inld = stringTree.getLowestOrigin(node, stringTree.originDown); if (innerNodeLowestUp > leafToBeConnectedNumber && !connectionNodes.containsValue(innerNodeLowestUp, node)) { //check whether that node can be accounted for by one of the connection trees that were already found? connectionNodes.put(innerNodeLowestUp, node); } if (inld != null) { Integer innerNodeLowestDown = stringTree.getLowestOrigin(node, stringTree.originDown); // int innerNodeLowestDown = Integer.parseInt(stringTree.getLowestOrigin(node, stringTree.originDown)); if (innerNodeLowestDown > leafToBeConnectedNumber && !connectionNodes.containsValue(innerNodeLowestDown, node)) // && !connectionNodes.containsValue(innerNodeLowestDown + "", node)) { connectionNodes.put(innerNodeLowestDown, node); // connectionNodes.put(innerNodeLowestDown + "", node); } } } return connectionNodes; }
From source file:pltag.corpus.TagCorpus.java
/** * This method reconstructs the lexentries back into a tree, calculates the connection path * and generates the predictive lexicon entries. It furthermore handles traces and fillers. * Too much responsibility!//ww w . ja v a2 s .c o m * * @param lexentries * @param filename * @param treeno * @param tree * @param elementTrees * @return * @throws IOException */ private List<StringTree> reconstructAndBuildLexicon(String filename, int treeno, PennTree tree, List<ElementaryStringTree> elementTrees) throws IOException { StopWatchSet.begin("reconstructAndBuildLexicon"); CompositeStringTree sentenceTree = new CompositeStringTree(treeno, opts.useSemantics); //HashMap<String, ElementaryStringTree> sentenceWordLex = new HashMap<String, ElementaryStringTree>(); ElementaryStringTree[] sentenceWordLex2 = new ElementaryStringTree[elementTrees.size() + 15];//allow for a max of 15 deleted words List<ElementaryStringTree> elementaryTrees = new ArrayList<ElementaryStringTree>(); HashMap<String, ElementaryStringTree> fillers = new HashMap<String, ElementaryStringTree>(); MultiValueMap<String, ElementaryStringTree> traces = new MultiValueMap<String, ElementaryStringTree>(); ArrayList<StringTree> allCorpus = new ArrayList<StringTree>(); for (ElementaryStringTree elementaryTree : elementTrees) { elementaryTree.simplifyCats(); if (elementaryTree.isVerificTree()) { sentenceTree.treeString += " " + elementaryTree.treeString; } else { sentenceTree.integrate(elementaryTree); // System.out.println("Word: " + elementaryTree.treeString + " head of tree : " + sentenceTree.getMainLeaf(sentenceTree.root)); // System.out.println(sentenceTree.getStructure(sentenceTree.root, true)); } //sentenceTree.putElementByOrigin(elementaryTree.getLowestOrigin(elementaryTree.getRoot(), elementaryTree.origin), elementaryTree); //if (verbose) System.out.println("$$"+sentenceTree.print()); //sentenceWordLex.put(elementaryTree.getLowestOrigin(elementaryTree.getAnchor(), elementaryTree.origin), elementaryTree); for (Integer leaf : elementaryTree.getLeaves(elementaryTree.getRoot(), new ArrayList<Integer>())) { if (leaf >= sentenceWordLex2.length) // if (Integer.parseInt(leaf) >= sentenceWordLex2.length) { LogInfo.error("Error while reconstructing and building lexicon"); } sentenceWordLex2[leaf] = elementaryTree; // sentenceWordLex2[Integer.parseInt(leaf)] = elementaryTree; } //sentenceWordLex.put(elementaryTree.getLowestOrigin(elementaryTree.getPseudoAnchorNumber(), elementaryTree.origin), elementaryTree); /*if (elementaryTree.getLowestOrigin(elementaryTree.getPseudoAnchorNumber(), elementaryTree.origin).equals("0")){ elementaryTree.setFirst(); }*/ elementaryTrees.add(elementaryTree); // associate and store traces and fillers String fillerPosition = elementaryTree.getFiller(); if (!fillerPosition.equals("")) { fillers.put(fillerPosition, elementaryTree); } String traceposition = elementaryTree.getTrace(); if (!traceposition.equals("")) { traces.put(traceposition, elementaryTree); } } // for each elementaryTree sentenceTree.joinUnConnected(); // massage lexicon entries elementaryTrees = traceAndFillerTreatment(elementaryTrees, sentenceTree, sentenceWordLex2, traces, fillers); //elementaryTrees = printNiceLexicon(elementaryTrees); allCorpus.add(sentenceTree); List<String> errors = errorTracker.leafCheck(sentenceTree, tree, filename); listErrors.addAll(errors); attachTraces(elementaryTrees, sentenceTree, sentenceWordLex2); if (errors.isEmpty()) { sentenceTree.removeUnaryNodes(sentenceTree.getRoot()); } // calculate connection path if (sentenceTree.treeString.startsWith("Unconnected")) { if (opts.outputEmptyExamples) { if (opts.examplesInSingleFile) { PltagExample example = new PltagExample(String.format("Example:%s-sent_%s", filename, treeno)); example.setGoldStandard("NOT PARSED\nNOT PARSED\nNOT PARSED"); //example.setLexicon("\n"); example.setPredLexicon("\n"); printOutput(example.toString()); } else { printGoldStandard(String.format("Example_%s-sent_%s\n%s\n", filename, treeno, "NOT PARSED\nNOT PARSED\nNOT PARSED")); } } return allCorpus; } ConnectionPathCalculator cpc = new ConnectionPathCalculator(sentenceTree, sentenceWordLex2, opts.useSemantics); cpc.calculateConnectionPath(sentenceTree.root, Integer.MIN_VALUE, lexicon); HashMap<Integer, Integer> newList = cpc.getNoOfSources(); cpc.combinePredictedTreesFromSameOrigin(); for (int i = 0; i < 6; i++) { // fullList.put(i + "", fullList.get(i + "").intValue() + newList.get(i + "").intValue()); fullList.put(i, fullList.get(i) + newList.get(i)); } // add predictive lexicon entries //if (verbose) allcorpus.addAll(cpc.getPredictedLexEntries()); StringBuilder predLexStr = new StringBuilder(); for (StringTree predTree : cpc.getPredictedLexEntries()) { /*if (predTree.categories[Integer.parseInt(predTree.root)] != null){ predTree.removeUnaryNodes(predTree.root); } else { predTree.removeUnaryNodes(predTree.coordanchor); } // */ if (verbose) { LogInfo.logs("TagCorpus : reconstructAndBuildLex: " + predTree.print()); } predLexStr.append(predTree.print()).append("\n"); } if (predLexStr.length() > 0) predLexStr.deleteCharAt(predLexStr.length() - 1); // remove last \n // output the example (we assume there were no errors in creating the gold standard tree) if (errors.isEmpty()) { if (opts.examplesInSingleFile) // do the necessary conversions { PltagExample example = new PltagExample(String.format("Example:%s-sent_%s", filename, treeno)); example.setGoldStandard(sentenceTree.goldStandardToString()); // if(getNiceLexicon(elementaryTrees).contains("'")) // { // String cmd = String.format("printf \"%%b\" '%s' | resources/extractLexicon.sh", getNiceLexicon(elementaryTrees).replaceAll("'", "'\\\\''")); // String cmdArray[] = {"/bin/sh", "-c", cmd}; // System.out.println(cmd+"\n"); // System.out.println(Utils.executeCmd(cmdArray)); // // } String cmd = String.format("printf \"%%b\" '%s' | resources/extractLexicon.sh", getNiceLexicon(elementaryTrees).replaceAll("'", "'\\\\''")); String cmdArray[] = { "/bin/sh", "-c", cmd }; example.setLexicon(Utils.executeCmd(cmdArray)); cmd = String.format("printf \"%%b\" '%s' | resources/extractPredLexicon.sh", predLexStr.toString().replaceAll("'", "'\\\\''")); cmdArray[2] = cmd; example.setPredLexicon(Utils.executeCmd(cmdArray)); printOutput(example.toString()); } else { if (opts.outputIndividualFiles) printGoldStandard(sentenceTree.goldStandardToString()); else printGoldStandard(String.format("Example_%s-sent_%s\n%s\n", filename, treeno, sentenceTree.goldStandardToString())); printLex(predLexStr.toString()); printLex(getNiceLexicon(elementaryTrees)); } } else if (opts.outputEmptyExamples) { if (opts.examplesInSingleFile) { PltagExample example = new PltagExample(String.format("Example:%s-sent_%s", filename, treeno)); example.setGoldStandard("NOT PARSED\nNOT PARSED\nNOT PARSED"); //example.setLexicon("\n"); example.setPredLexicon("\n"); printOutput(example.toString()); } else { printGoldStandard(String.format("Example_%s-sent_%s\n%s\n", filename, treeno, "NOT PARSED\nNOT PARSED\nNOT PARSED")); } } //*/ StopWatchSet.end(); return allCorpus; }
From source file:pltag.parser.Lexicon.java
/** * Converts a MultiValueMap with String values to one with StringTree values. * @param treetype /*ww w . j a va2s . c om*/ * * @param MultiValueMap lexString * @return MultiValueMap lexTree */ @SuppressWarnings("unchecked") private MultiValueMap makeLexTrees(MultiValueMap lexString, String treetype) { MultiValueMap<String, ElementaryStringTree> lexTree = new MultiValueMap(); Set<String> keys = lexString.keySet(); for (String key : keys) { Collection<String> values = lexString.getCollection(key); HashMap<String, ElementaryStringTree> treelist = new HashMap<String, ElementaryStringTree>(); for (String treeString : values) { ElementaryStringTree tree = makeToStringTreeOld(treeString, treetype); if (tree == null) { continue; } String POSword = tree.getPOStagged(tree.getRoot()).trim(); if (key.equals("prediction: ")) { POSword = key; } // for lexentries for "put up" etc, add three times into Map: as "put up", "put" and "up". String[] words = POSword.split("\t"); ElementaryStringTree sametree = null; if (!treelist.containsKey(POSword + "@@" + tree.toString())) { lexTree.put(POSword, tree); } treelist.put(POSword + "@@" + tree.toString(), tree); if (words.length > 1) { for (String word : words) { if (sametree == null) { lexTree.put(word, tree); } } } } } return lexTree; }
From source file:pltag.parser.Lexicon.java
/** * Converts a MultiValueMap with String values to one with StringTree values. * @param lexString//from w ww.j a v a 2s . c o m * @param treetype * @return MultiValueMap lexTree */ @SuppressWarnings("unchecked") protected MultiValueMap makeLexStrings(MultiValueMap lexString, String treetype) { MultiValueMap lexTree = new MultiValueMap(); Set<String> keys = lexString.keySet(); HashSet<String> treelist = new HashSet<String>(); for (String key : keys) { Collection<String> values = lexString.getCollection(key); for (String treeString : values) { //if (tree == null) continue; //need to deal with errors at different point. //need to extract POS tag from treestring and unlexicalize tree. String posWord = key; String tree = treeString; if (!key.equals("prediction: ")) { if (opts.goldPosTags || opts.treeFamilies) {//pos and word given // if (this.getClass() == UnkLexicon.class) // FIX: Unnecessary check // { // posWord = UnkLexicon.getPosFromTreeString(treeString, key); // } // else // { // posWord = getPosFromTreeString(treeString, key); // } posWord = getPosFromTreeString(treeString, key); if (opts.posOnly) {//only pos tag given String[] words = posWord.split("\t"); posWord = ""; for (String w : words) { if (w.contains("*") || w.equals("0")) { continue; } else { posWord += w.substring(0, w.indexOf(" ")) + "\t"; } } posWord = posWord.trim(); } } else {// only word posWord = Utils.getCutOffCorrectedMainLex(key.toLowerCase(), listOfFreqWords, opts.train, opts.fullLex); if (key.contains(" ")) { posWord = posWord.replace(" ", "\t"); } } tree = makeUnlex(treeString, key); } if (noOfTrees.containsKey(treeString) && posWord.contains(" ") && (opts.goldPosTags || opts.treeFamilies) && !opts.posOnly) { String pos = posWord.substring(0, posWord.indexOf(" ")); String puretree = pos + "\t" + tree.substring(2); if (noOfTrees.containsKey(puretree)) { noOfTrees.put(puretree, noOfTrees.get(puretree) + noOfTrees.get(treeString)); } else { noOfTrees.put(puretree, noOfTrees.get(treeString)); } noOfTrees.remove(treeString); } // for lexentries for "put up" etc, add three times into Map: as "put up", "put" and "up". String[] words = posWord.split("\t"); ElementaryStringTree sametree = null; if (!treelist.contains(posWord + "@@" + tree) && words.length == 1) { String lc = posWord.toLowerCase(); // String wlc = lc.substring(lc.indexOf(" ") + 1); if (!opts.goldPosTags && opts.treeFamilies && !lc.equals("prediction: ")) { lexTree.put(wlc, tree); } else { lexTree.put(lc, tree); } if (!wordPosMap.containsValue(wlc, lc)) { wordPosMap.put(wlc, lc); } trees.put(tree.substring(tree.indexOf("\t") + 1), lc); } treelist.add(posWord + "@@" + tree); if (words.length > 1) { for (String word : words) { if (sametree == null && !word.startsWith(" *T*") && !word.startsWith(" *?*") && !word.startsWith(" *-") && !word.equals(" *") && !word.equals(" 0")) { String lc = word.toLowerCase(); String wlc = lc.substring(lc.indexOf(" ") + 1); if (!opts.goldPosTags && opts.treeFamilies) { lexTree.put(wlc, tree); } else { lexTree.put(lc, tree); } if (!wordPosMap.containsValue(wlc, lc)) { wordPosMap.put(wlc, lc); } trees.put(tree.substring(tree.indexOf("\t") + 1), lc); } // if } // for } // if } // for (values) } // for (keys) return lexTree; }
From source file:pltag.parser.Lexicon.java
/** * Reads the lexicon file and sorts entries by their type (arg or mod). * For each of those types, it creates a MultiValueMap that's keyed on the lexeme, and whose * values are the Strings that represent the trees. * /*from w w w .j a v a 2s.co m*/ * @param lines * @return a MultiValueMap Array, with the arg string lexicon in first position, and * mod string lexicon in second position. */ protected MultiValueMap<String, ?>[] read(String[] lines) { MultiValueMap<String, String> modLexentriesString = new MultiValueMap(); MultiValueMap<String, String> argLexentriesString = new MultiValueMap(); for (String line : lines) { String[] lexcontent = Utils.getCatInventory(line.trim(), opts.combineNNVBcats).split("\t+"); int freq = Integer.parseInt(lexcontent[0]); if (lexcontent[3].contains("<>")) { String endswithLex = lexcontent[3].substring(0, lexcontent[3].indexOf("<>")); String anchor = endswithLex.substring(endswithLex.lastIndexOf(" ") + 1); if (!biWordMap.containsKey(anchor)) { biWordMap.put(anchor, freq); } else { biWordMap.put(anchor, biWordMap.get(anchor) + freq);//*/ } if (lexcontent[3].contains("1_1)")) { endswithLex = lexcontent[3].substring(0, lexcontent[3].indexOf("1_1)") - 1); anchor += "%" + endswithLex.substring(endswithLex.lastIndexOf(" ") + 1); if (!biWordMap.containsKey(anchor)) { biWordMap.put(anchor, freq); } else { biWordMap.put(anchor, biWordMap.get(anchor) + freq);//*/ } anchor = "UNK%" + endswithLex.substring(endswithLex.lastIndexOf(" ") + 1); if (!biWordMap.containsKey(anchor)) { biWordMap.put(anchor, 1); } else { biWordMap.put(anchor, biWordMap.get(anchor) + 1);//*/ } if (!biWordMap.containsKey("UNK")) { biWordMap.put("UNK", 1); } else { biWordMap.put("UNK", biWordMap.get("UNK") + 1); } } } if (lexcontent.length < 4) { if (opts.verbose) { System.out.println("wrong lex"); } } lexcontent = lexEntryRemoveDigits(lexcontent); int baumAnz = Integer.parseInt(lexcontent[0]); if (lexcontent[0].equals("1")) { lexcontent[0] = "0"; //continue; } else if (!opts.freqBaseline) { lexcontent[0] = "1"; } String word = lexcontent[1]; // String wordNoSemantics = stripSemanticFrame(lexcontent[1]); String val = lexcontent[0] + "\t" + lexcontent[3]; if (lexcontent[2].equals("ARG")) { // if (!lexcontent[1].equals("NUM") || !argLexentriesString.containsValue(lexcontent[1], lexcontent[0].toString() + "\t" + lexcontent[3].toString())) if (!lexcontent[1].equals("NUM") || !argLexentriesString.containsValue(word, lexcontent[0].toString() + "\t" + lexcontent[3].toString())) { // argLexentriesString.put(lexcontent[1], val); argLexentriesString.put(word, val); } } else if (lexcontent[2].equals("MOD")) { // if (!lexcontent[1].equals("NUM") || !modLexentriesString.containsValue(lexcontent[1], lexcontent[0].toString() + "\t" + lexcontent[3].toString())) if (!lexcontent[1].equals("NUM") || !modLexentriesString.containsValue(word, lexcontent[0].toString() + "\t" + lexcontent[3].toString())) { modLexentriesString.put(word, val); // modLexentriesString.put(lexcontent[1], val); } } else { System.err.println("Incorrect Lexicon format: line " + line); } // String posword = getPosFromTreeString(lexcontent[3], lexcontent[1]).toLowerCase(); String posword = getPosFromTreeString(lexcontent[3], word).toLowerCase(); if (posTagNo.containsKey(posword)) { posTagNo.put(posword, posTagNo.get(posword) + baumAnz); } else { posTagNo.put(posword, baumAnz); } if (noOfTrees.containsKey(val)) { this.noOfTrees.put(val, noOfTrees.get(val) + baumAnz); } else { this.noOfTrees.put(val, baumAnz); } } MultiValueMap<String, String>[] entries = new MultiValueMap[2]; entries[0] = argLexentriesString; entries[1] = modLexentriesString; return entries; }
From source file:pltag.parser.Lexicon.java
public MultiValueMap<String, ElementaryStringTree> getLexEntriesContaining(String category) { MultiValueMap<String, ElementaryStringTree> out = new MultiValueMap(); for (String key : wordPosMap.keySet()) { String firstWordPos = wordPosMap.getCollection(key).iterator().next(); Collection<ElementaryStringTree> col = getEntries(key, firstWordPos, firstWordPos.split(" ")[0], false, 0);//from ww w .j a va 2 s . c o m for (ElementaryStringTree e : col) { if (e.toString().contains(" " + category + "^")) out.put(key, e); } } return out; }
From source file:pltag.parser.ParsingTask.java
/** * Combines two trees by first checking whether they can be combined (no two shadow trees, or unused shadow tree) * and then calling the integration function and inserting results into the chart. * @param trees/* w w w .ja v a2 s . c om*/ * @param chart * @param chartindex * @param posOfNextWord * @return */ @SuppressWarnings("unchecked") // private ArrayList<ChartEntry> combineTrees(Collection<ElementaryStringTree> trees, Chart chart, // Collection<ChartEntry> lastSliceCopy, short chartindex, String[] origPosTags, double startTime, String posOfNextWord, SuperTagger superTagger, int beamWidth, boolean endfOfSent, int timestamp) { ArrayList<ChartEntry> newEntries = new ArrayList<ChartEntry>(); // try to integrate each tree (some may have two alternative fringes) // with prefix trees from prev slice. // add successful combinations to chart. ArrayList<ChartEntry> lastSliceCopy = getLastSlice(chartindex, chart); HashMap<ElementaryStringTree, ArrayList<TreeState>> treeStateMap = new HashMap<ElementaryStringTree, ArrayList<TreeState>>(); double totalTimeSuperTag = 0.0; int a = 0; int b = 0; double bestprob = Double.NEGATIVE_INFINITY; for (ChartEntry chartEntry : lastSliceCopy) { a++; Collection<ElementaryStringTree> selectedTrees; double start = System.currentTimeMillis(); //System.out.print(trees.size()+" "); // call the supertagger only on prediction trees (when trees are elementary trees, posOfNextWord is always "") boolean shadowTree = !posOfNextWord.equals(""); // selectedTrees = opts.train || shadowTree ? superTagger.superTag(trees, chartEntry, posOfNextWord) : trees ; selectedTrees = opts.train || !shadowTree ? trees : superTagger.superTag(trees, chartEntry, posOfNextWord); // selectedTrees = trees; //System.out.println(selectedTrees.size()); totalTimeSuperTag += (System.currentTimeMillis() - start); for (ElementaryStringTree tree : selectedTrees) { b++; ArrayList<TreeState> elemtreeStates; if (treeStateMap.containsKey(tree)) { elemtreeStates = treeStateMap.get(tree); } else { elemtreeStates = getTreeStateForTree(tree, chartindex); treeStateMap.put(tree, elemtreeStates); } for (TreeState elemtreetreeState : elemtreeStates) { MultiValueMap<String, ParserOperation> combinations = new MultiValueMap(); // if (System.currentTimeMillis() - startTime > 60000) return newEntries; if (tree.hasShadowInd() && chartEntry.getTreeState().getShadowTrees().size() > 2) { continue; } TreeState treeState = chartEntry.getTreeState(); if (opts.verbose && treeState.getUnaccessibles().isEmpty() && treeState.getFutureFringe().getNext() != null) { LogInfo.error("why not expanded???"); } String fringeString = treeState.getFringe().toString(); List<ChartEntry> ces = new ArrayList<ChartEntry>(); if (combinations.containsKey(fringeString)) { Collection<ParserOperation> operations = combinations.getCollection(fringeString); if (!elemtreetreeState.getRootNode().isShadow()) { ces.addAll(match(tree, elemtreetreeState, chart, chartEntry, origPosTags, chartindex)); } if (operations.size() == 1 && operations.contains(null)) { //don't need to do anything because this prefix tree does not integrate with prefix tree //need to do this at tree level. } else { for (ParserOperation operation : operations) { if (operation == null || operation == ParserOperation.verify) { continue; } else { ces.addAll(operation.combine(model, opts, words, origPosTags, treeState, elemtreetreeState, tree, chartindex)); } } } } else {//*/ combinations.put(fringeString, null); if (combineTwoShadowTreesInARow(chartEntry, elemtreetreeState)) { continue; } //***************** ces = integrate(treeState, elemtreetreeState, tree, chart, chartEntry, origPosTags, chartindex); //***************** } // clean up results for (ChartEntry cefirst : ces) { ArrayList<ChartEntry> expandedCEs = makeExpansions(cefirst); for (ChartEntry ce : expandedCEs) { for (BuildBlock bb : ce.getBuildBlocks()) { //System.out.println(bb.toString()+ bb.getIpi()+"ELEM:" + tree); if (didNotUseShadow(chartEntry, bb, bb.getIpi(), chartindex, tree)) { LinkedList<BuildBlock> list = new LinkedList<BuildBlock>(); list.add(bb); // ce.getTreeState().getFutureFringe().getBBHist().remove(bb); // TODO: FIX ce.getTreeState().getFutureFringe().getBBHist().remove(list); // TODO: FIX continue; } bb.setPrevChartEntry(chartEntry); if (!opts.train || opts.useProbabilityModel) { bb.retrieveProbability(model.getParams(), opts.freqBaseline); bb.removeElemTree(); // bb.removeFreqCounter(); } if (!elemtreetreeState.getShadowTrees().isEmpty()) { ShadowStringTree shadowt = ce.getTreeState().getShadowTrees() .get(ce.getTreeState().getShadowTrees().size() - 1); shadowt.setPredictProb(bb.getProbability()); } if (!combinations.containsValue(fringeString, bb.getOperation())) { combinations.put(fringeString, bb.getOperation()); } //System.out.print("\n"+ce.getTreeState().getFringe()); if (!opts.train || opts.useProbabilityModel) { // double vlap = computeLAP(tree); ce.updateProbs(ce.getTreeState().getFutureFringe(), 0); } //System.out.println(ce.getBestProbability()+"\t"+ce+"\n"+elemtreetreeState+"\n"+tree+a+" "+b+"\n---\n"); if (opts.pruneUsingScores) { if (ce.getBestScore() > bestprob) { bestprob = ce.getBestScore(); } if (ce.getBestScore() > bestprob - beamWidth) { newEntries.add(ce); } } else { double bestPropWithVlap = ce.getBestProbability() + (opts.train || !shadowTree ? 0.0d : computeLAP(tree)); if (bestPropWithVlap > bestprob) { bestprob = bestPropWithVlap; } if (opts.train || bestPropWithVlap > bestprob - beamWidth) { newEntries.add(ce); } } } // for each build block } // for each expanded chart entry } // for each new chart entry } // for each elementary tree state (fringes) ALWAYS ONE } // for each elementary tree } // for each chart entry // if (opts.timeProfile && totalTimeSuperTag > 100) // { // Utils.log("supertagtime: " + totalTimeSuperTag / 1000 + "\t"); // } return newEntries; }