Example usage for org.apache.commons.collections4.map MultiValueMap put

List of usage examples for org.apache.commons.collections4.map MultiValueMap put

Introduction

In this page you can find the example usage for org.apache.commons.collections4.map MultiValueMap put.

Prototype

@Override
@SuppressWarnings("unchecked")
public Object put(final K key, final Object value) 

Source Link

Document

Adds the value to the collection associated with the specified key.

Usage

From source file:com.farhad.ngram.lang.detector.util.FileTools.java

public MultiValueMap readFile(String path) {
    //Map<String, String> corpus=new HashMap<>();
    MultiValueMap corpus = new MultiValueMap();

    try {/*from  w  w w  .j ava2  s .com*/
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF8"));

        String sCurrentLine;

        while ((sCurrentLine = br.readLine()) != null) {
            //System.out.println(sCurrentLine);
            String[] parts = sCurrentLine.split(",'");

            String key = parts[0];
            String text = parts[1];
            text = text.substring(0, text.length() - 1);
            // replace spaces with _
            // text=text.replaceAll("\\s+", "_").toLowerCase();
            //replace number and punctuations with blankspace
            text = text.replaceAll("\\p{Punct}+", "");
            text = text.replaceAll("\\d+", "");
            text = text.replaceAll("\\s+", " ");
            // System.err.println(key + "| "+ text);

            corpus.put(key, text.toLowerCase());
            //                                

        }

    } catch (IOException e) {
        e.printStackTrace();
    }
    return corpus;
}

From source file:info.rmapproject.webapp.service.DataDisplayServiceImpl.java

@Override
public Graph getDiSCOGraph(DiSCODTO discoDTO) throws Exception {
    Graph graph = graphFactory.newGraph();
    String sDiscoUri = discoDTO.getUri().toString();

    graph.addNode(sDiscoUri, sDiscoUri, discoNodeType);

    if (WebappUtils.isUri(discoDTO.getCreator())) {
        graph.addNode(discoDTO.getCreator(), agentNodeType);
        graph.addEdge(sDiscoUri, discoDTO.getCreator(), DCTERMS.CREATOR.toString());
    }//from   w w  w  .  j a  v  a 2  s .  com

    if (WebappUtils.isUri(discoDTO.getProviderId())) {
        List<URI> rdfTypes = rmapService.getResourceRdfTypesInContext(new URI(discoDTO.getProviderId()),
                discoDTO.getUri());
        String targetNodeType = WebappUtils.getNodeType(rdfTypes);
        graph.addNode(discoDTO.getProviderId(), targetNodeType);
        graph.addEdge(sDiscoUri, discoDTO.getProviderId(), RMAP.PROVIDERID.toString());
    }

    for (URI aggregate : discoDTO.getAggregatedResources()) {
        List<URI> rdfTypes = rmapService.getResourceRdfTypesInContext(aggregate, discoDTO.getUri());
        String targetNodeType = WebappUtils.getNodeType(rdfTypes);
        graph.addNode(aggregate.toString(), targetNodeType);
        graph.addEdge(sDiscoUri, aggregate.toString(), Terms.ORE_AGGREGATES_PATH);
    }

    List<RMapTriple> triples = discoDTO.getRelatedStatements();

    List<RMapTriple> filteredTriples = new ArrayList<RMapTriple>();
    List<RMapTriple> labelTriples = new ArrayList<RMapTriple>();
    List<String> labelFlds = Arrays.asList(this.labelTypes.split(","));

    MultiValueMap<String, URI> typeMap = new MultiValueMap<String, URI>();

    //sort in triples into lists that will be used for graph creation
    for (RMapTriple triple : triples) {
        RMapResource subj = triple.getSubject();
        RMapIri pred = triple.getPredicate();
        RMapValue obj = triple.getObject();

        boolean isType = pred.toString().equals(RDF.TYPE.toString());

        if (!isType && !(obj instanceof RMapLiteral)) {
            filteredTriples.add(triple);
        }

        if (isType && obj instanceof RMapIri) {
            typeMap.put(subj.toString(), new URI(obj.toString()));
        }

        if (obj instanceof RMapLiteral && labelFlds.contains(pred.toString())) {
            labelTriples.add(triple);
        }

    }

    triples = null;

    if ((filteredTriples.size() + graph.getEdges().size()) <= maxObjGraphRelationships) {
        for (RMapTriple triple : filteredTriples) {
            String subject = triple.getSubject().toString();
            String predicate = triple.getPredicate().toString();
            String object = triple.getObject().toString();

            List<URI> subjTypes = null;
            List<URI> objTypes = null;
            if (typeMap != null && typeMap.getCollection(subject) != null) {
                subjTypes = new ArrayList<URI>(typeMap.getCollection(subject));
            }
            if (typeMap != null && typeMap.getCollection(object) != null) {
                objTypes = new ArrayList<URI>(typeMap.getCollection(object));
            }

            graph.addNode(subject, WebappUtils.getNodeType(subjTypes));
            graph.addNode(object, WebappUtils.getNodeType(objTypes));
            graph.addEdge(subject, object, predicate);
        }

        //make node labels from label triples within the graph
        for (RMapTriple triple : labelTriples) {
            if (graph.getNodes().containsKey(triple.getSubject().toString())) {
                graph.getNodes().get(triple.getSubject().toString()).setLabel(triple.getObject().toString());
            }
        }

    } else {
        //don't do graph because it's too large and will just be an unbearable mess!!
        graph = null;
    }
    return graph;
}

From source file:org.lockss.plugin.silverchair.PostHttpClientUrlConnection.java

public void storeResponseHeaderInto(Properties props, String prefix) {
    // store all header properties (this is the only way to iterate)
    // first collect all values for any repeated headers.
    MultiValueMap<String, String> map = new MultiValueMap<String, String>();
    Header[] headers = method.getResponseHeaders();
    for (int ix = 0; ix < headers.length; ix++) {
        Header hdr = headers[ix];/*w  ww .  ja v a 2  s .co  m*/
        String key = hdr.getName();
        String value = hdr.getValue();
        if (value != null) {
            // only store headers with values
            // qualify header names to avoid conflict with our properties
            if (key == null) {
                key = "header_" + ix;
            }
            String propKey = (prefix == null) ? key : prefix + key;
            if (!singleValuedHdrs.isEmpty() && singleValuedHdrs.contains(key.toLowerCase())) {
                map.remove(propKey);
            }
            map.put(propKey, value);
        }
    }
    // now combine multiple values into comma-separated string
    for (String key : map.keySet()) {
        Collection<String> val = map.getCollection(key);
        props.setProperty(key,
                ((val.size() > 1) ? StringUtil.separatedString(val, ",") : CollectionUtil.getAnElement(val)));
    }
}

From source file:pltag.corpus.ConnectionPathCalculator.java

/**
 * From all the nodes that are in the set of connection nodes, find those that have 
 * larger smallest origins than the current leaf number, and sort them by their smallest
 * origins, and put them into a MultiValueHashMap. This map will now contain as many keys
 * as prediction trees needed to connect the current word. The values of these keys contain
 * the nodes that are needed in the prediction tree to achieve connectivity. 
 * (+ one additional node for the head node???, all arguments before spine? that should be 
 * in prediction tree build procedure.)//from  w  ww  . ja  va  2 s.  c om
 * 
 * @param currentLeafNumber
 * @return
 */
private MultiValueMap findNodesWithGreaterLeafnumbers(int currentLeafNumber) {
    MultiValueMap<Integer, Integer> connectionNodes = new MultiValueMap();
    //deleteUnary(connectedNodes);

    int leafToBeConnectedNumber = currentLeafNumber;
    //        int leafToBeConnectedNumber = Integer.parseInt(currentLeafNumber);
    for (Integer node : connectedNodes) {
        Integer innerNodeLowestUp = stringTree.getLowestOrigin(node, stringTree.originUp);
        //            int innerNodeLowestUp = Integer.parseInt(stringTree.getLowestOrigin(node, stringTree.originUp));
        Integer inld = stringTree.getLowestOrigin(node, stringTree.originDown);

        if (innerNodeLowestUp > leafToBeConnectedNumber
                && !connectionNodes.containsValue(innerNodeLowestUp, node)) {

            //check whether that node can be accounted for by one of the connection trees that were already found?

            connectionNodes.put(innerNodeLowestUp, node);
        }
        if (inld != null) {
            Integer innerNodeLowestDown = stringTree.getLowestOrigin(node, stringTree.originDown);
            //                int innerNodeLowestDown = Integer.parseInt(stringTree.getLowestOrigin(node, stringTree.originDown));
            if (innerNodeLowestDown > leafToBeConnectedNumber
                    && !connectionNodes.containsValue(innerNodeLowestDown, node))
            //                        && !connectionNodes.containsValue(innerNodeLowestDown + "", node))
            {
                connectionNodes.put(innerNodeLowestDown, node);
                //                    connectionNodes.put(innerNodeLowestDown + "", node);
            }
        }
    }
    return connectionNodes;
}

From source file:pltag.corpus.TagCorpus.java

/**
 * This method reconstructs the lexentries back into a tree, calculates the connection path 
 * and generates the predictive lexicon entries. It furthermore handles traces and fillers.
 * Too much responsibility!//ww  w .  ja v a2  s  .c o m
 * 
 * @param lexentries
 * @param filename
 * @param treeno
 * @param tree
 * @param elementTrees
 * @return
 * @throws IOException
 */
private List<StringTree> reconstructAndBuildLexicon(String filename, int treeno, PennTree tree,
        List<ElementaryStringTree> elementTrees) throws IOException {
    StopWatchSet.begin("reconstructAndBuildLexicon");
    CompositeStringTree sentenceTree = new CompositeStringTree(treeno, opts.useSemantics);
    //HashMap<String, ElementaryStringTree> sentenceWordLex = new HashMap<String, ElementaryStringTree>();
    ElementaryStringTree[] sentenceWordLex2 = new ElementaryStringTree[elementTrees.size() + 15];//allow for a max of 15 deleted words
    List<ElementaryStringTree> elementaryTrees = new ArrayList<ElementaryStringTree>();
    HashMap<String, ElementaryStringTree> fillers = new HashMap<String, ElementaryStringTree>();
    MultiValueMap<String, ElementaryStringTree> traces = new MultiValueMap<String, ElementaryStringTree>();
    ArrayList<StringTree> allCorpus = new ArrayList<StringTree>();
    for (ElementaryStringTree elementaryTree : elementTrees) {
        elementaryTree.simplifyCats();
        if (elementaryTree.isVerificTree()) {
            sentenceTree.treeString += " " + elementaryTree.treeString;
        } else {
            sentenceTree.integrate(elementaryTree);
            //                System.out.println("Word: " + elementaryTree.treeString + " head of tree : " + sentenceTree.getMainLeaf(sentenceTree.root));
            //                System.out.println(sentenceTree.getStructure(sentenceTree.root, true));
        }
        //sentenceTree.putElementByOrigin(elementaryTree.getLowestOrigin(elementaryTree.getRoot(), elementaryTree.origin), elementaryTree);
        //if (verbose) System.out.println("$$"+sentenceTree.print());
        //sentenceWordLex.put(elementaryTree.getLowestOrigin(elementaryTree.getAnchor(), elementaryTree.origin), elementaryTree);
        for (Integer leaf : elementaryTree.getLeaves(elementaryTree.getRoot(), new ArrayList<Integer>())) {
            if (leaf >= sentenceWordLex2.length)
            //                if (Integer.parseInt(leaf) >= sentenceWordLex2.length)
            {
                LogInfo.error("Error while reconstructing and building lexicon");
            }
            sentenceWordLex2[leaf] = elementaryTree;
            //                sentenceWordLex2[Integer.parseInt(leaf)] = elementaryTree;
        }
        //sentenceWordLex.put(elementaryTree.getLowestOrigin(elementaryTree.getPseudoAnchorNumber(), elementaryTree.origin), elementaryTree);
        /*if (elementaryTree.getLowestOrigin(elementaryTree.getPseudoAnchorNumber(), elementaryTree.origin).equals("0")){
           elementaryTree.setFirst();
           }*/
        elementaryTrees.add(elementaryTree);
        // associate and store traces and fillers
        String fillerPosition = elementaryTree.getFiller();
        if (!fillerPosition.equals("")) {
            fillers.put(fillerPosition, elementaryTree);
        }
        String traceposition = elementaryTree.getTrace();
        if (!traceposition.equals("")) {
            traces.put(traceposition, elementaryTree);
        }
    } // for each elementaryTree        
    sentenceTree.joinUnConnected();
    // massage lexicon entries
    elementaryTrees = traceAndFillerTreatment(elementaryTrees, sentenceTree, sentenceWordLex2, traces, fillers);
    //elementaryTrees = printNiceLexicon(elementaryTrees);
    allCorpus.add(sentenceTree);

    List<String> errors = errorTracker.leafCheck(sentenceTree, tree, filename);
    listErrors.addAll(errors);
    attachTraces(elementaryTrees, sentenceTree, sentenceWordLex2);

    if (errors.isEmpty()) {
        sentenceTree.removeUnaryNodes(sentenceTree.getRoot());
    }
    // calculate connection path   
    if (sentenceTree.treeString.startsWith("Unconnected")) {
        if (opts.outputEmptyExamples) {
            if (opts.examplesInSingleFile) {
                PltagExample example = new PltagExample(String.format("Example:%s-sent_%s", filename, treeno));
                example.setGoldStandard("NOT PARSED\nNOT PARSED\nNOT PARSED");
                //example.setLexicon("\n"); example.setPredLexicon("\n");
                printOutput(example.toString());
            } else {
                printGoldStandard(String.format("Example_%s-sent_%s\n%s\n", filename, treeno,
                        "NOT PARSED\nNOT PARSED\nNOT PARSED"));
            }
        }
        return allCorpus;
    }
    ConnectionPathCalculator cpc = new ConnectionPathCalculator(sentenceTree, sentenceWordLex2,
            opts.useSemantics);
    cpc.calculateConnectionPath(sentenceTree.root, Integer.MIN_VALUE, lexicon);
    HashMap<Integer, Integer> newList = cpc.getNoOfSources();
    cpc.combinePredictedTreesFromSameOrigin();
    for (int i = 0; i < 6; i++) {
        //            fullList.put(i + "", fullList.get(i + "").intValue() + newList.get(i + "").intValue());
        fullList.put(i, fullList.get(i) + newList.get(i));
    }
    // add predictive lexicon entries
    //if (verbose) allcorpus.addAll(cpc.getPredictedLexEntries());
    StringBuilder predLexStr = new StringBuilder();
    for (StringTree predTree : cpc.getPredictedLexEntries()) {
        /*if (predTree.categories[Integer.parseInt(predTree.root)] != null){
        predTree.removeUnaryNodes(predTree.root);
        }
        else {
        predTree.removeUnaryNodes(predTree.coordanchor);
        }
        //   */
        if (verbose) {
            LogInfo.logs("TagCorpus : reconstructAndBuildLex: " + predTree.print());
        }
        predLexStr.append(predTree.print()).append("\n");
    }
    if (predLexStr.length() > 0)
        predLexStr.deleteCharAt(predLexStr.length() - 1); // remove last \n        
    // output the example (we assume there were no errors in creating the gold standard tree)
    if (errors.isEmpty()) {
        if (opts.examplesInSingleFile) // do the necessary conversions
        {
            PltagExample example = new PltagExample(String.format("Example:%s-sent_%s", filename, treeno));
            example.setGoldStandard(sentenceTree.goldStandardToString());
            //                if(getNiceLexicon(elementaryTrees).contains("'"))
            //                {
            //                    String cmd = String.format("printf \"%%b\" '%s' | resources/extractLexicon.sh", getNiceLexicon(elementaryTrees).replaceAll("'", "'\\\\''"));
            //                    String cmdArray[] = {"/bin/sh", "-c", cmd};  
            //                    System.out.println(cmd+"\n");
            //                    System.out.println(Utils.executeCmd(cmdArray));
            //                    
            //                }
            String cmd = String.format("printf \"%%b\" '%s' | resources/extractLexicon.sh",
                    getNiceLexicon(elementaryTrees).replaceAll("'", "'\\\\''"));
            String cmdArray[] = { "/bin/sh", "-c", cmd };
            example.setLexicon(Utils.executeCmd(cmdArray));
            cmd = String.format("printf \"%%b\" '%s' | resources/extractPredLexicon.sh",
                    predLexStr.toString().replaceAll("'", "'\\\\''"));
            cmdArray[2] = cmd;
            example.setPredLexicon(Utils.executeCmd(cmdArray));
            printOutput(example.toString());
        } else {
            if (opts.outputIndividualFiles)
                printGoldStandard(sentenceTree.goldStandardToString());
            else
                printGoldStandard(String.format("Example_%s-sent_%s\n%s\n", filename, treeno,
                        sentenceTree.goldStandardToString()));
            printLex(predLexStr.toString());
            printLex(getNiceLexicon(elementaryTrees));
        }
    } else if (opts.outputEmptyExamples) {
        if (opts.examplesInSingleFile) {
            PltagExample example = new PltagExample(String.format("Example:%s-sent_%s", filename, treeno));
            example.setGoldStandard("NOT PARSED\nNOT PARSED\nNOT PARSED");
            //example.setLexicon("\n"); example.setPredLexicon("\n");
            printOutput(example.toString());
        } else {
            printGoldStandard(String.format("Example_%s-sent_%s\n%s\n", filename, treeno,
                    "NOT PARSED\nNOT PARSED\nNOT PARSED"));
        }
    }
    //*/
    StopWatchSet.end();
    return allCorpus;
}

From source file:pltag.parser.Lexicon.java

/**
 * Converts a MultiValueMap with String values to one with StringTree values.
 * @param treetype /*ww  w  .  j a va2s  . c  om*/
 * 
 * @param MultiValueMap lexString
 * @return MultiValueMap lexTree
 */
@SuppressWarnings("unchecked")
private MultiValueMap makeLexTrees(MultiValueMap lexString, String treetype) {
    MultiValueMap<String, ElementaryStringTree> lexTree = new MultiValueMap();
    Set<String> keys = lexString.keySet();
    for (String key : keys) {
        Collection<String> values = lexString.getCollection(key);
        HashMap<String, ElementaryStringTree> treelist = new HashMap<String, ElementaryStringTree>();
        for (String treeString : values) {
            ElementaryStringTree tree = makeToStringTreeOld(treeString, treetype);
            if (tree == null) {
                continue;
            }
            String POSword = tree.getPOStagged(tree.getRoot()).trim();
            if (key.equals("prediction: ")) {
                POSword = key;
            }
            // for lexentries for "put up" etc, add three times into Map: as "put up", "put" and "up".
            String[] words = POSword.split("\t");
            ElementaryStringTree sametree = null;
            if (!treelist.containsKey(POSword + "@@" + tree.toString())) {
                lexTree.put(POSword, tree);
            }
            treelist.put(POSword + "@@" + tree.toString(), tree);
            if (words.length > 1) {
                for (String word : words) {
                    if (sametree == null) {
                        lexTree.put(word, tree);
                    }
                }
            }
        }
    }
    return lexTree;
}

From source file:pltag.parser.Lexicon.java

/**
 * Converts a MultiValueMap with String values to one with StringTree values.
 * @param lexString//from  w ww.j a  v  a 2s .  c  o m
 * @param treetype
 * @return MultiValueMap lexTree
 */
@SuppressWarnings("unchecked")
protected MultiValueMap makeLexStrings(MultiValueMap lexString, String treetype) {
    MultiValueMap lexTree = new MultiValueMap();
    Set<String> keys = lexString.keySet();
    HashSet<String> treelist = new HashSet<String>();
    for (String key : keys) {
        Collection<String> values = lexString.getCollection(key);
        for (String treeString : values) {
            //if (tree == null) continue; //need to deal with errors at different point.
            //need to extract POS tag from treestring and unlexicalize tree.
            String posWord = key;
            String tree = treeString;
            if (!key.equals("prediction: ")) {
                if (opts.goldPosTags || opts.treeFamilies) {//pos and word given
                    //                        if (this.getClass() == UnkLexicon.class) // FIX: Unnecessary check
                    //                        {
                    //                            posWord = UnkLexicon.getPosFromTreeString(treeString, key);
                    //                        }
                    //                        else
                    //                        {
                    //                            posWord = getPosFromTreeString(treeString, key);
                    //                        }                        
                    posWord = getPosFromTreeString(treeString, key);
                    if (opts.posOnly) {//only pos tag given
                        String[] words = posWord.split("\t");
                        posWord = "";
                        for (String w : words) {
                            if (w.contains("*") || w.equals("0")) {
                                continue;
                            } else {
                                posWord += w.substring(0, w.indexOf(" ")) + "\t";
                            }
                        }
                        posWord = posWord.trim();
                    }
                } else {// only word
                    posWord = Utils.getCutOffCorrectedMainLex(key.toLowerCase(), listOfFreqWords, opts.train,
                            opts.fullLex);
                    if (key.contains(" ")) {
                        posWord = posWord.replace(" ", "\t");
                    }
                }
                tree = makeUnlex(treeString, key);
            }
            if (noOfTrees.containsKey(treeString) && posWord.contains(" ")
                    && (opts.goldPosTags || opts.treeFamilies) && !opts.posOnly) {
                String pos = posWord.substring(0, posWord.indexOf(" "));
                String puretree = pos + "\t" + tree.substring(2);
                if (noOfTrees.containsKey(puretree)) {
                    noOfTrees.put(puretree, noOfTrees.get(puretree) + noOfTrees.get(treeString));
                } else {
                    noOfTrees.put(puretree, noOfTrees.get(treeString));
                }
                noOfTrees.remove(treeString);

            }
            // for lexentries for "put up" etc, add three times into Map: as "put up", "put" and "up".
            String[] words = posWord.split("\t");
            ElementaryStringTree sametree = null;

            if (!treelist.contains(posWord + "@@" + tree) && words.length == 1) {
                String lc = posWord.toLowerCase();
                // 
                String wlc = lc.substring(lc.indexOf(" ") + 1);
                if (!opts.goldPosTags && opts.treeFamilies && !lc.equals("prediction: ")) {
                    lexTree.put(wlc, tree);
                } else {
                    lexTree.put(lc, tree);
                }
                if (!wordPosMap.containsValue(wlc, lc)) {
                    wordPosMap.put(wlc, lc);
                }
                trees.put(tree.substring(tree.indexOf("\t") + 1), lc);
            }
            treelist.add(posWord + "@@" + tree);
            if (words.length > 1) {
                for (String word : words) {
                    if (sametree == null && !word.startsWith(" *T*") && !word.startsWith(" *?*")
                            && !word.startsWith(" *-") && !word.equals(" *") && !word.equals(" 0")) {
                        String lc = word.toLowerCase();
                        String wlc = lc.substring(lc.indexOf(" ") + 1);
                        if (!opts.goldPosTags && opts.treeFamilies) {
                            lexTree.put(wlc, tree);
                        } else {
                            lexTree.put(lc, tree);
                        }
                        if (!wordPosMap.containsValue(wlc, lc)) {
                            wordPosMap.put(wlc, lc);
                        }
                        trees.put(tree.substring(tree.indexOf("\t") + 1), lc);
                    } // if
                } // for
            } // if
        } // for (values)
    } // for (keys)
    return lexTree;
}

From source file:pltag.parser.Lexicon.java

/**
 * Reads the lexicon file and sorts entries by their type (arg or mod).
 * For each of those types, it creates a MultiValueMap that's keyed on the lexeme, and whose
 * values are the Strings that represent the trees.
 * /*from   w  w  w  .j  a  v a  2s.co m*/
 * @param lines
 * @return a MultiValueMap Array, with the arg string lexicon in first position, and 
 * mod string lexicon in second position. 
 */
protected MultiValueMap<String, ?>[] read(String[] lines) {
    MultiValueMap<String, String> modLexentriesString = new MultiValueMap();
    MultiValueMap<String, String> argLexentriesString = new MultiValueMap();
    for (String line : lines) {
        String[] lexcontent = Utils.getCatInventory(line.trim(), opts.combineNNVBcats).split("\t+");
        int freq = Integer.parseInt(lexcontent[0]);
        if (lexcontent[3].contains("<>")) {
            String endswithLex = lexcontent[3].substring(0, lexcontent[3].indexOf("<>"));
            String anchor = endswithLex.substring(endswithLex.lastIndexOf(" ") + 1);
            if (!biWordMap.containsKey(anchor)) {
                biWordMap.put(anchor, freq);
            } else {
                biWordMap.put(anchor, biWordMap.get(anchor) + freq);//*/
            }
            if (lexcontent[3].contains("1_1)")) {
                endswithLex = lexcontent[3].substring(0, lexcontent[3].indexOf("1_1)") - 1);
                anchor += "%" + endswithLex.substring(endswithLex.lastIndexOf(" ") + 1);
                if (!biWordMap.containsKey(anchor)) {
                    biWordMap.put(anchor, freq);
                } else {
                    biWordMap.put(anchor, biWordMap.get(anchor) + freq);//*/
                }
                anchor = "UNK%" + endswithLex.substring(endswithLex.lastIndexOf(" ") + 1);
                if (!biWordMap.containsKey(anchor)) {
                    biWordMap.put(anchor, 1);
                } else {
                    biWordMap.put(anchor, biWordMap.get(anchor) + 1);//*/
                }
                if (!biWordMap.containsKey("UNK")) {
                    biWordMap.put("UNK", 1);
                } else {
                    biWordMap.put("UNK", biWordMap.get("UNK") + 1);
                }
            }

        }
        if (lexcontent.length < 4) {
            if (opts.verbose) {
                System.out.println("wrong lex");
            }
        }
        lexcontent = lexEntryRemoveDigits(lexcontent);
        int baumAnz = Integer.parseInt(lexcontent[0]);
        if (lexcontent[0].equals("1")) {
            lexcontent[0] = "0";
            //continue;
        } else if (!opts.freqBaseline) {
            lexcontent[0] = "1";
        }
        String word = lexcontent[1];
        //            String wordNoSemantics = stripSemanticFrame(lexcontent[1]);
        String val = lexcontent[0] + "\t" + lexcontent[3];
        if (lexcontent[2].equals("ARG")) {
            //                if (!lexcontent[1].equals("NUM") || !argLexentriesString.containsValue(lexcontent[1], lexcontent[0].toString() + "\t" + lexcontent[3].toString()))
            if (!lexcontent[1].equals("NUM") || !argLexentriesString.containsValue(word,
                    lexcontent[0].toString() + "\t" + lexcontent[3].toString())) {
                //                    argLexentriesString.put(lexcontent[1], val);
                argLexentriesString.put(word, val);
            }
        } else if (lexcontent[2].equals("MOD")) {
            //                if (!lexcontent[1].equals("NUM") || !modLexentriesString.containsValue(lexcontent[1], lexcontent[0].toString() + "\t" + lexcontent[3].toString()))
            if (!lexcontent[1].equals("NUM") || !modLexentriesString.containsValue(word,
                    lexcontent[0].toString() + "\t" + lexcontent[3].toString())) {
                modLexentriesString.put(word, val);
                //                    modLexentriesString.put(lexcontent[1], val);
            }
        } else {
            System.err.println("Incorrect Lexicon format: line " + line);
        }
        //            String posword = getPosFromTreeString(lexcontent[3], lexcontent[1]).toLowerCase();
        String posword = getPosFromTreeString(lexcontent[3], word).toLowerCase();
        if (posTagNo.containsKey(posword)) {
            posTagNo.put(posword, posTagNo.get(posword) + baumAnz);
        } else {
            posTagNo.put(posword, baumAnz);
        }
        if (noOfTrees.containsKey(val)) {
            this.noOfTrees.put(val, noOfTrees.get(val) + baumAnz);
        } else {
            this.noOfTrees.put(val, baumAnz);
        }
    }

    MultiValueMap<String, String>[] entries = new MultiValueMap[2];
    entries[0] = argLexentriesString;
    entries[1] = modLexentriesString;
    return entries;
}

From source file:pltag.parser.Lexicon.java

public MultiValueMap<String, ElementaryStringTree> getLexEntriesContaining(String category) {
    MultiValueMap<String, ElementaryStringTree> out = new MultiValueMap();
    for (String key : wordPosMap.keySet()) {
        String firstWordPos = wordPosMap.getCollection(key).iterator().next();
        Collection<ElementaryStringTree> col = getEntries(key, firstWordPos, firstWordPos.split(" ")[0], false,
                0);//from   ww w  .j  a  va  2 s  .  c o m
        for (ElementaryStringTree e : col) {
            if (e.toString().contains(" " + category + "^"))
                out.put(key, e);
        }
    }
    return out;
}

From source file:pltag.parser.ParsingTask.java

/**
 * Combines two trees by first checking whether they can be combined (no two shadow trees, or unused shadow tree)
 * and then calling the integration function and inserting results into the chart.
 * @param trees/* w  w  w .ja v a2  s  . c om*/
 * @param chart
 * @param chartindex
 * @param posOfNextWord 
 * @return
 */
@SuppressWarnings("unchecked") //
private ArrayList<ChartEntry> combineTrees(Collection<ElementaryStringTree> trees, Chart chart, // Collection<ChartEntry> lastSliceCopy, 
        short chartindex, String[] origPosTags, double startTime, String posOfNextWord, SuperTagger superTagger,
        int beamWidth, boolean endfOfSent, int timestamp) {
    ArrayList<ChartEntry> newEntries = new ArrayList<ChartEntry>();
    // try to integrate each tree (some may have two alternative fringes) 
    // with prefix trees from prev slice.
    // add successful combinations to chart.
    ArrayList<ChartEntry> lastSliceCopy = getLastSlice(chartindex, chart);
    HashMap<ElementaryStringTree, ArrayList<TreeState>> treeStateMap = new HashMap<ElementaryStringTree, ArrayList<TreeState>>();
    double totalTimeSuperTag = 0.0;
    int a = 0;
    int b = 0;
    double bestprob = Double.NEGATIVE_INFINITY;
    for (ChartEntry chartEntry : lastSliceCopy) {
        a++;
        Collection<ElementaryStringTree> selectedTrees;
        double start = System.currentTimeMillis();
        //System.out.print(trees.size()+" ");
        // call the supertagger only on prediction trees (when trees are elementary trees, posOfNextWord is always  "")
        boolean shadowTree = !posOfNextWord.equals("");
        //            selectedTrees = opts.train || shadowTree ? superTagger.superTag(trees, chartEntry, posOfNextWord) : trees ;
        selectedTrees = opts.train || !shadowTree ? trees
                : superTagger.superTag(trees, chartEntry, posOfNextWord);
        //  selectedTrees = trees;
        //System.out.println(selectedTrees.size());
        totalTimeSuperTag += (System.currentTimeMillis() - start);
        for (ElementaryStringTree tree : selectedTrees) {
            b++;
            ArrayList<TreeState> elemtreeStates;
            if (treeStateMap.containsKey(tree)) {
                elemtreeStates = treeStateMap.get(tree);
            } else {
                elemtreeStates = getTreeStateForTree(tree, chartindex);
                treeStateMap.put(tree, elemtreeStates);
            }
            for (TreeState elemtreetreeState : elemtreeStates) {
                MultiValueMap<String, ParserOperation> combinations = new MultiValueMap();
                //      if (System.currentTimeMillis() - startTime > 60000) return newEntries;
                if (tree.hasShadowInd() && chartEntry.getTreeState().getShadowTrees().size() > 2) {
                    continue;
                }
                TreeState treeState = chartEntry.getTreeState();
                if (opts.verbose && treeState.getUnaccessibles().isEmpty()
                        && treeState.getFutureFringe().getNext() != null) {
                    LogInfo.error("why not expanded???");
                }
                String fringeString = treeState.getFringe().toString();
                List<ChartEntry> ces = new ArrayList<ChartEntry>();
                if (combinations.containsKey(fringeString)) {
                    Collection<ParserOperation> operations = combinations.getCollection(fringeString);
                    if (!elemtreetreeState.getRootNode().isShadow()) {
                        ces.addAll(match(tree, elemtreetreeState, chart, chartEntry, origPosTags, chartindex));
                    }
                    if (operations.size() == 1 && operations.contains(null)) {
                        //don't need to do anything because this prefix tree does not integrate with prefix tree
                        //need to do this at tree level.
                    } else {
                        for (ParserOperation operation : operations) {
                            if (operation == null || operation == ParserOperation.verify) {
                                continue;
                            } else {
                                ces.addAll(operation.combine(model, opts, words, origPosTags, treeState,
                                        elemtreetreeState, tree, chartindex));
                            }
                        }
                    }
                } else {//*/
                    combinations.put(fringeString, null);
                    if (combineTwoShadowTreesInARow(chartEntry, elemtreetreeState)) {
                        continue;
                    }
                    //*****************
                    ces = integrate(treeState, elemtreetreeState, tree, chart, chartEntry, origPosTags,
                            chartindex);
                    //*****************
                }
                // clean up results
                for (ChartEntry cefirst : ces) {
                    ArrayList<ChartEntry> expandedCEs = makeExpansions(cefirst);
                    for (ChartEntry ce : expandedCEs) {
                        for (BuildBlock bb : ce.getBuildBlocks()) {
                            //System.out.println(bb.toString()+ bb.getIpi()+"ELEM:" + tree);
                            if (didNotUseShadow(chartEntry, bb, bb.getIpi(), chartindex, tree)) {
                                LinkedList<BuildBlock> list = new LinkedList<BuildBlock>();
                                list.add(bb);
                                //                                    ce.getTreeState().getFutureFringe().getBBHist().remove(bb); // TODO: FIX
                                ce.getTreeState().getFutureFringe().getBBHist().remove(list); // TODO: FIX
                                continue;
                            }
                            bb.setPrevChartEntry(chartEntry);
                            if (!opts.train || opts.useProbabilityModel) {
                                bb.retrieveProbability(model.getParams(), opts.freqBaseline);
                                bb.removeElemTree();
                                //                        bb.removeFreqCounter();
                            }
                            if (!elemtreetreeState.getShadowTrees().isEmpty()) {
                                ShadowStringTree shadowt = ce.getTreeState().getShadowTrees()
                                        .get(ce.getTreeState().getShadowTrees().size() - 1);
                                shadowt.setPredictProb(bb.getProbability());
                            }
                            if (!combinations.containsValue(fringeString, bb.getOperation())) {
                                combinations.put(fringeString, bb.getOperation());
                            }
                            //System.out.print("\n"+ce.getTreeState().getFringe());
                            if (!opts.train || opts.useProbabilityModel) {
                                //                                    double vlap = computeLAP(tree);
                                ce.updateProbs(ce.getTreeState().getFutureFringe(), 0);
                            }
                            //System.out.println(ce.getBestProbability()+"\t"+ce+"\n"+elemtreetreeState+"\n"+tree+a+" "+b+"\n---\n");
                            if (opts.pruneUsingScores) {
                                if (ce.getBestScore() > bestprob) {
                                    bestprob = ce.getBestScore();
                                }
                                if (ce.getBestScore() > bestprob - beamWidth) {
                                    newEntries.add(ce);
                                }
                            } else {
                                double bestPropWithVlap = ce.getBestProbability()
                                        + (opts.train || !shadowTree ? 0.0d : computeLAP(tree));
                                if (bestPropWithVlap > bestprob) {
                                    bestprob = bestPropWithVlap;
                                }
                                if (opts.train || bestPropWithVlap > bestprob - beamWidth) {
                                    newEntries.add(ce);
                                }
                            }
                        } // for each build block
                    } // for each expanded chart entry
                } // for each new chart entry
            } // for each elementary tree state (fringes) ALWAYS ONE
        } // for each elementary tree
    } // for each chart entry
    //        if (opts.timeProfile && totalTimeSuperTag > 100)
    //        {
    //            Utils.log("supertagtime: " + totalTimeSuperTag / 1000 + "\t");
    //        }
    return newEntries;
}