List of usage examples for org.apache.commons.collections4.map MultiValueMap keySet
Set<K> keySet();
From source file:com.farhad.ngram.lang.detector.profile.LanguageProfile.java
public void construct(String path) { MultiValueMap corpus = filetools.readFile(path); Iterator<String> it = corpus.keySet().iterator(); Map<String, Map> profile = new ConcurrentHashMap<>(); //iterate over each class while (it.hasNext()) { String theKey = (String) it.next(); List<String> texts = (List<String>) corpus.get(theKey); Num_Docs = texts.size();// w w w . jav a 2 s.co m Map<Integer, Map> ngrams_lang = new HashMap<Integer, Map>(); Map<String, MyPair> ngrams_profile = new ConcurrentHashMap<>(); //iterate over each text for (int i = 0; i < texts.size(); i++) { String text = texts.get(i); Map<String, Integer> grams = new HashMap<>(); for (int n = 1; n <= ngrams; n++) { grams.putAll(NgramExtractor.gramLength(n).extractCountedGrams(text)); } ngrams_lang.put(i, grams); } Iterator<Integer> itt = ngrams_lang.keySet().iterator(); while (itt.hasNext()) { Map<String, Integer> ngram = ngrams_lang.get(itt.next()); Iterator<String> ittt = ngram.keySet().iterator(); while (ittt.hasNext()) { String ng = ittt.next(); if (ngrams_profile.containsKey(ng)) { MyPair pair = ngrams_profile.get(ng); pair.setFirst(pair.getFirst() + ngram.get(ng)); pair.setSecond(pair.getSecond() + 1); ngrams_profile.put(ng, pair); } else { MyPair pair = new MyPair(ngram.get(ng), 1); ngrams_profile.put(ng, pair); } } } profile.put(theKey, ngrams_profile); } //filter based on doc_frequency and term_frequency Iterator<String> p_it = profile.keySet().iterator(); while (p_it.hasNext()) { String lang = p_it.next(); List<String> texts = (List<String>) corpus.get(lang); Num_Docs = texts.size(); Map<String, MyPair> ngram = profile.get(lang); Iterator<String> ngram_it = ngram.keySet().iterator(); while (ngram_it.hasNext()) { String key = ngram_it.next(); MyPair freq = ngram.get(key); if (freq.getFirst() <= MIN_TERM_FREQUENCY | freq.getSecond() >= Num_Docs) { ngram.remove(key); } } } //computer the term frequecny for each n-gram p_it = profile.keySet().iterator(); while (p_it.hasNext()) { String lang = p_it.next(); List<String> texts = (List<String>) corpus.get(lang); Num_Docs = texts.size(); Map<String, MyPair> ngram = profile.get(lang); int N = ngram.keySet().size(); Iterator<String> ngram_it = ngram.keySet().iterator(); Map<String, Double> ngram_tfidf = new HashMap<>(); while (ngram_it.hasNext()) { String key = ngram_it.next(); MyPair freq = ngram.get(key); double tf = (double) freq.getFirst() / N; ngram_tfidf.put(key, tf); } //write the language profile String filename = lang + ".profile"; saveProfile(filename, ngram_tfidf); } }
From source file:org.lockss.plugin.silverchair.PostHttpClientUrlConnection.java
public void storeResponseHeaderInto(Properties props, String prefix) { // store all header properties (this is the only way to iterate) // first collect all values for any repeated headers. MultiValueMap<String, String> map = new MultiValueMap<String, String>(); Header[] headers = method.getResponseHeaders(); for (int ix = 0; ix < headers.length; ix++) { Header hdr = headers[ix];/*from w w w . j ava 2 s .c o m*/ String key = hdr.getName(); String value = hdr.getValue(); if (value != null) { // only store headers with values // qualify header names to avoid conflict with our properties if (key == null) { key = "header_" + ix; } String propKey = (prefix == null) ? key : prefix + key; if (!singleValuedHdrs.isEmpty() && singleValuedHdrs.contains(key.toLowerCase())) { map.remove(propKey); } map.put(propKey, value); } } // now combine multiple values into comma-separated string for (String key : map.keySet()) { Collection<String> val = map.getCollection(key); props.setProperty(key, ((val.size() > 1) ? StringUtil.separatedString(val, ",") : CollectionUtil.getAnElement(val))); } }
From source file:pltag.corpus.ConnectionPathCalculator.java
/** * Calculates which prediction trees need to be generated from the set of connectionNodes. * And combines prediction trees if possible. * //from w ww . j a v a 2 s.c o m * @param currentLeafNumber * @param lexicon * @return */ @SuppressWarnings("unchecked") // cast to Collection<String> private List<PredictionStringTree> generatePredictionTrees(int currentLeafNumber, List<StringTree> lexicon) {//, String leaf) { //if (currentLeafNumber.equals("6")) //System.out.print(""); MultiValueMap predictionTreeNodeMap = findNodesWithGreaterLeafnumbers(currentLeafNumber); ArrayList<PredictionStringTree> localPredictedTrees = new ArrayList<PredictionStringTree>(); int sourcetreeno = 0; ArrayList<PredictionStringTree> unhelpfulpredtrees = new ArrayList<PredictionStringTree>(); HashMap<Integer, Integer> translations = new HashMap<Integer, Integer>(); //System.out.println("prediction tree to connect leaf " + currentLeafNumber + " number of pred trees needed: "+ predictionTreeNodeMap.size()); //System.out.println("nodes needed: "+predictionTreeNodeMap); for (Integer predictionTreeOrigin : (Collection<Integer>) predictionTreeNodeMap.keySet()) { ElementaryStringTree originalStringTree = (ElementaryStringTree) sentenceWordLex[predictionTreeOrigin]; // ElementaryStringTree originalStringTree = (ElementaryStringTree) sentenceWordLex[Integer.parseInt(predictionTreeOrigin)]; if (originalStringTree == null) { continue; } originalStringTree.makeLexiconEntry(); if (originalStringTree.categories[originalStringTree.root] != null) // if (originalStringTree.categories[Integer.parseInt(originalStringTree.root)] != null) { translations.putAll(originalStringTree.removeUnaryNodes(originalStringTree.root)); } else { translations.putAll(originalStringTree.removeUnaryNodes(originalStringTree.coordanchor)); } if (originalStringTree.getAnchor() == Integer.MIN_VALUE && originalStringTree.treeString.startsWith("*")) // if (originalStringTree.getAnchor().equals("") && originalStringTree.treeString.startsWith("*")) { continue; } Collection<Integer> cn = predictionTreeNodeMap.getCollection(predictionTreeOrigin); PredictionStringTree predictionTree = buildPredictionTree(originalStringTree, cn, currentLeafNumber); // PredictionStringTree predictionTree = buildPredictionTree(originalStringTree, cn, Integer.parseInt(currentLeafNumber)); predictionTree.cutTail(cn); if (predictionTree.hasUsefulNodes(cn, translations)) { predictionTree = buildPredictionTree(originalStringTree, cn, currentLeafNumber); // predictionTree = buildPredictionTree(originalStringTree, cn, Integer.parseInt(currentLeafNumber)); sourcetreeno++; //System.out.println(predictionTree.print()); ArrayList<PredictionStringTree> newlist = new ArrayList<PredictionStringTree>(); ArrayList<PredictionStringTree> removelist = new ArrayList<PredictionStringTree>(); // combine prediction trees (trees can always be combined! I think.) for (PredictionStringTree otherTree : localPredictedTrees) { PredictionStringTree ct = combinePredTrees(predictionTree, otherTree, predictionTreeNodeMap.values(), translations);//.copyPred(); if (ct != null) { removelist.add(otherTree); removelist.add(predictionTree); newlist.remove(predictionTree); newlist.add(ct); predictionTree = ct; } } if (predictionTree.isAuxtree()) { localPredictedTrees.add(predictionTree); } else { localPredictedTrees.add(0, predictionTree); } localPredictedTrees.removeAll(removelist); // might add too much here. for (PredictionStringTree npt : newlist) { if (predictionTree.isAuxtree()) { localPredictedTrees.add(npt); } else { localPredictedTrees.add(0, npt); } } } else { predictionTree = buildPredictionTree(originalStringTree, cn, currentLeafNumber); // predictionTree = buildPredictionTree(originalStringTree, cn, Integer.parseInt(currentLeafNumber)); unhelpfulpredtrees.add(predictionTree); } } if (localPredictedTrees.isEmpty() & unhelpfulpredtrees.size() > 0) { PredictionStringTree first = null; int min = Integer.MAX_VALUE; // String others = ""; for (PredictionStringTree pt : unhelpfulpredtrees) { if (pt.isAuxtree()) { continue; } int anchorpos = pt.getAnchorList().get(0); // int anchorpos = Integer.parseInt(pt.getAnchorList().get(0)); if (anchorpos < min) { min = anchorpos; if (first != null) // others += " @ "+first.toString(); { first = pt; } } // else{ // if (first !=null) // others += " @ "+first.toString(); // } } if (first != null) { // System.out.println(first+"\t"+others); localPredictedTrees.add(first); } } //*/ if (localPredictedTrees.size() > 1) { PredictionStringTree predictionTree = localPredictedTrees.get(0); ArrayList<PredictionStringTree> newlist = new ArrayList<PredictionStringTree>(); ArrayList<PredictionStringTree> removelist = new ArrayList<PredictionStringTree>(); for (int i = 1; i < localPredictedTrees.size(); i++) { PredictionStringTree otherTree = localPredictedTrees.get(i); PredictionStringTree ct = combinePredTrees(predictionTree, otherTree, predictionTreeNodeMap.values(), translations);//.copyPred(); if (ct != null) { removelist.add(otherTree); removelist.add(predictionTree); newlist.remove(predictionTree); newlist.add(ct); predictionTree = ct; } } if (predictionTree.isAuxtree()) { localPredictedTrees.add(predictionTree); } else { localPredictedTrees.add(0, predictionTree); } localPredictedTrees.removeAll(removelist); // might add too much here. for (PredictionStringTree npt : newlist) { if (predictionTree.isAuxtree()) { localPredictedTrees.add(npt); } else { localPredictedTrees.add(0, npt); } } } for (PredictionStringTree pst : localPredictedTrees) { pst.cutTail(predictionTreeNodeMap.values()); } if (localPredictedTrees.size() > 1) { LogInfo.error("unaccounted case! combination of prediction trees; number of trees: " + sourcetreeno + " to connect leaf " + currentLeafNumber); } // noOfSources.put(sourcetreeno + "", noOfSources.get(sourcetreeno + "").intValue() + 1); noOfSources.put(sourcetreeno, noOfSources.get(sourcetreeno) + 1); return localPredictedTrees; }
From source file:pltag.corpus.TagCorpus.java
/** * /* ww w . jav a2 s . co m*/ * @param elementaryTrees * @param sentenceTree * @param sentenceWordLex2 * @param traces * @param fillers * @return */ private List<ElementaryStringTree> traceAndFillerTreatment(List<ElementaryStringTree> elementaryTrees, CompositeStringTree sentenceTree, ElementaryStringTree[] sentenceWordLex2, MultiValueMap<String, ElementaryStringTree> traces, HashMap<String, ElementaryStringTree> fillers) { for (String traceId : traces.keySet()) { for (ElementaryStringTree traceTree : traces.getCollection(traceId)) { // ElementaryStringTree traceTree = traces.get(traceId); ElementaryStringTree tracefather = null; if (traceTree.getTraceFather() != null) { //String rootNodeId = tracetree.getTraceFather().getRoot().getNodeID(); //String leafOrigin = sentenceTree.getLowestOrigin(rootNodeId, sentenceTree.originDown); Integer leafOrigin = traceTree.getTraceFather().getMainLeafNode().getLeafNo(); Integer leafOrigincoord = sentenceTree.getLowestOrigin(traceTree.getRoot(), sentenceTree.originUp); tracefather = sentenceWordLex2[leafOrigin]; if (leafOrigincoord != null) { ElementaryStringTree tracefathercoord = sentenceWordLex2[leafOrigincoord]; if (!leafOrigin.equals(leafOrigincoord)) { if (tracefather.getRoot() == traceTree.getTraceFather().getRoot().getNodeID()) // if (tracefather.getRoot().equals(tracetree.getTraceFather().getRoot().getNodeID())) { } else if (tracefathercoord.getRoot() == traceTree.getTraceFather().getRoot() .getNodeID()) // else if (tracefathercoord.getRoot().equals(tracetree.getTraceFather().getRoot().getNodeID())) { tracefather = tracefathercoord; } else { LogInfo.error( "stuck for ideas for where to stick trace TagCorpus:traceAndFillerTreatment."); } } } } ElementaryStringTree fillertree = fillers.get(traceId); if (fillertree == null) { return elementaryTrees; } if (fillertree.isAuxtree() && traceTree.isAuxtree() && tracefather != null && tracefather.categories[traceTree.getRoot()] != null && tracefather.categories[fillertree.getRoot()] != null) // if (fillertree.isAuxtree() && tracetree.isAuxtree() && tracefather != null // && tracefather.categories[Integer.parseInt(tracetree.getRoot())] != null // && tracefather.categories[Integer.parseInt(fillertree.getRoot())] != null) { fillertree.setMCTAG(traceTree); traceTree.setMCTAG(fillertree); } //else if (fillertree.getSubstNodes().contains(tracefather) && !tracetree.isAuxtree()){ //MCTAG //} else if (tracefather != null) { if (traceTree.isAuxtree()) { Integer rootid = traceTree.getRoot(); Integer traceFatherOldRootDownIndex = tracefather.getLowestOrigin(rootid, tracefather.originDown); Integer footid = traceTree.getFoot(); Integer traceOldFootUpIndex = traceTree.getLowestOrigin(footid, traceTree.originUp); tracefather.integrate(traceTree); while (tracefather.originDown.containsValue(rootid, traceFatherOldRootDownIndex)) { tracefather.originDown.remove(rootid, traceFatherOldRootDownIndex); } while (tracefather.originUp.containsValue(footid, traceFatherOldRootDownIndex)) { tracefather.originUp.remove(footid, traceOldFootUpIndex); } } else { tracefather.integrate(traceTree); } elementaryTrees.remove(traceTree); } else if (traceTree.getNodeType(fillertree.getRoot()) == TagNodeType.subst) { fillertree.integrate(traceTree); //tracetree.integrate(fillertree); elementaryTrees.remove(traceTree); traceTree = fillertree; } else if (fillertree.isAuxtree()) { fillertree.integrate(traceTree); elementaryTrees.remove(traceTree); sentenceWordLex2[traceTree.getLowestOrigin(traceTree.root, traceTree.originDown)] = fillertree; // sentenceWordLex2[Integer.parseInt(tracetree.getLowestOrigin(tracetree.root, tracetree.originDown))] = fillertree; } } } //for each trace id return elementaryTrees; }
From source file:pltag.parser.Lexicon.java
/** * Converts a MultiValueMap with String values to one with StringTree values. * @param treetype //from www .j a v a 2 s. c o m * * @param MultiValueMap lexString * @return MultiValueMap lexTree */ @SuppressWarnings("unchecked") private MultiValueMap makeLexTrees(MultiValueMap lexString, String treetype) { MultiValueMap<String, ElementaryStringTree> lexTree = new MultiValueMap(); Set<String> keys = lexString.keySet(); for (String key : keys) { Collection<String> values = lexString.getCollection(key); HashMap<String, ElementaryStringTree> treelist = new HashMap<String, ElementaryStringTree>(); for (String treeString : values) { ElementaryStringTree tree = makeToStringTreeOld(treeString, treetype); if (tree == null) { continue; } String POSword = tree.getPOStagged(tree.getRoot()).trim(); if (key.equals("prediction: ")) { POSword = key; } // for lexentries for "put up" etc, add three times into Map: as "put up", "put" and "up". String[] words = POSword.split("\t"); ElementaryStringTree sametree = null; if (!treelist.containsKey(POSword + "@@" + tree.toString())) { lexTree.put(POSword, tree); } treelist.put(POSword + "@@" + tree.toString(), tree); if (words.length > 1) { for (String word : words) { if (sametree == null) { lexTree.put(word, tree); } } } } } return lexTree; }
From source file:pltag.parser.Lexicon.java
/** * Converts a MultiValueMap with String values to one with StringTree values. * @param lexString/*w ww. ja v a2 s .c om*/ * @param treetype * @return MultiValueMap lexTree */ @SuppressWarnings("unchecked") protected MultiValueMap makeLexStrings(MultiValueMap lexString, String treetype) { MultiValueMap lexTree = new MultiValueMap(); Set<String> keys = lexString.keySet(); HashSet<String> treelist = new HashSet<String>(); for (String key : keys) { Collection<String> values = lexString.getCollection(key); for (String treeString : values) { //if (tree == null) continue; //need to deal with errors at different point. //need to extract POS tag from treestring and unlexicalize tree. String posWord = key; String tree = treeString; if (!key.equals("prediction: ")) { if (opts.goldPosTags || opts.treeFamilies) {//pos and word given // if (this.getClass() == UnkLexicon.class) // FIX: Unnecessary check // { // posWord = UnkLexicon.getPosFromTreeString(treeString, key); // } // else // { // posWord = getPosFromTreeString(treeString, key); // } posWord = getPosFromTreeString(treeString, key); if (opts.posOnly) {//only pos tag given String[] words = posWord.split("\t"); posWord = ""; for (String w : words) { if (w.contains("*") || w.equals("0")) { continue; } else { posWord += w.substring(0, w.indexOf(" ")) + "\t"; } } posWord = posWord.trim(); } } else {// only word posWord = Utils.getCutOffCorrectedMainLex(key.toLowerCase(), listOfFreqWords, opts.train, opts.fullLex); if (key.contains(" ")) { posWord = posWord.replace(" ", "\t"); } } tree = makeUnlex(treeString, key); } if (noOfTrees.containsKey(treeString) && posWord.contains(" ") && (opts.goldPosTags || opts.treeFamilies) && !opts.posOnly) { String pos = posWord.substring(0, posWord.indexOf(" ")); String puretree = pos + "\t" + tree.substring(2); if (noOfTrees.containsKey(puretree)) { noOfTrees.put(puretree, noOfTrees.get(puretree) + noOfTrees.get(treeString)); } else { noOfTrees.put(puretree, noOfTrees.get(treeString)); } noOfTrees.remove(treeString); } // for lexentries for "put up" etc, add three times into Map: as "put up", "put" and "up". String[] words = posWord.split("\t"); ElementaryStringTree sametree = null; if (!treelist.contains(posWord + "@@" + tree) && words.length == 1) { String lc = posWord.toLowerCase(); // String wlc = lc.substring(lc.indexOf(" ") + 1); if (!opts.goldPosTags && opts.treeFamilies && !lc.equals("prediction: ")) { lexTree.put(wlc, tree); } else { lexTree.put(lc, tree); } if (!wordPosMap.containsValue(wlc, lc)) { wordPosMap.put(wlc, lc); } trees.put(tree.substring(tree.indexOf("\t") + 1), lc); } treelist.add(posWord + "@@" + tree); if (words.length > 1) { for (String word : words) { if (sametree == null && !word.startsWith(" *T*") && !word.startsWith(" *?*") && !word.startsWith(" *-") && !word.equals(" *") && !word.equals(" 0")) { String lc = word.toLowerCase(); String wlc = lc.substring(lc.indexOf(" ") + 1); if (!opts.goldPosTags && opts.treeFamilies) { lexTree.put(wlc, tree); } else { lexTree.put(lc, tree); } if (!wordPosMap.containsValue(wlc, lc)) { wordPosMap.put(wlc, lc); } trees.put(tree.substring(tree.indexOf("\t") + 1), lc); } // if } // for } // if } // for (values) } // for (keys) return lexTree; }
From source file:pltag.parser.semantics.classifier.ArgumentClassifier.java
public void extractFeatures(Map<Predicate, Proposition> predProps, Map<Predicate, Proposition> goldProps, Lexicon lexicon, boolean train) { MultiValueMap<Predicate, Argument> identifiedArcs = new MultiValueMap<Predicate, Argument>(); // Get things in common (Arguments) Iterator<Map.Entry<Predicate, Proposition>> iterProps = goldProps.entrySet().iterator(); while (iterProps.hasNext()) { Map.Entry<Predicate, Proposition> goldEntry = iterProps.next(); Proposition predProp = predProps.get(goldEntry.getKey()); if (predProp == null) // if we didn't find the correct predicate {// w ww . j ava 2 s.co m Predicate predPredIgnoreCase = Predicate.getPredicateIgnoreSense(goldEntry.getKey(), predProps.keySet()); if (predPredIgnoreCase != null) // found correct predicate with wrong sense { predProp = predProps.get(predPredIgnoreCase); } } if (predProp != null) // continue with identifying correct arguments { List<Argument> goldArgs = goldEntry.getValue().getArguments(); List<Argument> predArgs = predProp.getArguments(); Iterator<Argument> iterGoldArgs = goldArgs.iterator(); while (iterGoldArgs.hasNext()) { Argument goldArg = iterGoldArgs.next(); Iterator<Argument> iterPredArgs = predArgs.iterator(); while (iterPredArgs.hasNext()) // True Positive (gold role label, +1 identifier label) { Argument predArg = iterPredArgs.next(); if (predArg.getTimestamp() == goldArg.getTimestamp()) { addFeatureVecs(predProp.getPredicate(), predArg, goldArg.getRole(), true, identifiedArcs, train); iterGoldArgs.remove(); iterPredArgs.remove(); break; } } } // while if (predArgs.isEmpty()) predProps.remove(goldEntry.getKey()); if (goldArgs.isEmpty()) iterProps.remove(); } // if } // while // Mark the differences between the two maps (entries left) for (Proposition predProp : predProps.values()) { for (Argument predArg : predProp.getArguments()) // False Positive { addFeatureVecs(predProp.getPredicate(), predArg, null, false, identifiedArcs, train); } } for (Proposition goldProp : goldProps.values()) { for (Argument goldArg : goldProp.getArguments()) // False Negatives (inaccurate) { Predicate goldPred = goldProp.getPredicate(); // try to find a predicate we've already correctly predicted earlier Predicate predPred = Predicate.getPredicateIgnoreSense(goldPred, identifiedArcs.keySet()); if (predPred != null) { // try to find argument in list of identified arcs (attached to a wrong predicate) boolean foundInIdentified = false; for (Object predObj : identifiedArcs.values()) { Argument predArg = (Argument) predObj; if (predArg.getTimestamp() == goldArg.getTimestamp()) { addFeatureVecs(predPred, predArg, goldArg.getRole(), true, identifiedArcs, train); foundInIdentified = true; break; } // if } // for if (!foundInIdentified) // reconstruct argument (fairly inaccurate) from lexicon and info from rest predictions { Argument reconstructedArgument = reconstructArgument(lexicon, predPred, goldArg, identifiedArcs); if (reconstructedArgument != null) addFeatureVecs(predPred, reconstructedArgument, goldArg.getRole(), true, identifiedArcs, train); } } // if } // for } // for }
From source file:pltag.parser.semantics.DepTreeState.java
/** * /*from w ww .j a v a 2 s . com*/ // * @param coveredNodes map of verification node ids to (verified) shadow tree node ids * @param elemTreeState * @param coveredNodes map of (verified) shadow tree node ids to verification node ids * @param tree * @param ipIdOffset * @param offsetNodeIdsOfShadowTree * @param shadowTreeRoot * @param words * @param origPosTags * @param operation the parsing operation applied (adjunction, substitution, verification or initial) * @param timestamp */ public void updateDependencies(TreeState elemTreeState, DualHashBidiMap<Integer, Integer> coveredNodes, ElementaryStringTree tree, short ipIdOffset, DualHashBidiMap<Short, Short> offsetNodeIdsOfShadowTree, Node shadowTreeRoot, String[] words, String[] origPosTags, String operation, int timestamp) { retainInfoFromTreeHeuristics(elemTreeState, shadowTreeRoot, tree, ipIdOffset, false, timestamp, true, coveredNodes, offsetNodeIdsOfShadowTree); DepNode anchorNode = getAnchorNode(tree, ipIdOffset, timestamp); Set<Integer> processedNodes = new HashSet<>(); // First check if the verifying tree has roles if (tree.hasRoles()) { MultiValueMap<Integer, Role> rolesPerNode = getRolesPerNode(tree, ipIdOffset); // Browse through all candidate integration points for (Integer ipCandidate : rolesPerNode.keySet()) { processedNodes.add(ipCandidate); // find the ones that are part of the verification tree, hence exist on the prefix tree as part of shadow trees Integer shadowNodeId = coveredNodes.getKey(ipCandidate - ipIdOffset); if (shadowNodeId != null) { // find the arc with the partially complete dependency Short offsetShadowId = offsetNodeIdsOfShadowTree.get(shadowNodeId.shortValue()); if (offsetShadowId != null) { Collection<DependencyArc> arcs = dependencies .getDependenciesByIntegPoint(new DepNode(offsetShadowId.shortValue(), timestamp)); if (arcs != null) { // for(DependencyArc arc : arcs) Iterator<DependencyArc> iterator = arcs.iterator(); while (iterator.hasNext()) { DependencyArc arc = iterator.next(); if (arc.isIncomplete()) { if (tree.isRelation() && arc.isRelationIncomplete()) // avoid filling in an already complete relation entry { filterRoles(arc, rolesPerNode.getCollection(ipCandidate)); setRelation(arc, anchorNode, iterator, words, origPosTags, operation, true, false); // possibly created a complete arc, so we can identify and disambiguate role labels discriminatively boolean keepArc = identifyArcAndDisambiguateRoles(model, arc, words, origPosTags); if (!keepArc) removeArcSafe(arc, arc.getIntegrationPoint(), iterator); } // else if(!tree.isRelation() && arc.isArgumentIncomplete()) // removed restriction of above if statement: a relation can be an argument to another relation else if (arc.isArgumentIncomplete()) { filterRoles(arc, rolesPerNode.getCollection(ipCandidate)); if (applyConllHeuristics) // Apply infinitive marker heuristic, if necessary applyInfinitiveMarkerHeuristic(arc, anchorNode, iterator, words, origPosTags, operation, true, false); else { // Apply PP heuristic, if neceessary boolean keepArc = applyPPArgumentHeuristic(arc, tree, anchorNode, ipIdOffset, words, origPosTags, operation, true, false); // Apply infinitive marker heuristic, if necessary if (keepArc) applyInfinitiveMarkerHeuristic(arc, anchorNode, iterator, words, origPosTags, operation, true, false); } // setArgument(arc, anchorNode); } } // if } //for } // if } // if else // the shadow sub-tree of the prefix tree doesn't have a role. Proceed with the normal addition of a new dependency { addDependency(new DepNode(ipCandidate, timestamp), tree, (short) (ipCandidate.shortValue() - ipIdOffset), ipIdOffset, rolesPerNode, shadowTreeRoot, false, words, origPosTags, operation, true, timestamp); // System.out.println("Check! If we never end up here, consider dropping the if statement above"); } } // if // integration points on the elementary tree that are not verifying a shadow sub-tree on the prefix tree. // we need to consider it as an incomplete dependency else { addIncompleteDependency(ipCandidate, tree, ipIdOffset, false, tree.isRelation(), rolesPerNode.getCollection(ipCandidate), null, words, origPosTags, operation, true, timestamp); } } // for all candidate ips } // if hasRoles // Process the rest of the covered nodes in the shadow subtree of the prefix tree that have a role, hence a dependency arc already observed for (Entry<Integer, Integer> e : coveredNodes.entrySet()) { DependencyArc arc; Short offsetShadowId = offsetNodeIdsOfShadowTree.get(e.getValue().shortValue()); if (offsetShadowId != null && !processedNodes.contains(e.getValue() + ipIdOffset)) { Collection<DependencyArc> arcsByIp = dependencies .getDependenciesByIntegPoint(new DepNode(offsetShadowId.shortValue(), timestamp)); // if((arc = dependencies.pollArcWithShadowArg(new DepNode(offsetShadowId.shortValue(), timestamp))) != null) // if((arc = dependencies.getArcWithShadowArg(new DepNode(offsetShadowId.shortValue(), timestamp))) != null) // { // updateDependenciesUnderCoveredNode(tree, coveredNodes, offsetNodeIdsOfShadowTree, timestamp, anchorNode, arc, null, origPosTags); // } // else if(arcsByIp != null) if (arcsByIp != null) { // for(DependencyArc arcByIp : arcsByIp) Iterator<DependencyArc> iterator = arcsByIp.iterator(); while (iterator.hasNext()) { DependencyArc arcByIp = iterator.next(); updateDependenciesUnderCoveredNode(tree, coveredNodes, offsetNodeIdsOfShadowTree, timestamp, anchorNode, arcByIp, iterator, words, origPosTags); } } } } if (tree.isRelation()) { addHangingRelation(tree, ipIdOffset, words, origPosTags, operation, true, false, timestamp); } dependencies.postProcessArcs(operation, true, false); }
From source file:pltag.parser.semantics.DepTreeState.java
private void addIncompleteDependencies(ElementaryStringTree tree, short offset, boolean isShadowTree, boolean isRelation, MultiValueMap<Integer, Role> rolesPerNode, Node prefixTreeNode, String[] words, String[] origPosTags, String operation, boolean direction, int timestamp)//, Set<DepNode> excludeIntegPoints) { for (Integer nodeId : rolesPerNode.keySet()) { addIncompleteDependency(nodeId, tree, offset, isShadowTree, isRelation, rolesPerNode.getCollection(nodeId), prefixTreeNode, words, origPosTags, operation, direction, timestamp);/*w w w .j a va 2 s . co m*/ } }
From source file:pltag.parser.semantics.SemanticLexicon.java
/** * Converts a MultiValueMap with String values to one with StringTree values. * @param lexEntriesWithOrigWordsAsKeys//from www. j av a 2s .c om * @param treetype * @return MultiValueMap lexTree */ @SuppressWarnings("unchecked") protected MultiValueMap makeLexTrees(MultiValueMap<String, ?> lexEntriesWithOrigWordsAsKeys, String treetype) { MultiValueMap<String, LexiconEntryWithRoles> unlexEntriesWithNormWordsAsKeys = new MultiValueMap(); // HashSet<String> unlexTreeList = new HashSet<String>(); Map<String, String> origWordsToNormWordsMap = new HashMap<String, String>(); for (String key : lexEntriesWithOrigWordsAsKeys.keySet()) { for (LexiconEntryWithRoles e : (Collection<LexiconEntryWithRoles>) lexEntriesWithOrigWordsAsKeys .getCollection(key)) { //need to extract POS tag from treestring and unlexicalize tree. LexiconEntryWithRoles entry = new LexiconEntryWithRoles(e); String posWord = key; // String treeString = entry.getLexEntry(); String treeString = entry.toString(); String unlexTree = treeString; if (!key.equals("prediction: ")) { posWord = processKey(treeString, key); unlexTree = makeUnlex(treeString, key); // entry.setUnlexEntry(unlexTree.substring(unlexTree.indexOf("\t") + 1)); } entry.setUnlexEntry(unlexTree.substring(unlexTree.indexOf("\t") + 1)); updateNoOfTreesMap(treeString, posWord, unlexTree); String[] words = posWord.split("\t"); // if (!unlexTreeList.contains(posWord + "@@" + unlexTree) && words.length == 1) if (words.length == 1) { int tempIndex = key.indexOf(" "); String origWord = tempIndex != -1 ? key.substring(0, tempIndex) : key; // key with original case String posWordLowerCase = posWord.toLowerCase(); String wordLowerCase = posWordLowerCase.substring(posWordLowerCase.indexOf(" ") + 1); if (!opts.goldPosTags && opts.treeFamilies && !posWordLowerCase.equals("prediction: ")) { // lexTree.put(wlc, unlexTree); updateEntryWithRolesMap(unlexEntriesWithNormWordsAsKeys, wordLowerCase, entry); // origWordsToNormWordsMap.put(posWord.substring(posWord.indexOf(" ") + 1), wordLowerCase); origWordsToNormWordsMap.put(origWord, wordLowerCase); } else { // lexTree.put(lc, unlexTree); updateEntryWithRolesMap(unlexEntriesWithNormWordsAsKeys, posWordLowerCase, entry); origWordsToNormWordsMap.put(posWord.substring(0, posWord.indexOf(" ") + 1) + origWord, posWordLowerCase); } if (!wordPosMap.containsValue(wordLowerCase, posWordLowerCase)) { wordPosMap.put(wordLowerCase, posWordLowerCase); } trees.put(unlexTree.substring(unlexTree.indexOf("\t") + 1), posWordLowerCase); } // if // unlexTreeList.add(posWord + "@@" + unlexTree); // for lexentries for "put up" etc, add three times into Map: as "put up", "put" and "up". if (words.length > 1) { for (String word : words) { if (!(word.startsWith(" *T*") || word.startsWith(" *?*") || word.startsWith(" *-") || word.equals(" *") || word.equals(" 0"))) { String lc = word.toLowerCase(); String wlc = lc.substring(lc.indexOf(" ") + 1); if (!opts.goldPosTags && opts.treeFamilies) { // lexTree.put(wlc, unlexTree); updateEntryWithRolesMap(unlexEntriesWithNormWordsAsKeys, wlc, entry); origWordsToNormWordsMap.put(word.substring(word.indexOf(" ") + 1), wlc); } else { // lexTree.put(lc, unlexTree); updateEntryWithRolesMap(unlexEntriesWithNormWordsAsKeys, lc, entry); origWordsToNormWordsMap.put(word, lc); // TODO: FIX Not handling potential uppercase partial lexemes correctly } if (!wordPosMap.containsValue(wlc, lc)) { wordPosMap.put(wlc, lc); } trees.put(unlexTree.substring(unlexTree.indexOf("\t") + 1), lc); } // if } // for } // if } // for (values) } // for (keys) // MultiValueMap<String, ElementaryStringTree> stringTreeEntriesWithOrigWordsAsKeys = new MultiValueMap<String, ElementaryStringTree>(); // for (Map.Entry<String, String> e : origWordsToNormWordsMap.entrySet()) // { // Collection<EntryWithRoles> unlexEntries = unlexEntriesWithNormWordsAsKeys.getCollection(e.getValue()); // // we need to convert to StringTree instances as well // for(LexiconEntryWithRoles entry : unlexEntries) // { // String unlexTreeString = entry.toString(); // String treeString = insertLex(e.getKey(), unlexTreeString); // ElementaryStringTree tree = makeToStringTree(treeString, unlexTreeString, entry.getUnlexEntriesWithSemantics()); // stringTreeEntriesWithOrigWordsAsKeys.put(e.getKey(), tree); // } // } // return stringTreeEntriesWithOrigWordsAsKeys; //printEntriesWithComplexRolesStats(unlexEntriesWithNormWordsAsKeys); return unlexEntriesWithNormWordsAsKeys; }