List of usage examples for com.google.common.collect MinMaxPriorityQueue size
int size
To view the source code for com.google.common.collect MinMaxPriorityQueue size.
Click Source Link
From source file:org.commoncrawl.mapred.pipelineV3.domainmeta.blogs.feedurlid.FeedUrlIdStep.java
public static ArrayList<URLCandidate> drainToArrayList(MinMaxPriorityQueue<URLCandidate> queue) { int queueSize = queue.size(); ArrayList<URLCandidate> list = new ArrayList<URLCandidate>(queueSize); for (int i = 0; i < queueSize; ++i) { list.add(queue.removeFirst());/*from w w w . ja v a 2 s .c o m*/ } return list; }
From source file:kungfu.algdesign.ds.MovingMedian.java
public static void calculate(Queue<Integer> data, Queue<Integer> medians) { MinMaxPriorityQueue<Integer> minHeap = MinMaxPriorityQueue.create(); MinMaxPriorityQueue<Integer> maxHeap = MinMaxPriorityQueue.create(); minHeap.add(Integer.MIN_VALUE); maxHeap.add(Integer.MAX_VALUE); Integer item = null;/*from www. ja va 2 s . c om*/ Integer median = null; while ((item = data.poll()) != null) { if (median == null) { maxHeap.add(item); } else if (item >= median) { maxHeap.add(item); } else { minHeap.add(item); } if (maxHeap.size() - minHeap.size() == 2) { minHeap.add(maxHeap.pollFirst()); } else if (minHeap.size() - maxHeap.size() == 2) { maxHeap.add(minHeap.pollLast()); } if (minHeap.size() == maxHeap.size() || minHeap.size() > maxHeap.size()) { median = minHeap.peekLast(); } else { median = maxHeap.peekFirst(); } medians.add(median); } }
From source file:edu.brandeis.wisedb.scheduler.BestNFirstGraphSearch.java
@Override public List<Action> schedule(Set<ModelQuery> toSched) { FullGraphState first = new FullGraphState(new TreeSet<ModelVM>(), toSched, sla, qtp); MinMaxPriorityQueue<StateCost> frontier = MinMaxPriorityQueue.create(); frontier.add(new StateCost(first, 0, null, null)); while (!frontier.isEmpty()) { log.fine("Frontier size: " + frontier.size()); PriorityQueue<Action> pq = new PriorityQueue<Action>(new ActionComparator()); StateCost next = frontier.poll(); if (next.s.isGoalState()) { // we're done List<Action> toR = new LinkedList<Action>(); StateCost last = next;//from ww w .jav a2 s . com while (last.action != null) { toR.add(0, last.action); last = last.prev; } log.fine("Reached goal state with following actions: " + toR); return toR; } for (Action a : next.s.getPossibleActions()) { int cost = 0; FullGraphState nextState = next.s.getNewStateForAction(a); cost += h.predictCostToEnd(nextState); //cost += nextState.getExecutionCost(); a.computedCost = cost; log.finer("Added action " + a + " to the frontier"); pq.add(a); } if (pq.isEmpty()) { log.severe("There was no selectable action for state: " + next); return null; } for (int i = 0; i < toTry; i++) { Action nextBest = pq.poll(); if (nextBest == null) { log.fine("Unable to get " + (i + 1) + "th action for state " + next); break; } FullGraphState c = next.s.getNewStateForAction(nextBest); StateCost candidate = new StateCost(c, c.getExecutionCost(), nextBest, next); frontier.add(candidate); } while (frontier.size() > maxFrontierSize) { frontier.removeLast(); } } return null; }
From source file:com.griddynamics.jagger.diagnostics.thread.sampling.RuntimeGraph.java
private List<MethodProfile> getHotSpots(int maxSpots, Comparator<MethodStatistics> comparator) { List<MethodProfile> result = Lists.newArrayList(); MinMaxPriorityQueue<MethodStatistics> hotSpots = MinMaxPriorityQueue.orderedBy(comparator) .maximumSize(maxSpots).create(graph.getVertices()); int queueSize = hotSpots.size(); for (int i = 0; i < queueSize; i++) { result.add(assembleProfile(hotSpots.removeFirst())); }// www . ja v a 2s.c om return result; }
From source file:de.tudarmstadt.ukp.dkpro.tc.features.ngram.base.LuceneFeatureExtractorBase.java
@Override protected FrequencyDistribution<String> getTopNgrams() throws ResourceInitializationException { FrequencyDistribution<String> topNGrams = new FrequencyDistribution<String>(); MinMaxPriorityQueue<TermFreqTuple> topN = MinMaxPriorityQueue.maximumSize(getTopN()).create(); long ngramVocabularySize = 0; IndexReader reader;//from ww w. ja v a 2 s . co m try { reader = DirectoryReader.open(FSDirectory.open(luceneDir)); Fields fields = MultiFields.getFields(reader); if (fields != null) { Terms terms = fields.terms(getFieldName()); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); BytesRef text = null; while ((text = termsEnum.next()) != null) { String term = text.utf8ToString(); long freq = termsEnum.totalTermFreq(); if (passesScreening(term)) { topN.add(new TermFreqTuple(term, freq)); ngramVocabularySize += freq; } } } } } catch (Exception e) { throw new ResourceInitializationException(e); } int size = topN.size(); for (int i = 0; i < size; i++) { TermFreqTuple tuple = topN.poll(); long absCount = tuple.getFreq(); double relFrequency = ((double) absCount) / ngramVocabularySize; if (relFrequency >= ngramFreqThreshold) topNGrams.addSample(tuple.getTerm(), tuple.getFreq()); } getLogger().log(Level.INFO, "+++ SELECTING THE " + topNGrams.getB() + " MOST FREQUENT NGRAMS"); return topNGrams; }
From source file:org.dkpro.tc.features.ngram.base.LuceneFeatureExtractorBase.java
@Override protected FrequencyDistribution<String> getTopNgrams() throws ResourceInitializationException { FrequencyDistribution<String> topNGrams = new FrequencyDistribution<String>(); MinMaxPriorityQueue<TermFreqTuple> topN = MinMaxPriorityQueue.maximumSize(getTopN()).create(); long ngramVocabularySize = 0; IndexReader reader;/*from w ww. j a v a 2 s . c o m*/ try { reader = DirectoryReader.open(FSDirectory.open(luceneDir)); Fields fields = MultiFields.getFields(reader); if (fields != null) { Terms terms = fields.terms(getFieldName()); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); BytesRef text = null; while ((text = termsEnum.next()) != null) { String term = text.utf8ToString(); long freq = termsEnum.totalTermFreq(); if (passesScreening(term)) { topN.add(new TermFreqTuple(term, freq)); ngramVocabularySize += freq; } } } } } catch (Exception e) { throw new ResourceInitializationException(e); } int size = topN.size(); for (int i = 0; i < size; i++) { TermFreqTuple tuple = topN.poll(); long absCount = tuple.getFreq(); double relFrequency = ((double) absCount) / ngramVocabularySize; if (relFrequency >= ngramFreqThreshold) { topNGrams.addSample(tuple.getTerm(), tuple.getFreq()); } } logSelectionProcess(topNGrams.getB()); return topNGrams; }
From source file:org.dkpro.tc.features.pair.core.ngram.LuceneNGramPFE.java
private FrequencyDistribution<String> getTopNgrams(int topNgramThreshold, String fieldName) throws ResourceInitializationException { FrequencyDistribution<String> topNGrams = new FrequencyDistribution<String>(); MinMaxPriorityQueue<TermFreqTuple> topN = MinMaxPriorityQueue.maximumSize(topNgramThreshold).create(); IndexReader reader;//from w ww. j a v a2 s . c om try { reader = DirectoryReader.open(FSDirectory.open(luceneDir)); Fields fields = MultiFields.getFields(reader); if (fields != null) { Terms terms = fields.terms(fieldName); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); BytesRef text = null; while ((text = termsEnum.next()) != null) { String term = text.utf8ToString(); long freq = termsEnum.totalTermFreq(); topN.add(new TermFreqTuple(term, freq)); } } } } catch (Exception e) { throw new ResourceInitializationException(e); } int size = topN.size(); for (int i = 0; i < size; i++) { TermFreqTuple tuple = topN.poll(); // System.out.println(tuple.getTerm() + " - " + tuple.getFreq()); topNGrams.addSample(tuple.getTerm(), tuple.getFreq()); } return topNGrams; }
From source file:co.cask.cdap.common.zookeeper.coordination.BalancedAssignmentStrategy.java
@Override public <T> void assign(ResourceRequirement requirement, Set<T> handlers, ResourceAssigner<T> assigner) { MinMaxPriorityQueue<HandlerSize<T>> handlerQueue = MinMaxPriorityQueue.create(); Multimap<T, PartitionReplica> assignments = assigner.get(); // Compute for each handler how many partition replica is already assigned for (T handler : handlers) { handlerQueue.add(new HandlerSize<>(handler, assignments)); }/*from w w w .java 2s .c o m*/ // For each unassigned partition replica in the requirement, assign it to the handler // with smallest partition replica assigned. It's just a heuristic to make the later balance phase doing less work. int totalPartitionReplica = 0; for (ResourceRequirement.Partition partition : requirement.getPartitions()) { totalPartitionReplica += partition.getReplicas(); for (int replica = 0; replica < partition.getReplicas(); replica++) { if (assigner.getHandler(partition.getName(), replica) == null) { HandlerSize<T> handlerSize = handlerQueue.removeFirst(); assigner.set(handlerSize.getHandler(), partition.getName(), replica); // After assignment, the size should get updated, hence put it back to the queue for next round usage. handlerQueue.add(handlerSize); } } } // Balance if (totalPartitionReplica > handlers.size()) { balance(handlerQueue, assigner, 1); } else { // Evenly distribute it to the first N handlers. while (handlerQueue.size() > totalPartitionReplica) { // If number of handler is > total partition replica, // there must be at least 1 handler that has nothing assigned, handlerQueue.removeFirst(); } // Balance it evenly, and there should be no differences in number of partition replica assigned to each handler. balance(handlerQueue, assigner, 0); } }
From source file:org.dkpro.tc.features.pair.core.ngram.LuceneNGramCPFE.java
private FrequencyDistribution<String> getTopNgramsCombo(int topNgramThreshold, String fieldName) throws ResourceInitializationException { FrequencyDistribution<String> topNGrams = new FrequencyDistribution<String>(); MinMaxPriorityQueue<TermFreqTuple> topN = MinMaxPriorityQueue.maximumSize(topNgramThreshold).create(); IndexReader reader;//from w ww . j av a 2 s .co m try { reader = DirectoryReader.open(FSDirectory.open(luceneDir)); Fields fields = MultiFields.getFields(reader); if (fields != null) { Terms terms = fields.terms(fieldName); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); BytesRef text = null; while ((text = termsEnum.next()) != null) { String term = text.utf8ToString(); long freq = termsEnum.totalTermFreq(); //add conditions here, like ngram1 is in most freq ngrams1... String combo1 = term.split(ComboUtils.JOINT)[0]; String combo2 = term.split(ComboUtils.JOINT)[1]; int combinedSize = combo1.split("_").length + combo2.split("_").length; if (topKSetView1.contains(combo1) && topKSet.contains(combo1) && topKSetView2.contains(combo2) && topKSet.contains(combo2) && combinedSize <= ngramMaxNCombo && combinedSize >= ngramMinNCombo) { //print out here for testing topN.add(new TermFreqTuple(term, freq)); } } } } } catch (Exception e) { throw new ResourceInitializationException(e); } int size = topN.size(); for (int i = 0; i < size; i++) { TermFreqTuple tuple = topN.poll(); // System.out.println(tuple.getTerm() + " - " + tuple.getFreq()); topNGrams.addSample(tuple.getTerm(), tuple.getFreq()); } return topNGrams; }
From source file:org.apache.hadoop.hbase.master.balancer.DefaultLoadBalancer.java
/** * Generate a global load balancing plan according to the specified map of * server information to the most loaded regions of each server. * * The load balancing invariant is that all servers are within 1 region of the * average number of regions per server. If the average is an integer number, * all servers will be balanced to the average. Otherwise, all servers will * have either floor(average) or ceiling(average) regions. * * HBASE-3609 Modeled regionsToMove using Guava's MinMaxPriorityQueue so that * we can fetch from both ends of the queue. * At the beginning, we check whether there was empty region server * just discovered by Master. If so, we alternately choose new / old * regions from head / tail of regionsToMove, respectively. This alternation * avoids clustering young regions on the newly discovered region server. * Otherwise, we choose new regions from head of regionsToMove. * //from ww w .ja v a 2 s . c om * Another improvement from HBASE-3609 is that we assign regions from * regionsToMove to underloaded servers in round-robin fashion. * Previously one underloaded server would be filled before we move onto * the next underloaded server, leading to clustering of young regions. * * Finally, we randomly shuffle underloaded servers so that they receive * offloaded regions relatively evenly across calls to balanceCluster(). * * The algorithm is currently implemented as such: * * <ol> * <li>Determine the two valid numbers of regions each server should have, * <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average). * * <li>Iterate down the most loaded servers, shedding regions from each so * each server hosts exactly <b>MAX</b> regions. Stop once you reach a * server that already has <= <b>MAX</b> regions. * <p> * Order the regions to move from most recent to least. * * <li>Iterate down the least loaded servers, assigning regions so each server * has exactly </b>MIN</b> regions. Stop once you reach a server that * already has >= <b>MIN</b> regions. * * Regions being assigned to underloaded servers are those that were shed * in the previous step. It is possible that there were not enough * regions shed to fill each underloaded server to <b>MIN</b>. If so we * end up with a number of regions required to do so, <b>neededRegions</b>. * * It is also possible that we were able to fill each underloaded but ended * up with regions that were unassigned from overloaded servers but that * still do not have assignment. * * If neither of these conditions hold (no regions needed to fill the * underloaded servers, no regions leftover from overloaded servers), * we are done and return. Otherwise we handle these cases below. * * <li>If <b>neededRegions</b> is non-zero (still have underloaded servers), * we iterate the most loaded servers again, shedding a single server from * each (this brings them from having <b>MAX</b> regions to having * <b>MIN</b> regions). * * <li>We now definitely have more regions that need assignment, either from * the previous step or from the original shedding from overloaded servers. * Iterate the least loaded servers filling each to <b>MIN</b>. * * <li>If we still have more regions that need assignment, again iterate the * least loaded servers, this time giving each one (filling them to * </b>MAX</b>) until we run out. * * <li>All servers will now either host <b>MIN</b> or <b>MAX</b> regions. * * In addition, any server hosting >= <b>MAX</b> regions is guaranteed * to end up with <b>MAX</b> regions at the end of the balancing. This * ensures the minimal number of regions possible are moved. * </ol> * * TODO: We can at-most reassign the number of regions away from a particular * server to be how many they report as most loaded. * Should we just keep all assignment in memory? Any objections? * Does this mean we need HeapSize on HMaster? Or just careful monitor? * (current thinking is we will hold all assignments in memory) * * @param clusterMap Map of regionservers and their load/region information to * a list of their most loaded regions * @return a list of regions to be moved, including source and destination, * or null if cluster is already balanced */ public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterMap) { boolean emptyRegionServerPresent = false; long startTime = System.currentTimeMillis(); ClusterLoadState cs = new ClusterLoadState(clusterMap); if (!this.needsBalance(cs)) return null; int numServers = cs.getNumServers(); NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad(); int numRegions = cs.getNumRegions(); int min = numRegions / numServers; int max = numRegions % numServers == 0 ? min : min + 1; // Using to check balance result. StringBuilder strBalanceParam = new StringBuilder(); strBalanceParam.append("Balance parameter: numRegions=").append(numRegions).append(", numServers=") .append(numServers).append(", max=").append(max).append(", min=").append(min); LOG.debug(strBalanceParam.toString()); // Balance the cluster // TODO: Look at data block locality or a more complex load to do this MinMaxPriorityQueue<RegionPlan> regionsToMove = MinMaxPriorityQueue.orderedBy(rpComparator).create(); List<RegionPlan> regionsToReturn = new ArrayList<RegionPlan>(); // Walk down most loaded, pruning each to the max int serversOverloaded = 0; // flag used to fetch regions from head and tail of list, alternately boolean fetchFromTail = false; Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<ServerName, BalanceInfo>(); for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) { ServerAndLoad sal = server.getKey(); int regionCount = sal.getLoad(); if (regionCount <= max) { serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0)); break; } serversOverloaded++; List<HRegionInfo> regions = server.getValue(); int numToOffload = Math.min(regionCount - max, regions.size()); // account for the out-of-band regions which were assigned to this server // after some other region server crashed Collections.sort(regions, riComparator); int numTaken = 0; for (int i = 0; i <= numToOffload;) { HRegionInfo hri = regions.get(i); // fetch from head if (fetchFromTail) { hri = regions.get(regions.size() - 1 - i); } i++; // Don't rebalance meta regions. if (hri.isMetaRegion()) continue; regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null)); numTaken++; if (numTaken >= numToOffload) break; // fetch in alternate order if there is new region server if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } } serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, (-1) * numTaken)); } int totalNumMoved = regionsToMove.size(); // Walk down least loaded, filling each to the min int neededRegions = 0; // number of regions needed to bring all up to min fetchFromTail = false; Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>(); float average = (float) numRegions / numServers; // for logging int maxToTake = numRegions - (int) average; for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) { if (maxToTake == 0) break; // no more to take int regionCount = server.getKey().getLoad(); if (regionCount >= min && regionCount > 0) { continue; // look for other servers which haven't reached min } int regionsToPut = min - regionCount; if (regionsToPut == 0) { regionsToPut = 1; maxToTake--; } underloadedServers.put(server.getKey().getServerName(), regionsToPut); } // number of servers that get new regions int serversUnderloaded = underloadedServers.size(); int incr = 1; List<ServerName> sns = Arrays .asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded])); Collections.shuffle(sns, RANDOM); while (regionsToMove.size() > 0) { int cnt = 0; int i = incr > 0 ? 0 : underloadedServers.size() - 1; for (; i >= 0 && i < underloadedServers.size(); i += incr) { if (regionsToMove.isEmpty()) break; ServerName si = sns.get(i); int numToTake = underloadedServers.get(si); if (numToTake == 0) continue; addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn); if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } underloadedServers.put(si, numToTake - 1); cnt++; BalanceInfo bi = serverBalanceInfo.get(si); if (bi == null) { bi = new BalanceInfo(0, 0); serverBalanceInfo.put(si, bi); } bi.setNumRegionsAdded(bi.getNumRegionsAdded() + 1); } if (cnt == 0) break; // iterates underloadedServers in the other direction incr = -incr; } for (Integer i : underloadedServers.values()) { // If we still want to take some, increment needed neededRegions += i; } // If none needed to fill all to min and none left to drain all to max, // we are done if (neededRegions == 0 && regionsToMove.isEmpty()) { long endTime = System.currentTimeMillis(); LOG.info("Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded + " less loaded servers"); return regionsToReturn; } // Need to do a second pass. // Either more regions to assign out or servers that are still underloaded // If we need more to fill min, grab one from each most loaded until enough if (neededRegions != 0) { // Walk down most loaded, grabbing one from each until we get enough for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) { BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName()); int idx = balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload(); if (idx >= server.getValue().size()) break; HRegionInfo region = server.getValue().get(idx); if (region.isMetaRegion()) continue; // Don't move meta regions. regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null)); totalNumMoved++; if (--neededRegions == 0) { // No more regions needed, done shedding break; } } } // Now we have a set of regions that must be all assigned out // Assign each underloaded up to the min, then if leftovers, assign to max // Walk down least loaded, assigning to each to fill up to min for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) { int regionCount = server.getKey().getLoad(); if (regionCount >= min) break; BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName()); if (balanceInfo != null) { regionCount += balanceInfo.getNumRegionsAdded(); } if (regionCount >= min) { continue; } int numToTake = min - regionCount; int numTaken = 0; while (numTaken < numToTake && 0 < regionsToMove.size()) { addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn); numTaken++; if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } } } // If we still have regions to dish out, assign underloaded to max if (0 < regionsToMove.size()) { for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) { int regionCount = server.getKey().getLoad(); if (regionCount >= max) { break; } addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn); if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } if (regionsToMove.isEmpty()) { break; } } } long endTime = System.currentTimeMillis(); if (!regionsToMove.isEmpty() || neededRegions != 0) { // Emit data so can diagnose how balancer went astray. LOG.warn("regionsToMove=" + totalNumMoved + ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded + ", serversUnderloaded=" + serversUnderloaded); StringBuilder sb = new StringBuilder(); for (Map.Entry<ServerName, List<HRegionInfo>> e : clusterMap.entrySet()) { if (sb.length() > 0) sb.append(", "); sb.append(e.getKey().toString()); sb.append(" "); sb.append(e.getValue().size()); } LOG.warn("Input " + sb.toString()); } // All done! LOG.info("Done. Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded + " less loaded servers"); return regionsToReturn; }