List of usage examples for com.google.common.collect MinMaxPriorityQueue add
@Override public boolean add(E element)
From source file:co.cask.cdap.common.zookeeper.coordination.BalancedAssignmentStrategy.java
@Override public <T> void assign(ResourceRequirement requirement, Set<T> handlers, ResourceAssigner<T> assigner) { MinMaxPriorityQueue<HandlerSize<T>> handlerQueue = MinMaxPriorityQueue.create(); Multimap<T, PartitionReplica> assignments = assigner.get(); // Compute for each handler how many partition replica is already assigned for (T handler : handlers) { handlerQueue.add(new HandlerSize<>(handler, assignments)); }/* w w w .ja v a 2s . c o m*/ // For each unassigned partition replica in the requirement, assign it to the handler // with smallest partition replica assigned. It's just a heuristic to make the later balance phase doing less work. int totalPartitionReplica = 0; for (ResourceRequirement.Partition partition : requirement.getPartitions()) { totalPartitionReplica += partition.getReplicas(); for (int replica = 0; replica < partition.getReplicas(); replica++) { if (assigner.getHandler(partition.getName(), replica) == null) { HandlerSize<T> handlerSize = handlerQueue.removeFirst(); assigner.set(handlerSize.getHandler(), partition.getName(), replica); // After assignment, the size should get updated, hence put it back to the queue for next round usage. handlerQueue.add(handlerSize); } } } // Balance if (totalPartitionReplica > handlers.size()) { balance(handlerQueue, assigner, 1); } else { // Evenly distribute it to the first N handlers. while (handlerQueue.size() > totalPartitionReplica) { // If number of handler is > total partition replica, // there must be at least 1 handler that has nothing assigned, handlerQueue.removeFirst(); } // Balance it evenly, and there should be no differences in number of partition replica assigned to each handler. balance(handlerQueue, assigner, 0); } }
From source file:de.tudarmstadt.ukp.dkpro.core.api.frequency.util.FrequencyDistribution.java
/** * Returns the n most frequent samples in the distribution. The ordering within in a group of * samples with the same frequency is undefined. * /*from w w w . j a va 2s. c o m*/ * @param n * the numer of most frequent samples to return. * @return the n most frequent samples in the distribution. */ public List<T> getMostFrequentSamples(int n) { MinMaxPriorityQueue<TermFreqTuple<T>> topN = MinMaxPriorityQueue.maximumSize(n).create(); for (T key : this.getKeys()) { topN.add(new TermFreqTuple<T>(key, this.getCount(key))); } List<T> topNList = new ArrayList<T>(); while (!topN.isEmpty()) { topNList.add(topN.poll().getKey()); } return topNList; }
From source file:com.davidbracewell.ml.classification.lazy.SparseKNN.java
@Override protected ClassificationResult classifyImpl(Instance instance) { final MinMaxPriorityQueue<Pair<Double, Double>> neighbors = MinMaxPriorityQueue .orderedBy(Ordering.from(Sorting.<Double, Double>mapEntryComparator(false, true))).maximumSize(K) .create();/*from w ww. j a va 2 s .c o m*/ for (Instance inst : index.query(instance)) { double distance = distanceMeasure.calculate(inst, instance, VectorMap.VALID_VALUES.FINITE); neighbors.add(Pair.of(inst.getTargetValue(), distance)); } double[] p = new double[getTargetFeature().alphabetSize()]; for (Pair<Double, Double> pair : neighbors) { p[pair.getFirst().intValue()] += 1d / (pair.getSecond() + 0.00000001); } return new ClassificationResult(getTargetFeature(), p); }
From source file:com.davidbracewell.ml.sequence.decoder.LinearViterbi.java
@Override public double[] decode(SequenceModel<V> raw, Sequence<V> sequence) { LinearSequenceModel<V> model = Val.of(raw).cast(); final Feature classFeature = model.getTargetFeature(); final int numStates = classFeature.alphabetSize(); MinMaxPriorityQueue<State> beam = MinMaxPriorityQueue.maximumSize(beamSize).create(); ClassificationResult result = model.classifyItem(0, sequence, new double[0]); for (int ci = 0; ci < numStates; ci++) { if (isValidStartTag(classFeature.valueAtIndex(ci)) && isValidTag(classFeature.valueAtIndex(ci), sequence.getData(0))) { beam.add(new State(Math.log(result.getConfidence(ci)), ci, null, 0)); }//from ww w .j a v a 2 s . c o m } MinMaxPriorityQueue<State> tempBeam = MinMaxPriorityQueue.maximumSize(beamSize).create(); for (int i = 1; i < sequence.length(); i++) { while (!beam.isEmpty()) { // go through all the previous states State state = beam.removeFirst(); String previousTag = classFeature.valueAtIndex(state.tag); result = model.classifyItem(i, sequence, state.labels()); for (int ci = 0; ci < numStates; ci++) { if (isValidTransition(previousTag, classFeature.valueAtIndex(ci)) && ((i + 1 < sequence.length()) || isValidEndTag(classFeature.valueAtIndex(ci))) && isValidTag(classFeature.valueAtIndex(ci), sequence.getData(i))) { tempBeam.add( new State(state.probability + Math.log(result.getConfidence(ci)), ci, state, i)); } } } beam.addAll(tempBeam); tempBeam.clear(); } return beam.remove().labels(); }
From source file:com.griddynamics.jagger.storage.fs.logging.ChronologyLogAggregator.java
@Override public AggregationInfo chronology(String dir, String targetFile) throws IOException { log.info("Try to aggregate {} into file {}", dir, targetFile); Collection<Iterable<LogEntry>> readers = new ArrayList<Iterable<LogEntry>>(); Set<String> fileNameList = fileStorage.getFileNameList(dir); if (fileNameList.isEmpty()) { log.info("Nothing to aggregate. Directory {} is empty.", dir); fileStorage.create(targetFile);//from ww w. j av a 2s.c o m return new AggregationInfo(0, 0, 0); } for (String fileName : fileNameList) { try { readers.add(logReader.read(fileName, LogEntry.class)); } catch (Exception e) { // TODO log.warn(e.getMessage(), e); } } int count = 0; long minTime = 0; long maxTime = 0; BufferedLogWriter.LogWriterOutput objectOutput = null; try { if (fileStorage.delete(targetFile, false)) { log.warn("Target file {} did not deleted!", targetFile); } objectOutput = logWriter.getOutput(fileStorage.create(targetFile)); MinMaxPriorityQueue<StreamInfo> queue = MinMaxPriorityQueue.create(); for (Iterable<LogEntry> inputStream : readers) { LogEntry logEntry; Iterator<LogEntry> it = inputStream.iterator(); if (it.hasNext()) { logEntry = it.next(); } else { continue; } queue.add(new StreamInfo(it, logEntry)); } while (!queue.isEmpty()) { StreamInfo<LogEntry> streamInfo = queue.removeFirst(); objectOutput.writeObject(streamInfo.lastLogEntry); if (count == 0) { minTime = streamInfo.lastLogEntry.getTime(); maxTime = streamInfo.lastLogEntry.getTime(); } else { maxTime = streamInfo.lastLogEntry.getTime(); } count++; LogEntry logEntry; if (streamInfo.stream.hasNext()) { logEntry = streamInfo.stream.next(); } else { continue; } streamInfo.lastLogEntry = logEntry; queue.add(streamInfo); } } finally { Closeables.closeQuietly(objectOutput); } return new AggregationInfo(minTime, maxTime, count); }
From source file:com.davidbracewell.ml.sequence.decoder.HMMViterbi.java
@Override public double[] decode(SequenceModel<V> raw, Sequence<V> seq) { FirstOrderHMM<V> model = Val.of(raw).cast(); int len = seq.length(); Feature classFeature = model.getTargetFeature(); int NC = classFeature.alphabetSize(); MinMaxPriorityQueue<State> beam = MinMaxPriorityQueue.maximumSize(beamSize).create(); MinMaxPriorityQueue<State> tempBeam = MinMaxPriorityQueue.maximumSize(beamSize).create(); Instance firstInstance = seq.generateInstance(0, new double[] { 0 }); for (int ci = 0; ci < NC; ci++) { if (isValidStartTag(classFeature.valueAtIndex(ci)) && isValidTag(classFeature.valueAtIndex(ci), seq.getData(0))) { beam.add(new State(model.pi(ci) + model.beta(ci, firstInstance), ci, null)); }/*from w w w . j av a 2s . c o m*/ } for (int i = 1; i < len; i++) { for (int ci = 0; ci < NC; ci++) { String thisTag = classFeature.valueAtIndex(ci); if (!isValidTag(thisTag, seq.getData(i))) { continue; } if ((i + 1 < seq.length()) || isValidEndTag(thisTag)) { double pInst = model.beta(ci, seq.generateInstance(i, new double[] { ci })); for (State state : beam) { if (isValidTransition(classFeature.valueAtIndex(state.tag), thisTag)) { tempBeam.add(new State(state.probability + //previous probability model.alpha(state.tag, ci) + //transition probability pInst, //probability of the vector given the tag ci, state)); } } } } MinMaxPriorityQueue<State> t = beam; beam = tempBeam; tempBeam = t; tempBeam.clear(); } State max = beam.remove(); double[] prediction = new double[len]; for (int i = len - 1; i >= 0; i--) { prediction[i] = max.tag; max = max.prev; } return prediction; }
From source file:org.apache.tephra.hbase.txprune.InvalidListPruningDebugTool.java
/** * Return a list of RegionPruneInfo. These regions are the ones that have the lowest prune upper bounds. * If -1 is passed in, all the regions and their prune upper bound will be returned. Note that only the regions * that are known to be live will be returned. * * @param numRegions number of regions/*from www . j av a 2s . c o m*/ * @param time time in milliseconds or relative time, regions recorded before the given time are returned * @return Map of region name and its prune upper bound */ @Override @SuppressWarnings("WeakerAccess") public SortedSet<RegionPruneInfoPretty> getIdleRegions(Integer numRegions, String time) throws IOException { List<RegionPruneInfo> regionPruneInfos = dataJanitorState.getPruneInfoForRegions(null); if (regionPruneInfos.isEmpty()) { return new TreeSet<>(); } // Create a set with region names Set<String> pruneRegionNameSet = new HashSet<>(); for (RegionPruneInfo regionPruneInfo : regionPruneInfos) { pruneRegionNameSet.add(regionPruneInfo.getRegionNameAsString()); } // Fetch the latest live regions RegionsAtTime latestRegions = getRegionsOnOrBeforeTime(NOW); // Fetch the regions at the given time RegionsAtTime timeRegions = getRegionsOnOrBeforeTime(time); Set<String> liveRegions = Sets.intersection(latestRegions.getRegions(), timeRegions.getRegions()); Set<String> liveRegionsWithPruneInfo = Sets.intersection(liveRegions, pruneRegionNameSet); List<RegionPruneInfo> liveRegionWithPruneInfoList = new ArrayList<>(); for (RegionPruneInfo regionPruneInfo : regionPruneInfos) { if (liveRegionsWithPruneInfo.contains(regionPruneInfo.getRegionNameAsString())) { liveRegionWithPruneInfoList.add(regionPruneInfo); } // Use the subset of live regions and prune regions regionPruneInfos = liveRegionWithPruneInfoList; } if (numRegions < 0) { numRegions = regionPruneInfos.size(); } Comparator<RegionPruneInfo> comparator = new Comparator<RegionPruneInfo>() { @Override public int compare(RegionPruneInfo o1, RegionPruneInfo o2) { int result = Long.compare(o1.getPruneUpperBound(), o2.getPruneUpperBound()); if (result == 0) { return o1.getRegionNameAsString().compareTo(o2.getRegionNameAsString()); } return result; } }; MinMaxPriorityQueue<RegionPruneInfoPretty> lowestPrunes = MinMaxPriorityQueue.orderedBy(comparator) .maximumSize(numRegions).create(); for (RegionPruneInfo pruneInfo : regionPruneInfos) { lowestPrunes.add(new RegionPruneInfoPretty(pruneInfo)); } SortedSet<RegionPruneInfoPretty> regions = new TreeSet<>(comparator); regions.addAll(lowestPrunes); return regions; }
From source file:gobblin.source.extractor.extract.kafka.workunit.packer.KafkaWorkUnitPacker.java
/** * Pack a list of {@link WorkUnit}s into a smaller number of {@link MultiWorkUnit}s, * using the worst-fit-decreasing algorithm. * * Each {@link WorkUnit} is assigned to the {@link MultiWorkUnit} with the smallest load. *///from w w w. j ava2 s . c o m protected List<WorkUnit> worstFitDecreasingBinPacking(List<WorkUnit> groups, int numOfMultiWorkUnits) { // Sort workunit groups by data size desc Collections.sort(groups, LOAD_DESC_COMPARATOR); MinMaxPriorityQueue<MultiWorkUnit> pQueue = MinMaxPriorityQueue.orderedBy(LOAD_ASC_COMPARATOR) .expectedSize(numOfMultiWorkUnits).create(); for (int i = 0; i < numOfMultiWorkUnits; i++) { MultiWorkUnit multiWorkUnit = MultiWorkUnit.createEmpty(); setWorkUnitEstSize(multiWorkUnit, 0); pQueue.add(multiWorkUnit); } for (WorkUnit group : groups) { MultiWorkUnit lightestMultiWorkUnit = pQueue.poll(); addWorkUnitToMultiWorkUnit(group, lightestMultiWorkUnit); pQueue.add(lightestMultiWorkUnit); } logMultiWorkUnitInfo(pQueue); double minLoad = getWorkUnitEstLoad(pQueue.peekFirst()); double maxLoad = getWorkUnitEstLoad(pQueue.peekLast()); LOG.info(String.format("Min load of multiWorkUnit = %f; Max load of multiWorkUnit = %f; Diff = %f%%", minLoad, maxLoad, (maxLoad - minLoad) / maxLoad * 100.0)); this.state.setProp(MIN_MULTIWORKUNIT_LOAD, minLoad); this.state.setProp(MAX_MULTIWORKUNIT_LOAD, maxLoad); List<WorkUnit> multiWorkUnits = Lists.newArrayList(); multiWorkUnits.addAll(pQueue); return multiWorkUnits; }
From source file:org.apache.phoenix.schema.PMetaDataCache.java
/** * Used when the cache is growing past its max size to clone in a single pass. * Removes least recently used tables to get size of cache below its max size by * the overage amount.//w w w .j a v a2 s .c o m */ public PMetaDataCache cloneMinusOverage(long overage) { assert (overage > 0); int nToRemove = Math.max(MIN_REMOVAL_SIZE, (int) Math.ceil((currentByteSize - maxByteSize) / ((double) currentByteSize / size())) + 1); MinMaxPriorityQueue<PTableRef> toRemove = BUILDER.expectedSize(nToRemove).create(); PMetaDataCache newCache = new PMetaDataCache(this.size(), this.maxByteSize, this.timeKeeper, this.tableRefFactory); long toRemoveBytes = 0; // Add to new cache, but track references to remove when done // to bring cache at least overage amount below it's max size. for (PTableRef tableRef : this.tables.values()) { newCache.put(tableRef.getTable().getKey(), tableRefFactory.makePTableRef(tableRef)); toRemove.add(tableRef); toRemoveBytes += tableRef.getEstimatedSize(); while (toRemoveBytes - toRemove.peekLast().getEstimatedSize() >= overage) { PTableRef removedRef = toRemove.removeLast(); toRemoveBytes -= removedRef.getEstimatedSize(); } } for (PTableRef toRemoveRef : toRemove) { newCache.remove(toRemoveRef.getTable().getKey()); } return newCache; }
From source file:com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByOperatorService.java
/** * Translate the reducedGroupByResults (output of broker's reduce) to AggregationResult object * to be used to build the BrokerResponse. * * @param reducedGroupByResults//from ww w . ja v a 2 s . co m * @return */ public List<AggregationResult> renderAggregationGroupByResult( List<Map<String, Serializable>> reducedGroupByResults) { if (reducedGroupByResults == null || reducedGroupByResults.size() != _aggregationFunctionList.size()) { return null; } List<AggregationResult> aggregationResults = new ArrayList<AggregationResult>(); for (int i = 0; i < _aggregationFunctionList.size(); ++i) { int groupSize = _groupByColumns.size(); Map<String, Serializable> reducedGroupByResult = reducedGroupByResults.get(i); AggregationFunction aggregationFunction = _aggregationFunctionList.get(i); String functionName = aggregationFunction.getFunctionName(); List<GroupByResult> groupByResults = new ArrayList<GroupByResult>(); if (!reducedGroupByResult.isEmpty()) { /* Reverse sort order for min functions. */ boolean reverseOrder = aggregationFunction.getFunctionName().startsWith(MIN_PREFIX); // The MinMaxPriorityQueue will only add TOP N MinMaxPriorityQueue<ImmutablePair<Serializable, String>> minMaxPriorityQueue = getMinMaxPriorityQueue( reducedGroupByResult.values().iterator().next(), _groupByTopN, reverseOrder); if (minMaxPriorityQueue != null) { for (String groupedKey : reducedGroupByResult.keySet()) { minMaxPriorityQueue .add(new ImmutablePair(reducedGroupByResult.get(groupedKey), groupedKey)); } ImmutablePair res; while ((res = (ImmutablePair) minMaxPriorityQueue.pollFirst()) != null) { String groupByColumnsString = (String) res.getRight(); List<String> groupByColumns = Arrays.asList(groupByColumnsString.split( GroupByConstants.GroupByDelimiter.groupByMultiDelimeter.toString(), groupSize)); Serializable value = (Serializable) res.getLeft(); GroupByResult groupValue = new GroupByResult(); groupValue.setGroup(groupByColumns); groupValue.setValue(formatValue(value)); groupByResults.add(groupValue); } } } AggregationResult aggregationResult = new AggregationResult(groupByResults, _groupByColumns, functionName); aggregationResults.add(aggregationResult); } return aggregationResults; }