Example usage for com.google.common.collect MinMaxPriorityQueue orderedBy

List of usage examples for com.google.common.collect MinMaxPriorityQueue orderedBy

Introduction

In this page you can find the example usage for com.google.common.collect MinMaxPriorityQueue orderedBy.

Prototype

public static <B> Builder<B> orderedBy(Comparator<B> comparator) 

Source Link

Document

Creates and returns a new builder, configured to build MinMaxPriorityQueue instances that use comparator to determine the least and greatest elements.

Usage

From source file:io.druid.server.coordinator.DruidCoordinatorBalancerProfiler.java

public void profileRun() {
    Stopwatch watch = Stopwatch.createUnstarted();
    LoadQueuePeonTester fromPeon = new LoadQueuePeonTester();
    LoadQueuePeonTester toPeon = new LoadQueuePeonTester();

    EasyMock.expect(druidServer1.getName()).andReturn("from").atLeastOnce();
    EasyMock.expect(druidServer1.getCurrSize()).andReturn(30L).atLeastOnce();
    EasyMock.expect(druidServer1.getMaxSize()).andReturn(100L).atLeastOnce();
    EasyMock.expect(druidServer1.getSegments()).andReturn(segments).anyTimes();
    EasyMock.expect(druidServer1.getSegment(EasyMock.<String>anyObject())).andReturn(null).anyTimes();
    EasyMock.replay(druidServer1);//from w  w  w  . jav a2  s. c o  m

    EasyMock.expect(druidServer2.getName()).andReturn("to").atLeastOnce();
    EasyMock.expect(druidServer2.getTier()).andReturn("normal").anyTimes();
    EasyMock.expect(druidServer2.getCurrSize()).andReturn(0L).atLeastOnce();
    EasyMock.expect(druidServer2.getMaxSize()).andReturn(100L).atLeastOnce();
    EasyMock.expect(druidServer2.getSegments()).andReturn(new HashMap<String, DataSegment>()).anyTimes();
    EasyMock.expect(druidServer2.getSegment(EasyMock.<String>anyObject())).andReturn(null).anyTimes();
    EasyMock.replay(druidServer2);

    coordinator.moveSegment(EasyMock.<ImmutableDruidServer>anyObject(),
            EasyMock.<ImmutableDruidServer>anyObject(), EasyMock.<String>anyObject(),
            EasyMock.<LoadPeonCallback>anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.replay(coordinator);

    DruidCoordinatorRuntimeParams params = DruidCoordinatorRuntimeParams.newBuilder()
            .withDruidCluster(
                    new DruidCluster(ImmutableMap.<String, MinMaxPriorityQueue<ServerHolder>>of("normal",
                            MinMaxPriorityQueue.orderedBy(DruidCoordinatorBalancerTester.percentUsedComparator)
                                    .create(Arrays.asList(new ServerHolder(druidServer1, fromPeon),
                                            new ServerHolder(druidServer2, toPeon))))))
            .withLoadManagementPeons(ImmutableMap.<String, LoadQueuePeon>of("from", fromPeon, "to", toPeon))
            .withAvailableSegments(segments.values())
            .withDynamicConfigs(
                    new CoordinatorDynamicConfig.Builder().withMaxSegmentsToMove(MAX_SEGMENTS_TO_MOVE).build())
            .withBalancerReferenceTimestamp(new DateTime("2013-01-01")).build();
    DruidCoordinatorBalancerTester tester = new DruidCoordinatorBalancerTester(coordinator);
    watch.start();
    DruidCoordinatorRuntimeParams balanceParams = tester.run(params);
    System.out.println(watch.stop());
}

From source file:com.linkedin.pinot.controller.helix.core.relocation.RealtimeSegmentRelocator.java

/**
 * Given a realtime tag config and an ideal state, relocate the segments
 * which are completed but still hanging around on consuming servers, one replica at a time
 * @param  realtimeTagConfig/*from  w  w w  . j  ava2 s  .  c  o m*/
 * @param idealState
 */
protected void relocateSegments(RealtimeTagConfig realtimeTagConfig, IdealState idealState) {

    List<String> consumingServers = _helixAdmin.getInstancesInClusterWithTag(_helixManager.getClusterName(),
            realtimeTagConfig.getConsumingServerTag());
    if (consumingServers.isEmpty()) {
        throw new IllegalStateException(
                "Found no realtime consuming servers with tag " + realtimeTagConfig.getConsumingServerTag());
    }
    List<String> completedServers = _helixAdmin.getInstancesInClusterWithTag(_helixManager.getClusterName(),
            realtimeTagConfig.getCompletedServerTag());
    if (completedServers.isEmpty()) {
        throw new IllegalStateException(
                "Found no realtime completed servers with tag " + realtimeTagConfig.getCompletedServerTag());
    }

    if (completedServers.size() < Integer.valueOf(idealState.getReplicas())) {
        throw new IllegalStateException("Number of completed servers: " + completedServers.size()
                + " is less than num replicas: " + idealState.getReplicas());
    }
    // TODO: use segment assignment strategy to decide where to place relocated segment

    // create map of completed server name to num segments it holds.
    // This will help us decide which completed server to choose for replacing a consuming server
    Map<String, Integer> completedServerToNumSegments = new HashMap<>(completedServers.size());
    for (String server : completedServers) {
        completedServerToNumSegments.put(server, 0);
    }
    for (String segmentName : idealState.getPartitionSet()) {
        Map<String, String> instanceStateMap = idealState.getInstanceStateMap(segmentName);
        for (String instance : instanceStateMap.keySet()) {
            if (completedServers.contains(instance)) {
                completedServerToNumSegments.put(instance, completedServerToNumSegments.get(instance) + 1);
            }
        }
    }
    MinMaxPriorityQueue<Map.Entry<String, Integer>> completedServersQueue = MinMaxPriorityQueue
            .orderedBy(new Comparator<Map.Entry<String, Integer>>() {
                @Override
                public int compare(Map.Entry<String, Integer> entry1, Map.Entry<String, Integer> entry2) {
                    return Integer.compare(entry1.getValue(), entry2.getValue());
                }
            }).maximumSize(completedServers.size()).create();
    completedServersQueue.addAll(completedServerToNumSegments.entrySet());

    // get new mapping for segments that need relocation
    createNewIdealState(idealState, consumingServers, completedServersQueue);
}

From source file:com.metamx.druid.utils.DruidMasterBalancerProfiler.java

public void profileRun() {
    Stopwatch watch = new Stopwatch();
    LoadQueuePeonTester fromPeon = new LoadQueuePeonTester();
    LoadQueuePeonTester toPeon = new LoadQueuePeonTester();

    EasyMock.expect(druidServer1.getName()).andReturn("from").atLeastOnce();
    EasyMock.expect(druidServer1.getCurrSize()).andReturn(30L).atLeastOnce();
    EasyMock.expect(druidServer1.getMaxSize()).andReturn(100L).atLeastOnce();
    EasyMock.expect(druidServer1.getSegments()).andReturn(segments).anyTimes();
    EasyMock.expect(druidServer1.getSegment(EasyMock.<String>anyObject())).andReturn(null).anyTimes();
    EasyMock.replay(druidServer1);/* ww  w  .j  a v  a2s .  co  m*/

    EasyMock.expect(druidServer2.getName()).andReturn("to").atLeastOnce();
    EasyMock.expect(druidServer2.getTier()).andReturn("normal").anyTimes();
    EasyMock.expect(druidServer2.getCurrSize()).andReturn(0L).atLeastOnce();
    EasyMock.expect(druidServer2.getMaxSize()).andReturn(100L).atLeastOnce();
    EasyMock.expect(druidServer2.getSegments()).andReturn(new HashMap<String, DataSegment>()).anyTimes();
    EasyMock.expect(druidServer2.getSegment(EasyMock.<String>anyObject())).andReturn(null).anyTimes();
    EasyMock.replay(druidServer2);

    master.moveSegment(EasyMock.<String>anyObject(), EasyMock.<String>anyObject(), EasyMock.<String>anyObject(),
            EasyMock.<LoadPeonCallback>anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.replay(master);

    DruidMasterRuntimeParams params = DruidMasterRuntimeParams.newBuilder()
            .withDruidCluster(
                    new DruidCluster(ImmutableMap.<String, MinMaxPriorityQueue<ServerHolder>>of("normal",
                            MinMaxPriorityQueue.orderedBy(DruidMasterBalancerTester.percentUsedComparator)
                                    .create(Arrays.asList(new ServerHolder(druidServer1, fromPeon),
                                            new ServerHolder(druidServer2, toPeon))))))
            .withLoadManagementPeons(ImmutableMap.<String, LoadQueuePeon>of("from", fromPeon, "to", toPeon))
            .withAvailableSegments(segments.values())
            .withMasterSegmentSettings(
                    new MasterSegmentSettings.Builder().withMaxSegmentsToMove(MAX_SEGMENTS_TO_MOVE).build())
            .withBalancerReferenceTimestamp(new DateTime("2013-01-01")).build();
    DruidMasterBalancerTester tester = new DruidMasterBalancerTester(master);
    watch.start();
    DruidMasterRuntimeParams balanceParams = tester.run(params);
    System.out.println(watch.stop());
}

From source file:it.uniroma3.mat.extendedset.intset.ImmutableConciseSet.java

private static ImmutableConciseSet doUnion(Iterator<ImmutableConciseSet> sets) {
    IntList retVal = new IntList();

    // lhs = current word position, rhs = the iterator
    // Comparison is first by index, then one fills > literals > zero fills
    // one fills are sorted by length (longer one fills have priority)
    // similarily, shorter zero fills have priority
    MinMaxPriorityQueue<WordHolder> theQ = MinMaxPriorityQueue.orderedBy(new Comparator<WordHolder>() {
        @Override/*  www .  j a va  2  s . co m*/
        public int compare(WordHolder h1, WordHolder h2) {
            int w1 = h1.getWord();
            int w2 = h2.getWord();
            int s1 = h1.getIterator().startIndex;
            int s2 = h2.getIterator().startIndex;

            if (s1 != s2) {
                return compareInts(s1, s2);
            }

            if (ConciseSetUtils.isOneSequence(w1)) {
                if (ConciseSetUtils.isOneSequence(w2)) {
                    return -compareInts(ConciseSetUtils.getSequenceNumWords(w1),
                            ConciseSetUtils.getSequenceNumWords(w2));
                }
                return -1;
            } else if (ConciseSetUtils.isLiteral(w1)) {
                if (ConciseSetUtils.isOneSequence(w2)) {
                    return 1;
                } else if (ConciseSetUtils.isLiteral(w2)) {
                    return 0;
                }
                return -1;
            } else {
                if (!ConciseSetUtils.isZeroSequence(w2)) {
                    return 1;
                }
                return compareInts(ConciseSetUtils.getSequenceNumWords(w1),
                        ConciseSetUtils.getSequenceNumWords(w2));
            }
        }
    }).create();

    // populate priority queue
    while (sets.hasNext()) {
        ImmutableConciseSet set = sets.next();

        if (set != null && !set.isEmpty()) {
            WordIterator itr = set.newWordIterator();
            theQ.add(new WordHolder(itr.next(), itr));
        }
    }

    int currIndex = 0;

    while (!theQ.isEmpty()) {
        // create a temp list to hold everything that will get pushed back into the priority queue after each run
        List<WordHolder> wordsToAdd = Lists.newArrayList();

        // grab the top element from the priority queue
        WordHolder curr = theQ.poll();
        int word = curr.getWord();
        WordIterator itr = curr.getIterator();

        // if the next word in the queue starts at a different point than where we ended off we need to create a zero gap
        // to fill the space
        if (currIndex < itr.startIndex) {
            addAndCompact(retVal, itr.startIndex - currIndex - 1);
            currIndex = itr.startIndex;
        }

        if (ConciseSetUtils.isOneSequence(word)) {
            // extract a literal from the flip bits of the one sequence
            int flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(word);

            // advance everything past the longest ones sequence
            WordHolder nextVal = theQ.peek();
            while (nextVal != null && nextVal.getIterator().startIndex < itr.wordsWalked) {
                WordHolder entry = theQ.poll();
                int w = entry.getWord();
                WordIterator i = entry.getIterator();

                if (i.startIndex == itr.startIndex) {
                    // if a literal was created from a flip bit, OR it with other literals or literals from flip bits in the same
                    // position
                    if (ConciseSetUtils.isOneSequence(w)) {
                        flipBitLiteral |= ConciseSetUtils.getLiteralFromOneSeqFlipBit(w);
                    } else if (ConciseSetUtils.isLiteral(w)) {
                        flipBitLiteral |= w;
                    } else {
                        flipBitLiteral |= ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w);
                    }
                }

                i.advanceTo(itr.wordsWalked);
                if (i.hasNext()) {
                    wordsToAdd.add(new WordHolder(i.next(), i));
                }
                nextVal = theQ.peek();
            }

            // advance longest one literal forward and push result back to priority queue
            // if a flip bit is still needed, put it in the correct position
            int newWord = word & 0xC1FFFFFF;
            if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) {
                flipBitLiteral ^= ConciseSetUtils.ALL_ONES_LITERAL;
                int position = Integer.numberOfTrailingZeros(flipBitLiteral) + 1;
                newWord |= (position << 25);
            }
            addAndCompact(retVal, newWord);
            currIndex = itr.wordsWalked;

            if (itr.hasNext()) {
                wordsToAdd.add(new WordHolder(itr.next(), itr));
            }
        } else if (ConciseSetUtils.isLiteral(word)) {
            // advance all other literals
            WordHolder nextVal = theQ.peek();
            while (nextVal != null && nextVal.getIterator().startIndex == itr.startIndex) {

                WordHolder entry = theQ.poll();
                int w = entry.getWord();
                WordIterator i = entry.getIterator();

                // if we still have zero fills with flipped bits, OR them here
                if (ConciseSetUtils.isLiteral(w)) {
                    word |= w;
                } else {
                    int flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w);
                    if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) {
                        word |= flipBitLiteral;
                        i.advanceTo(itr.wordsWalked);
                    }
                }

                if (i.hasNext()) {
                    wordsToAdd.add(new WordHolder(i.next(), i));
                }

                nextVal = theQ.peek();
            }

            // advance the set with the current literal forward and push result back to priority queue
            addAndCompact(retVal, word);
            currIndex++;

            if (itr.hasNext()) {
                wordsToAdd.add(new WordHolder(itr.next(), itr));
            }
        } else { // zero fills
            int flipBitLiteral;
            WordHolder nextVal = theQ.peek();

            while (nextVal != null && nextVal.getIterator().startIndex == itr.startIndex) {
                // check if literal can be created flip bits of other zero sequences
                WordHolder entry = theQ.poll();
                int w = entry.getWord();
                WordIterator i = entry.getIterator();

                flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w);
                if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) {
                    wordsToAdd.add(new WordHolder(flipBitLiteral, i));
                } else if (i.hasNext()) {
                    wordsToAdd.add(new WordHolder(i.next(), i));
                }
                nextVal = theQ.peek();
            }

            // check if a literal needs to be created from the flipped bits of this sequence
            flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(word);
            if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) {
                wordsToAdd.add(new WordHolder(flipBitLiteral, itr));
            } else if (itr.hasNext()) {
                wordsToAdd.add(new WordHolder(itr.next(), itr));
            }
        }

        theQ.addAll(wordsToAdd);
    }

    if (retVal.isEmpty()) {
        return new ImmutableConciseSet();
    }
    return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray()));
}

From source file:org.apache.tephra.hbase.txprune.InvalidListPruningDebugTool.java

/**
 * Return a list of RegionPruneInfo. These regions are the ones that have the lowest prune upper bounds.
 * If -1 is passed in, all the regions and their prune upper bound will be returned. Note that only the regions
 * that are known to be live will be returned.
 *
 * @param numRegions number of regions//from ww  w  .  j  a v a  2  s . c o m
 * @param time time in milliseconds or relative time, regions recorded before the given time are returned
 * @return Map of region name and its prune upper bound
 */
@Override
@SuppressWarnings("WeakerAccess")
public SortedSet<RegionPruneInfoPretty> getIdleRegions(Integer numRegions, String time) throws IOException {
    List<RegionPruneInfo> regionPruneInfos = dataJanitorState.getPruneInfoForRegions(null);
    if (regionPruneInfos.isEmpty()) {
        return new TreeSet<>();
    }

    // Create a set with region names
    Set<String> pruneRegionNameSet = new HashSet<>();
    for (RegionPruneInfo regionPruneInfo : regionPruneInfos) {
        pruneRegionNameSet.add(regionPruneInfo.getRegionNameAsString());
    }

    // Fetch the latest live regions
    RegionsAtTime latestRegions = getRegionsOnOrBeforeTime(NOW);

    // Fetch the regions at the given time
    RegionsAtTime timeRegions = getRegionsOnOrBeforeTime(time);
    Set<String> liveRegions = Sets.intersection(latestRegions.getRegions(), timeRegions.getRegions());
    Set<String> liveRegionsWithPruneInfo = Sets.intersection(liveRegions, pruneRegionNameSet);
    List<RegionPruneInfo> liveRegionWithPruneInfoList = new ArrayList<>();
    for (RegionPruneInfo regionPruneInfo : regionPruneInfos) {
        if (liveRegionsWithPruneInfo.contains(regionPruneInfo.getRegionNameAsString())) {
            liveRegionWithPruneInfoList.add(regionPruneInfo);
        }

        // Use the subset of live regions and prune regions
        regionPruneInfos = liveRegionWithPruneInfoList;
    }

    if (numRegions < 0) {
        numRegions = regionPruneInfos.size();
    }

    Comparator<RegionPruneInfo> comparator = new Comparator<RegionPruneInfo>() {
        @Override
        public int compare(RegionPruneInfo o1, RegionPruneInfo o2) {
            int result = Long.compare(o1.getPruneUpperBound(), o2.getPruneUpperBound());
            if (result == 0) {
                return o1.getRegionNameAsString().compareTo(o2.getRegionNameAsString());
            }
            return result;
        }
    };
    MinMaxPriorityQueue<RegionPruneInfoPretty> lowestPrunes = MinMaxPriorityQueue.orderedBy(comparator)
            .maximumSize(numRegions).create();

    for (RegionPruneInfo pruneInfo : regionPruneInfos) {
        lowestPrunes.add(new RegionPruneInfoPretty(pruneInfo));
    }

    SortedSet<RegionPruneInfoPretty> regions = new TreeSet<>(comparator);
    regions.addAll(lowestPrunes);
    return regions;
}

From source file:org.apache.hadoop.hbase.master.balancer.DefaultLoadBalancer.java

/**
 * Generate a global load balancing plan according to the specified map of
 * server information to the most loaded regions of each server.
 *
 * The load balancing invariant is that all servers are within 1 region of the
 * average number of regions per server.  If the average is an integer number,
 * all servers will be balanced to the average.  Otherwise, all servers will
 * have either floor(average) or ceiling(average) regions.
 *
 * HBASE-3609 Modeled regionsToMove using Guava's MinMaxPriorityQueue so that
 *   we can fetch from both ends of the queue. 
 * At the beginning, we check whether there was empty region server 
 *   just discovered by Master. If so, we alternately choose new / old
 *   regions from head / tail of regionsToMove, respectively. This alternation
 *   avoids clustering young regions on the newly discovered region server.
 *   Otherwise, we choose new regions from head of regionsToMove.
 *   //from  ww  w.j a  va2  s  .c  o  m
 * Another improvement from HBASE-3609 is that we assign regions from
 *   regionsToMove to underloaded servers in round-robin fashion.
 *   Previously one underloaded server would be filled before we move onto
 *   the next underloaded server, leading to clustering of young regions.
 *   
 * Finally, we randomly shuffle underloaded servers so that they receive
 *   offloaded regions relatively evenly across calls to balanceCluster().
 *         
 * The algorithm is currently implemented as such:
 *
 * <ol>
 * <li>Determine the two valid numbers of regions each server should have,
 *     <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average).
 *
 * <li>Iterate down the most loaded servers, shedding regions from each so
 *     each server hosts exactly <b>MAX</b> regions.  Stop once you reach a
 *     server that already has &lt;= <b>MAX</b> regions.
 *     <p>
 *     Order the regions to move from most recent to least.
 *
 * <li>Iterate down the least loaded servers, assigning regions so each server
 *     has exactly </b>MIN</b> regions.  Stop once you reach a server that
 *     already has &gt;= <b>MIN</b> regions.
 *
 *     Regions being assigned to underloaded servers are those that were shed
 *     in the previous step.  It is possible that there were not enough
 *     regions shed to fill each underloaded server to <b>MIN</b>.  If so we
 *     end up with a number of regions required to do so, <b>neededRegions</b>.
 *
 *     It is also possible that we were able to fill each underloaded but ended
 *     up with regions that were unassigned from overloaded servers but that
 *     still do not have assignment.
 *
 *     If neither of these conditions hold (no regions needed to fill the
 *     underloaded servers, no regions leftover from overloaded servers),
 *     we are done and return.  Otherwise we handle these cases below.
 *
 * <li>If <b>neededRegions</b> is non-zero (still have underloaded servers),
 *     we iterate the most loaded servers again, shedding a single server from
 *     each (this brings them from having <b>MAX</b> regions to having
 *     <b>MIN</b> regions).
 *
 * <li>We now definitely have more regions that need assignment, either from
 *     the previous step or from the original shedding from overloaded servers.
 *     Iterate the least loaded servers filling each to <b>MIN</b>.
 *
 * <li>If we still have more regions that need assignment, again iterate the
 *     least loaded servers, this time giving each one (filling them to
 *     </b>MAX</b>) until we run out.
 *
 * <li>All servers will now either host <b>MIN</b> or <b>MAX</b> regions.
 *
 *     In addition, any server hosting &gt;= <b>MAX</b> regions is guaranteed
 *     to end up with <b>MAX</b> regions at the end of the balancing.  This
 *     ensures the minimal number of regions possible are moved.
 * </ol>
 *
 * TODO: We can at-most reassign the number of regions away from a particular
 *       server to be how many they report as most loaded.
 *       Should we just keep all assignment in memory?  Any objections?
 *       Does this mean we need HeapSize on HMaster?  Or just careful monitor?
 *       (current thinking is we will hold all assignments in memory)
 *
 * @param clusterMap Map of regionservers and their load/region information to
 *                   a list of their most loaded regions
 * @return a list of regions to be moved, including source and destination,
 *         or null if cluster is already balanced
 */
public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterMap) {
    boolean emptyRegionServerPresent = false;
    long startTime = System.currentTimeMillis();

    ClusterLoadState cs = new ClusterLoadState(clusterMap);

    if (!this.needsBalance(cs))
        return null;

    int numServers = cs.getNumServers();
    NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
    int numRegions = cs.getNumRegions();
    int min = numRegions / numServers;
    int max = numRegions % numServers == 0 ? min : min + 1;

    // Using to check balance result.
    StringBuilder strBalanceParam = new StringBuilder();
    strBalanceParam.append("Balance parameter: numRegions=").append(numRegions).append(", numServers=")
            .append(numServers).append(", max=").append(max).append(", min=").append(min);
    LOG.debug(strBalanceParam.toString());

    // Balance the cluster
    // TODO: Look at data block locality or a more complex load to do this
    MinMaxPriorityQueue<RegionPlan> regionsToMove = MinMaxPriorityQueue.orderedBy(rpComparator).create();
    List<RegionPlan> regionsToReturn = new ArrayList<RegionPlan>();

    // Walk down most loaded, pruning each to the max
    int serversOverloaded = 0;
    // flag used to fetch regions from head and tail of list, alternately
    boolean fetchFromTail = false;
    Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<ServerName, BalanceInfo>();
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
        ServerAndLoad sal = server.getKey();
        int regionCount = sal.getLoad();
        if (regionCount <= max) {
            serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0));
            break;
        }
        serversOverloaded++;
        List<HRegionInfo> regions = server.getValue();
        int numToOffload = Math.min(regionCount - max, regions.size());
        // account for the out-of-band regions which were assigned to this server
        // after some other region server crashed 
        Collections.sort(regions, riComparator);
        int numTaken = 0;
        for (int i = 0; i <= numToOffload;) {
            HRegionInfo hri = regions.get(i); // fetch from head
            if (fetchFromTail) {
                hri = regions.get(regions.size() - 1 - i);
            }
            i++;
            // Don't rebalance meta regions.
            if (hri.isMetaRegion())
                continue;
            regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null));
            numTaken++;
            if (numTaken >= numToOffload)
                break;
            // fetch in alternate order if there is new region server
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
        }
        serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, (-1) * numTaken));
    }
    int totalNumMoved = regionsToMove.size();

    // Walk down least loaded, filling each to the min
    int neededRegions = 0; // number of regions needed to bring all up to min
    fetchFromTail = false;

    Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>();
    float average = (float) numRegions / numServers; // for logging
    int maxToTake = numRegions - (int) average;
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
        if (maxToTake == 0)
            break; // no more to take
        int regionCount = server.getKey().getLoad();
        if (regionCount >= min && regionCount > 0) {
            continue; // look for other servers which haven't reached min
        }
        int regionsToPut = min - regionCount;
        if (regionsToPut == 0) {
            regionsToPut = 1;
            maxToTake--;
        }
        underloadedServers.put(server.getKey().getServerName(), regionsToPut);
    }
    // number of servers that get new regions
    int serversUnderloaded = underloadedServers.size();
    int incr = 1;
    List<ServerName> sns = Arrays
            .asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded]));
    Collections.shuffle(sns, RANDOM);
    while (regionsToMove.size() > 0) {
        int cnt = 0;
        int i = incr > 0 ? 0 : underloadedServers.size() - 1;
        for (; i >= 0 && i < underloadedServers.size(); i += incr) {
            if (regionsToMove.isEmpty())
                break;
            ServerName si = sns.get(i);
            int numToTake = underloadedServers.get(si);
            if (numToTake == 0)
                continue;

            addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn);
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }

            underloadedServers.put(si, numToTake - 1);
            cnt++;
            BalanceInfo bi = serverBalanceInfo.get(si);
            if (bi == null) {
                bi = new BalanceInfo(0, 0);
                serverBalanceInfo.put(si, bi);
            }
            bi.setNumRegionsAdded(bi.getNumRegionsAdded() + 1);
        }
        if (cnt == 0)
            break;
        // iterates underloadedServers in the other direction
        incr = -incr;
    }
    for (Integer i : underloadedServers.values()) {
        // If we still want to take some, increment needed
        neededRegions += i;
    }

    // If none needed to fill all to min and none left to drain all to max,
    // we are done
    if (neededRegions == 0 && regionsToMove.isEmpty()) {
        long endTime = System.currentTimeMillis();
        LOG.info("Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved
                + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded
                + " less loaded servers");
        return regionsToReturn;
    }

    // Need to do a second pass.
    // Either more regions to assign out or servers that are still underloaded

    // If we need more to fill min, grab one from each most loaded until enough
    if (neededRegions != 0) {
        // Walk down most loaded, grabbing one from each until we get enough
        for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
            BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
            int idx = balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload();
            if (idx >= server.getValue().size())
                break;
            HRegionInfo region = server.getValue().get(idx);
            if (region.isMetaRegion())
                continue; // Don't move meta regions.
            regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null));
            totalNumMoved++;
            if (--neededRegions == 0) {
                // No more regions needed, done shedding
                break;
            }
        }
    }

    // Now we have a set of regions that must be all assigned out
    // Assign each underloaded up to the min, then if leftovers, assign to max

    // Walk down least loaded, assigning to each to fill up to min
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
        int regionCount = server.getKey().getLoad();
        if (regionCount >= min)
            break;
        BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
        if (balanceInfo != null) {
            regionCount += balanceInfo.getNumRegionsAdded();
        }
        if (regionCount >= min) {
            continue;
        }
        int numToTake = min - regionCount;
        int numTaken = 0;
        while (numTaken < numToTake && 0 < regionsToMove.size()) {
            addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn);
            numTaken++;
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
        }
    }

    // If we still have regions to dish out, assign underloaded to max
    if (0 < regionsToMove.size()) {
        for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
            int regionCount = server.getKey().getLoad();
            if (regionCount >= max) {
                break;
            }
            addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn);
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
            if (regionsToMove.isEmpty()) {
                break;
            }
        }
    }

    long endTime = System.currentTimeMillis();

    if (!regionsToMove.isEmpty() || neededRegions != 0) {
        // Emit data so can diagnose how balancer went astray.
        LOG.warn("regionsToMove=" + totalNumMoved + ", numServers=" + numServers + ", serversOverloaded="
                + serversOverloaded + ", serversUnderloaded=" + serversUnderloaded);
        StringBuilder sb = new StringBuilder();
        for (Map.Entry<ServerName, List<HRegionInfo>> e : clusterMap.entrySet()) {
            if (sb.length() > 0)
                sb.append(", ");
            sb.append(e.getKey().toString());
            sb.append(" ");
            sb.append(e.getValue().size());
        }
        LOG.warn("Input " + sb.toString());
    }

    // All done!
    LOG.info("Done. Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved
            + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded
            + " less loaded servers");

    return regionsToReturn;
}

From source file:org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer.java

/**
 * Generate a global load balancing plan according to the specified map of
 * server information to the most loaded regions of each server.
 *
 * The load balancing invariant is that all servers are within 1 region of the
 * average number of regions per server.  If the average is an integer number,
 * all servers will be balanced to the average.  Otherwise, all servers will
 * have either floor(average) or ceiling(average) regions.
 *
 * HBASE-3609 Modeled regionsToMove using Guava's MinMaxPriorityQueue so that
 *   we can fetch from both ends of the queue. 
 * At the beginning, we check whether there was empty region server 
 *   just discovered by Master. If so, we alternately choose new / old
 *   regions from head / tail of regionsToMove, respectively. This alternation
 *   avoids clustering young regions on the newly discovered region server.
 *   Otherwise, we choose new regions from head of regionsToMove.
 *   //  w  w w  . j a  v  a 2s .c o m
 * Another improvement from HBASE-3609 is that we assign regions from
 *   regionsToMove to underloaded servers in round-robin fashion.
 *   Previously one underloaded server would be filled before we move onto
 *   the next underloaded server, leading to clustering of young regions.
 *   
 * Finally, we randomly shuffle underloaded servers so that they receive
 *   offloaded regions relatively evenly across calls to balanceCluster().
 *         
 * The algorithm is currently implemented as such:
 *
 * <ol>
 * <li>Determine the two valid numbers of regions each server should have,
 *     <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average).
 *
 * <li>Iterate down the most loaded servers, shedding regions from each so
 *     each server hosts exactly <b>MAX</b> regions.  Stop once you reach a
 *     server that already has &lt;= <b>MAX</b> regions.
 *     <p>
 *     Order the regions to move from most recent to least.
 *
 * <li>Iterate down the least loaded servers, assigning regions so each server
 *     has exactly </b>MIN</b> regions.  Stop once you reach a server that
 *     already has &gt;= <b>MIN</b> regions.
 *
 *     Regions being assigned to underloaded servers are those that were shed
 *     in the previous step.  It is possible that there were not enough
 *     regions shed to fill each underloaded server to <b>MIN</b>.  If so we
 *     end up with a number of regions required to do so, <b>neededRegions</b>.
 *
 *     It is also possible that we were able to fill each underloaded but ended
 *     up with regions that were unassigned from overloaded servers but that
 *     still do not have assignment.
 *
 *     If neither of these conditions hold (no regions needed to fill the
 *     underloaded servers, no regions leftover from overloaded servers),
 *     we are done and return.  Otherwise we handle these cases below.
 *
 * <li>If <b>neededRegions</b> is non-zero (still have underloaded servers),
 *     we iterate the most loaded servers again, shedding a single server from
 *     each (this brings them from having <b>MAX</b> regions to having
 *     <b>MIN</b> regions).
 *
 * <li>We now definitely have more regions that need assignment, either from
 *     the previous step or from the original shedding from overloaded servers.
 *     Iterate the least loaded servers filling each to <b>MIN</b>.
 *
 * <li>If we still have more regions that need assignment, again iterate the
 *     least loaded servers, this time giving each one (filling them to
 *     </b>MAX</b>) until we run out.
 *
 * <li>All servers will now either host <b>MIN</b> or <b>MAX</b> regions.
 *
 *     In addition, any server hosting &gt;= <b>MAX</b> regions is guaranteed
 *     to end up with <b>MAX</b> regions at the end of the balancing.  This
 *     ensures the minimal number of regions possible are moved.
 * </ol>
 *
 * TODO: We can at-most reassign the number of regions away from a particular
 *       server to be how many they report as most loaded.
 *       Should we just keep all assignment in memory?  Any objections?
 *       Does this mean we need HeapSize on HMaster?  Or just careful monitor?
 *       (current thinking is we will hold all assignments in memory)
 *
 * @param clusterMap Map of regionservers and their load/region information to
 *                   a list of their most loaded regions
 * @return a list of regions to be moved, including source and destination,
 *         or null if cluster is already balanced
 */
public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterMap) {
    List<RegionPlan> regionsToReturn = balanceMasterRegions(clusterMap);
    if (regionsToReturn != null) {
        return regionsToReturn;
    }
    filterExcludedServers(clusterMap);
    boolean emptyRegionServerPresent = false;
    long startTime = System.currentTimeMillis();

    Collection<ServerName> backupMasters = getBackupMasters();
    ClusterLoadState cs = new ClusterLoadState(masterServerName, backupMasters, backupMasterWeight, clusterMap);

    if (!this.needsBalance(cs))
        return null;

    int numServers = cs.getNumServers();
    NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
    int numRegions = cs.getNumRegions();
    float average = cs.getLoadAverage();
    int max = (int) Math.ceil(average);
    int min = (int) average;

    // Using to check balance result.
    StringBuilder strBalanceParam = new StringBuilder();
    strBalanceParam.append("Balance parameter: numRegions=").append(numRegions).append(", numServers=")
            .append(numServers).append(", numBackupMasters=").append(cs.getNumBackupMasters())
            .append(", backupMasterWeight=").append(backupMasterWeight).append(", max=").append(max)
            .append(", min=").append(min);
    LOG.debug(strBalanceParam.toString());

    // Balance the cluster
    // TODO: Look at data block locality or a more complex load to do this
    MinMaxPriorityQueue<RegionPlan> regionsToMove = MinMaxPriorityQueue.orderedBy(rpComparator).create();
    regionsToReturn = new ArrayList<RegionPlan>();

    // Walk down most loaded, pruning each to the max
    int serversOverloaded = 0;
    // flag used to fetch regions from head and tail of list, alternately
    boolean fetchFromTail = false;
    Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<ServerName, BalanceInfo>();
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
        ServerAndLoad sal = server.getKey();
        int load = sal.getLoad();
        if (load <= max) {
            serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0));
            break;
        }
        serversOverloaded++;
        List<HRegionInfo> regions = server.getValue();
        int w = 1; // Normal region server has weight 1
        if (backupMasters != null && backupMasters.contains(sal.getServerName())) {
            w = backupMasterWeight; // Backup master has heavier weight
        }
        int numToOffload = Math.min((load - max) / w, regions.size());
        // account for the out-of-band regions which were assigned to this server
        // after some other region server crashed 
        Collections.sort(regions, riComparator);
        int numTaken = 0;
        for (int i = 0; i <= numToOffload;) {
            HRegionInfo hri = regions.get(i); // fetch from head
            if (fetchFromTail) {
                hri = regions.get(regions.size() - 1 - i);
            }
            i++;
            // Don't rebalance special regions.
            if (shouldBeOnMaster(hri) && masterServerName.equals(sal.getServerName()))
                continue;
            regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null));
            numTaken++;
            if (numTaken >= numToOffload)
                break;
            // fetch in alternate order if there is new region server
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
        }
        serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, (-1) * numTaken));
    }
    int totalNumMoved = regionsToMove.size();

    // Walk down least loaded, filling each to the min
    int neededRegions = 0; // number of regions needed to bring all up to min
    fetchFromTail = false;

    Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>();
    int maxToTake = numRegions - min;
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
        if (maxToTake == 0)
            break; // no more to take
        int load = server.getKey().getLoad();
        if (load >= min && load > 0) {
            continue; // look for other servers which haven't reached min
        }
        int w = 1; // Normal region server has weight 1
        if (backupMasters != null && backupMasters.contains(server.getKey().getServerName())) {
            w = backupMasterWeight; // Backup master has heavier weight
        }
        int regionsToPut = (min - load) / w;
        if (regionsToPut == 0) {
            regionsToPut = 1;
        }
        maxToTake -= regionsToPut;
        underloadedServers.put(server.getKey().getServerName(), regionsToPut);
    }
    // number of servers that get new regions
    int serversUnderloaded = underloadedServers.size();
    int incr = 1;
    List<ServerName> sns = Arrays
            .asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded]));
    Collections.shuffle(sns, RANDOM);
    while (regionsToMove.size() > 0) {
        int cnt = 0;
        int i = incr > 0 ? 0 : underloadedServers.size() - 1;
        for (; i >= 0 && i < underloadedServers.size(); i += incr) {
            if (regionsToMove.isEmpty())
                break;
            ServerName si = sns.get(i);
            int numToTake = underloadedServers.get(si);
            if (numToTake == 0)
                continue;

            addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn);
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }

            underloadedServers.put(si, numToTake - 1);
            cnt++;
            BalanceInfo bi = serverBalanceInfo.get(si);
            if (bi == null) {
                bi = new BalanceInfo(0, 0);
                serverBalanceInfo.put(si, bi);
            }
            bi.setNumRegionsAdded(bi.getNumRegionsAdded() + 1);
        }
        if (cnt == 0)
            break;
        // iterates underloadedServers in the other direction
        incr = -incr;
    }
    for (Integer i : underloadedServers.values()) {
        // If we still want to take some, increment needed
        neededRegions += i;
    }

    // If none needed to fill all to min and none left to drain all to max,
    // we are done
    if (neededRegions == 0 && regionsToMove.isEmpty()) {
        long endTime = System.currentTimeMillis();
        LOG.info("Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved
                + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded
                + " less loaded servers");
        return regionsToReturn;
    }

    // Need to do a second pass.
    // Either more regions to assign out or servers that are still underloaded

    // If we need more to fill min, grab one from each most loaded until enough
    if (neededRegions != 0) {
        // Walk down most loaded, grabbing one from each until we get enough
        for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
            BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
            int idx = balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload();
            if (idx >= server.getValue().size())
                break;
            HRegionInfo region = server.getValue().get(idx);
            if (region.isMetaRegion())
                continue; // Don't move meta regions.
            regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null));
            totalNumMoved++;
            if (--neededRegions == 0) {
                // No more regions needed, done shedding
                break;
            }
        }
    }

    // Now we have a set of regions that must be all assigned out
    // Assign each underloaded up to the min, then if leftovers, assign to max

    // Walk down least loaded, assigning to each to fill up to min
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
        int regionCount = server.getKey().getLoad();
        if (regionCount >= min)
            break;
        BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
        if (balanceInfo != null) {
            regionCount += balanceInfo.getNumRegionsAdded();
        }
        if (regionCount >= min) {
            continue;
        }
        int numToTake = min - regionCount;
        int numTaken = 0;
        while (numTaken < numToTake && 0 < regionsToMove.size()) {
            addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn);
            numTaken++;
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
        }
    }

    // If we still have regions to dish out, assign underloaded to max
    if (0 < regionsToMove.size()) {
        for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
            int regionCount = server.getKey().getLoad();
            BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
            if (balanceInfo != null) {
                regionCount += balanceInfo.getNumRegionsAdded();
            }
            if (regionCount >= max) {
                break;
            }
            addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn);
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
            if (regionsToMove.isEmpty()) {
                break;
            }
        }
    }

    long endTime = System.currentTimeMillis();

    if (!regionsToMove.isEmpty() || neededRegions != 0) {
        // Emit data so can diagnose how balancer went astray.
        LOG.warn("regionsToMove=" + totalNumMoved + ", numServers=" + numServers + ", serversOverloaded="
                + serversOverloaded + ", serversUnderloaded=" + serversUnderloaded);
        StringBuilder sb = new StringBuilder();
        for (Map.Entry<ServerName, List<HRegionInfo>> e : clusterMap.entrySet()) {
            if (sb.length() > 0)
                sb.append(", ");
            sb.append(e.getKey().toString());
            sb.append(" ");
            sb.append(e.getValue().size());
        }
        LOG.warn("Input " + sb.toString());
    }

    // All done!
    LOG.info("Done. Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved
            + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded
            + " less loaded servers");

    return regionsToReturn;
}

From source file:com.alibaba.wasp.master.balancer.DefaultLoadBalancer.java

/**
 * Generate a global load balancing plan according to the specified map of
 * server information to the most loaded entityGroups of each server.
 * /*w w  w  .j a  v  a 2  s .com*/
 * The load balancing invariant is that all servers are within 1 entityGroup of the
 * average number of entityGroups per server. If the average is an integer number,
 * all servers will be balanced to the average. Otherwise, all servers will
 * have either floor(average) or ceiling(average) entityGroups.
 * 
 * HBASE-3609 Modeled entityGroupsToMove using Guava's MinMaxPriorityQueue so that
 * we can fetch from both ends of the queue. At the beginning, we check
 * whether there was empty entityGroup server just discovered by Master. If so, we
 * alternately choose new / old entityGroups from head / tail of entityGroupsToMove,
 * respectively. This alternation avoids clustering young entityGroups on the newly
 * discovered entityGroup server. Otherwise, we choose new entityGroups from head of
 * entityGroupsToMove.
 * 
 * Another improvement from HBASE-3609 is that we assign entityGroups from
 * entityGroupsToMove to underloaded servers in round-robin fashion. Previously one
 * underloaded server would be filled before we move onto the next underloaded
 * server, leading to clustering of young entityGroups.
 * 
 * Finally, we randomly shuffle underloaded servers so that they receive
 * offloaded entityGroups relatively evenly across calls to balanceCluster().
 * 
 * The algorithm is currently implemented as such:
 * 
 * <ol>
 * <li>Determine the two valid numbers of entityGroups each server should have,
 * <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average).
 * 
 * <li>Iterate down the most loaded servers, shedding entityGroups from each so
 * each server hosts exactly <b>MAX</b> entityGroups. Stop once you reach a server
 * that already has &lt;= <b>MAX</b> entityGroups.
 * <p>
 * Order the entityGroups to move from most recent to least.
 * 
 * <li>Iterate down the least loaded servers, assigning entityGroups so each server
 * has exactly </b>MIN</b> entityGroups. Stop once you reach a server that already
 * has &gt;= <b>MIN</b> entityGroups.
 * 
 * EntityGroups being assigned to underloaded servers are those that were shed in
 * the previous step. It is possible that there were not enough entityGroups shed
 * to fill each underloaded server to <b>MIN</b>. If so we end up with a
 * number of entityGroups required to do so, <b>neededEntityGroups</b>.
 * 
 * It is also possible that we were able to fill each underloaded but ended up
 * with entityGroups that were unassigned from overloaded servers but that still do
 * not have assignment.
 * 
 * If neither of these conditions hold (no entityGroups needed to fill the
 * underloaded servers, no entityGroups leftover from overloaded servers), we are
 * done and return. Otherwise we handle these cases below.
 * 
 * <li>If <b>neededEntityGroups</b> is non-zero (still have underloaded servers),
 * we iterate the most loaded servers again, shedding a single server from
 * each (this brings them from having <b>MAX</b> entityGroups to having <b>MIN</b>
 * entityGroups).
 * 
 * <li>We now definitely have more entityGroups that need assignment, either from
 * the previous step or from the original shedding from overloaded servers.
 * Iterate the least loaded servers filling each to <b>MIN</b>.
 * 
 * <li>If we still have more entityGroups that need assignment, again iterate the
 * least loaded servers, this time giving each one (filling them to
 * </b>MAX</b>) until we run out.
 * 
 * <li>All servers will now either host <b>MIN</b> or <b>MAX</b> entityGroups.
 * 
 * In addition, any server hosting &gt;= <b>MAX</b> entityGroups is guaranteed to
 * end up with <b>MAX</b> entityGroups at the end of the balancing. This ensures
 * the minimal number of entityGroups possible are moved.
 * </ol>
 * 
 * TODO: We can at-most reassign the number of entityGroups away from a particular
 * server to be how many they report as most loaded. Should we just keep all
 * assignment in memory? Any objections? Does this mean we need HeapSize on
 * HMaster? Or just careful monitor? (current thinking is we will hold all
 * assignments in memory)
 * 
 * @param clusterState Map of entityGroupservers and their load/entityGroup information
 *          to a list of their most loaded entityGroups
 * @return a list of entityGroups to be moved, including source and destination, or
 *         null if cluster is already balanced
 */
public List<EntityGroupPlan> balanceCluster(Map<ServerName, List<EntityGroupInfo>> clusterMap) {
    boolean emptyFServerPresent = false;
    long startTime = System.currentTimeMillis();

    ClusterLoadState cs = new ClusterLoadState(clusterMap);

    int numServers = cs.getNumServers();
    if (numServers == 0) {
        LOG.debug("numServers=0 so skipping load balancing");
        return null;
    }
    NavigableMap<ServerAndLoad, List<EntityGroupInfo>> serversByLoad = cs.getServersByLoad();

    int numEntityGroups = cs.getNumEntityGroups();

    if (!this.needsBalance(cs)) {
        // Skipped because no server outside (min,max) range
        float average = cs.getLoadAverage(); // for logging
        LOG.info("Skipping load balancing because balanced cluster; " + "servers=" + numServers + " "
                + "entityGroups=" + numEntityGroups + " average=" + average + " " + "mostloaded="
                + serversByLoad.lastKey().getLoad() + " leastloaded=" + serversByLoad.firstKey().getLoad());
        return null;
    }

    int min = numEntityGroups / numServers;
    int max = numEntityGroups % numServers == 0 ? min : min + 1;

    // Using to check balance result.
    StringBuilder strBalanceParam = new StringBuilder();
    strBalanceParam.append("Balance parameter: numEntityGroups=").append(numEntityGroups)
            .append(", numServers=").append(numServers).append(", max=").append(max).append(", min=")
            .append(min);
    LOG.debug(strBalanceParam.toString());

    // Balance the cluster
    // TODO: Look at data block locality or a more complex load to do this
    MinMaxPriorityQueue<EntityGroupPlan> entityGroupsToMove = MinMaxPriorityQueue.orderedBy(rpComparator)
            .create();
    List<EntityGroupPlan> entityGroupsToReturn = new ArrayList<EntityGroupPlan>();

    // Walk down most loaded, pruning each to the max
    int serversOverloaded = 0;
    // flag used to fetch entityGroups from head and tail of list, alternately
    boolean fetchFromTail = false;
    Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<ServerName, BalanceInfo>();
    for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.descendingMap().entrySet()) {
        ServerAndLoad sal = server.getKey();
        int entityGroupCount = sal.getLoad();
        if (entityGroupCount <= max) {
            serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0));
            break;
        }
        serversOverloaded++;
        List<EntityGroupInfo> entityGroups = server.getValue();
        int numToOffload = Math.min(entityGroupCount - max, entityGroups.size());
        // account for the out-of-band entityGroups which were assigned to this server
        // after some other entityGroup server crashed
        Collections.sort(entityGroups, riComparator);
        int numTaken = 0;
        for (int i = 0; i <= numToOffload;) {
            EntityGroupInfo egInfo = entityGroups.get(i); // fetch from head
            if (fetchFromTail) {
                egInfo = entityGroups.get(entityGroups.size() - 1 - i);
            }
            i++;
            entityGroupsToMove.add(new EntityGroupPlan(egInfo, sal.getServerName(), null));
            numTaken++;
            if (numTaken >= numToOffload)
                break;
            // fetch in alternate order if there is new entityGroup server
            if (emptyFServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
        }
        serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, (-1) * numTaken));
    }
    int totalNumMoved = entityGroupsToMove.size();

    // Walk down least loaded, filling each to the min
    int neededEntityGroups = 0; // number of entityGroups needed to bring all up to min
    fetchFromTail = false;

    Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>();
    for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.entrySet()) {
        int entityGroupCount = server.getKey().getLoad();
        if (entityGroupCount >= min) {
            break;
        }
        underloadedServers.put(server.getKey().getServerName(), min - entityGroupCount);
    }
    // number of servers that get new entityGroups
    int serversUnderloaded = underloadedServers.size();
    int incr = 1;
    List<ServerName> sns = Arrays
            .asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded]));
    Collections.shuffle(sns, RANDOM);
    while (entityGroupsToMove.size() > 0) {
        int cnt = 0;
        int i = incr > 0 ? 0 : underloadedServers.size() - 1;
        for (; i >= 0 && i < underloadedServers.size(); i += incr) {
            if (entityGroupsToMove.isEmpty())
                break;
            ServerName si = sns.get(i);
            int numToTake = underloadedServers.get(si);
            if (numToTake == 0)
                continue;

            addEntityGroupPlan(entityGroupsToMove, fetchFromTail, si, entityGroupsToReturn);
            if (emptyFServerPresent) {
                fetchFromTail = !fetchFromTail;
            }

            underloadedServers.put(si, numToTake - 1);
            cnt++;
            BalanceInfo bi = serverBalanceInfo.get(si);
            if (bi == null) {
                bi = new BalanceInfo(0, 0);
                serverBalanceInfo.put(si, bi);
            }
            bi.setNumEntityGroupsAdded(bi.getNumEntityGroupsAdded() + 1);
        }
        if (cnt == 0)
            break;
        // iterates underloadedServers in the other direction
        incr = -incr;
    }
    for (Integer i : underloadedServers.values()) {
        // If we still want to take some, increment needed
        neededEntityGroups += i;
    }

    // If none needed to fill all to min and none left to drain all to max,
    // we are done
    if (neededEntityGroups == 0 && entityGroupsToMove.isEmpty()) {
        long endTime = System.currentTimeMillis();
        LOG.info("Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved
                + " entityGroups off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded
                + " less loaded servers");
        return entityGroupsToReturn;
    }

    // Need to do a second pass.
    // Either more entityGroups to assign out or servers that are still underloaded

    // If we need more to fill min, grab one from each most loaded until enough
    if (neededEntityGroups != 0) {
        // Walk down most loaded, grabbing one from each until we get enough
        for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.descendingMap()
                .entrySet()) {
            BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
            int idx = balanceInfo == null ? 0 : balanceInfo.getNextEntityGroupForUnload();
            if (idx >= server.getValue().size())
                break;
            EntityGroupInfo entityGroup = server.getValue().get(idx);
            entityGroupsToMove.add(new EntityGroupPlan(entityGroup, server.getKey().getServerName(), null));
            totalNumMoved++;
            if (--neededEntityGroups == 0) {
                // No more entityGroups needed, done shedding
                break;
            }
        }
    }

    // Now we have a set of entityGroups that must be all assigned out
    // Assign each underloaded up to the min, then if leftovers, assign to max

    // Walk down least loaded, assigning to each to fill up to min
    for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.entrySet()) {
        int entityGroupCount = server.getKey().getLoad();
        if (entityGroupCount >= min)
            break;
        BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
        if (balanceInfo != null) {
            entityGroupCount += balanceInfo.getNumEntityGroupsAdded();
        }
        if (entityGroupCount >= min) {
            continue;
        }
        int numToTake = min - entityGroupCount;
        int numTaken = 0;
        while (numTaken < numToTake && 0 < entityGroupsToMove.size()) {
            addEntityGroupPlan(entityGroupsToMove, fetchFromTail, server.getKey().getServerName(),
                    entityGroupsToReturn);
            numTaken++;
            if (emptyFServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
        }
    }

    // If we still have entityGroups to dish out, assign underloaded to max
    if (0 < entityGroupsToMove.size()) {
        for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.entrySet()) {
            int entityGroupCount = server.getKey().getLoad();
            if (entityGroupCount >= max) {
                break;
            }
            addEntityGroupPlan(entityGroupsToMove, fetchFromTail, server.getKey().getServerName(),
                    entityGroupsToReturn);
            if (emptyFServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
            if (entityGroupsToMove.isEmpty()) {
                break;
            }
        }
    }

    long endTime = System.currentTimeMillis();

    if (!entityGroupsToMove.isEmpty() || neededEntityGroups != 0) {
        // Emit data so can diagnose how balancer went astray.
        LOG.warn("entityGroupsToMove=" + totalNumMoved + ", numServers=" + numServers + ", serversOverloaded="
                + serversOverloaded + ", serversUnderloaded=" + serversUnderloaded);
        StringBuilder sb = new StringBuilder();
        for (Map.Entry<ServerName, List<EntityGroupInfo>> e : clusterMap.entrySet()) {
            if (sb.length() > 0)
                sb.append(", ");
            sb.append(e.getKey().toString());
            sb.append(" ");
            sb.append(e.getValue().size());
        }
        LOG.warn("Input " + sb.toString());
    }

    // All done!
    LOG.info("Done. Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved
            + " entityGroups off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded
            + " less loaded servers");

    return entityGroupsToReturn;
}

From source file:org.apache.hadoop.hbase.master.DefaultLoadBalancer.java

/**
 * Generate a global load balancing plan according to the specified map of
 * server information to the most loaded regions of each server.
 *
 * The load balancing invariant is that all servers are within 1 region of the
 * average number of regions per server.  If the average is an integer number,
 * all servers will be balanced to the average.  Otherwise, all servers will
 * have either floor(average) or ceiling(average) regions.
 *
 * HBASE-3609 Modeled regionsToMove using Guava's MinMaxPriorityQueue so that
 *   we can fetch from both ends of the queue. 
 * At the beginning, we check whether there was empty region server 
 *   just discovered by Master. If so, we alternately choose new / old
 *   regions from head / tail of regionsToMove, respectively. This alternation
 *   avoids clustering young regions on the newly discovered region server.
 *   Otherwise, we choose new regions from head of regionsToMove.
 *   //from   w w  w .j  av  a2 s  . co m
 * Another improvement from HBASE-3609 is that we assign regions from
 *   regionsToMove to underloaded servers in round-robin fashion.
 *   Previously one underloaded server would be filled before we move onto
 *   the next underloaded server, leading to clustering of young regions.
 *   
 * Finally, we randomly shuffle underloaded servers so that they receive
 *   offloaded regions relatively evenly across calls to balanceCluster().
 *         
 * The algorithm is currently implemented as such:
 *
 * <ol>
 * <li>Determine the two valid numbers of regions each server should have,
 *     <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average).
 *
 * <li>Iterate down the most loaded servers, shedding regions from each so
 *     each server hosts exactly <b>MAX</b> regions.  Stop once you reach a
 *     server that already has &lt;= <b>MAX</b> regions.
 *     <p>
 *     Order the regions to move from most recent to least.
 *
 * <li>Iterate down the least loaded servers, assigning regions so each server
 *     has exactly </b>MIN</b> regions.  Stop once you reach a server that
 *     already has &gt;= <b>MIN</b> regions.
 *
 *     Regions being assigned to underloaded servers are those that were shed
 *     in the previous step.  It is possible that there were not enough
 *     regions shed to fill each underloaded server to <b>MIN</b>.  If so we
 *     end up with a number of regions required to do so, <b>neededRegions</b>.
 *
 *     It is also possible that we were able to fill each underloaded but ended
 *     up with regions that were unassigned from overloaded servers but that
 *     still do not have assignment.
 *
 *     If neither of these conditions hold (no regions needed to fill the
 *     underloaded servers, no regions leftover from overloaded servers),
 *     we are done and return.  Otherwise we handle these cases below.
 *
 * <li>If <b>neededRegions</b> is non-zero (still have underloaded servers),
 *     we iterate the most loaded servers again, shedding a single server from
 *     each (this brings them from having <b>MAX</b> regions to having
 *     <b>MIN</b> regions).
 *
 * <li>We now definitely have more regions that need assignment, either from
 *     the previous step or from the original shedding from overloaded servers.
 *     Iterate the least loaded servers filling each to <b>MIN</b>.
 *
 * <li>If we still have more regions that need assignment, again iterate the
 *     least loaded servers, this time giving each one (filling them to
 *     </b>MAX</b>) until we run out.
 *
 * <li>All servers will now either host <b>MIN</b> or <b>MAX</b> regions.
 *
 *     In addition, any server hosting &gt;= <b>MAX</b> regions is guaranteed
 *     to end up with <b>MAX</b> regions at the end of the balancing.  This
 *     ensures the minimal number of regions possible are moved.
 * </ol>
 *
 * TODO: We can at-most reassign the number of regions away from a particular
 *       server to be how many they report as most loaded.
 *       Should we just keep all assignment in memory?  Any objections?
 *       Does this mean we need HeapSize on HMaster?  Or just careful monitor?
 *       (current thinking is we will hold all assignments in memory)
 *
 * @param clusterState Map of regionservers and their load/region information to
 *                   a list of their most loaded regions
 * @return a list of regions to be moved, including source and destination,
 *         or null if cluster is already balanced
 */
public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) {
    boolean emptyRegionServerPresent = false;
    long startTime = System.currentTimeMillis();

    int numServers = clusterState.size();
    if (numServers == 0) {
        LOG.debug("numServers=0 so skipping load balancing");
        return null;
    }
    NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = new TreeMap<ServerAndLoad, List<HRegionInfo>>();
    int numRegions = 0;
    // Iterate so we can count regions as we build the map
    for (Map.Entry<ServerName, List<HRegionInfo>> server : clusterState.entrySet()) {
        List<HRegionInfo> regions = server.getValue();
        int sz = regions.size();
        if (sz == 0)
            emptyRegionServerPresent = true;
        numRegions += sz;
        serversByLoad.put(new ServerAndLoad(server.getKey(), sz), regions);
    }
    // Check if we even need to do any load balancing
    float average = (float) numRegions / numServers; // for logging
    // HBASE-3681 check sloppiness first
    int floor = (int) Math.floor(average * (1 - slop));
    int ceiling = (int) Math.ceil(average * (1 + slop));
    if (serversByLoad.lastKey().getLoad() <= ceiling && serversByLoad.firstKey().getLoad() >= floor) {
        // Skipped because no server outside (min,max) range
        LOG.info("Skipping load balancing because balanced cluster; " + "servers=" + numServers + " "
                + "regions=" + numRegions + " average=" + average + " " + "mostloaded="
                + serversByLoad.lastKey().getLoad() + " leastloaded=" + serversByLoad.firstKey().getLoad());
        return null;
    }
    int min = numRegions / numServers;
    int max = numRegions % numServers == 0 ? min : min + 1;

    // Using to check balance result.
    StringBuilder strBalanceParam = new StringBuilder();
    strBalanceParam.append("Balance parameter: numRegions=").append(numRegions).append(", numServers=")
            .append(numServers).append(", max=").append(max).append(", min=").append(min);
    LOG.debug(strBalanceParam.toString());

    // Balance the cluster
    // TODO: Look at data block locality or a more complex load to do this
    MinMaxPriorityQueue<RegionPlan> regionsToMove = MinMaxPriorityQueue.orderedBy(rpComparator).create();
    List<RegionPlan> regionsToReturn = new ArrayList<RegionPlan>();

    // Walk down most loaded, pruning each to the max
    int serversOverloaded = 0;
    // flag used to fetch regions from head and tail of list, alternately
    boolean fetchFromTail = false;
    Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<ServerName, BalanceInfo>();
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
        ServerAndLoad sal = server.getKey();
        int regionCount = sal.getLoad();
        if (regionCount <= max) {
            serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0));
            break;
        }
        serversOverloaded++;
        List<HRegionInfo> regions = server.getValue();
        int numToOffload = Math.min(regionCount - max, regions.size());
        // account for the out-of-band regions which were assigned to this server
        // after some other region server crashed 
        Collections.sort(regions, riComparator);
        int numTaken = 0;
        for (int i = 0; i <= numToOffload;) {
            HRegionInfo hri = regions.get(i); // fetch from head
            if (fetchFromTail) {
                hri = regions.get(regions.size() - 1 - i);
            }
            i++;
            // Don't rebalance meta regions.
            if (hri.isMetaRegion())
                continue;
            regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null));
            numTaken++;
            if (numTaken >= numToOffload)
                break;
            // fetch in alternate order if there is new region server
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
        }
        serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, (-1) * numTaken));
    }
    int totalNumMoved = regionsToMove.size();

    // Walk down least loaded, filling each to the min
    int neededRegions = 0; // number of regions needed to bring all up to min
    fetchFromTail = false;

    Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>();
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
        int regionCount = server.getKey().getLoad();
        if (regionCount >= min) {
            break;
        }
        underloadedServers.put(server.getKey().getServerName(), min - regionCount);
    }
    // number of servers that get new regions
    int serversUnderloaded = underloadedServers.size();
    int incr = 1;
    List<ServerName> sns = Arrays
            .asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded]));
    Collections.shuffle(sns, RANDOM);
    while (regionsToMove.size() > 0) {
        int cnt = 0;
        int i = incr > 0 ? 0 : underloadedServers.size() - 1;
        for (; i >= 0 && i < underloadedServers.size(); i += incr) {
            if (regionsToMove.isEmpty())
                break;
            ServerName si = sns.get(i);
            int numToTake = underloadedServers.get(si);
            if (numToTake == 0)
                continue;

            addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn);
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }

            underloadedServers.put(si, numToTake - 1);
            cnt++;
            BalanceInfo bi = serverBalanceInfo.get(si);
            if (bi == null) {
                bi = new BalanceInfo(0, 0);
                serverBalanceInfo.put(si, bi);
            }
            bi.setNumRegionsAdded(bi.getNumRegionsAdded() + 1);
        }
        if (cnt == 0)
            break;
        // iterates underloadedServers in the other direction
        incr = -incr;
    }
    for (Integer i : underloadedServers.values()) {
        // If we still want to take some, increment needed
        neededRegions += i;
    }

    // If none needed to fill all to min and none left to drain all to max,
    // we are done
    if (neededRegions == 0 && regionsToMove.isEmpty()) {
        long endTime = System.currentTimeMillis();
        LOG.info("Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved
                + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded
                + " less loaded servers");
        return regionsToReturn;
    }

    // Need to do a second pass.
    // Either more regions to assign out or servers that are still underloaded

    // If we need more to fill min, grab one from each most loaded until enough
    if (neededRegions != 0) {
        // Walk down most loaded, grabbing one from each until we get enough
        for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
            BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
            int idx = balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload();
            if (idx >= server.getValue().size())
                break;
            HRegionInfo region = server.getValue().get(idx);
            if (region.isMetaRegion())
                continue; // Don't move meta regions.
            regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null));
            totalNumMoved++;
            if (--neededRegions == 0) {
                // No more regions needed, done shedding
                break;
            }
        }
    }

    // Now we have a set of regions that must be all assigned out
    // Assign each underloaded up to the min, then if leftovers, assign to max

    // Walk down least loaded, assigning to each to fill up to min
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
        int regionCount = server.getKey().getLoad();
        if (regionCount >= min)
            break;
        BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
        if (balanceInfo != null) {
            regionCount += balanceInfo.getNumRegionsAdded();
        }
        if (regionCount >= min) {
            continue;
        }
        int numToTake = min - regionCount;
        int numTaken = 0;
        while (numTaken < numToTake && 0 < regionsToMove.size()) {
            addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn);
            numTaken++;
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
        }
    }

    // If we still have regions to dish out, assign underloaded to max
    if (0 < regionsToMove.size()) {
        for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
            int regionCount = server.getKey().getLoad();
            if (regionCount >= max) {
                break;
            }
            addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn);
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
            if (regionsToMove.isEmpty()) {
                break;
            }
        }
    }

    long endTime = System.currentTimeMillis();

    if (!regionsToMove.isEmpty() || neededRegions != 0) {
        // Emit data so can diagnose how balancer went astray.
        LOG.warn("regionsToMove=" + totalNumMoved + ", numServers=" + numServers + ", serversOverloaded="
                + serversOverloaded + ", serversUnderloaded=" + serversUnderloaded);
        StringBuilder sb = new StringBuilder();
        for (Map.Entry<ServerName, List<HRegionInfo>> e : clusterState.entrySet()) {
            if (sb.length() > 0)
                sb.append(", ");
            sb.append(e.getKey().toString());
            sb.append(" ");
            sb.append(e.getValue().size());
        }
        LOG.warn("Input " + sb.toString());
    }

    // All done!
    LOG.info("Done. Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved
            + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded
            + " less loaded servers");

    return regionsToReturn;
}

From source file:gobblin.source.extractor.extract.kafka.workunit.packer.KafkaWorkUnitPacker.java

/**
 * Pack a list of {@link WorkUnit}s into a smaller number of {@link MultiWorkUnit}s,
 * using the worst-fit-decreasing algorithm.
 *
 * Each {@link WorkUnit} is assigned to the {@link MultiWorkUnit} with the smallest load.
 *///  w  w  w .java 2 s  . com
protected List<WorkUnit> worstFitDecreasingBinPacking(List<WorkUnit> groups, int numOfMultiWorkUnits) {

    // Sort workunit groups by data size desc
    Collections.sort(groups, LOAD_DESC_COMPARATOR);

    MinMaxPriorityQueue<MultiWorkUnit> pQueue = MinMaxPriorityQueue.orderedBy(LOAD_ASC_COMPARATOR)
            .expectedSize(numOfMultiWorkUnits).create();
    for (int i = 0; i < numOfMultiWorkUnits; i++) {
        MultiWorkUnit multiWorkUnit = MultiWorkUnit.createEmpty();
        setWorkUnitEstSize(multiWorkUnit, 0);
        pQueue.add(multiWorkUnit);
    }

    for (WorkUnit group : groups) {
        MultiWorkUnit lightestMultiWorkUnit = pQueue.poll();
        addWorkUnitToMultiWorkUnit(group, lightestMultiWorkUnit);
        pQueue.add(lightestMultiWorkUnit);
    }

    logMultiWorkUnitInfo(pQueue);

    double minLoad = getWorkUnitEstLoad(pQueue.peekFirst());
    double maxLoad = getWorkUnitEstLoad(pQueue.peekLast());
    LOG.info(String.format("Min load of multiWorkUnit = %f; Max load of multiWorkUnit = %f; Diff = %f%%",
            minLoad, maxLoad, (maxLoad - minLoad) / maxLoad * 100.0));

    this.state.setProp(MIN_MULTIWORKUNIT_LOAD, minLoad);
    this.state.setProp(MAX_MULTIWORKUNIT_LOAD, maxLoad);

    List<WorkUnit> multiWorkUnits = Lists.newArrayList();
    multiWorkUnits.addAll(pQueue);
    return multiWorkUnits;
}