Example usage for com.google.common.collect MinMaxPriorityQueue add

List of usage examples for com.google.common.collect MinMaxPriorityQueue add

Introduction

In this page you can find the example usage for com.google.common.collect MinMaxPriorityQueue add.

Prototype

@Override
public boolean add(E element) 

Source Link

Document

Adds the given element to this queue.

Usage

From source file:com.metamx.druid.master.rules.LoadRule.java

private MasterStats drop(int expectedReplicants, int clusterReplicants, final DataSegment segment,
        final DruidMasterRuntimeParams params) {
    MasterStats stats = new MasterStats();
    final ReplicationThrottler replicationManager = params.getReplicationManager();

    if (!params.hasDeletionWaitTimeElapsed()) {
        return stats;
    }/*from  ww w .  ja  va2 s  .c  o  m*/

    // Make sure we have enough actual replicants in the cluster before doing anything
    if (clusterReplicants < expectedReplicants) {
        return stats;
    }

    Map<String, Integer> replicantsByType = params.getSegmentReplicantLookup()
            .getClusterTiers(segment.getIdentifier());

    for (Map.Entry<String, Integer> entry : replicantsByType.entrySet()) {
        String tier = entry.getKey();
        int actualNumReplicantsForType = entry.getValue();
        int expectedNumReplicantsForType = getReplicants(tier);

        MinMaxPriorityQueue<ServerHolder> serverQueue = params.getDruidCluster().get(tier);
        if (serverQueue == null) {
            log.makeAlert("No holders found for tier[%s]", entry.getKey()).emit();
            return stats;
        }

        List<ServerHolder> droppedServers = Lists.newArrayList();
        while (actualNumReplicantsForType > expectedNumReplicantsForType) {
            final ServerHolder holder = serverQueue.pollLast();
            if (holder == null) {
                log.warn("Wtf, holder was null?  I have no servers serving [%s]?", segment.getIdentifier());
                break;
            }

            if (holder.isServingSegment(segment)) {
                if (expectedNumReplicantsForType > 0) { // don't throttle unless we are removing extra replicants
                    if (!replicationManager.canDestroyReplicant(getTier())) {
                        serverQueue.add(holder);
                        break;
                    }

                    replicationManager.registerReplicantTermination(getTier(), segment.getIdentifier(),
                            holder.getServer().getHost());
                }

                holder.getPeon().dropSegment(segment, new LoadPeonCallback() {
                    @Override
                    protected void execute() {
                        replicationManager.unregisterReplicantTermination(getTier(), segment.getIdentifier(),
                                holder.getServer().getHost());
                    }
                });
                --actualNumReplicantsForType;
                stats.addToTieredStat("droppedCount", tier, 1);
            }
            droppedServers.add(holder);
        }
        serverQueue.addAll(droppedServers);
    }

    return stats;
}

From source file:io.druid.server.coordinator.rules.LoadRule.java

private CoordinatorStats drop(final Map<String, Integer> loadStatus, final DataSegment segment,
        final DruidCoordinatorRuntimeParams params) {
    CoordinatorStats stats = new CoordinatorStats();

    // Make sure we have enough loaded replicants in the correct tiers in the cluster before doing anything
    for (Integer leftToLoad : loadStatus.values()) {
        if (leftToLoad > 0) {
            return stats;
        }//  w w  w  .  j  a  v  a 2  s  .  co  m
    }

    final ReplicationThrottler replicationManager = params.getReplicationManager();

    // Find all instances of this segment across tiers
    Map<String, Integer> replicantsByTier = params.getSegmentReplicantLookup()
            .getClusterTiers(segment.getIdentifier());

    for (Map.Entry<String, Integer> entry : replicantsByTier.entrySet()) {
        final String tier = entry.getKey();
        int loadedNumReplicantsForTier = entry.getValue();
        int expectedNumReplicantsForTier = getNumReplicants(tier);

        stats.addToTieredStat(droppedCount, tier, 0);

        MinMaxPriorityQueue<ServerHolder> serverQueue = params.getDruidCluster().get(tier);
        if (serverQueue == null) {
            log.makeAlert("No holders found for tier[%s]", entry.getKey()).emit();
            return stats;
        }

        List<ServerHolder> droppedServers = Lists.newArrayList();
        while (loadedNumReplicantsForTier > expectedNumReplicantsForTier) {
            final ServerHolder holder = serverQueue.pollLast();
            if (holder == null) {
                log.warn("Wtf, holder was null?  I have no servers serving [%s]?", segment.getIdentifier());
                break;
            }

            if (holder.isServingSegment(segment)) {
                if (expectedNumReplicantsForTier > 0) { // don't throttle unless we are removing extra replicants
                    if (!replicationManager.canDestroyReplicant(tier)) {
                        serverQueue.add(holder);
                        break;
                    }

                    replicationManager.registerReplicantTermination(tier, segment.getIdentifier(),
                            holder.getServer().getHost());
                }

                holder.getPeon().dropSegment(segment, new LoadPeonCallback() {
                    @Override
                    public void execute() {
                        replicationManager.unregisterReplicantTermination(tier, segment.getIdentifier(),
                                holder.getServer().getHost());
                    }
                });
                --loadedNumReplicantsForTier;
                stats.addToTieredStat(droppedCount, tier, 1);
            }
            droppedServers.add(holder);
        }
        serverQueue.addAll(droppedServers);
    }

    return stats;
}

From source file:org.dkpro.tc.features.pair.core.ngram.LuceneNGramCPFE.java

private FrequencyDistribution<String> getTopNgramsCombo(int topNgramThreshold, String fieldName)
        throws ResourceInitializationException {

    FrequencyDistribution<String> topNGrams = new FrequencyDistribution<String>();

    MinMaxPriorityQueue<TermFreqTuple> topN = MinMaxPriorityQueue.maximumSize(topNgramThreshold).create();
    IndexReader reader;/*  w  ww .j  ava  2 s . c om*/
    try {
        reader = DirectoryReader.open(FSDirectory.open(luceneDir));
        Fields fields = MultiFields.getFields(reader);
        if (fields != null) {
            Terms terms = fields.terms(fieldName);
            if (terms != null) {
                TermsEnum termsEnum = terms.iterator(null);
                BytesRef text = null;
                while ((text = termsEnum.next()) != null) {
                    String term = text.utf8ToString();
                    long freq = termsEnum.totalTermFreq();
                    //add conditions here, like ngram1 is in most freq ngrams1...
                    String combo1 = term.split(ComboUtils.JOINT)[0];
                    String combo2 = term.split(ComboUtils.JOINT)[1];
                    int combinedSize = combo1.split("_").length + combo2.split("_").length;
                    if (topKSetView1.contains(combo1) && topKSet.contains(combo1)
                            && topKSetView2.contains(combo2) && topKSet.contains(combo2)
                            && combinedSize <= ngramMaxNCombo && combinedSize >= ngramMinNCombo) {
                        //print out here for testing
                        topN.add(new TermFreqTuple(term, freq));
                    }
                }
            }
        }
    } catch (Exception e) {
        throw new ResourceInitializationException(e);
    }

    int size = topN.size();
    for (int i = 0; i < size; i++) {
        TermFreqTuple tuple = topN.poll();
        // System.out.println(tuple.getTerm() + " - " + tuple.getFreq());
        topNGrams.addSample(tuple.getTerm(), tuple.getFreq());
    }

    return topNGrams;
}

From source file:com.metamx.druid.master.BalancerCostAnalyzer.java

private MinMaxPriorityQueue<Pair<Double, ServerHolder>> computeCosts(final DataSegment proposalSegment,
        final Iterable<ServerHolder> serverHolders) {
    MinMaxPriorityQueue<Pair<Double, ServerHolder>> costsAndServers = MinMaxPriorityQueue
            .orderedBy(new Comparator<Pair<Double, ServerHolder>>() {
                @Override/*from  w  w  w . j av a2  s.  c o m*/
                public int compare(Pair<Double, ServerHolder> o, Pair<Double, ServerHolder> o1) {
                    return Double.compare(o.lhs, o1.lhs);
                }
            }).create();

    final long proposalSegmentSize = proposalSegment.getSize();

    for (ServerHolder server : serverHolders) {
        /** Don't calculate cost if the server doesn't have enough space or is loading the segment */
        if (proposalSegmentSize > server.getAvailableSize() || server.isLoadingSegment(proposalSegment)) {
            continue;
        }

        /** The contribution to the total cost of a given server by proposing to move the segment to that server is... */
        double cost = 0f;
        /**  the sum of the costs of other (exclusive of the proposalSegment) segments on the server */
        for (DataSegment segment : server.getServer().getSegments().values()) {
            if (!proposalSegment.equals(segment)) {
                cost += computeJointSegmentCosts(proposalSegment, segment);
            }
        }
        /**  plus the costs of segments that will be loaded */
        for (DataSegment segment : server.getPeon().getSegmentsToLoad()) {
            cost += computeJointSegmentCosts(proposalSegment, segment);
        }

        costsAndServers.add(Pair.of(cost, server));
    }

    return costsAndServers;
}

From source file:it.uniroma3.mat.extendedset.intset.ImmutableConciseSet.java

private static ImmutableConciseSet doUnion(Iterator<ImmutableConciseSet> sets) {
    IntList retVal = new IntList();

    // lhs = current word position, rhs = the iterator
    // Comparison is first by index, then one fills > literals > zero fills
    // one fills are sorted by length (longer one fills have priority)
    // similarily, shorter zero fills have priority
    MinMaxPriorityQueue<WordHolder> theQ = MinMaxPriorityQueue.orderedBy(new Comparator<WordHolder>() {
        @Override//  ww w  .  ja  v  a  2s.c  om
        public int compare(WordHolder h1, WordHolder h2) {
            int w1 = h1.getWord();
            int w2 = h2.getWord();
            int s1 = h1.getIterator().startIndex;
            int s2 = h2.getIterator().startIndex;

            if (s1 != s2) {
                return compareInts(s1, s2);
            }

            if (ConciseSetUtils.isOneSequence(w1)) {
                if (ConciseSetUtils.isOneSequence(w2)) {
                    return -compareInts(ConciseSetUtils.getSequenceNumWords(w1),
                            ConciseSetUtils.getSequenceNumWords(w2));
                }
                return -1;
            } else if (ConciseSetUtils.isLiteral(w1)) {
                if (ConciseSetUtils.isOneSequence(w2)) {
                    return 1;
                } else if (ConciseSetUtils.isLiteral(w2)) {
                    return 0;
                }
                return -1;
            } else {
                if (!ConciseSetUtils.isZeroSequence(w2)) {
                    return 1;
                }
                return compareInts(ConciseSetUtils.getSequenceNumWords(w1),
                        ConciseSetUtils.getSequenceNumWords(w2));
            }
        }
    }).create();

    // populate priority queue
    while (sets.hasNext()) {
        ImmutableConciseSet set = sets.next();

        if (set != null && !set.isEmpty()) {
            WordIterator itr = set.newWordIterator();
            theQ.add(new WordHolder(itr.next(), itr));
        }
    }

    int currIndex = 0;

    while (!theQ.isEmpty()) {
        // create a temp list to hold everything that will get pushed back into the priority queue after each run
        List<WordHolder> wordsToAdd = Lists.newArrayList();

        // grab the top element from the priority queue
        WordHolder curr = theQ.poll();
        int word = curr.getWord();
        WordIterator itr = curr.getIterator();

        // if the next word in the queue starts at a different point than where we ended off we need to create a zero gap
        // to fill the space
        if (currIndex < itr.startIndex) {
            addAndCompact(retVal, itr.startIndex - currIndex - 1);
            currIndex = itr.startIndex;
        }

        if (ConciseSetUtils.isOneSequence(word)) {
            // extract a literal from the flip bits of the one sequence
            int flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(word);

            // advance everything past the longest ones sequence
            WordHolder nextVal = theQ.peek();
            while (nextVal != null && nextVal.getIterator().startIndex < itr.wordsWalked) {
                WordHolder entry = theQ.poll();
                int w = entry.getWord();
                WordIterator i = entry.getIterator();

                if (i.startIndex == itr.startIndex) {
                    // if a literal was created from a flip bit, OR it with other literals or literals from flip bits in the same
                    // position
                    if (ConciseSetUtils.isOneSequence(w)) {
                        flipBitLiteral |= ConciseSetUtils.getLiteralFromOneSeqFlipBit(w);
                    } else if (ConciseSetUtils.isLiteral(w)) {
                        flipBitLiteral |= w;
                    } else {
                        flipBitLiteral |= ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w);
                    }
                }

                i.advanceTo(itr.wordsWalked);
                if (i.hasNext()) {
                    wordsToAdd.add(new WordHolder(i.next(), i));
                }
                nextVal = theQ.peek();
            }

            // advance longest one literal forward and push result back to priority queue
            // if a flip bit is still needed, put it in the correct position
            int newWord = word & 0xC1FFFFFF;
            if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) {
                flipBitLiteral ^= ConciseSetUtils.ALL_ONES_LITERAL;
                int position = Integer.numberOfTrailingZeros(flipBitLiteral) + 1;
                newWord |= (position << 25);
            }
            addAndCompact(retVal, newWord);
            currIndex = itr.wordsWalked;

            if (itr.hasNext()) {
                wordsToAdd.add(new WordHolder(itr.next(), itr));
            }
        } else if (ConciseSetUtils.isLiteral(word)) {
            // advance all other literals
            WordHolder nextVal = theQ.peek();
            while (nextVal != null && nextVal.getIterator().startIndex == itr.startIndex) {

                WordHolder entry = theQ.poll();
                int w = entry.getWord();
                WordIterator i = entry.getIterator();

                // if we still have zero fills with flipped bits, OR them here
                if (ConciseSetUtils.isLiteral(w)) {
                    word |= w;
                } else {
                    int flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w);
                    if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) {
                        word |= flipBitLiteral;
                        i.advanceTo(itr.wordsWalked);
                    }
                }

                if (i.hasNext()) {
                    wordsToAdd.add(new WordHolder(i.next(), i));
                }

                nextVal = theQ.peek();
            }

            // advance the set with the current literal forward and push result back to priority queue
            addAndCompact(retVal, word);
            currIndex++;

            if (itr.hasNext()) {
                wordsToAdd.add(new WordHolder(itr.next(), itr));
            }
        } else { // zero fills
            int flipBitLiteral;
            WordHolder nextVal = theQ.peek();

            while (nextVal != null && nextVal.getIterator().startIndex == itr.startIndex) {
                // check if literal can be created flip bits of other zero sequences
                WordHolder entry = theQ.poll();
                int w = entry.getWord();
                WordIterator i = entry.getIterator();

                flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w);
                if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) {
                    wordsToAdd.add(new WordHolder(flipBitLiteral, i));
                } else if (i.hasNext()) {
                    wordsToAdd.add(new WordHolder(i.next(), i));
                }
                nextVal = theQ.peek();
            }

            // check if a literal needs to be created from the flipped bits of this sequence
            flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(word);
            if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) {
                wordsToAdd.add(new WordHolder(flipBitLiteral, itr));
            } else if (itr.hasNext()) {
                wordsToAdd.add(new WordHolder(itr.next(), itr));
            }
        }

        theQ.addAll(wordsToAdd);
    }

    if (retVal.isEmpty()) {
        return new ImmutableConciseSet();
    }
    return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray()));
}

From source file:it.uniroma3.mat.extendedset.intset.ImmutableConciseSet.java

public static ImmutableConciseSet doIntersection(Iterator<ImmutableConciseSet> sets) {
    IntList retVal = new IntList();

    // lhs = current word position, rhs = the iterator
    // Comparison is first by index, then zero fills > literals > one fills
    // zero fills are sorted by length (longer zero fills have priority)
    // similarily, shorter one fills have priority
    MinMaxPriorityQueue<WordHolder> theQ = MinMaxPriorityQueue.orderedBy(new Comparator<WordHolder>() {
        @Override/*  www.  ja va2  s . c om*/
        public int compare(WordHolder h1, WordHolder h2) {
            int w1 = h1.getWord();
            int w2 = h2.getWord();
            int s1 = h1.getIterator().startIndex;
            int s2 = h2.getIterator().startIndex;

            if (s1 != s2) {
                return compareInts(s1, s2);
            }

            if (ConciseSetUtils.isZeroSequence(w1)) {
                if (ConciseSetUtils.isZeroSequence(w2)) {
                    return -compareInts(ConciseSetUtils.getSequenceNumWords(w1),
                            ConciseSetUtils.getSequenceNumWords(w2));
                }
                return -1;
            } else if (ConciseSetUtils.isLiteral(w1)) {
                if (ConciseSetUtils.isZeroSequence(w2)) {
                    return 1;
                } else if (ConciseSetUtils.isLiteral(w2)) {
                    return 0;
                }
                return -1;
            } else {
                if (!ConciseSetUtils.isOneSequence(w2)) {
                    return 1;
                }
                return compareInts(ConciseSetUtils.getSequenceNumWords(w1),
                        ConciseSetUtils.getSequenceNumWords(w2));
            }
        }
    }).create();

    // populate priority queue
    while (sets.hasNext()) {
        ImmutableConciseSet set = sets.next();

        if (set == null || set.isEmpty()) {
            return new ImmutableConciseSet();
        }

        WordIterator itr = set.newWordIterator();
        theQ.add(new WordHolder(itr.next(), itr));
    }

    int currIndex = 0;
    int wordsWalkedAtSequenceEnd = Integer.MAX_VALUE;

    while (!theQ.isEmpty()) {
        // create a temp list to hold everything that will get pushed back into the priority queue after each run
        List<WordHolder> wordsToAdd = Lists.newArrayList();

        // grab the top element from the priority queue
        WordHolder curr = theQ.poll();
        int word = curr.getWord();
        WordIterator itr = curr.getIterator();

        // if a sequence has ended, we can break out because of Boolean logic
        if (itr.startIndex >= wordsWalkedAtSequenceEnd) {
            break;
        }

        // if the next word in the queue starts at a different point than where we ended off we need to create a one gap
        // to fill the space
        if (currIndex < itr.startIndex) {
            // number of 31 bit blocks that compromise the fill minus one
            addAndCompact(retVal, (ConciseSetUtils.SEQUENCE_BIT | (itr.startIndex - currIndex - 1)));
            currIndex = itr.startIndex;
        }

        if (ConciseSetUtils.isZeroSequence(word)) {
            // extract a literal from the flip bits of the zero sequence
            int flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(word);

            // advance everything past the longest zero sequence
            WordHolder nextVal = theQ.peek();
            while (nextVal != null && nextVal.getIterator().startIndex < itr.wordsWalked) {
                WordHolder entry = theQ.poll();
                int w = entry.getWord();
                WordIterator i = entry.getIterator();

                if (i.startIndex == itr.startIndex) {
                    // if a literal was created from a flip bit, AND it with other literals or literals from flip bits in the same
                    // position
                    if (ConciseSetUtils.isZeroSequence(w)) {
                        flipBitLiteral &= ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w);
                    } else if (ConciseSetUtils.isLiteral(w)) {
                        flipBitLiteral &= w;
                    } else {
                        flipBitLiteral &= ConciseSetUtils.getLiteralFromOneSeqFlipBit(w);
                    }
                }

                i.advanceTo(itr.wordsWalked);
                if (i.hasNext()) {
                    wordsToAdd.add(new WordHolder(i.next(), i));
                } else {
                    wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd);
                }
                nextVal = theQ.peek();
            }

            // advance longest zero literal forward and push result back to priority queue
            // if a flip bit is still needed, put it in the correct position
            int newWord = word & 0xC1FFFFFF;
            if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) {
                int position = Integer.numberOfTrailingZeros(flipBitLiteral) + 1;
                newWord = (word & 0xC1FFFFFF) | (position << 25);
            }
            addAndCompact(retVal, newWord);
            currIndex = itr.wordsWalked;

            if (itr.hasNext()) {
                wordsToAdd.add(new WordHolder(itr.next(), itr));
            } else {
                wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd);
            }
        } else if (ConciseSetUtils.isLiteral(word)) {
            // advance all other literals
            WordHolder nextVal = theQ.peek();
            while (nextVal != null && nextVal.getIterator().startIndex == itr.startIndex) {

                WordHolder entry = theQ.poll();
                int w = entry.getWord();
                WordIterator i = entry.getIterator();

                // if we still have one fills with flipped bits, AND them here
                if (ConciseSetUtils.isLiteral(w)) {
                    word &= w;
                } else {
                    int flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(w);
                    if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) {
                        word &= flipBitLiteral;
                        i.advanceTo(itr.wordsWalked);
                    }
                }

                if (i.hasNext()) {
                    wordsToAdd.add(new WordHolder(i.next(), i));
                } else {
                    wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd);
                }

                nextVal = theQ.peek();
            }

            // advance the set with the current literal forward and push result back to priority queue
            addAndCompact(retVal, word);
            currIndex++;

            if (itr.hasNext()) {
                wordsToAdd.add(new WordHolder(itr.next(), itr));
            } else {
                wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd);
            }
        } else { // one fills
            int flipBitLiteral;
            WordHolder nextVal = theQ.peek();

            while (nextVal != null && nextVal.getIterator().startIndex == itr.startIndex) {
                // check if literal can be created flip bits of other one sequences
                WordHolder entry = theQ.poll();
                int w = entry.getWord();
                WordIterator i = entry.getIterator();

                flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(w);
                if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) {
                    wordsToAdd.add(new WordHolder(flipBitLiteral, i));
                } else if (i.hasNext()) {
                    wordsToAdd.add(new WordHolder(i.next(), i));
                } else {
                    wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd);
                }

                nextVal = theQ.peek();
            }

            // check if a literal needs to be created from the flipped bits of this sequence
            flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(word);
            if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) {
                wordsToAdd.add(new WordHolder(flipBitLiteral, itr));
            } else if (itr.hasNext()) {
                wordsToAdd.add(new WordHolder(itr.next(), itr));
            } else {
                wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd);
            }
        }

        theQ.addAll(wordsToAdd);
    }

    // fill in any missing one sequences
    if (currIndex < wordsWalkedAtSequenceEnd) {
        addAndCompact(retVal, (ConciseSetUtils.SEQUENCE_BIT | (wordsWalkedAtSequenceEnd - currIndex - 1)));
    }

    if (retVal.isEmpty()) {
        return new ImmutableConciseSet();
    }
    return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray()));
}

From source file:org.apache.hadoop.hbase.master.balancer.DefaultLoadBalancer.java

/**
 * Generate a global load balancing plan according to the specified map of
 * server information to the most loaded regions of each server.
 *
 * The load balancing invariant is that all servers are within 1 region of the
 * average number of regions per server.  If the average is an integer number,
 * all servers will be balanced to the average.  Otherwise, all servers will
 * have either floor(average) or ceiling(average) regions.
 *
 * HBASE-3609 Modeled regionsToMove using Guava's MinMaxPriorityQueue so that
 *   we can fetch from both ends of the queue. 
 * At the beginning, we check whether there was empty region server 
 *   just discovered by Master. If so, we alternately choose new / old
 *   regions from head / tail of regionsToMove, respectively. This alternation
 *   avoids clustering young regions on the newly discovered region server.
 *   Otherwise, we choose new regions from head of regionsToMove.
 *   //from w  w  w  .ja v  a2s. co m
 * Another improvement from HBASE-3609 is that we assign regions from
 *   regionsToMove to underloaded servers in round-robin fashion.
 *   Previously one underloaded server would be filled before we move onto
 *   the next underloaded server, leading to clustering of young regions.
 *   
 * Finally, we randomly shuffle underloaded servers so that they receive
 *   offloaded regions relatively evenly across calls to balanceCluster().
 *         
 * The algorithm is currently implemented as such:
 *
 * <ol>
 * <li>Determine the two valid numbers of regions each server should have,
 *     <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average).
 *
 * <li>Iterate down the most loaded servers, shedding regions from each so
 *     each server hosts exactly <b>MAX</b> regions.  Stop once you reach a
 *     server that already has &lt;= <b>MAX</b> regions.
 *     <p>
 *     Order the regions to move from most recent to least.
 *
 * <li>Iterate down the least loaded servers, assigning regions so each server
 *     has exactly </b>MIN</b> regions.  Stop once you reach a server that
 *     already has &gt;= <b>MIN</b> regions.
 *
 *     Regions being assigned to underloaded servers are those that were shed
 *     in the previous step.  It is possible that there were not enough
 *     regions shed to fill each underloaded server to <b>MIN</b>.  If so we
 *     end up with a number of regions required to do so, <b>neededRegions</b>.
 *
 *     It is also possible that we were able to fill each underloaded but ended
 *     up with regions that were unassigned from overloaded servers but that
 *     still do not have assignment.
 *
 *     If neither of these conditions hold (no regions needed to fill the
 *     underloaded servers, no regions leftover from overloaded servers),
 *     we are done and return.  Otherwise we handle these cases below.
 *
 * <li>If <b>neededRegions</b> is non-zero (still have underloaded servers),
 *     we iterate the most loaded servers again, shedding a single server from
 *     each (this brings them from having <b>MAX</b> regions to having
 *     <b>MIN</b> regions).
 *
 * <li>We now definitely have more regions that need assignment, either from
 *     the previous step or from the original shedding from overloaded servers.
 *     Iterate the least loaded servers filling each to <b>MIN</b>.
 *
 * <li>If we still have more regions that need assignment, again iterate the
 *     least loaded servers, this time giving each one (filling them to
 *     </b>MAX</b>) until we run out.
 *
 * <li>All servers will now either host <b>MIN</b> or <b>MAX</b> regions.
 *
 *     In addition, any server hosting &gt;= <b>MAX</b> regions is guaranteed
 *     to end up with <b>MAX</b> regions at the end of the balancing.  This
 *     ensures the minimal number of regions possible are moved.
 * </ol>
 *
 * TODO: We can at-most reassign the number of regions away from a particular
 *       server to be how many they report as most loaded.
 *       Should we just keep all assignment in memory?  Any objections?
 *       Does this mean we need HeapSize on HMaster?  Or just careful monitor?
 *       (current thinking is we will hold all assignments in memory)
 *
 * @param clusterMap Map of regionservers and their load/region information to
 *                   a list of their most loaded regions
 * @return a list of regions to be moved, including source and destination,
 *         or null if cluster is already balanced
 */
public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterMap) {
    boolean emptyRegionServerPresent = false;
    long startTime = System.currentTimeMillis();

    ClusterLoadState cs = new ClusterLoadState(clusterMap);

    if (!this.needsBalance(cs))
        return null;

    int numServers = cs.getNumServers();
    NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
    int numRegions = cs.getNumRegions();
    int min = numRegions / numServers;
    int max = numRegions % numServers == 0 ? min : min + 1;

    // Using to check balance result.
    StringBuilder strBalanceParam = new StringBuilder();
    strBalanceParam.append("Balance parameter: numRegions=").append(numRegions).append(", numServers=")
            .append(numServers).append(", max=").append(max).append(", min=").append(min);
    LOG.debug(strBalanceParam.toString());

    // Balance the cluster
    // TODO: Look at data block locality or a more complex load to do this
    MinMaxPriorityQueue<RegionPlan> regionsToMove = MinMaxPriorityQueue.orderedBy(rpComparator).create();
    List<RegionPlan> regionsToReturn = new ArrayList<RegionPlan>();

    // Walk down most loaded, pruning each to the max
    int serversOverloaded = 0;
    // flag used to fetch regions from head and tail of list, alternately
    boolean fetchFromTail = false;
    Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<ServerName, BalanceInfo>();
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
        ServerAndLoad sal = server.getKey();
        int regionCount = sal.getLoad();
        if (regionCount <= max) {
            serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0));
            break;
        }
        serversOverloaded++;
        List<HRegionInfo> regions = server.getValue();
        int numToOffload = Math.min(regionCount - max, regions.size());
        // account for the out-of-band regions which were assigned to this server
        // after some other region server crashed 
        Collections.sort(regions, riComparator);
        int numTaken = 0;
        for (int i = 0; i <= numToOffload;) {
            HRegionInfo hri = regions.get(i); // fetch from head
            if (fetchFromTail) {
                hri = regions.get(regions.size() - 1 - i);
            }
            i++;
            // Don't rebalance meta regions.
            if (hri.isMetaRegion())
                continue;
            regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null));
            numTaken++;
            if (numTaken >= numToOffload)
                break;
            // fetch in alternate order if there is new region server
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
        }
        serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, (-1) * numTaken));
    }
    int totalNumMoved = regionsToMove.size();

    // Walk down least loaded, filling each to the min
    int neededRegions = 0; // number of regions needed to bring all up to min
    fetchFromTail = false;

    Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>();
    float average = (float) numRegions / numServers; // for logging
    int maxToTake = numRegions - (int) average;
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
        if (maxToTake == 0)
            break; // no more to take
        int regionCount = server.getKey().getLoad();
        if (regionCount >= min && regionCount > 0) {
            continue; // look for other servers which haven't reached min
        }
        int regionsToPut = min - regionCount;
        if (regionsToPut == 0) {
            regionsToPut = 1;
            maxToTake--;
        }
        underloadedServers.put(server.getKey().getServerName(), regionsToPut);
    }
    // number of servers that get new regions
    int serversUnderloaded = underloadedServers.size();
    int incr = 1;
    List<ServerName> sns = Arrays
            .asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded]));
    Collections.shuffle(sns, RANDOM);
    while (regionsToMove.size() > 0) {
        int cnt = 0;
        int i = incr > 0 ? 0 : underloadedServers.size() - 1;
        for (; i >= 0 && i < underloadedServers.size(); i += incr) {
            if (regionsToMove.isEmpty())
                break;
            ServerName si = sns.get(i);
            int numToTake = underloadedServers.get(si);
            if (numToTake == 0)
                continue;

            addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn);
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }

            underloadedServers.put(si, numToTake - 1);
            cnt++;
            BalanceInfo bi = serverBalanceInfo.get(si);
            if (bi == null) {
                bi = new BalanceInfo(0, 0);
                serverBalanceInfo.put(si, bi);
            }
            bi.setNumRegionsAdded(bi.getNumRegionsAdded() + 1);
        }
        if (cnt == 0)
            break;
        // iterates underloadedServers in the other direction
        incr = -incr;
    }
    for (Integer i : underloadedServers.values()) {
        // If we still want to take some, increment needed
        neededRegions += i;
    }

    // If none needed to fill all to min and none left to drain all to max,
    // we are done
    if (neededRegions == 0 && regionsToMove.isEmpty()) {
        long endTime = System.currentTimeMillis();
        LOG.info("Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved
                + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded
                + " less loaded servers");
        return regionsToReturn;
    }

    // Need to do a second pass.
    // Either more regions to assign out or servers that are still underloaded

    // If we need more to fill min, grab one from each most loaded until enough
    if (neededRegions != 0) {
        // Walk down most loaded, grabbing one from each until we get enough
        for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
            BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
            int idx = balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload();
            if (idx >= server.getValue().size())
                break;
            HRegionInfo region = server.getValue().get(idx);
            if (region.isMetaRegion())
                continue; // Don't move meta regions.
            regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null));
            totalNumMoved++;
            if (--neededRegions == 0) {
                // No more regions needed, done shedding
                break;
            }
        }
    }

    // Now we have a set of regions that must be all assigned out
    // Assign each underloaded up to the min, then if leftovers, assign to max

    // Walk down least loaded, assigning to each to fill up to min
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
        int regionCount = server.getKey().getLoad();
        if (regionCount >= min)
            break;
        BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
        if (balanceInfo != null) {
            regionCount += balanceInfo.getNumRegionsAdded();
        }
        if (regionCount >= min) {
            continue;
        }
        int numToTake = min - regionCount;
        int numTaken = 0;
        while (numTaken < numToTake && 0 < regionsToMove.size()) {
            addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn);
            numTaken++;
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
        }
    }

    // If we still have regions to dish out, assign underloaded to max
    if (0 < regionsToMove.size()) {
        for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
            int regionCount = server.getKey().getLoad();
            if (regionCount >= max) {
                break;
            }
            addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn);
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
            if (regionsToMove.isEmpty()) {
                break;
            }
        }
    }

    long endTime = System.currentTimeMillis();

    if (!regionsToMove.isEmpty() || neededRegions != 0) {
        // Emit data so can diagnose how balancer went astray.
        LOG.warn("regionsToMove=" + totalNumMoved + ", numServers=" + numServers + ", serversOverloaded="
                + serversOverloaded + ", serversUnderloaded=" + serversUnderloaded);
        StringBuilder sb = new StringBuilder();
        for (Map.Entry<ServerName, List<HRegionInfo>> e : clusterMap.entrySet()) {
            if (sb.length() > 0)
                sb.append(", ");
            sb.append(e.getKey().toString());
            sb.append(" ");
            sb.append(e.getValue().size());
        }
        LOG.warn("Input " + sb.toString());
    }

    // All done!
    LOG.info("Done. Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved
            + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded
            + " less loaded servers");

    return regionsToReturn;
}

From source file:com.alibaba.wasp.master.balancer.DefaultLoadBalancer.java

/**
 * Generate a global load balancing plan according to the specified map of
 * server information to the most loaded entityGroups of each server.
 * /*from  www  .j av a  2  s .  co m*/
 * The load balancing invariant is that all servers are within 1 entityGroup of the
 * average number of entityGroups per server. If the average is an integer number,
 * all servers will be balanced to the average. Otherwise, all servers will
 * have either floor(average) or ceiling(average) entityGroups.
 * 
 * HBASE-3609 Modeled entityGroupsToMove using Guava's MinMaxPriorityQueue so that
 * we can fetch from both ends of the queue. At the beginning, we check
 * whether there was empty entityGroup server just discovered by Master. If so, we
 * alternately choose new / old entityGroups from head / tail of entityGroupsToMove,
 * respectively. This alternation avoids clustering young entityGroups on the newly
 * discovered entityGroup server. Otherwise, we choose new entityGroups from head of
 * entityGroupsToMove.
 * 
 * Another improvement from HBASE-3609 is that we assign entityGroups from
 * entityGroupsToMove to underloaded servers in round-robin fashion. Previously one
 * underloaded server would be filled before we move onto the next underloaded
 * server, leading to clustering of young entityGroups.
 * 
 * Finally, we randomly shuffle underloaded servers so that they receive
 * offloaded entityGroups relatively evenly across calls to balanceCluster().
 * 
 * The algorithm is currently implemented as such:
 * 
 * <ol>
 * <li>Determine the two valid numbers of entityGroups each server should have,
 * <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average).
 * 
 * <li>Iterate down the most loaded servers, shedding entityGroups from each so
 * each server hosts exactly <b>MAX</b> entityGroups. Stop once you reach a server
 * that already has &lt;= <b>MAX</b> entityGroups.
 * <p>
 * Order the entityGroups to move from most recent to least.
 * 
 * <li>Iterate down the least loaded servers, assigning entityGroups so each server
 * has exactly </b>MIN</b> entityGroups. Stop once you reach a server that already
 * has &gt;= <b>MIN</b> entityGroups.
 * 
 * EntityGroups being assigned to underloaded servers are those that were shed in
 * the previous step. It is possible that there were not enough entityGroups shed
 * to fill each underloaded server to <b>MIN</b>. If so we end up with a
 * number of entityGroups required to do so, <b>neededEntityGroups</b>.
 * 
 * It is also possible that we were able to fill each underloaded but ended up
 * with entityGroups that were unassigned from overloaded servers but that still do
 * not have assignment.
 * 
 * If neither of these conditions hold (no entityGroups needed to fill the
 * underloaded servers, no entityGroups leftover from overloaded servers), we are
 * done and return. Otherwise we handle these cases below.
 * 
 * <li>If <b>neededEntityGroups</b> is non-zero (still have underloaded servers),
 * we iterate the most loaded servers again, shedding a single server from
 * each (this brings them from having <b>MAX</b> entityGroups to having <b>MIN</b>
 * entityGroups).
 * 
 * <li>We now definitely have more entityGroups that need assignment, either from
 * the previous step or from the original shedding from overloaded servers.
 * Iterate the least loaded servers filling each to <b>MIN</b>.
 * 
 * <li>If we still have more entityGroups that need assignment, again iterate the
 * least loaded servers, this time giving each one (filling them to
 * </b>MAX</b>) until we run out.
 * 
 * <li>All servers will now either host <b>MIN</b> or <b>MAX</b> entityGroups.
 * 
 * In addition, any server hosting &gt;= <b>MAX</b> entityGroups is guaranteed to
 * end up with <b>MAX</b> entityGroups at the end of the balancing. This ensures
 * the minimal number of entityGroups possible are moved.
 * </ol>
 * 
 * TODO: We can at-most reassign the number of entityGroups away from a particular
 * server to be how many they report as most loaded. Should we just keep all
 * assignment in memory? Any objections? Does this mean we need HeapSize on
 * HMaster? Or just careful monitor? (current thinking is we will hold all
 * assignments in memory)
 * 
 * @param clusterState Map of entityGroupservers and their load/entityGroup information
 *          to a list of their most loaded entityGroups
 * @return a list of entityGroups to be moved, including source and destination, or
 *         null if cluster is already balanced
 */
public List<EntityGroupPlan> balanceCluster(Map<ServerName, List<EntityGroupInfo>> clusterMap) {
    boolean emptyFServerPresent = false;
    long startTime = System.currentTimeMillis();

    ClusterLoadState cs = new ClusterLoadState(clusterMap);

    int numServers = cs.getNumServers();
    if (numServers == 0) {
        LOG.debug("numServers=0 so skipping load balancing");
        return null;
    }
    NavigableMap<ServerAndLoad, List<EntityGroupInfo>> serversByLoad = cs.getServersByLoad();

    int numEntityGroups = cs.getNumEntityGroups();

    if (!this.needsBalance(cs)) {
        // Skipped because no server outside (min,max) range
        float average = cs.getLoadAverage(); // for logging
        LOG.info("Skipping load balancing because balanced cluster; " + "servers=" + numServers + " "
                + "entityGroups=" + numEntityGroups + " average=" + average + " " + "mostloaded="
                + serversByLoad.lastKey().getLoad() + " leastloaded=" + serversByLoad.firstKey().getLoad());
        return null;
    }

    int min = numEntityGroups / numServers;
    int max = numEntityGroups % numServers == 0 ? min : min + 1;

    // Using to check balance result.
    StringBuilder strBalanceParam = new StringBuilder();
    strBalanceParam.append("Balance parameter: numEntityGroups=").append(numEntityGroups)
            .append(", numServers=").append(numServers).append(", max=").append(max).append(", min=")
            .append(min);
    LOG.debug(strBalanceParam.toString());

    // Balance the cluster
    // TODO: Look at data block locality or a more complex load to do this
    MinMaxPriorityQueue<EntityGroupPlan> entityGroupsToMove = MinMaxPriorityQueue.orderedBy(rpComparator)
            .create();
    List<EntityGroupPlan> entityGroupsToReturn = new ArrayList<EntityGroupPlan>();

    // Walk down most loaded, pruning each to the max
    int serversOverloaded = 0;
    // flag used to fetch entityGroups from head and tail of list, alternately
    boolean fetchFromTail = false;
    Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<ServerName, BalanceInfo>();
    for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.descendingMap().entrySet()) {
        ServerAndLoad sal = server.getKey();
        int entityGroupCount = sal.getLoad();
        if (entityGroupCount <= max) {
            serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0));
            break;
        }
        serversOverloaded++;
        List<EntityGroupInfo> entityGroups = server.getValue();
        int numToOffload = Math.min(entityGroupCount - max, entityGroups.size());
        // account for the out-of-band entityGroups which were assigned to this server
        // after some other entityGroup server crashed
        Collections.sort(entityGroups, riComparator);
        int numTaken = 0;
        for (int i = 0; i <= numToOffload;) {
            EntityGroupInfo egInfo = entityGroups.get(i); // fetch from head
            if (fetchFromTail) {
                egInfo = entityGroups.get(entityGroups.size() - 1 - i);
            }
            i++;
            entityGroupsToMove.add(new EntityGroupPlan(egInfo, sal.getServerName(), null));
            numTaken++;
            if (numTaken >= numToOffload)
                break;
            // fetch in alternate order if there is new entityGroup server
            if (emptyFServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
        }
        serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, (-1) * numTaken));
    }
    int totalNumMoved = entityGroupsToMove.size();

    // Walk down least loaded, filling each to the min
    int neededEntityGroups = 0; // number of entityGroups needed to bring all up to min
    fetchFromTail = false;

    Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>();
    for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.entrySet()) {
        int entityGroupCount = server.getKey().getLoad();
        if (entityGroupCount >= min) {
            break;
        }
        underloadedServers.put(server.getKey().getServerName(), min - entityGroupCount);
    }
    // number of servers that get new entityGroups
    int serversUnderloaded = underloadedServers.size();
    int incr = 1;
    List<ServerName> sns = Arrays
            .asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded]));
    Collections.shuffle(sns, RANDOM);
    while (entityGroupsToMove.size() > 0) {
        int cnt = 0;
        int i = incr > 0 ? 0 : underloadedServers.size() - 1;
        for (; i >= 0 && i < underloadedServers.size(); i += incr) {
            if (entityGroupsToMove.isEmpty())
                break;
            ServerName si = sns.get(i);
            int numToTake = underloadedServers.get(si);
            if (numToTake == 0)
                continue;

            addEntityGroupPlan(entityGroupsToMove, fetchFromTail, si, entityGroupsToReturn);
            if (emptyFServerPresent) {
                fetchFromTail = !fetchFromTail;
            }

            underloadedServers.put(si, numToTake - 1);
            cnt++;
            BalanceInfo bi = serverBalanceInfo.get(si);
            if (bi == null) {
                bi = new BalanceInfo(0, 0);
                serverBalanceInfo.put(si, bi);
            }
            bi.setNumEntityGroupsAdded(bi.getNumEntityGroupsAdded() + 1);
        }
        if (cnt == 0)
            break;
        // iterates underloadedServers in the other direction
        incr = -incr;
    }
    for (Integer i : underloadedServers.values()) {
        // If we still want to take some, increment needed
        neededEntityGroups += i;
    }

    // If none needed to fill all to min and none left to drain all to max,
    // we are done
    if (neededEntityGroups == 0 && entityGroupsToMove.isEmpty()) {
        long endTime = System.currentTimeMillis();
        LOG.info("Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved
                + " entityGroups off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded
                + " less loaded servers");
        return entityGroupsToReturn;
    }

    // Need to do a second pass.
    // Either more entityGroups to assign out or servers that are still underloaded

    // If we need more to fill min, grab one from each most loaded until enough
    if (neededEntityGroups != 0) {
        // Walk down most loaded, grabbing one from each until we get enough
        for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.descendingMap()
                .entrySet()) {
            BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
            int idx = balanceInfo == null ? 0 : balanceInfo.getNextEntityGroupForUnload();
            if (idx >= server.getValue().size())
                break;
            EntityGroupInfo entityGroup = server.getValue().get(idx);
            entityGroupsToMove.add(new EntityGroupPlan(entityGroup, server.getKey().getServerName(), null));
            totalNumMoved++;
            if (--neededEntityGroups == 0) {
                // No more entityGroups needed, done shedding
                break;
            }
        }
    }

    // Now we have a set of entityGroups that must be all assigned out
    // Assign each underloaded up to the min, then if leftovers, assign to max

    // Walk down least loaded, assigning to each to fill up to min
    for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.entrySet()) {
        int entityGroupCount = server.getKey().getLoad();
        if (entityGroupCount >= min)
            break;
        BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
        if (balanceInfo != null) {
            entityGroupCount += balanceInfo.getNumEntityGroupsAdded();
        }
        if (entityGroupCount >= min) {
            continue;
        }
        int numToTake = min - entityGroupCount;
        int numTaken = 0;
        while (numTaken < numToTake && 0 < entityGroupsToMove.size()) {
            addEntityGroupPlan(entityGroupsToMove, fetchFromTail, server.getKey().getServerName(),
                    entityGroupsToReturn);
            numTaken++;
            if (emptyFServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
        }
    }

    // If we still have entityGroups to dish out, assign underloaded to max
    if (0 < entityGroupsToMove.size()) {
        for (Map.Entry<ServerAndLoad, List<EntityGroupInfo>> server : serversByLoad.entrySet()) {
            int entityGroupCount = server.getKey().getLoad();
            if (entityGroupCount >= max) {
                break;
            }
            addEntityGroupPlan(entityGroupsToMove, fetchFromTail, server.getKey().getServerName(),
                    entityGroupsToReturn);
            if (emptyFServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
            if (entityGroupsToMove.isEmpty()) {
                break;
            }
        }
    }

    long endTime = System.currentTimeMillis();

    if (!entityGroupsToMove.isEmpty() || neededEntityGroups != 0) {
        // Emit data so can diagnose how balancer went astray.
        LOG.warn("entityGroupsToMove=" + totalNumMoved + ", numServers=" + numServers + ", serversOverloaded="
                + serversOverloaded + ", serversUnderloaded=" + serversUnderloaded);
        StringBuilder sb = new StringBuilder();
        for (Map.Entry<ServerName, List<EntityGroupInfo>> e : clusterMap.entrySet()) {
            if (sb.length() > 0)
                sb.append(", ");
            sb.append(e.getKey().toString());
            sb.append(" ");
            sb.append(e.getValue().size());
        }
        LOG.warn("Input " + sb.toString());
    }

    // All done!
    LOG.info("Done. Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved
            + " entityGroups off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded
            + " less loaded servers");

    return entityGroupsToReturn;
}

From source file:org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer.java

/**
 * Generate a global load balancing plan according to the specified map of
 * server information to the most loaded regions of each server.
 *
 * The load balancing invariant is that all servers are within 1 region of the
 * average number of regions per server.  If the average is an integer number,
 * all servers will be balanced to the average.  Otherwise, all servers will
 * have either floor(average) or ceiling(average) regions.
 *
 * HBASE-3609 Modeled regionsToMove using Guava's MinMaxPriorityQueue so that
 *   we can fetch from both ends of the queue. 
 * At the beginning, we check whether there was empty region server 
 *   just discovered by Master. If so, we alternately choose new / old
 *   regions from head / tail of regionsToMove, respectively. This alternation
 *   avoids clustering young regions on the newly discovered region server.
 *   Otherwise, we choose new regions from head of regionsToMove.
 *   /*from   w  w  w.j  a  va2s . c  om*/
 * Another improvement from HBASE-3609 is that we assign regions from
 *   regionsToMove to underloaded servers in round-robin fashion.
 *   Previously one underloaded server would be filled before we move onto
 *   the next underloaded server, leading to clustering of young regions.
 *   
 * Finally, we randomly shuffle underloaded servers so that they receive
 *   offloaded regions relatively evenly across calls to balanceCluster().
 *         
 * The algorithm is currently implemented as such:
 *
 * <ol>
 * <li>Determine the two valid numbers of regions each server should have,
 *     <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average).
 *
 * <li>Iterate down the most loaded servers, shedding regions from each so
 *     each server hosts exactly <b>MAX</b> regions.  Stop once you reach a
 *     server that already has &lt;= <b>MAX</b> regions.
 *     <p>
 *     Order the regions to move from most recent to least.
 *
 * <li>Iterate down the least loaded servers, assigning regions so each server
 *     has exactly </b>MIN</b> regions.  Stop once you reach a server that
 *     already has &gt;= <b>MIN</b> regions.
 *
 *     Regions being assigned to underloaded servers are those that were shed
 *     in the previous step.  It is possible that there were not enough
 *     regions shed to fill each underloaded server to <b>MIN</b>.  If so we
 *     end up with a number of regions required to do so, <b>neededRegions</b>.
 *
 *     It is also possible that we were able to fill each underloaded but ended
 *     up with regions that were unassigned from overloaded servers but that
 *     still do not have assignment.
 *
 *     If neither of these conditions hold (no regions needed to fill the
 *     underloaded servers, no regions leftover from overloaded servers),
 *     we are done and return.  Otherwise we handle these cases below.
 *
 * <li>If <b>neededRegions</b> is non-zero (still have underloaded servers),
 *     we iterate the most loaded servers again, shedding a single server from
 *     each (this brings them from having <b>MAX</b> regions to having
 *     <b>MIN</b> regions).
 *
 * <li>We now definitely have more regions that need assignment, either from
 *     the previous step or from the original shedding from overloaded servers.
 *     Iterate the least loaded servers filling each to <b>MIN</b>.
 *
 * <li>If we still have more regions that need assignment, again iterate the
 *     least loaded servers, this time giving each one (filling them to
 *     </b>MAX</b>) until we run out.
 *
 * <li>All servers will now either host <b>MIN</b> or <b>MAX</b> regions.
 *
 *     In addition, any server hosting &gt;= <b>MAX</b> regions is guaranteed
 *     to end up with <b>MAX</b> regions at the end of the balancing.  This
 *     ensures the minimal number of regions possible are moved.
 * </ol>
 *
 * TODO: We can at-most reassign the number of regions away from a particular
 *       server to be how many they report as most loaded.
 *       Should we just keep all assignment in memory?  Any objections?
 *       Does this mean we need HeapSize on HMaster?  Or just careful monitor?
 *       (current thinking is we will hold all assignments in memory)
 *
 * @param clusterMap Map of regionservers and their load/region information to
 *                   a list of their most loaded regions
 * @return a list of regions to be moved, including source and destination,
 *         or null if cluster is already balanced
 */
public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterMap) {
    List<RegionPlan> regionsToReturn = balanceMasterRegions(clusterMap);
    if (regionsToReturn != null) {
        return regionsToReturn;
    }
    filterExcludedServers(clusterMap);
    boolean emptyRegionServerPresent = false;
    long startTime = System.currentTimeMillis();

    Collection<ServerName> backupMasters = getBackupMasters();
    ClusterLoadState cs = new ClusterLoadState(masterServerName, backupMasters, backupMasterWeight, clusterMap);

    if (!this.needsBalance(cs))
        return null;

    int numServers = cs.getNumServers();
    NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
    int numRegions = cs.getNumRegions();
    float average = cs.getLoadAverage();
    int max = (int) Math.ceil(average);
    int min = (int) average;

    // Using to check balance result.
    StringBuilder strBalanceParam = new StringBuilder();
    strBalanceParam.append("Balance parameter: numRegions=").append(numRegions).append(", numServers=")
            .append(numServers).append(", numBackupMasters=").append(cs.getNumBackupMasters())
            .append(", backupMasterWeight=").append(backupMasterWeight).append(", max=").append(max)
            .append(", min=").append(min);
    LOG.debug(strBalanceParam.toString());

    // Balance the cluster
    // TODO: Look at data block locality or a more complex load to do this
    MinMaxPriorityQueue<RegionPlan> regionsToMove = MinMaxPriorityQueue.orderedBy(rpComparator).create();
    regionsToReturn = new ArrayList<RegionPlan>();

    // Walk down most loaded, pruning each to the max
    int serversOverloaded = 0;
    // flag used to fetch regions from head and tail of list, alternately
    boolean fetchFromTail = false;
    Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<ServerName, BalanceInfo>();
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
        ServerAndLoad sal = server.getKey();
        int load = sal.getLoad();
        if (load <= max) {
            serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0));
            break;
        }
        serversOverloaded++;
        List<HRegionInfo> regions = server.getValue();
        int w = 1; // Normal region server has weight 1
        if (backupMasters != null && backupMasters.contains(sal.getServerName())) {
            w = backupMasterWeight; // Backup master has heavier weight
        }
        int numToOffload = Math.min((load - max) / w, regions.size());
        // account for the out-of-band regions which were assigned to this server
        // after some other region server crashed 
        Collections.sort(regions, riComparator);
        int numTaken = 0;
        for (int i = 0; i <= numToOffload;) {
            HRegionInfo hri = regions.get(i); // fetch from head
            if (fetchFromTail) {
                hri = regions.get(regions.size() - 1 - i);
            }
            i++;
            // Don't rebalance special regions.
            if (shouldBeOnMaster(hri) && masterServerName.equals(sal.getServerName()))
                continue;
            regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null));
            numTaken++;
            if (numTaken >= numToOffload)
                break;
            // fetch in alternate order if there is new region server
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
        }
        serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, (-1) * numTaken));
    }
    int totalNumMoved = regionsToMove.size();

    // Walk down least loaded, filling each to the min
    int neededRegions = 0; // number of regions needed to bring all up to min
    fetchFromTail = false;

    Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>();
    int maxToTake = numRegions - min;
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
        if (maxToTake == 0)
            break; // no more to take
        int load = server.getKey().getLoad();
        if (load >= min && load > 0) {
            continue; // look for other servers which haven't reached min
        }
        int w = 1; // Normal region server has weight 1
        if (backupMasters != null && backupMasters.contains(server.getKey().getServerName())) {
            w = backupMasterWeight; // Backup master has heavier weight
        }
        int regionsToPut = (min - load) / w;
        if (regionsToPut == 0) {
            regionsToPut = 1;
        }
        maxToTake -= regionsToPut;
        underloadedServers.put(server.getKey().getServerName(), regionsToPut);
    }
    // number of servers that get new regions
    int serversUnderloaded = underloadedServers.size();
    int incr = 1;
    List<ServerName> sns = Arrays
            .asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded]));
    Collections.shuffle(sns, RANDOM);
    while (regionsToMove.size() > 0) {
        int cnt = 0;
        int i = incr > 0 ? 0 : underloadedServers.size() - 1;
        for (; i >= 0 && i < underloadedServers.size(); i += incr) {
            if (regionsToMove.isEmpty())
                break;
            ServerName si = sns.get(i);
            int numToTake = underloadedServers.get(si);
            if (numToTake == 0)
                continue;

            addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn);
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }

            underloadedServers.put(si, numToTake - 1);
            cnt++;
            BalanceInfo bi = serverBalanceInfo.get(si);
            if (bi == null) {
                bi = new BalanceInfo(0, 0);
                serverBalanceInfo.put(si, bi);
            }
            bi.setNumRegionsAdded(bi.getNumRegionsAdded() + 1);
        }
        if (cnt == 0)
            break;
        // iterates underloadedServers in the other direction
        incr = -incr;
    }
    for (Integer i : underloadedServers.values()) {
        // If we still want to take some, increment needed
        neededRegions += i;
    }

    // If none needed to fill all to min and none left to drain all to max,
    // we are done
    if (neededRegions == 0 && regionsToMove.isEmpty()) {
        long endTime = System.currentTimeMillis();
        LOG.info("Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved
                + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded
                + " less loaded servers");
        return regionsToReturn;
    }

    // Need to do a second pass.
    // Either more regions to assign out or servers that are still underloaded

    // If we need more to fill min, grab one from each most loaded until enough
    if (neededRegions != 0) {
        // Walk down most loaded, grabbing one from each until we get enough
        for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
            BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
            int idx = balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload();
            if (idx >= server.getValue().size())
                break;
            HRegionInfo region = server.getValue().get(idx);
            if (region.isMetaRegion())
                continue; // Don't move meta regions.
            regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null));
            totalNumMoved++;
            if (--neededRegions == 0) {
                // No more regions needed, done shedding
                break;
            }
        }
    }

    // Now we have a set of regions that must be all assigned out
    // Assign each underloaded up to the min, then if leftovers, assign to max

    // Walk down least loaded, assigning to each to fill up to min
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
        int regionCount = server.getKey().getLoad();
        if (regionCount >= min)
            break;
        BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
        if (balanceInfo != null) {
            regionCount += balanceInfo.getNumRegionsAdded();
        }
        if (regionCount >= min) {
            continue;
        }
        int numToTake = min - regionCount;
        int numTaken = 0;
        while (numTaken < numToTake && 0 < regionsToMove.size()) {
            addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn);
            numTaken++;
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
        }
    }

    // If we still have regions to dish out, assign underloaded to max
    if (0 < regionsToMove.size()) {
        for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
            int regionCount = server.getKey().getLoad();
            BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
            if (balanceInfo != null) {
                regionCount += balanceInfo.getNumRegionsAdded();
            }
            if (regionCount >= max) {
                break;
            }
            addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn);
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
            if (regionsToMove.isEmpty()) {
                break;
            }
        }
    }

    long endTime = System.currentTimeMillis();

    if (!regionsToMove.isEmpty() || neededRegions != 0) {
        // Emit data so can diagnose how balancer went astray.
        LOG.warn("regionsToMove=" + totalNumMoved + ", numServers=" + numServers + ", serversOverloaded="
                + serversOverloaded + ", serversUnderloaded=" + serversUnderloaded);
        StringBuilder sb = new StringBuilder();
        for (Map.Entry<ServerName, List<HRegionInfo>> e : clusterMap.entrySet()) {
            if (sb.length() > 0)
                sb.append(", ");
            sb.append(e.getKey().toString());
            sb.append(" ");
            sb.append(e.getValue().size());
        }
        LOG.warn("Input " + sb.toString());
    }

    // All done!
    LOG.info("Done. Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved
            + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded
            + " less loaded servers");

    return regionsToReturn;
}

From source file:org.apache.hadoop.hbase.master.DefaultLoadBalancer.java

/**
 * Generate a global load balancing plan according to the specified map of
 * server information to the most loaded regions of each server.
 *
 * The load balancing invariant is that all servers are within 1 region of the
 * average number of regions per server.  If the average is an integer number,
 * all servers will be balanced to the average.  Otherwise, all servers will
 * have either floor(average) or ceiling(average) regions.
 *
 * HBASE-3609 Modeled regionsToMove using Guava's MinMaxPriorityQueue so that
 *   we can fetch from both ends of the queue. 
 * At the beginning, we check whether there was empty region server 
 *   just discovered by Master. If so, we alternately choose new / old
 *   regions from head / tail of regionsToMove, respectively. This alternation
 *   avoids clustering young regions on the newly discovered region server.
 *   Otherwise, we choose new regions from head of regionsToMove.
 *   // w  w  w .j av  a  2  s  .  co m
 * Another improvement from HBASE-3609 is that we assign regions from
 *   regionsToMove to underloaded servers in round-robin fashion.
 *   Previously one underloaded server would be filled before we move onto
 *   the next underloaded server, leading to clustering of young regions.
 *   
 * Finally, we randomly shuffle underloaded servers so that they receive
 *   offloaded regions relatively evenly across calls to balanceCluster().
 *         
 * The algorithm is currently implemented as such:
 *
 * <ol>
 * <li>Determine the two valid numbers of regions each server should have,
 *     <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average).
 *
 * <li>Iterate down the most loaded servers, shedding regions from each so
 *     each server hosts exactly <b>MAX</b> regions.  Stop once you reach a
 *     server that already has &lt;= <b>MAX</b> regions.
 *     <p>
 *     Order the regions to move from most recent to least.
 *
 * <li>Iterate down the least loaded servers, assigning regions so each server
 *     has exactly </b>MIN</b> regions.  Stop once you reach a server that
 *     already has &gt;= <b>MIN</b> regions.
 *
 *     Regions being assigned to underloaded servers are those that were shed
 *     in the previous step.  It is possible that there were not enough
 *     regions shed to fill each underloaded server to <b>MIN</b>.  If so we
 *     end up with a number of regions required to do so, <b>neededRegions</b>.
 *
 *     It is also possible that we were able to fill each underloaded but ended
 *     up with regions that were unassigned from overloaded servers but that
 *     still do not have assignment.
 *
 *     If neither of these conditions hold (no regions needed to fill the
 *     underloaded servers, no regions leftover from overloaded servers),
 *     we are done and return.  Otherwise we handle these cases below.
 *
 * <li>If <b>neededRegions</b> is non-zero (still have underloaded servers),
 *     we iterate the most loaded servers again, shedding a single server from
 *     each (this brings them from having <b>MAX</b> regions to having
 *     <b>MIN</b> regions).
 *
 * <li>We now definitely have more regions that need assignment, either from
 *     the previous step or from the original shedding from overloaded servers.
 *     Iterate the least loaded servers filling each to <b>MIN</b>.
 *
 * <li>If we still have more regions that need assignment, again iterate the
 *     least loaded servers, this time giving each one (filling them to
 *     </b>MAX</b>) until we run out.
 *
 * <li>All servers will now either host <b>MIN</b> or <b>MAX</b> regions.
 *
 *     In addition, any server hosting &gt;= <b>MAX</b> regions is guaranteed
 *     to end up with <b>MAX</b> regions at the end of the balancing.  This
 *     ensures the minimal number of regions possible are moved.
 * </ol>
 *
 * TODO: We can at-most reassign the number of regions away from a particular
 *       server to be how many they report as most loaded.
 *       Should we just keep all assignment in memory?  Any objections?
 *       Does this mean we need HeapSize on HMaster?  Or just careful monitor?
 *       (current thinking is we will hold all assignments in memory)
 *
 * @param clusterState Map of regionservers and their load/region information to
 *                   a list of their most loaded regions
 * @return a list of regions to be moved, including source and destination,
 *         or null if cluster is already balanced
 */
public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) {
    boolean emptyRegionServerPresent = false;
    long startTime = System.currentTimeMillis();

    int numServers = clusterState.size();
    if (numServers == 0) {
        LOG.debug("numServers=0 so skipping load balancing");
        return null;
    }
    NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = new TreeMap<ServerAndLoad, List<HRegionInfo>>();
    int numRegions = 0;
    // Iterate so we can count regions as we build the map
    for (Map.Entry<ServerName, List<HRegionInfo>> server : clusterState.entrySet()) {
        List<HRegionInfo> regions = server.getValue();
        int sz = regions.size();
        if (sz == 0)
            emptyRegionServerPresent = true;
        numRegions += sz;
        serversByLoad.put(new ServerAndLoad(server.getKey(), sz), regions);
    }
    // Check if we even need to do any load balancing
    float average = (float) numRegions / numServers; // for logging
    // HBASE-3681 check sloppiness first
    int floor = (int) Math.floor(average * (1 - slop));
    int ceiling = (int) Math.ceil(average * (1 + slop));
    if (serversByLoad.lastKey().getLoad() <= ceiling && serversByLoad.firstKey().getLoad() >= floor) {
        // Skipped because no server outside (min,max) range
        LOG.info("Skipping load balancing because balanced cluster; " + "servers=" + numServers + " "
                + "regions=" + numRegions + " average=" + average + " " + "mostloaded="
                + serversByLoad.lastKey().getLoad() + " leastloaded=" + serversByLoad.firstKey().getLoad());
        return null;
    }
    int min = numRegions / numServers;
    int max = numRegions % numServers == 0 ? min : min + 1;

    // Using to check balance result.
    StringBuilder strBalanceParam = new StringBuilder();
    strBalanceParam.append("Balance parameter: numRegions=").append(numRegions).append(", numServers=")
            .append(numServers).append(", max=").append(max).append(", min=").append(min);
    LOG.debug(strBalanceParam.toString());

    // Balance the cluster
    // TODO: Look at data block locality or a more complex load to do this
    MinMaxPriorityQueue<RegionPlan> regionsToMove = MinMaxPriorityQueue.orderedBy(rpComparator).create();
    List<RegionPlan> regionsToReturn = new ArrayList<RegionPlan>();

    // Walk down most loaded, pruning each to the max
    int serversOverloaded = 0;
    // flag used to fetch regions from head and tail of list, alternately
    boolean fetchFromTail = false;
    Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<ServerName, BalanceInfo>();
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
        ServerAndLoad sal = server.getKey();
        int regionCount = sal.getLoad();
        if (regionCount <= max) {
            serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0));
            break;
        }
        serversOverloaded++;
        List<HRegionInfo> regions = server.getValue();
        int numToOffload = Math.min(regionCount - max, regions.size());
        // account for the out-of-band regions which were assigned to this server
        // after some other region server crashed 
        Collections.sort(regions, riComparator);
        int numTaken = 0;
        for (int i = 0; i <= numToOffload;) {
            HRegionInfo hri = regions.get(i); // fetch from head
            if (fetchFromTail) {
                hri = regions.get(regions.size() - 1 - i);
            }
            i++;
            // Don't rebalance meta regions.
            if (hri.isMetaRegion())
                continue;
            regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null));
            numTaken++;
            if (numTaken >= numToOffload)
                break;
            // fetch in alternate order if there is new region server
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
        }
        serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, (-1) * numTaken));
    }
    int totalNumMoved = regionsToMove.size();

    // Walk down least loaded, filling each to the min
    int neededRegions = 0; // number of regions needed to bring all up to min
    fetchFromTail = false;

    Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>();
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
        int regionCount = server.getKey().getLoad();
        if (regionCount >= min) {
            break;
        }
        underloadedServers.put(server.getKey().getServerName(), min - regionCount);
    }
    // number of servers that get new regions
    int serversUnderloaded = underloadedServers.size();
    int incr = 1;
    List<ServerName> sns = Arrays
            .asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded]));
    Collections.shuffle(sns, RANDOM);
    while (regionsToMove.size() > 0) {
        int cnt = 0;
        int i = incr > 0 ? 0 : underloadedServers.size() - 1;
        for (; i >= 0 && i < underloadedServers.size(); i += incr) {
            if (regionsToMove.isEmpty())
                break;
            ServerName si = sns.get(i);
            int numToTake = underloadedServers.get(si);
            if (numToTake == 0)
                continue;

            addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn);
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }

            underloadedServers.put(si, numToTake - 1);
            cnt++;
            BalanceInfo bi = serverBalanceInfo.get(si);
            if (bi == null) {
                bi = new BalanceInfo(0, 0);
                serverBalanceInfo.put(si, bi);
            }
            bi.setNumRegionsAdded(bi.getNumRegionsAdded() + 1);
        }
        if (cnt == 0)
            break;
        // iterates underloadedServers in the other direction
        incr = -incr;
    }
    for (Integer i : underloadedServers.values()) {
        // If we still want to take some, increment needed
        neededRegions += i;
    }

    // If none needed to fill all to min and none left to drain all to max,
    // we are done
    if (neededRegions == 0 && regionsToMove.isEmpty()) {
        long endTime = System.currentTimeMillis();
        LOG.info("Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved
                + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded
                + " less loaded servers");
        return regionsToReturn;
    }

    // Need to do a second pass.
    // Either more regions to assign out or servers that are still underloaded

    // If we need more to fill min, grab one from each most loaded until enough
    if (neededRegions != 0) {
        // Walk down most loaded, grabbing one from each until we get enough
        for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
            BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
            int idx = balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload();
            if (idx >= server.getValue().size())
                break;
            HRegionInfo region = server.getValue().get(idx);
            if (region.isMetaRegion())
                continue; // Don't move meta regions.
            regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null));
            totalNumMoved++;
            if (--neededRegions == 0) {
                // No more regions needed, done shedding
                break;
            }
        }
    }

    // Now we have a set of regions that must be all assigned out
    // Assign each underloaded up to the min, then if leftovers, assign to max

    // Walk down least loaded, assigning to each to fill up to min
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
        int regionCount = server.getKey().getLoad();
        if (regionCount >= min)
            break;
        BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
        if (balanceInfo != null) {
            regionCount += balanceInfo.getNumRegionsAdded();
        }
        if (regionCount >= min) {
            continue;
        }
        int numToTake = min - regionCount;
        int numTaken = 0;
        while (numTaken < numToTake && 0 < regionsToMove.size()) {
            addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn);
            numTaken++;
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
        }
    }

    // If we still have regions to dish out, assign underloaded to max
    if (0 < regionsToMove.size()) {
        for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
            int regionCount = server.getKey().getLoad();
            if (regionCount >= max) {
                break;
            }
            addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn);
            if (emptyRegionServerPresent) {
                fetchFromTail = !fetchFromTail;
            }
            if (regionsToMove.isEmpty()) {
                break;
            }
        }
    }

    long endTime = System.currentTimeMillis();

    if (!regionsToMove.isEmpty() || neededRegions != 0) {
        // Emit data so can diagnose how balancer went astray.
        LOG.warn("regionsToMove=" + totalNumMoved + ", numServers=" + numServers + ", serversOverloaded="
                + serversOverloaded + ", serversUnderloaded=" + serversUnderloaded);
        StringBuilder sb = new StringBuilder();
        for (Map.Entry<ServerName, List<HRegionInfo>> e : clusterState.entrySet()) {
            if (sb.length() > 0)
                sb.append(", ");
            sb.append(e.getKey().toString());
            sb.append(" ");
            sb.append(e.getValue().size());
        }
        LOG.warn("Input " + sb.toString());
    }

    // All done!
    LOG.info("Done. Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved
            + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded
            + " less loaded servers");

    return regionsToReturn;
}