org.openspaces.grid.gsm.rebalancing.RebalancingUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.openspaces.grid.gsm.rebalancing.RebalancingUtils.java

Source

/*******************************************************************************
 * 
 * Copyright (c) 2012 GigaSpaces Technologies Ltd. All rights reserved
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *       http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *  
 ******************************************************************************/
package org.openspaces.grid.gsm.rebalancing;

import com.gigaspaces.cluster.activeelection.SpaceMode;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.openspaces.admin.Admin;
import org.openspaces.admin.AdminException;
import org.openspaces.admin.GridComponent;
import org.openspaces.admin.gsa.GridServiceAgent;
import org.openspaces.admin.gsa.GridServiceAgents;
import org.openspaces.admin.gsc.GridServiceContainer;
import org.openspaces.admin.internal.admin.InternalAdmin;
import org.openspaces.admin.internal.pu.InternalProcessingUnit;
import org.openspaces.admin.machine.Machine;
import org.openspaces.admin.pu.DeploymentStatus;
import org.openspaces.admin.pu.ProcessingUnit;
import org.openspaces.admin.pu.ProcessingUnitInstance;
import org.openspaces.admin.space.SpaceInstance;
import org.openspaces.core.internal.commons.math.fraction.Fraction;
import org.openspaces.grid.gsm.capacity.CapacityRequirements;
import org.openspaces.grid.gsm.capacity.CapacityRequirementsPerAgent;
import org.openspaces.grid.gsm.capacity.CpuCapacityRequirement;
import org.openspaces.grid.gsm.containers.ContainersSlaUtils;
import org.openspaces.grid.gsm.rebalancing.exceptions.RemovedContainerProcessingUnitDeploymentException;
import org.openspaces.grid.gsm.rebalancing.exceptions.SpaceRecoveryAfterRelocationException;
import org.openspaces.grid.gsm.rebalancing.exceptions.WrongContainerProcessingUnitRelocationException;

import java.io.File;
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.logging.Logger;

public class RebalancingUtils {

    private static final Log logger = LogFactory.getLog(RebalancingUtils.class);

    static Collection<FutureStatelessProcessingUnitInstance> incrementNumberOfStatelessInstancesAsync(
            final ProcessingUnit pu, final GridServiceContainer[] containers, final Log logger, final long duration,
            final TimeUnit timeUnit) {

        if (pu.getMaxInstancesPerVM() != 1) {
            throw new IllegalArgumentException("Only one instance per VM is allowed");
        }

        List<GridServiceContainer> unusedContainers = getUnusedContainers(pu, containers);

        final Admin admin = pu.getAdmin();
        final Map<GridServiceContainer, FutureStatelessProcessingUnitInstance> futureInstances = new HashMap<GridServiceContainer, FutureStatelessProcessingUnitInstance>();

        final AtomicInteger targetNumberOfInstances = new AtomicInteger(pu.getNumberOfInstances());

        final long start = System.currentTimeMillis();
        final long end = start + timeUnit.toMillis(duration);

        for (GridServiceContainer container : unusedContainers) {
            final GridServiceContainer targetContainer = container;
            futureInstances.put(container, new FutureStatelessProcessingUnitInstance() {

                AtomicReference<Throwable> throwable = new AtomicReference<Throwable>();
                ProcessingUnitInstance newInstance;

                public boolean isTimedOut() {
                    return System.currentTimeMillis() > end;
                }

                public boolean isDone() {

                    end();

                    return isTimedOut() || throwable.get() != null || newInstance != null;
                }

                public ProcessingUnitInstance get()
                        throws ExecutionException, IllegalStateException, TimeoutException {

                    end();

                    if (getException() != null) {
                        throw getException();
                    }

                    if (newInstance == null) {
                        if (isTimedOut()) {
                            throw new TimeoutException("Relocation timeout");
                        }
                        throw new IllegalStateException("Async operation is not done yet.");
                    }

                    return newInstance;
                }

                public Date getTimestamp() {
                    return new Date(start);
                }

                public ExecutionException getException() {

                    end();
                    Throwable t = throwable.get();
                    if (t != null) {
                        return new ExecutionException(t.getMessage(), t);
                    }
                    return null;
                }

                public GridServiceContainer getTargetContainer() {
                    return targetContainer;
                }

                public ProcessingUnit getProcessingUnit() {
                    return pu;
                }

                public String getFailureMessage() throws IllegalStateException {
                    if (isTimedOut()) {
                        return "deployment timeout of processing unit " + pu.getName() + " on "
                                + gscToString(targetContainer);
                    }

                    if (getException() != null) {
                        return getException().getMessage();
                    }

                    throw new IllegalStateException("Relocation has not encountered any failure.");
                }

                private void end() {

                    if (!targetContainer.isDiscovered()) {
                        throwable.set(new RemovedContainerProcessingUnitDeploymentException(pu, targetContainer));
                    }

                    else if (throwable.get() != null || newInstance != null) {
                        //do nothing. idempotent method
                    }

                    else {
                        incrementInstance();

                        ProcessingUnitInstance[] instances = targetContainer
                                .getProcessingUnitInstances(pu.getName());

                        if (instances.length > 0) {
                            newInstance = instances[0];
                        }
                    }
                }

                private void incrementInstance() {
                    final String uuid = "[incrementUid:" + UUID.randomUUID().toString() + "] ";
                    int numberOfInstances = pu.getNumberOfInstances();
                    int maxNumberOfInstances = getContainersOnMachines(pu).length;
                    if (numberOfInstances < maxNumberOfInstances) {
                        if (targetNumberOfInstances.get() == numberOfInstances + 1) {
                            if (logger.isInfoEnabled()) {
                                logger.info("Waiting for pu.numberOfInstances to increment from "
                                        + numberOfInstances + " to " + targetNumberOfInstances.get() + ". "
                                        + "Number of relevant containers " + maxNumberOfInstances);
                            }
                        } else if (admin.getGridServiceManagers().getSize() > 1
                                && !((InternalProcessingUnit) pu).isBackupGsmInSync()) {
                            if (logger.isInfoEnabled()) {
                                logger.info("Waiting for backup gsm to sync with active gsm");
                            }
                        } else {
                            targetNumberOfInstances.set(numberOfInstances + 1);
                            if (logger.isInfoEnabled()) {
                                logger.info(uuid + " Planning to increment pu.numberOfInstances from "
                                        + numberOfInstances + " to " + targetNumberOfInstances.get() + ". "
                                        + "Number of relevant containers " + maxNumberOfInstances);
                            }
                            ((InternalAdmin) admin).scheduleAdminOperation(new Runnable() {
                                public void run() {
                                    try {
                                        // this is an async operation 
                                        // pu.getNumberOfInstances() still shows the old value.
                                        pu.incrementInstance();
                                        if (logger.isInfoEnabled()) {
                                            logger.info(uuid + " pu.incrementInstance() called");
                                        }
                                    } catch (AdminException e) {
                                        throwable.set(e);
                                    } catch (Throwable e) {
                                        logger.error(uuid + " Unexpected Exception: " + e.getMessage(), e);
                                        throwable.set(e);
                                    }
                                }
                            });
                        }
                    }
                }
            });

        }

        return futureInstances.values();

    }

    private static List<GridServiceContainer> getUnusedContainers(final ProcessingUnit pu,
            final GridServiceContainer[] containers) {
        // look for free containers
        List<GridServiceContainer> unusedContainers = new ArrayList<GridServiceContainer>();
        for (GridServiceContainer container : containers) {
            if (container.getProcessingUnitInstances(pu.getName()).length == 0) {
                unusedContainers.add(container);
            }
        }
        return unusedContainers;
    }

    public static String puInstancesToString(Collection<ProcessingUnitInstance> instances) {
        StringBuilder builder = new StringBuilder();
        for (ProcessingUnitInstance instance : instances) {
            builder.append(RebalancingUtils.puInstanceToString(instance));
            builder.append(File.separator);
        }
        return builder.toString();
    }

    static FutureStatefulProcessingUnitInstance relocateProcessingUnitInstanceAsync(
            final GridServiceContainer targetContainer, final ProcessingUnitInstance puInstance, final Log logger,
            final long duration, final TimeUnit timeUnit) {

        final ProcessingUnit pu = puInstance.getProcessingUnit();
        final GridServiceContainer[] replicationSourceContainers = getReplicationSourceContainers(puInstance);
        final int instanceId = puInstance.getInstanceId();

        final AtomicReference<Throwable> relocateThrowable = new AtomicReference<Throwable>();

        final Admin admin = puInstance.getAdmin();
        final int runningNumber = puInstance.getClusterInfo().getRunningNumber();
        final String puName = puInstance.getName();

        final GridServiceContainer sourceContainer = puInstance.getGridServiceContainer();
        final Set<ProcessingUnitInstance> puInstancesFromSamePartition = getOtherInstancesFromSamePartition(
                puInstance);
        if (logger.isDebugEnabled()) {
            logger.debug(
                    "Found instances from the same partition as " + RebalancingUtils.puInstanceToString(puInstance)
                            + " : " + RebalancingUtils.puInstancesToString(puInstancesFromSamePartition));
        }

        if (puInstancesFromSamePartition.size() != pu.getNumberOfBackups()) {
            // total number of instances per partition = numberOfBackups + 1
            throw new IllegalStateException("puInstancesFromSamePartition has "
                    + puInstancesFromSamePartition.size() + " instances instead of " + pu.getNumberOfBackups());
        }

        final long start = System.currentTimeMillis();
        final long end = start + timeUnit.toMillis(duration);

        ((InternalAdmin) admin).scheduleAdminOperation(new Runnable() {
            public void run() {
                try {
                    logger.debug("Relocation of " + RebalancingUtils.puInstanceToString(puInstance) + " to "
                            + ContainersSlaUtils.gscToString(targetContainer) + " has started.");
                    puInstance.relocate(targetContainer);
                } catch (AdminException e) {
                    logger.error("Admin exception " + e.getMessage(), e);
                    relocateThrowable.set(e);
                } catch (Throwable e) {
                    logger.error("Unexpected exception " + e.getMessage(), e);
                    relocateThrowable.set(e);
                }
            }
        });

        return new FutureStatefulProcessingUnitInstance() {

            Throwable throwable;
            ProcessingUnitInstance newInstance;

            public boolean isTimedOut() {
                return System.currentTimeMillis() > end;
            }

            public boolean isDone() {

                endRelocation();

                return isTimedOut() || throwable != null || newInstance != null;
            }

            public ProcessingUnitInstance get() throws ExecutionException, IllegalStateException, TimeoutException {

                endRelocation();

                ExecutionException exception = getException();
                if (exception != null) {
                    throw exception;
                }
                if (newInstance == null) {
                    if (isTimedOut()) {
                        throw new TimeoutException("Relocation timeout");
                    }
                    throw new IllegalStateException("Async operation is not done yet.");
                }

                return newInstance;
            }

            public Date getTimestamp() {
                return new Date(start);
            }

            public ExecutionException getException() {

                endRelocation();
                if (throwable != null) {
                    return new ExecutionException(throwable.getMessage(), throwable);
                }
                return null;
            }

            /**
             * populates this.exception or this.newInstance if relocation is complete
             */
            private void endRelocation() {
                boolean inProgress = true;

                tryStateChange(); // this makes relocation synchronous
                if (newInstance != null || throwable != null) {
                    inProgress = false;
                }

                if (inProgress) {
                    if (logger.isDebugEnabled()) {
                        logger.debug("Relocation from " + ContainersSlaUtils.gscToString(getSourceContainer())
                                + " to " + ContainersSlaUtils.gscToString(getTargetContainer())
                                + " is in progress.");
                    }
                    // do nothing. relocate() method running on another thread has not returned yet.
                }
            }

            private void tryStateChange() {
                ProcessingUnitInstance relocatedInstance = getRelocatedProcessingUnitInstance();
                if (relocatedInstance != null) {

                    if (relocatedInstance.getGridServiceContainer().equals(targetContainer)) {
                        if (relocatedInstance.getSpaceInstance() != null
                                && relocatedInstance.getSpaceInstance().getMode() != SpaceMode.NONE) {
                            if (logger.isDebugEnabled()) {
                                logger.debug(
                                        "Relocation from " + ContainersSlaUtils.gscToString(getSourceContainer())
                                                + " to " + ContainersSlaUtils.gscToString(getTargetContainer())
                                                + " had ended successfully.");
                            }
                            newInstance = relocatedInstance;
                        }
                    } else {
                        if (logger.isDebugEnabled()) {
                            logger.debug("Relocation from " + ContainersSlaUtils.gscToString(getSourceContainer())
                                    + " to " + ContainersSlaUtils.gscToString(getTargetContainer())
                                    + " has ended with an error.");
                        }
                        throwable = new WrongContainerProcessingUnitRelocationException(puInstance,
                                targetContainer);

                    }
                }
            }

            private ProcessingUnitInstance getRelocatedProcessingUnitInstance() {
                for (GridServiceContainer container : admin.getGridServiceContainers()) {
                    for (ProcessingUnitInstance instance : container.getProcessingUnitInstances(puName)) {
                        if (!instance.equals(puInstance)
                                && instance.getClusterInfo().getRunningNumber() == runningNumber
                                && !puInstancesFromSamePartition.contains(instance)) {
                            return instance;
                        }
                    }
                }
                return null;
            }

            private boolean isAtLeastOneInstanceValid(Set<ProcessingUnitInstance> instances) {
                boolean isValidState = false;
                for (ProcessingUnitInstance instance : instances) {
                    if (instance.isDiscovered() && instance.getGridServiceContainer().isDiscovered()) {
                        isValidState = true;
                        break;
                    }
                }
                return isValidState;
            }

            public String getFailureMessage() {
                if (isTimedOut()) {
                    return "relocation timeout of processing unit instance " + instanceId + " from "
                            + gscToString(sourceContainer) + " to " + gscToString(targetContainer);
                }

                if (getException() != null) {
                    return getException().getMessage();
                }

                throw new IllegalStateException("Relocation has not encountered any failure.");

            }

            public GridServiceContainer getTargetContainer() {
                return targetContainer;
            }

            public ProcessingUnit getProcessingUnit() {
                return pu;
            }

            public int getInstanceId() {
                return instanceId;
            }

            public GridServiceContainer getSourceContainer() {
                return sourceContainer;
            }

            public GridServiceContainer[] getReplicaitonSourceContainers() {
                return replicationSourceContainers;
            }

        };
    }

    /**
     * @param instance
     * @return list of containers that are used by the relocated processing unit instance to synchronize all data.
     */
    public static GridServiceContainer[] getReplicationSourceContainers(ProcessingUnitInstance instance) {
        Set<GridServiceContainer> repContainers = new HashSet<GridServiceContainer>();

        GridServiceContainer[] containers = instance.getAdmin().getGridServiceContainers().getContainers();

        int numberOfBackups = instance.getProcessingUnit().getNumberOfBackups();
        if (numberOfBackups == 0) {
            return new GridServiceContainer[] {};
        }

        if (!isProcessingUnitPartitionIntact(instance.getProcessingUnit(), instance.getInstanceId(), containers)) {
            throw new IllegalStateException("Cannot relocate pu instance " + puInstanceToString(instance)
                    + " since partition is not intact.");
        }

        for (int backupId = 0; backupId <= numberOfBackups; backupId++) {
            if (backupId != instance.getBackupId()) {
                repContainers.add(findProcessingUnitInstance(instance.getProcessingUnit(), instance.getInstanceId(),
                        backupId, containers).getGridServiceContainer());
            }
        }

        return repContainers.toArray(new GridServiceContainer[repContainers.size()]);
    }

    public static boolean isProcessingUnitIntact(ProcessingUnit pu, GridServiceContainer[] containers) {
        boolean intact = true;
        if (pu.getStatus() != DeploymentStatus.INTACT) {
            intact = false;
        } else {
            if (pu.getNumberOfBackups() > 0) {
                for (int instanceId = 1; intact && instanceId <= pu.getNumberOfInstances(); instanceId++) {
                    if (!isProcessingUnitPartitionIntact(pu, instanceId, containers)) {
                        intact = false;
                        break;
                    }
                }
            } else {
                ProcessingUnitInstance[] instances = pu.getInstances();
                if (instances.length < pu.getNumberOfInstances()) {
                    intact = false;
                } else {
                    for (ProcessingUnitInstance instance : instances) {
                        if (findProcessingUnitInstance(pu, instance.getInstanceId(), 0, containers) == null) {
                            intact = false;
                            break;
                        }
                    }
                }
            }
        }

        return intact;
    }

    public static boolean isProcessingUnitHasMinimumNumberOfInstancesPerPartition(ProcessingUnit pu,
            int minimumNumberOfInstancesPerPartition) {
        return isProcessingUnitHasMinimumNumberOfInstancesPerPartition(pu,
                pu.getAdmin().getGridServiceContainers().getContainers(), minimumNumberOfInstancesPerPartition);
    }

    private static boolean isProcessingUnitHasMinimumNumberOfInstancesPerPartition(ProcessingUnit pu,
            GridServiceContainer[] containers, int minimumNumberOfInstancesPerPartition) {

        boolean hasMinimum = true;
        if (pu.getNumberOfBackups() > 0) {
            for (int instanceId = 1; hasMinimum && instanceId <= pu.getNumberOfInstances(); instanceId++) {
                if (!isProcessingUnitPartitionHasMinimumNumberOfInstances(pu, instanceId, containers,
                        minimumNumberOfInstancesPerPartition)) {
                    hasMinimum = false;
                    break;
                }
            }
        } else {
            hasMinimum = isProcessingUnitIntact(pu, containers);
        }

        return hasMinimum;
    }

    public static boolean isProcessingUnitPartitionIntact(ProcessingUnitInstance instance) {
        GridServiceContainer[] containers = instance.getAdmin().getGridServiceContainers().getContainers();
        return isProcessingUnitPartitionIntact(instance.getProcessingUnit(), instance.getInstanceId(), containers);
    }

    public static boolean isProcessingUnitIntact(ProcessingUnit pu) {
        return isProcessingUnitIntact(pu, pu.getAdmin().getGridServiceContainers().getContainers());
    }

    private static ProcessingUnitInstance findProcessingUnitInstance(ProcessingUnit pu, int instanceId,
            int backupId, GridServiceContainer[] containers) {
        for (final GridServiceContainer container : containers) {
            for (final ProcessingUnitInstance instance : container.getProcessingUnitInstances(pu.getName())) {
                if (instance.getInstanceId() == instanceId && instance.getBackupId() == backupId) {
                    return instance;
                }
            }
        }
        return null;
    }

    public static boolean isProcessingUnitPartitionIntact(ProcessingUnit pu, int instanceId,
            GridServiceContainer[] containers) {

        return isProcessingUnitPartitionHasMinimumNumberOfInstances(pu, instanceId, containers,
                1 + pu.getNumberOfBackups());
    }

    private static boolean isProcessingUnitPartitionHasMinimumNumberOfInstances(ProcessingUnit pu, int instanceId,
            GridServiceContainer[] containers, int minimumNumberOfInstancesPerPartition) {

        boolean hasMinimum = true;

        if (minimumNumberOfInstancesPerPartition >= 1) {

            int numberOfPrimaryInstances = 0;
            int numberOfBackupInstances = 0;

            for (int backupId = 0; backupId <= pu.getNumberOfBackups(); backupId++) {
                ProcessingUnitInstance instance = findProcessingUnitInstance(pu, instanceId, backupId, containers);
                if (instance != null && instance.getSpaceInstance() != null) {

                    if (instance.getSpaceInstance().getMode() == SpaceMode.BACKUP) {
                        numberOfBackupInstances++;
                    } else if (instance.getSpaceInstance().getMode() == SpaceMode.PRIMARY) {
                        numberOfPrimaryInstances++;
                    }
                }
            }

            hasMinimum = numberOfPrimaryInstances == 1
                    && 1 + numberOfBackupInstances >= minimumNumberOfInstancesPerPartition;
        }
        return hasMinimum;
    }

    /**
     * @param instance
     * @return all instances from the same partition that is not the specified instance.
     */
    public static Set<ProcessingUnitInstance> getOtherInstancesFromSamePartition(ProcessingUnitInstance instance) {

        final Set<ProcessingUnitInstance> puInstancesFromSamePartition = new HashSet<ProcessingUnitInstance>();
        for (final GridServiceContainer container : instance.getAdmin().getGridServiceContainers()) {
            puInstancesFromSamePartition.addAll(getOtherInstancesFromSamePartitionInContainer(container, instance));
        }
        return puInstancesFromSamePartition;
    }

    public static Set<ProcessingUnitInstance> getOtherInstancesFromSamePartitionInContainer(
            GridServiceContainer container, ProcessingUnitInstance instance) {
        Set<ProcessingUnitInstance> puInstancesFromSamePartition = new HashSet<ProcessingUnitInstance>();
        for (ProcessingUnitInstance instanceOnContainer : container
                .getProcessingUnitInstances(instance.getName())) {
            if (instanceOnContainer.getInstanceId() == instance.getInstanceId()
                    && !instanceOnContainer.equals(instance)) {

                puInstancesFromSamePartition.add(instanceOnContainer);
            }
        }
        return puInstancesFromSamePartition;
    }

    public static Set<ProcessingUnitInstance> getOtherInstancesFromSamePartitionInMachine(Machine machine,
            ProcessingUnitInstance puInstance) {
        final Set<ProcessingUnitInstance> puInstancesFromSamePartition = new HashSet<ProcessingUnitInstance>();
        for (final GridServiceContainer container : machine.getGridServiceContainers()) {
            puInstancesFromSamePartition
                    .addAll(getOtherInstancesFromSamePartitionInContainer(container, puInstance));
        }
        return puInstancesFromSamePartition;
    }

    public static boolean isEvenlyDistributedAcrossMachines(ProcessingUnit pu,
            CapacityRequirementsPerAgent aggregatedAllocatedCapacity) {

        boolean isEvenlyDistributedAcrossMachines = true;
        final Machine[] machines = getMachinesFromAgentUids(pu, aggregatedAllocatedCapacity.getAgentUids());

        if (!isProcessingUnitIntact(pu, machines)) {
            isEvenlyDistributedAcrossMachines = false;
        } else {
            Fraction averageCpuCoresPerPrimaryInstance = getAverageCpuCoresPerPrimary(pu,
                    aggregatedAllocatedCapacity);

            for (Machine source : machines) {
                for (Machine target : machines) {

                    if (target.equals(source)) {
                        continue;
                    }

                    if (isRestartRecommended(pu, source, target, averageCpuCoresPerPrimaryInstance,
                            aggregatedAllocatedCapacity)) {
                        isEvenlyDistributedAcrossMachines = false;
                        break;
                    }
                }
            }
        }
        return isEvenlyDistributedAcrossMachines;
    }

    private static Machine[] getMachinesFromAgentUids(ProcessingUnit pu, Collection<String> agentUids) {
        final List<Machine> machines = new ArrayList<Machine>();
        final GridServiceAgents gridServiceAgents = pu.getAdmin().getGridServiceAgents();
        for (final String agentUid : agentUids) {
            final GridServiceAgent agent = gridServiceAgents.getAgentByUID(agentUid);
            if (agent == null) {
                throw new IllegalStateException("At this point agent " + agentUid + " must be discovered.");
            }
            machines.add(agent.getMachine());
        }
        return machines.toArray(new Machine[machines.size()]);
    }

    public static boolean isRestartRecommended(ProcessingUnit pu, Machine source, Machine target,
            Fraction optimalCpuCoresPerPrimary, CapacityRequirementsPerAgent allocatedCapacity) {

        boolean isRestartRecommended = false;
        final int numberOfPrimaryInstancesOnSource = getNumberOfPrimaryInstancesOnMachine(pu, source);
        if (numberOfPrimaryInstancesOnSource > 0) {

            final int numberOfPrimaryInstancesOnTarget = getNumberOfPrimaryInstancesOnMachine(pu, target);
            Fraction cpuCoresOnSource = getNumberOfCpuCores(source, allocatedCapacity);
            Fraction cpuCoresOnTarget = getNumberOfCpuCores(target, allocatedCapacity);
            final Fraction missingCpuCoresBeforeRestart = max(Fraction.ZERO,
                    optimalCpuCoresPerPrimary.multiply(numberOfPrimaryInstancesOnSource).subtract(cpuCoresOnSource))
                            .add(max(Fraction.ZERO, optimalCpuCoresPerPrimary
                                    .multiply(numberOfPrimaryInstancesOnTarget).subtract(cpuCoresOnTarget)));

            final Fraction missingCpuCoresAfterRestart = max(Fraction.ZERO,
                    optimalCpuCoresPerPrimary.multiply(numberOfPrimaryInstancesOnSource - 1)
                            .subtract(cpuCoresOnSource))
                                    .add(max(Fraction.ZERO,
                                            optimalCpuCoresPerPrimary.multiply(numberOfPrimaryInstancesOnTarget + 1)
                                                    .subtract(cpuCoresOnTarget)));

            isRestartRecommended = missingCpuCoresAfterRestart.compareTo(missingCpuCoresBeforeRestart) < 0;
        }

        return isRestartRecommended;
    }

    private static Fraction max(Fraction a, Fraction b) {
        if (b.compareTo(a) > 0) {
            return b;
        }
        return a;
    }

    /**
     * @return true if number of primary instances are evenly distributed across the specified machines
     *
    public static boolean isEvenlyDistributedAcrossMachines(ProcessingUnit pu, Machine[] machines) {
        
    if (!isProcessingUnitIntact(pu,machines)) {
        return false;
    }
        
    double averagePrimariesPerCpuCore = 
        getAverageNumberOfPrimaryInstancesPerCpuCore(pu,machines);
        
    boolean foundMachineWithSurplusPrimaries = false;
    boolean foundMachineWithDeficitPrimaries = false;
        
    for (Machine machine : machines) {
            
        if (RebalancingUtils.getAverageNumberOfPrimaryInstancesMinusOnePerCpuCore(pu,machine) 
                >= averagePrimariesPerCpuCore) {
            foundMachineWithSurplusPrimaries = true;
        }
        else if (RebalancingUtils.getAverageNumberOfPrimaryInstancesPlusOnePerCpuCore(pu,machine) 
                <= averagePrimariesPerCpuCore) {
            foundMachineWithDeficitPrimaries = true;
        }            
    }
    return 
    // everything is balanced
    (!foundMachineWithSurplusPrimaries && !foundMachineWithDeficitPrimaries) ||
        
    //not exactly balanced, but there is nothing we can do about it
    (!foundMachineWithSurplusPrimaries &&  foundMachineWithDeficitPrimaries) ||
    ( foundMachineWithSurplusPrimaries && !foundMachineWithDeficitPrimaries);
    }
    */
    private static boolean isProcessingUnitIntact(ProcessingUnit pu, Machine[] machines) {
        return isProcessingUnitIntact(pu, getContainersOnMachines(pu, machines));
    }

    /**
     * @return all containers that the gsm can deploy the specified pu.
     */
    public static GridServiceContainer[] getContainersOnMachines(ProcessingUnit pu) {
        return getContainersOnMachines(pu, pu.getAdmin().getMachines().getMachines());
    }

    private static GridServiceContainer[] getContainersOnMachines(ProcessingUnit pu, Machine[] machines) {
        if (pu.getRequiredZones().length != 1) {
            throw new IllegalStateException("Processing Unit must have exactly one container zone defined.");
        }
        final List<GridServiceContainer> containers = new ArrayList<GridServiceContainer>();
        for (final Machine machine : machines) {
            for (final GridServiceContainer container : machine.getGridServiceContainers()) {
                if (container.getZones().size() == 1
                        && container.getZones().containsKey(pu.getRequiredZones()[0])) {

                    containers.add(container);
                }
            }
        }
        return containers.toArray(new GridServiceContainer[containers.size()]);
    }

    /**
     * @return true if number of instances are evenly distributed across the specified containers
     */
    public static boolean isEvenlyDistributedAcrossContainers(ProcessingUnit pu,
            GridServiceContainer[] containers) {

        if (!isProcessingUnitIntact(pu, containers)) {
            return false;
        }

        boolean evenlyDistributed = true;
        int numberOfInstances = pu.getTotalNumberOfInstances();
        int numberOfContainers = containers.length;
        if (numberOfInstances < numberOfContainers) {
            evenlyDistributed = false;
        } else {
            double expectedAverageNumberOfInstancesPerContainer = 1.0 * numberOfInstances / numberOfContainers;
            int numberOfServicesPerContainerUpperBound = (int) Math
                    .ceil(expectedAverageNumberOfInstancesPerContainer);
            int numberOfServicesPerContainerLowerBound = (int) Math
                    .floor(expectedAverageNumberOfInstancesPerContainer);

            for (GridServiceContainer container : containers) {

                int puNumberOfInstances = container.getProcessingUnitInstances(pu.getName()).length;

                if (puNumberOfInstances < numberOfServicesPerContainerLowerBound
                        || puNumberOfInstances > numberOfServicesPerContainerUpperBound) {
                    evenlyDistributed = false;
                    break;
                }
            }
        }
        return evenlyDistributed;
    }

    public static Machine[] getMachinesHostingContainers(GridServiceContainer[] containers) {
        Set<Machine> machines = new HashSet<Machine>();
        for (GridServiceContainer container : containers) {
            machines.add(container.getMachine());
        }
        return machines.toArray(new Machine[machines.size()]);
    }

    /**
     * 
     * @param container - the container for which planned min number of instances is requested
     * @param approvedContainers - the containers approved for deployment for the specified pu
     * @param pu - the processing unit
     * @return the planned minimum number of instances for the specified container
     */
    public static int getPlannedMinimumNumberOfInstancesForContainer(GridServiceContainer container,
            GridServiceContainer[] approvedContainers, ProcessingUnit pu) {

        int min = 0;
        if (Arrays.asList(approvedContainers).contains(container)) {
            min = (int) Math.floor(getAverageNumberOfInstancesPerContainer(approvedContainers, pu));
        }
        return min;
    }

    /**
     * 
     * @param container - the container for which planned min number of instances is requested
     * @param approvedContainers - the containers approved for deployment for the specified pu
     * @param pu - the processing unit
     * @return the planned minimum number of instances for the specified container
     */
    public static int getPlannedMaximumNumberOfInstancesForContainer(GridServiceContainer container,
            GridServiceContainer[] approvedContainers, ProcessingUnit pu) {

        int max = 0;
        if (Arrays.asList(approvedContainers).contains(container)) {
            max = (int) Math.ceil(getAverageNumberOfInstancesPerContainer(approvedContainers, pu));
        }
        return max;
    }

    private static double getAverageNumberOfInstancesPerContainer(GridServiceContainer[] approvedContainers,
            ProcessingUnit pu) {
        double avg = ((double) pu.getTotalNumberOfInstances()) / approvedContainers.length;
        if (logger.isTraceEnabled()) {
            logger.trace(
                    "averageInstancesPerContainer = ((double) pu.getTotalNumberOfInstances()) / approvedContainers.length = "
                            + ((double) pu.getTotalNumberOfInstances()) + "/" + approvedContainers.length + " = "
                            + avg);
        }
        return avg;
    }

    /**
     * Sorts all of the admin containers based on 
     * (number of instances from the specified pu - min number of instances)
     * If the container is not in the specified approved container list then min=0, meaning
     * it will get a higher weight in the sort. 
     * 
     * 
     * @param pu
     * @param approvedContainers
     * @return the list of sorted containers
     * @see RebalancingUtils#getPlannedMinimumNumberOfInstancesForContainer(GridServiceContainer, GridServiceContainer[], ProcessingUnit)
     */
    public static List<GridServiceContainer> sortAllContainersByNumberOfInstancesAboveMinimum(
            final ProcessingUnit pu, final GridServiceContainer[] approvedContainers) {
        final List<GridServiceContainer> sortedContainers = new ArrayList<GridServiceContainer>(
                Arrays.asList(pu.getAdmin().getGridServiceContainers().getContainers()));
        Collections.sort(sortedContainers, new Comparator<GridServiceContainer>() {

            public int compare(final GridServiceContainer o1, final GridServiceContainer o2) {
                return getNormalizedNumberOfInstances(o1) - getNormalizedNumberOfInstances(o2);
            }

            private int getNormalizedNumberOfInstances(final GridServiceContainer container) {
                final int numberOfInstances = container.getProcessingUnitInstances(pu.getName()).length;
                return numberOfInstances - RebalancingUtils
                        .getPlannedMinimumNumberOfInstancesForContainer(container, approvedContainers, pu);
            }
        });
        return sortedContainers;
    }

    public static List<Machine> sortMachinesByNumberOfPrimaryInstancesPerCpuCore(final ProcessingUnit pu,
            final Machine[] machines, final CapacityRequirementsPerAgent allocatedCapacity) {

        final List<Machine> sortedMachines = new ArrayList<Machine>(Arrays.asList(machines));

        Collections.sort(sortedMachines, new Comparator<Machine>() {

            public int compare(final Machine m1, final Machine m2) {

                if (getNumberOfCpuCores(m1, allocatedCapacity).equals(Fraction.ZERO)
                        || getNumberOfCpuCores(m2, allocatedCapacity).equals(Fraction.ZERO)) {
                    throw new IllegalStateException("Rebalancing assumes positive number of CPU cores per machine");
                }

                return getNumberOfPrimaryInstancesPerCpuCore(pu, m1, allocatedCapacity)
                        .compareTo(getNumberOfPrimaryInstancesPerCpuCore(pu, m2, allocatedCapacity));
            }
        });
        return sortedMachines;
    }

    public static Fraction getNumberOfPrimaryInstancesPerCpuCore(ProcessingUnit pu, Machine machine,
            CapacityRequirementsPerAgent allocatedCapacity) {
        return new Fraction(getNumberOfPrimaryInstancesOnMachine(pu, machine))
                .divide(getNumberOfCpuCores(machine, allocatedCapacity));
    }

    public static int getNumberOfPrimaryInstancesOnMachine(ProcessingUnit pu, Machine machine) {
        int numberOfPrimaryInstances = 0;
        for (GridServiceContainer container : machine.getGridServiceContainers()) {
            for (ProcessingUnitInstance instance : container.getProcessingUnitInstances(pu.getName())) {
                if (instance.getSpaceInstance() != null
                        && instance.getSpaceInstance().getMode() == SpaceMode.PRIMARY) {
                    numberOfPrimaryInstances++;
                }
            }
        }
        return numberOfPrimaryInstances;
    }

    public static FutureStatefulProcessingUnitInstance restartProcessingUnitInstanceAsync(
            ProcessingUnitInstance candidateInstance, Log logger, long timeout, TimeUnit timeUnit) {

        return relocateProcessingUnitInstanceAsync(candidateInstance.getGridServiceContainer(), candidateInstance,
                logger, timeout, timeUnit);
    }

    public static Fraction getAverageCpuCoresPerPrimary(ProcessingUnit pu,
            CapacityRequirementsPerAgent aggregatedAllocatedCapacity) {

        CapacityRequirements totalAllocatedCapacity = aggregatedAllocatedCapacity.getTotalAllocatedCapacity();

        if (totalAllocatedCapacity.equalsZero()) {
            throw new IllegalStateException("allocated capacity cannot be empty.");
        }

        return getCpuCores(totalAllocatedCapacity).divide(pu.getNumberOfInstances());
    }

    private static Fraction getCpuCores(CapacityRequirements totalAllocatedCapacity) {
        return totalAllocatedCapacity.getRequirement(new CpuCapacityRequirement().getType()).getCpu();
    }

    public static Fraction getNumberOfCpuCores(Machine machine, CapacityRequirementsPerAgent allocatedCapacity) {
        if (machine.getGridServiceAgents().getSize() != 1) {
            throw new IllegalStateException("Machine must have at least one agent");
        }
        return getCpuCores(allocatedCapacity.getAgentCapacity(machine.getGridServiceAgent().getUid()));
    }

    public static String puInstanceToString(ProcessingUnitInstance instance) {
        StringBuilder builder = new StringBuilder(16);
        builder.append("[").append(instance.getInstanceId()).append(",").append(instance.getBackupId() + 1);
        SpaceInstance spaceInstance = instance.getSpaceInstance();
        if (spaceInstance != null) {
            builder.append(",").append(spaceInstance.getMode());
        }
        builder.append("]");
        return builder.toString();
    }

    public static String machineToString(Machine machine) {
        return machine.getHostName() + "/" + machine.getHostAddress();
    }

    public static String gscToString(GridComponent container) {
        return ContainersSlaUtils.gscToString(container);
    }

    public static String gscsToString(List<GridServiceContainer> containers) {
        return ContainersSlaUtils.gscsToString(containers);
    }

    public static String processingUnitDeploymentToString(ProcessingUnit pu) {
        StringBuilder deployment = new StringBuilder();
        for (final GridServiceContainer container : pu.getAdmin().getGridServiceContainers()) {
            deployment.append(gscToString(container));
            deployment.append(" { ");
            for (final ProcessingUnitInstance instance : container.getProcessingUnitInstances(pu.getName())) {
                deployment.append(puInstanceToString(instance));
                deployment.append(" ");
            }
            deployment.append(" } ");
        }
        return deployment.toString();
    }

}