Java tutorial
/************************************************************************* * Copyright 2009-2012 Eucalyptus Systems, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see http://www.gnu.org/licenses/. * * Please contact Eucalyptus Systems, Inc., 6755 Hollister Ave., Goleta * CA 93117, USA or visit http://www.eucalyptus.com/licenses/ if you need * additional information or have any questions. * * This file may incorporate work covered under the following copyright * and permission notice: * * Software License Agreement (BSD License) * * Copyright (c) 2008, Regents of the University of California * All rights reserved. * * Redistribution and use of this software in source and binary forms, * with or without modification, are permitted provided that the * following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. USERS OF THIS SOFTWARE ACKNOWLEDGE * THE POSSIBLE PRESENCE OF OTHER OPEN SOURCE LICENSED MATERIAL, * COPYRIGHTED MATERIAL OR PATENTED MATERIAL IN THIS SOFTWARE, * AND IF ANY SUCH MATERIAL IS DISCOVERED THE PARTY DISCOVERING * IT MAY INFORM DR. RICH WOLSKI AT THE UNIVERSITY OF CALIFORNIA, * SANTA BARBARA WHO WILL THEN ASCERTAIN THE MOST APPROPRIATE REMEDY, * WHICH IN THE REGENTS' DISCRETION MAY INCLUDE, WITHOUT LIMITATION, * REPLACEMENT OF THE CODE SO IDENTIFIED, LICENSING OF THE CODE SO * IDENTIFIED, OR WITHDRAWAL OF THE CODE CAPABILITY TO THE EXTENT * NEEDED TO COMPLY WITH ANY SUCH LICENSES OR RIGHTS. ************************************************************************/ package com.eucalyptus.cluster; import java.io.IOException; import java.lang.reflect.UndeclaredThrowableException; import java.net.InetAddress; import java.net.URI; import java.net.URISyntaxException; import java.net.UnknownHostException; import java.security.cert.X509Certificate; import java.util.List; import java.util.Map; import java.util.NavigableSet; import java.util.NoSuchElementException; import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; import java.util.concurrent.CancellationException; import java.util.concurrent.ConcurrentNavigableMap; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import javax.annotation.Nullable; import com.eucalyptus.component.*; import com.eucalyptus.node.NodeController; import org.apache.log4j.Logger; import com.eucalyptus.auth.principal.Principals; import com.eucalyptus.bootstrap.Bootstrap; import com.eucalyptus.bootstrap.Hosts; import com.eucalyptus.compute.common.CloudMetadata.AvailabilityZoneMetadata; import com.eucalyptus.compute.common.CloudMetadatas; import com.eucalyptus.cluster.ResourceState.VmTypeAvailability; import com.eucalyptus.cluster.callback.ClusterCertsCallback; import com.eucalyptus.cluster.callback.LogDataCallback; import com.eucalyptus.cluster.callback.NetworkStateCallback; import com.eucalyptus.cluster.callback.PublicAddressStateCallback; import com.eucalyptus.cluster.callback.ResourceStateCallback; import com.eucalyptus.cluster.callback.VmStateCallback; import com.eucalyptus.component.Faults.CheckException; import com.eucalyptus.component.id.ClusterController; import com.eucalyptus.component.id.ClusterController.GatherLogService; import com.eucalyptus.context.Contexts; import com.eucalyptus.context.ServiceStateException; import com.eucalyptus.crypto.util.B64; import com.eucalyptus.crypto.util.PEMFiles; import com.eucalyptus.empyrean.DescribeServicesResponseType; import com.eucalyptus.empyrean.DescribeServicesType; import com.eucalyptus.empyrean.DisableServiceType; import com.eucalyptus.empyrean.EnableServiceType; import com.eucalyptus.empyrean.ServiceId; import com.eucalyptus.empyrean.ServiceStatusType; import com.eucalyptus.empyrean.ServiceTransitionType; import com.eucalyptus.empyrean.StartServiceType; import com.eucalyptus.entities.Entities; import com.eucalyptus.event.ClockTick; import com.eucalyptus.event.Event; import com.eucalyptus.event.EventListener; import com.eucalyptus.event.Hertz; import com.eucalyptus.event.ListenerRegistry; import com.eucalyptus.event.Listeners; import com.eucalyptus.records.EventRecord; import com.eucalyptus.records.EventType; import com.eucalyptus.records.Logs; import com.eucalyptus.system.Threads; import com.eucalyptus.util.Callback; import com.eucalyptus.util.Classes; import com.eucalyptus.util.EucalyptusCloudException; import com.eucalyptus.util.Exceptions; import com.eucalyptus.util.FullName; import com.eucalyptus.util.HasFullName; import com.eucalyptus.util.LogUtil; import com.eucalyptus.util.OwnerFullName; import com.eucalyptus.util.TypeMappers; import com.eucalyptus.util.async.AsyncRequests; import com.eucalyptus.util.async.CheckedListenableFuture; import com.eucalyptus.util.async.ConnectionException; import com.eucalyptus.util.async.FailedRequestException; import com.eucalyptus.util.async.RemoteCallback; import com.eucalyptus.util.async.SubjectMessageCallback; import com.eucalyptus.util.async.SubjectRemoteCallbackFactory; import com.eucalyptus.util.fsm.AbstractTransitionAction; import com.eucalyptus.util.fsm.Automata; import com.eucalyptus.util.fsm.HasStateMachine; import com.eucalyptus.util.fsm.StateMachine; import com.eucalyptus.util.fsm.StateMachineBuilder; import com.eucalyptus.util.fsm.TransitionAction; import com.eucalyptus.util.fsm.Transitions; import com.eucalyptus.vm.MigrationState; import com.eucalyptus.vm.VmInstance; import com.eucalyptus.vm.VmInstances; import com.eucalyptus.vmtypes.VmType; import com.eucalyptus.vmtypes.VmTypes; import com.eucalyptus.ws.WebServicesException; import com.google.common.base.Function; import com.google.common.base.Joiner; import com.google.common.base.Predicate; import com.google.common.base.Predicates; import com.google.common.base.Strings; import com.google.common.collect.ForwardingMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.ObjectArrays; import edu.ucsb.eucalyptus.cloud.NodeInfo; import edu.ucsb.eucalyptus.msgs.BaseMessage; import edu.ucsb.eucalyptus.msgs.MigrateInstancesType; import edu.ucsb.eucalyptus.msgs.NodeCertInfo; import edu.ucsb.eucalyptus.msgs.NodeLogInfo; public class Cluster implements AvailabilityZoneMetadata, HasFullName<Cluster>, EventListener, HasStateMachine<Cluster, Cluster.State, Cluster.Transition> { private static Logger LOG = Logger.getLogger(Cluster.class); private final StateMachine<Cluster, State, Transition> stateMachine; private final ClusterConfiguration configuration; //TODO:GRZE: sigh. This stuff needs to be addressed by (1) move to Nodes.java for nodeMap, (2) handling it like any other registered service. private final ConcurrentNavigableMap<String, NodeInfo> nodeMap; private final Map<String, NodeInfo> nodeHostAddrMap = new ForwardingMap<String, NodeInfo>() { @Override protected Map<String, NodeInfo> delegate() { return Cluster.this.nodeMap; } @Override public boolean containsKey(Object keyObject) { return delegate().containsKey(findRealKey(keyObject)); } @Override public NodeInfo get(Object key) { return delegate().get(findRealKey(key)); } public String findRealKey(Object keyObject) { if (keyObject instanceof String) { String key = (String) keyObject; for (String serviceTag : delegate().keySet()) { try { URI tag = new URI(serviceTag); String host = tag.getHost(); if (host != null && host.equals(key)) { return serviceTag; } else { InetAddress addr = InetAddress.getByName(host); String hostAddr = addr.getHostAddress(); if (hostAddr != null && hostAddr.equals(key)) { return serviceTag; } } } catch (UnknownHostException ex) { LOG.debug(ex); } catch (URISyntaxException ex) { LOG.debug(ex); } } return key; } else { return "" + keyObject; } } }; private final BlockingQueue<Throwable> pendingErrors = new LinkedBlockingDeque<Throwable>(); private final ClusterState state; private final ResourceState nodeState; private NodeLogInfo lastLog = new NodeLogInfo(); private boolean hasClusterCert = false; private boolean hasNodeCert = false; private final ReadWriteLock gateLock = new ReentrantReadWriteLock(); enum ZoneRegistration implements Predicate<Cluster> { REGISTER { @Override public boolean apply(final Cluster input) { Clusters.getInstance().register(input); return true; } }, DEREGISTER { @Override public boolean apply(final Cluster input) { Clusters.getInstance().registerDisabled(input); return true; } }; } private enum ServiceStateDispatch implements Predicate<Cluster>, RemoteCallback<ServiceTransitionType, ServiceTransitionType> { STARTED(StartServiceType.class), ENABLED(EnableServiceType.class) { @Override public boolean apply(final Cluster input) { try { if (Bootstrap.isOperational()) { super.apply(input); } ZoneRegistration.REGISTER.apply(input); return true; } catch (final Exception t) { return input.swallowException(t); } } }, DISABLED(DisableServiceType.class) { @Override public boolean apply(final Cluster input) { try { if (Bootstrap.isOperational()) { super.apply(input); } ZoneRegistration.DEREGISTER.apply(input); return true; } catch (Exception ex) { return false; } } }; final Class<? extends ServiceTransitionType> msgClass; private ServiceStateDispatch(Class<? extends ServiceTransitionType> msgClass) { this.msgClass = msgClass; } @Override public ServiceTransitionType getRequest() { return Classes.newInstance(this.msgClass); } @Override public void fire(ServiceTransitionType msg) { LOG.debug(this.name() + " service: " + msg); } @Override public boolean apply(final Cluster input) { if (Hosts.isCoordinator()) { try { AsyncRequests.newRequest(this).sendSync(input.configuration); return true; } catch (final Exception t) { return input.swallowException(t); } } else { return true; } } @Override public void initialize(ServiceTransitionType request) throws Exception { } @Override public void fireException(Throwable t) { Logs.extreme().error(t, t); } } enum LogRefresh implements Function<Cluster, TransitionAction<Cluster>> { LOGS(LogDataCallback.class), CERTS(ClusterCertsCallback.class); Class refresh; private LogRefresh(final Class refresh) { this.refresh = refresh; } @Override public TransitionAction<Cluster> apply(final Cluster cluster) { final SubjectRemoteCallbackFactory<RemoteCallback, Cluster> factory = newSubjectMessageFactory( this.refresh, cluster); return new AbstractTransitionAction<Cluster>() { @Override public final void leave(final Cluster parent, final Callback.Completion transitionCallback) { Cluster.fireCallback(parent, parent.getLogServiceConfiguration(), false, factory, transitionCallback); } }; } } private static class ServiceStateCallback extends SubjectMessageCallback<Cluster, DescribeServicesType, DescribeServicesResponseType> { public ServiceStateCallback() { this.setRequest(new DescribeServicesType()); } @Override public void fire(DescribeServicesResponseType msg) { List<ServiceStatusType> serviceStatuses = msg.getServiceStatuses(); Cluster parent = this.getSubject(); LOG.debug("DescribeServices for " + parent.getFullName()); if (serviceStatuses.isEmpty()) { throw new NoSuchElementException( "Failed to find service info for cluster: " + parent.getFullName()); } else if (!Bootstrap.isOperational()) { return; } else { ServiceConfiguration config = parent.getConfiguration(); for (ServiceStatusType status : serviceStatuses) { if ("self".equals(status.getServiceId().getName())) { status.setServiceId(TypeMappers.transform(parent.getConfiguration(), ServiceId.class)); } if (status.getServiceId() == null || status.getServiceId().getName() == null || status.getServiceId().getType() == null) { LOG.error("Received invalid service id: " + status); } else if (config.getName().equals(status.getServiceId().getName()) && Components .lookup(ClusterController.class).getName().equals(status.getServiceId().getType())) { LOG.debug("Found service info: " + status); Component.State serviceState = Component.State.valueOf(status.getLocalState()); Component.State localState = parent.getConfiguration().lookupState(); Component.State proxyState = parent.getStateMachine().getState().proxyState(); CheckException ex = Faults.transformToExceptions().apply(status); if (Component.State.NOTREADY.equals(serviceState)) { throw new IllegalStateException(ex); } else if (Component.State.ENABLED.equals(serviceState) && Component.State.DISABLED.ordinal() >= localState.ordinal()) { Cluster.ServiceStateDispatch.DISABLED.apply(parent); } else if (Component.State.DISABLED.equals(serviceState) && Component.State.ENABLED.equals(localState)) { Cluster.ServiceStateDispatch.ENABLED.apply(parent); } else if (Component.State.LOADED.equals(serviceState) && Component.State.NOTREADY.ordinal() <= localState.ordinal()) { Cluster.ServiceStateDispatch.STARTED.apply(parent); } else if (Component.State.NOTREADY.ordinal() < serviceState.ordinal()) { parent.clearExceptions(); } return; } } } LOG.error("Failed to find service info for cluster: " + parent.getFullName() + " instead found service status for: " + serviceStatuses); throw new NoSuchElementException("Failed to find service info for cluster: " + parent.getFullName()); } @Override public void setSubject(Cluster subject) { this.getRequest().getServices().add(TypeMappers.transform(subject.getConfiguration(), ServiceId.class)); super.setSubject(subject); } } enum Refresh implements Function<Cluster, TransitionAction<Cluster>> { RESOURCES(ResourceStateCallback.class), NETWORKS(NetworkStateCallback.class), INSTANCES( VmStateCallback.class), VOLATILEINSTANCES(VmStateCallback.VmPendingCallback.class), ADDRESSES( PublicAddressStateCallback.class), SERVICEREADY(ServiceStateCallback.class); Class refresh; private Refresh(final Class refresh) { this.refresh = refresh; } @SuppressWarnings({ "rawtypes", "unchecked" }) @Override public TransitionAction<Cluster> apply(final Cluster cluster) { final SubjectRemoteCallbackFactory<RemoteCallback, Cluster> factory = newSubjectMessageFactory( this.refresh, cluster); return new AbstractTransitionAction<Cluster>() { @SuppressWarnings("rawtypes") @Override public final void leave(final Cluster parent, final Callback.Completion transitionCallback) { Cluster.fireCallback(parent, factory, transitionCallback); } }; } public void fire(Cluster input) { final SubjectRemoteCallbackFactory<RemoteCallback, Cluster> factory = newSubjectMessageFactory( this.refresh, input); try { RemoteCallback messageCallback = factory.newInstance(); BaseMessage baseMessage = AsyncRequests.newRequest(messageCallback) .sendSync(input.getConfiguration()); Logs.extreme().debug("Response to " + messageCallback + ": " + baseMessage); } catch (CancellationException ex) { //do nothing } catch (Exception ex) { LOG.error(ex); Logs.extreme().error(ex); throw Exceptions.toUndeclared(ex); } } @Override public String toString() { return this.name() + ":" + this.refresh.getSimpleName(); } } private static void fireCallback(final Cluster parent, final SubjectRemoteCallbackFactory<RemoteCallback, Cluster> factory, final Callback.Completion transitionCallback) { fireCallback(parent, parent.getConfiguration(), true, factory, transitionCallback); } private static void fireCallback(final Cluster parent, final ServiceConfiguration config, final boolean doCoordinatorCheck, final SubjectRemoteCallbackFactory<RemoteCallback, Cluster> factory, final Callback.Completion transitionCallback) { RemoteCallback messageCallback = null; try { if (!doCoordinatorCheck || checkCoordinator(transitionCallback)) { try { messageCallback = factory.newInstance(); try { BaseMessage baseMessage = AsyncRequests.newRequest(messageCallback).sendSync(config); transitionCallback.fire(); if (Logs.isExtrrreeeme()) { Logs.extreme().debug(baseMessage); } } catch (final Exception t) { if (!parent.swallowException(t)) { transitionCallback.fireException(Exceptions.unwrapCause(t)); } else { transitionCallback.fire(); } } } catch (CancellationException ex) { transitionCallback.fire(); } catch (Exception ex) { transitionCallback.fireException(ex); } } else { transitionCallback.fire(); } } finally { if (!transitionCallback.isDone()) { LOG.debug(parent.getFullName() + " transition fell through w/o completing: " + messageCallback); Logs.extreme().debug(Exceptions.toUndeclared( parent.getFullName() + " transition fell through w/o completing: " + messageCallback)); transitionCallback.fire(); } } } private static boolean checkCoordinator(final Callback.Completion transitionCallback) { boolean coordinator = false; try { coordinator = Hosts.isCoordinator(); if (!coordinator) { transitionCallback.fire(); return false; } } catch (Exception ex) { transitionCallback.fire(); return false; } return coordinator; } private static final List<Class<? extends Exception>> communicationErrors = Lists .newArrayList(ConnectionException.class, IOException.class, WebServicesException.class); private static final List<Class<? extends Exception>> executionErrors = Lists .newArrayList(UndeclaredThrowableException.class, ExecutionException.class); public enum State implements Automata.State<State> { BROKEN, /** cannot establish initial contact with cluster because of CLC side errors **/ STOPPED, /** Component.State.NOTREADY: cluster unreachable **/ PENDING, /** Component.State.NOTREADY: cluster unreachable **/ AUTHENTICATING, STARTING, STARTING_NOTREADY, /** Component.State.NOTREADY:enter() **/ NOTREADY, /** Component.State.NOTREADY -> Component.State.DISABLED **/ DISABLED, /** Component.State.DISABLED -> DISABLED: service ready, not current primary **/ /** Component.State.DISABLED -> Component.State.ENABLED **/ ENABLING, ENABLING_RESOURCES, ENABLING_NET, ENABLING_VMS, ENABLING_ADDRS, ENABLING_VMS_PASS_TWO, ENABLING_ADDRS_PASS_TWO, /** Component.State.ENABLED -> Component.State.ENABLED **/ ENABLED, ENABLED_SERVICE_CHECK, ENABLED_ADDRS, ENABLED_RSC, ENABLED_NET, ENABLED_VMS; public Component.State proxyState() { try { return Component.State.valueOf(this.name()); } catch (final Exception ex) { if (this.equals(DISABLED)) { return Component.State.DISABLED; } else if (this.ordinal() < DISABLED.ordinal()) { return Component.State.NOTREADY; } else if (this.ordinal() >= ENABLING.ordinal()) { return Component.State.ENABLED; } else { return Component.State.INITIALIZED; } } } } public enum Transition implements Automata.Transition<Transition> { RESTART_BROKEN, PRESTART, /** pending setup **/ AUTHENTICATE, START, START_CHECK, STARTING_SERVICES, NOTREADYCHECK, ENABLE, ENABLING_RESOURCES, ENABLING_NET, ENABLING_VMS, ENABLING_ADDRS, ENABLING_VMS_PASS_TWO, ENABLING_ADDRS_PASS_TWO, ENABLED, ENABLED_ADDRS, ENABLED_VMS, ENABLED_NET, ENABLED_SERVICES, ENABLED_RSC, DISABLE, DISABLEDCHECK, STOP, } enum ErrorStateListeners implements Callback<Cluster> { FLUSHPENDING { @Override public void fire(final Cluster t) { LOG.debug("Clearing error logs for: " + t); t.clearExceptions(); } }, CHECKPENDING { @Override public void fire(final Cluster t) { if (!t.pendingErrors.isEmpty()) { Logs.extreme().error(t.pendingErrors); } LOG.debug("Clearing error logs for: " + t); t.clearExceptions(); } } } /** * Constructor for test use */ protected Cluster(final ClusterConfiguration configuration, final Void nothing) { this.configuration = configuration; this.state = null; this.nodeState = null; this.nodeMap = new ConcurrentSkipListMap<>(); this.stateMachine = null; } public Cluster(final ClusterConfiguration configuration) { this.configuration = configuration; this.state = new ClusterState(configuration.getName()); this.nodeState = new ResourceState(configuration.getName()); this.nodeMap = new ConcurrentSkipListMap<String, NodeInfo>(); this.stateMachine = new StateMachineBuilder<Cluster, State, Transition>(this, State.PENDING) { { final TransitionAction<Cluster> noop = Transitions.noop(); this.in(Cluster.State.DISABLED).run(Cluster.ZoneRegistration.DEREGISTER); this.in(Cluster.State.NOTREADY).run(Cluster.ServiceStateDispatch.DISABLED); this.in(Cluster.State.ENABLED).run(Cluster.ZoneRegistration.REGISTER); this.from(State.BROKEN).to(State.PENDING).error(State.BROKEN).on(Transition.RESTART_BROKEN) .run(noop); this.from(State.STOPPED).to(State.PENDING).error(State.PENDING).on(Transition.PRESTART).run(noop); this.from(State.PENDING).to(State.AUTHENTICATING).error(State.PENDING).on(Transition.AUTHENTICATE) .run(LogRefresh.CERTS); this.from(State.AUTHENTICATING).to(State.STARTING).error(State.PENDING).on(Transition.START) .run(noop); this.from(State.STARTING).to(State.STARTING_NOTREADY).error(State.PENDING) .on(Transition.START_CHECK).run(Refresh.SERVICEREADY); this.from(State.STARTING_NOTREADY).to(State.NOTREADY).error(State.PENDING) .on(Transition.STARTING_SERVICES).run(Refresh.SERVICEREADY); this.from(State.NOTREADY).to(State.DISABLED).error(State.NOTREADY).on(Transition.NOTREADYCHECK) .run(Refresh.SERVICEREADY); this.from(State.DISABLED).to(State.DISABLED).error(State.NOTREADY).on(Transition.DISABLEDCHECK) .addListener(ErrorStateListeners.FLUSHPENDING).run(Refresh.SERVICEREADY); this.from(State.DISABLED).to(State.ENABLING).error(State.DISABLED).on(Transition.ENABLE) .run(Cluster.ServiceStateDispatch.ENABLED); this.from(State.DISABLED).to(State.STOPPED).error(State.PENDING).on(Transition.STOP).run(noop); this.from(State.ENABLED).to(State.DISABLED).error(State.NOTREADY).on(Transition.DISABLE) .run(Cluster.ServiceStateDispatch.DISABLED); this.from(State.ENABLING).to(State.ENABLING_RESOURCES).error(State.NOTREADY) .on(Transition.ENABLING_RESOURCES).run(Refresh.RESOURCES); this.from(State.ENABLING_RESOURCES).to(State.ENABLING_NET).error(State.NOTREADY) .on(Transition.ENABLING_NET).run(Refresh.NETWORKS); this.from(State.ENABLING_NET).to(State.ENABLING_VMS).error(State.NOTREADY) .on(Transition.ENABLING_VMS).run(Refresh.INSTANCES); this.from(State.ENABLING_VMS).to(State.ENABLING_ADDRS).error(State.NOTREADY) .on(Transition.ENABLING_ADDRS).run(Refresh.ADDRESSES); this.from(State.ENABLING_ADDRS).to(State.ENABLING_VMS_PASS_TWO).error(State.NOTREADY) .on(Transition.ENABLING_VMS_PASS_TWO).run(Refresh.INSTANCES); this.from(State.ENABLING_VMS_PASS_TWO).to(State.ENABLING_ADDRS_PASS_TWO).error(State.NOTREADY) .on(Transition.ENABLING_ADDRS_PASS_TWO).run(Refresh.ADDRESSES); this.from(State.ENABLING_ADDRS_PASS_TWO).to(State.ENABLED).error(State.NOTREADY) .on(Transition.ENABLING_ADDRS_PASS_TWO).run(Refresh.ADDRESSES); this.from(State.ENABLED).to(State.ENABLED_SERVICE_CHECK).error(State.NOTREADY) .on(Transition.ENABLED_SERVICES).run(Refresh.SERVICEREADY); this.from(State.ENABLED_SERVICE_CHECK).to(State.ENABLED_ADDRS).error(State.NOTREADY) .on(Transition.ENABLED_ADDRS).run(Refresh.ADDRESSES); this.from(State.ENABLED_ADDRS).to(State.ENABLED_RSC).error(State.NOTREADY) .on(Transition.ENABLED_RSC).run(Refresh.RESOURCES); this.from(State.ENABLED_RSC).to(State.ENABLED_NET).error(State.NOTREADY).on(Transition.ENABLED_NET) .run(Refresh.NETWORKS); this.from(State.ENABLED_NET).to(State.ENABLED_VMS).error(State.NOTREADY).on(Transition.ENABLED_VMS) .run(Refresh.INSTANCES); this.from(State.ENABLED_VMS).to(State.ENABLED).error(State.NOTREADY).on(Transition.ENABLED) .run(ErrorStateListeners.FLUSHPENDING); } }.newAtomicMarkedState(); } public void clearExceptions() { if (!this.pendingErrors.isEmpty()) { final List<Throwable> currentErrors = Lists.newArrayList(); this.pendingErrors.drainTo(currentErrors); for (final Throwable t : currentErrors) { final Throwable filtered = Exceptions.filterStackTrace(t); LOG.debug(this.configuration + ": Clearing error: " + filtered.getMessage(), filtered); } } else { LOG.debug(this.configuration + ": no pending errors to clear."); } } private void fireClockTick(final Hertz tick) { try { Component.State systemState; try { systemState = this.configuration.lookupState(); } catch (final NoSuchElementException ex1) { this.stop(); return; } final boolean initialized = systemState.ordinal() > Component.State.LOADED.ordinal(); if (!this.stateMachine.isBusy()) { Callable<CheckedListenableFuture<Cluster>> transition = null; switch (this.stateMachine.getState()) { case PENDING: case AUTHENTICATING: case STARTING: if (tick.isAsserted(Clusters.getConfiguration().getPendingInterval())) { transition = startingTransition(); } break; case NOTREADY: if (initialized && tick.isAsserted(Clusters.getConfiguration().getNotreadyInterval())) { transition = notreadyTransition(); } break; case DISABLED: if (initialized && tick.isAsserted(Clusters.getConfiguration().getDisabledInterval()) && (Component.State.DISABLED.equals(systemState) || Component.State.NOTREADY.equals(systemState))) { transition = disabledTransition(); } else if (initialized && tick.isAsserted(Clusters.getConfiguration().getDisabledInterval()) && Component.State.ENABLED.equals(systemState)) { transition = enablingTransition(); } break; case ENABLING: case ENABLING_RESOURCES: case ENABLING_NET: case ENABLING_VMS: case ENABLING_ADDRS: case ENABLING_VMS_PASS_TWO: case ENABLING_ADDRS_PASS_TWO: break; case ENABLED: case ENABLED_ADDRS: case ENABLED_RSC: case ENABLED_NET: case ENABLED_VMS: case ENABLED_SERVICE_CHECK: if (initialized && tick.isAsserted(VmInstances.VOLATILE_STATE_INTERVAL_SEC) && Component.State.ENABLED.equals(this.configuration.lookupState())) { Refresh.VOLATILEINSTANCES.fire(this); } break; default: break; } // if ( transition != null ) { // try { // transition.call( ); // Cluster.this.clearExceptions( ); // } catch ( final Exception ex ) { // LOG.error( ex ); // Logs.extreme( ).error( ex, ex ); // } // } } } catch (final Exception ex) { LOG.error(ex, ex); } } private static final State[] PATH_NOTREADY = new State[] { State.PENDING, State.AUTHENTICATING, State.STARTING, State.STARTING_NOTREADY, State.NOTREADY }; private static final State[] PATH_DISABLED = ObjectArrays.concat(PATH_NOTREADY, State.DISABLED); private static final State[] PATH_ENABLED = new State[] { State.PENDING, State.AUTHENTICATING, State.STARTING, State.STARTING_NOTREADY, State.NOTREADY, State.DISABLED, State.ENABLING, State.ENABLING_RESOURCES, State.ENABLING_NET, State.ENABLING_VMS, State.ENABLING_ADDRS, State.ENABLING_VMS_PASS_TWO, State.ENABLING_ADDRS_PASS_TWO, State.ENABLED }; private static final State[] PATH_ENABLED_CHECK = new State[] { State.ENABLED, State.ENABLED_SERVICE_CHECK, State.ENABLED_ADDRS, State.ENABLED_RSC, State.ENABLED_NET, State.ENABLED_VMS, State.ENABLED }; private Callable<CheckedListenableFuture<Cluster>> disableTransition() { Callable<CheckedListenableFuture<Cluster>> transition; if (this.stateMachine.getState().ordinal() >= State.ENABLED.ordinal()) { return Automata.sequenceTransitions(this, State.ENABLED, State.DISABLED); } else { return Automata.sequenceTransitions(this, PATH_DISABLED); } } private Callable<CheckedListenableFuture<Cluster>> enabledTransition() { Callable<CheckedListenableFuture<Cluster>> transition; if (this.stateMachine.getState().ordinal() >= State.ENABLED.ordinal()) { return Automata.sequenceTransitions(this, PATH_ENABLED_CHECK); } else { return Automata.sequenceTransitions(this, PATH_ENABLED); } } private Callable<CheckedListenableFuture<Cluster>> enablingTransition() { return Automata.sequenceTransitions(this, PATH_ENABLED); } private Callable<CheckedListenableFuture<Cluster>> disabledTransition() { if (this.stateMachine.getState().ordinal() >= State.ENABLED.ordinal()) { return Automata.sequenceTransitions(this, ObjectArrays.concat(PATH_ENABLED_CHECK, State.DISABLED)); } else { return Automata.sequenceTransitions(this, ObjectArrays.concat(PATH_DISABLED, State.DISABLED)); } } private Callable<CheckedListenableFuture<Cluster>> notreadyTransition() { if (this.stateMachine.getState().ordinal() >= State.ENABLED.ordinal()) { return Automata.sequenceTransitions(this, ObjectArrays.concat(PATH_ENABLED_CHECK, State.DISABLED)); } else { return Automata.sequenceTransitions(this, PATH_DISABLED); } } private Callable<CheckedListenableFuture<Cluster>> startingTransition() { return Automata.sequenceTransitions(this, PATH_DISABLED); } public Boolean isReady() { return this.hasClusterCert && this.hasNodeCert && Bootstrap.isFinished(); } public X509Certificate getClusterCertificate() { return Partitions.lookup(this.configuration).getCertificate(); } public X509Certificate getNodeCertificate() { return Partitions.lookup(this.configuration).getNodeCertificate(); } @Override public String getName() { return this.configuration.getName(); } public NavigableSet<String> getNodeTags() { return this.nodeMap.navigableKeySet(); } public NodeInfo getNode(final String serviceTag) { if (this.nodeMap.containsKey(serviceTag)) { return this.nodeMap.get(serviceTag); } else { try { URI tag = new URI(serviceTag); String host = tag.getHost(); InetAddress addr = InetAddress.getByName(host); String hostAddr = addr.getHostAddress(); String altTag = serviceTag.replace(host, hostAddr); if (this.nodeMap.containsKey(altTag)) { return this.nodeMap.get(altTag); } else { return null;//TODO:GRZE: sigh. } } catch (Exception ex) { return null;//TODO:GRZE: sigh. } } } @Override public int compareTo(final Cluster that) { return this.getName().compareTo(that.getName()); } public ClusterConfiguration getConfiguration() { return this.configuration; } public ClusterState getState() { return this.state; } public ResourceState getNodeState() { return this.nodeState; } public void start() throws ServiceRegistrationException { try { Clusters.getInstance().registerDisabled(this); if (!State.DISABLED.equals(this.stateMachine.getState())) { final Callable<CheckedListenableFuture<Cluster>> trans = startingTransition(); Exception lastEx = null; for (int i = 0; i < Clusters.getConfiguration().getStartupSyncRetries(); i++) { try { trans.call().get(); lastEx = null; break; } catch (final InterruptedException ex) { Thread.currentThread().interrupt(); } catch (final ServiceRegistrationException ex) { lastEx = ex; Logs.extreme().debug(ex, ex); } catch (final Exception ex) { lastEx = ex; Logs.extreme().debug(ex, ex); } } Listeners.register(Hertz.class, this); } } catch (final NoSuchElementException ex) { Logs.extreme().debug(ex, ex); throw ex; } catch (final Exception ex) { Logs.extreme().debug(ex, ex); throw new ServiceRegistrationException( "Failed to call start() on cluster " + this.configuration + " because of: " + ex.getMessage(), ex); } } public void enable() throws ServiceRegistrationException { if (State.ENABLING.ordinal() > this.stateMachine.getState().ordinal()) { try { final Callable<CheckedListenableFuture<Cluster>> trans = enablingTransition(); RuntimeException fail = null; for (int i = 0; i < Clusters.getConfiguration().getStartupSyncRetries(); i++) { try { trans.call().get(); fail = null; break; } catch (Exception ex) { try { TimeUnit.SECONDS.sleep(1); } catch (Exception ex1) { LOG.error(ex1, ex1); } fail = Exceptions.toUndeclared(ex); } } if (fail != null) { throw fail; } } catch (final Exception ex) { Logs.extreme().debug(ex, ex); throw new ServiceRegistrationException("Failed to call enable() on cluster " + this.configuration + " because of: " + ex.getMessage(), ex); } } } public void disable() throws ServiceRegistrationException { try { if (State.NOTREADY.equals(this.getStateMachine().getState())) { Automata.sequenceTransitions(this, State.NOTREADY, State.DISABLED).call().get(); } else if (State.ENABLED.equals(this.getStateMachine().getState())) { Automata.sequenceTransitions(this, State.ENABLED, State.DISABLED).call().get(); } } catch (final InterruptedException ex) { Thread.currentThread().interrupt(); } catch (final Exception ex) { Logs.extreme().debug(ex, ex); // throw new ServiceRegistrationException( "Failed to call disable() on cluster " + this.configuration // + " because of: " // + ex.getMessage( ), ex ); } finally { try { Clusters.getInstance().disable(this.getName()); } catch (Exception ex) { } } } public void stop() throws ServiceRegistrationException { try { Automata.sequenceTransitions(this, State.DISABLED, State.STOPPED).call().get(); } catch (final InterruptedException ex) { Thread.currentThread().interrupt(); } catch (final Exception ex) { Logs.extreme().debug(ex, ex); throw new ServiceRegistrationException( "Failed to call stop() on cluster " + this.configuration + " because of: " + ex.getMessage(), ex); } finally { try { ListenerRegistry.getInstance().deregister(Hertz.class, this); ListenerRegistry.getInstance().deregister(ClockTick.class, this); } catch (Exception ex) { } Clusters.getInstance().deregister(this.getName()); } } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((this.configuration == null) ? 0 : this.configuration.hashCode()); result = prime * result + ((this.state == null) ? 0 : this.state.hashCode()); return result; } @Override public boolean equals(final Object obj) { if (this == obj) { return true; } if (obj == null) { return false; } if (this.getClass() != obj.getClass()) { return false; } final Cluster other = (Cluster) obj; if (this.configuration == null) { if (other.configuration != null) { return false; } } else if (!this.configuration.equals(other.configuration)) { return false; } if (this.state == null) { if (other.state != null) { return false; } } else if (!this.state.equals(other.state)) { return false; } return true; } public URI getUri() { return ServiceUris.remote(this.configuration); } public String getHostName() { return this.configuration.getHostName(); } public String getInsecureServicePath() { return this.configuration.getInsecureServicePath(); } public Integer getPort() { return this.configuration.getPort(); } public String getServicePath() { return this.configuration.getServicePath(); } public ThreadFactory getThreadFactory() { return Threads.lookup(ClusterController.class, Cluster.class, this.getFullName().toString()); } @Override public String toString() { final StringBuilder buf = new StringBuilder(); buf.append("Cluster ").append(this.configuration).append('\n'); buf.append("Cluster ").append(this.configuration.getName());//.append( " mq=" ).append( this.getConfiguration( ).lookupService( ). ).append( '\n' );//TODO:GRZE:RESTORE ME for (final NodeInfo node : this.nodeMap.values()) { buf.append("Cluster ").append(this.configuration.getName()).append(" node=").append(node).append('\n'); } for (final VmType type : VmTypes.list()) { final VmTypeAvailability avail = this.nodeState.getAvailability(type.getName()); buf.append("Cluster ").append(this.configuration.getName()).append(" node=").append(avail).append('\n'); } return buf.toString(); } private final AtomicBoolean logUpdate = new AtomicBoolean(false); private final Predicate<VmInstance> filterPartition = new Predicate<VmInstance>() { @Override public boolean apply(@Nullable VmInstance input) { return input.getPartition().equals(Cluster.this.getPartition()) && MigrationState.isMigrating(input); } }; public NodeLogInfo getLastLog() { if (this.logUpdate.compareAndSet(false, true)) { final Cluster self = this; try { /** * TODO:ASAP:GRZE: RESTORE * Callbacks.newRequest( new LogDataCallback( this, null ) ) * .execute( this.getServiceEndpoint( ), * com.eucalyptus.component.id.Cluster.getLogClientPipeline( ) ) * .getResponse( ).get( ); * Callbacks.newLogRequest( new LogDataCallback( this, null ) ).dispatch( * this.getServiceEndpoint( ) ); **/ } catch (final Throwable t) { LOG.error(t, t); } finally { this.logUpdate.set(false); } } return this.lastLog; } public void clearLogPending() { this.logUpdate.set(false); } public NodeLogInfo getNodeLog(final String nodeIp) { final NodeInfo nodeInfo = Iterables.find(this.nodeMap.values(), new Predicate<NodeInfo>() { @Override public boolean apply(final NodeInfo arg0) { return nodeIp.equals(arg0.getName()); } }); if (nodeInfo == null) { throw new NoSuchElementException("Error obtaining node log files for: " + nodeIp); } if (this.logUpdate.compareAndSet(false, true)) { final Cluster self = this; try { /** * TODO:ASAP:GRZE: RESTORE * Callbacks.newRequest( new LogDataCallback( this, null ) ) * .execute( this.getServiceEndpoint( ), * com.eucalyptus.component.id.Cluster.getLogClientPipeline( ) ) * .getResponse( ).get( ); **/ // Callbacks.newLogRequest( new LogDataCallback( this, nodeInfo ) ).dispatch( this.getServiceEndpoint( ) ); } catch (final Throwable t) { LOG.debug(t, t); } finally { this.logUpdate.set(false); } } return nodeInfo.getLogs(); } public void setLastLog(final NodeLogInfo lastLog) { this.lastLog = lastLog; } public boolean checkCerts(final NodeCertInfo certs) { if ((certs == null) || (certs.getCcCert() == null) || (certs.getNcCert() == null)) { return false; } final X509Certificate clusterx509 = PEMFiles.getCert(B64.standard.dec(certs.getCcCert())); final X509Certificate nodex509 = PEMFiles.getCert(B64.standard.dec(certs.getNcCert())); if ("self".equals(certs.getServiceTag()) || (certs.getServiceTag() == null)) { return (this.hasClusterCert = this.checkCerts(this.getClusterCertificate(), clusterx509)) && (this.hasNodeCert = this.checkCerts(this.getNodeCertificate(), nodex509)); } else if (this.nodeMap.containsKey(certs.getServiceTag())) { final NodeInfo nodeInfo = this.nodeMap.get(certs.getServiceTag()); nodeInfo.setHasClusterCert(this.checkCerts(this.getClusterCertificate(), clusterx509)); nodeInfo.setHasNodeCert(this.checkCerts(this.getNodeCertificate(), nodex509)); return nodeInfo.getHasClusterCert() && nodeInfo.getHasNodeCert(); } else { LOG.error("Cluster " + this.getName() + " failed to find cluster/node info for service tag: " + certs.getServiceTag()); return false; } } private boolean checkCerts(final X509Certificate realx509, final X509Certificate msgx509) { if (realx509 != null) { final Boolean match = realx509.equals(msgx509); EventRecord.here(Cluster.class, EventType.CLUSTER_CERT, this.getName(), realx509.getSubjectX500Principal().getName(), match.toString()).info(); if (!match) { LOG.warn(LogUtil.subheader("EXPECTED CERTIFICATE") + realx509); LOG.warn(LogUtil.subheader("RECEIVED CERTIFICATE") + msgx509); } return match; } else { return false; } } private AbstractTransitionAction<Cluster> newLogRefresh(final Class msgClass) {//TODO:GRZE:REMOVE final Cluster cluster = this; final SubjectRemoteCallbackFactory<RemoteCallback, Cluster> factory = newSubjectMessageFactory(msgClass, cluster); return new AbstractTransitionAction<Cluster>() { @Override public final void leave(final Cluster parent, final Callback.Completion transitionCallback) { Cluster.fireCallback(parent, parent.getLogServiceConfiguration(), false, factory, transitionCallback); } }; } protected ServiceConfiguration getLogServiceConfiguration() { final ComponentId glId = ComponentIds.lookup(GatherLogService.class); final ServiceConfiguration conf = this.getConfiguration(); final URI glUri = ServiceUris.remote(GatherLogService.class, conf.getInetAddress()); return ServiceConfigurations.createEphemeral(glId, conf.getPartition(), conf.getName(), glUri); } @Override public void fireEvent(final Event event) { if (!Bootstrap.isFinished()) { LOG.info(this.getFullName() + " skipping clock event because bootstrap isn't finished"); } else if (Hosts.isCoordinator() && event instanceof Hertz) { this.fireClockTick((Hertz) event); } } private static <P, T extends SubjectMessageCallback<P, Q, R>, Q extends BaseMessage, R extends BaseMessage> SubjectRemoteCallbackFactory<T, P> newSubjectMessageFactory( final Class<T> callbackClass, final P subject) throws CancellationException { return new SubjectRemoteCallbackFactory<T, P>() { @Override public T newInstance() { try { if (subject != null) { try { T callback = Classes.builder(callbackClass).arg(subject).newInstance(); return callback; } catch (UndeclaredThrowableException ex) { if (ex.getCause() instanceof CancellationException) { throw (CancellationException) ex.getCause(); } else if (ex.getCause() instanceof NoSuchMethodException) { try { T callback = Classes.builder(callbackClass).newInstance(); callback.setSubject(subject); return callback; } catch (UndeclaredThrowableException ex1) { if (ex1.getCause() instanceof CancellationException) { throw (CancellationException) ex.getCause(); } else if (ex1.getCause() instanceof NoSuchMethodException) { throw ex1; } else { throw ex1; } } catch (Exception ex1) { if (ex1.getCause() instanceof CancellationException) { throw (CancellationException) ex.getCause(); } else if (ex1.getCause() instanceof NoSuchMethodException) { throw ex; } else { throw Exceptions.toUndeclared(ex1); } } } else { T callback = callbackClass.newInstance(); LOG.error("Creating uninitialized callback (subject=" + subject + ") for type: " + callbackClass.getCanonicalName()); return callback; } } catch (RuntimeException ex) { LOG.error("Failed to create instance of: " + callbackClass); Logs.extreme().error(ex, ex); throw ex; } } else { T callback = callbackClass.newInstance(); LOG.error("Creating uninitialized callback (subject=" + subject + ") for type: " + callbackClass.getCanonicalName()); return callback; } } catch (final CancellationException ex) { LOG.debug(ex); throw ex; } catch (final Exception ex) { LOG.error(ex); Logs.extreme().error(ex, ex); throw Exceptions.toUndeclared(ex); } } @Override public P getSubject() { return subject; } }; } private <T extends Throwable> boolean swallowException(final T t) { LOG.error(this.getConfiguration().getFullName() + " checking: " + Exceptions.causeString(t)); if (Exceptions.isCausedBy(t, InterruptedException.class)) { Thread.currentThread().interrupt(); return true; } else if (Exceptions.isCausedBy(t, FailedRequestException.class)) { Logs.extreme().debug(t, t); this.pendingErrors.add(t); return false; } else if (Exceptions.isCausedBy(t, ConnectionException.class) || Exceptions.isCausedBy(t, IOException.class)) { LOG.error(this.getName() + ": Error communicating with cluster: " + t.getMessage()); Logs.extreme().debug(t, t); this.pendingErrors.add(t); return false; } else { Logs.extreme().debug(t, t); this.pendingErrors.add(t); return false; } } public void refreshResources() { try { Refresh.RESOURCES.fire(this); } catch (Exception ex) { LOG.error(ex); LOG.debug(ex, ex); } } public void check() throws Faults.CheckException, IllegalStateException, InterruptedException, ServiceStateException { if (this.gateLock.readLock().tryLock(60, TimeUnit.SECONDS)) { try { final Cluster.State currentState = this.stateMachine.getState(); final List<Throwable> currentErrors = Lists.newArrayList(this.pendingErrors); this.pendingErrors.clear(); try { Component.State state = this.configuration.lookupState(); if (Component.State.ENABLED.equals(this.configuration.lookupState())) { enabledTransition().call().get(); } else if (Component.State.DISABLED.equals(this.configuration.lookupState())) { disabledTransition().call().get(); } else if (Component.State.NOTREADY.equals(this.configuration.lookupState())) { notreadyTransition().call().get(); } else { Refresh.SERVICEREADY.fire(this); } } catch (Exception ex) { if (ex.getCause() instanceof CancellationException) { //ignore cancellation errors. } else { currentErrors.add(ex); } } final Component.State externalState = this.configuration.lookupState(); if (!currentErrors.isEmpty()) { throw Faults.failure(this.configuration, currentErrors); } else if (Component.State.ENABLED.equals(externalState) && (Cluster.State.ENABLING.ordinal() >= currentState.ordinal())) { final IllegalStateException ex = new IllegalStateException( "Cluster is currently reported as " + externalState + " but is really " + currentState + ": please see logs for additional information."); currentErrors.add(ex); throw Faults.failure(this.configuration, currentErrors); } } finally { //#6 Unmark this cluster as gated. this.gateLock.readLock().unlock(); } } else { throw new ServiceStateException("Failed to check state in the zone " + this.getPartition() + ", it is currently locked for maintenance."); } } @Override public String getPartition() { return this.configuration.getPartition(); } public Partition lookupPartition() { return Partitions.lookup(this.getConfiguration()); } @Override public FullName getFullName() { return this.configuration.getFullName(); } @Override public StateMachine<Cluster, State, Transition> getStateMachine() { return this.stateMachine; } @Override public String getDisplayName() { return this.getPartition(); } @Override public OwnerFullName getOwner() { return Principals.systemFullName(); } public ConcurrentNavigableMap<String, NodeInfo> getNodeMap() { return this.nodeMap; } /** * GRZE:WARNING: this is a temporary method to expose the forwarding map of NC info * @return */ public Map<String, NodeInfo> getNodeHostMap() { return this.nodeHostAddrMap; } public ReadWriteLock getGateLock() { return this.gateLock; } /** * <ol> * <li> Mark this cluster as gated. * <li> Update node and resource information; describe resources. * <li> Find all VMs and update their migration state and volumes * <li> Send the MigrateInstances operation. * <li> Update node and resource information; describe resources. * <li> Unmark this cluster as gated. * </ol> * @param sourceHost * @param destHostsWhiteList -- the destination host list is a white list when true and a black list when false * @param destHosts -- list of hosts which are either a white list or black list based on {@code destHostsWhiteList} * @throws EucalyptusCloudException * @throws Exception */ public void migrateInstances(final String sourceHost, final Boolean destHostsWhiteList, final List<String> destHosts) throws Exception { //#1 Mark this cluster as gated. if (this.gateLock.writeLock().tryLock(60, TimeUnit.SECONDS)) { try { //#2 Only one migration per cluster for now List<VmInstance> currentMigrations = this.lookupCurrentMigrations(); if (!currentMigrations.isEmpty()) { throw Exceptions.toUndeclared( "Cannot start a new migration because the following are already ongoing: " + Joiner .on(", ") .join(Iterables.transform(currentMigrations, CloudMetadatas.toDisplayName()))); } //#3 Update node and resource information this.retryCheck(); //#4 Find all VMs and update their migration state and volumes this.prepareInstanceEvacuations(sourceHost); //#5 Send the MigrateInstances operation. try { AsyncRequests.sendSync(this.getConfiguration(), new MigrateInstancesType() { { this.setCorrelationId(Contexts.lookup().getCorrelationId()); this.setSourceHost(sourceHost); this.setAllowHosts(destHostsWhiteList); this.getDestinationHosts().addAll(destHosts); } }); } catch (Exception ex) { //#5 On error go back and abort the migration status for every instance this.rollbackInstanceEvacuations(sourceHost); throw ex; } //#6 Update node and resource information; describe resources. this.retryCheck(); } catch (Exception ex) { LOG.error(ex); throw ex; } finally { //#6 Unmark this cluster as gated. this.gateLock.writeLock().unlock(); } } else { throw new ServiceStateException("Failed to request migration in the zone " + this.getPartition() + ", it is currently locked for maintenance."); } } /** * <ol> * <li> Mark this cluster as gated. * <li> Update node and resource information; describe resources. * <li> Find the VM and its volume attachments and authorize every node's IQN. * <li> Send the MigrateInstances operation. * <li> Update node and resource information; describe resources. * <li> Unmark this cluster as gated. * </ol> * @param sourceHost * @param destHostsWhiteList -- the destination host list is a white list when true and a black list when false * @param destHosts -- list of hosts which are either a white list or black list based on {@code destHostsWhiteList} * @throws EucalyptusCloudException * @throws Exception */ public void migrateInstance(final String instanceId, final Boolean destHostsWhiteList, final List<String> destHosts) throws Exception { //#1 Mark this cluster as gated. if (this.gateLock.writeLock().tryLock(60, TimeUnit.SECONDS)) { try { //#2 Only one migration per cluster for now List<VmInstance> currentMigrations = this.lookupCurrentMigrations(); if (!currentMigrations.isEmpty()) { throw Exceptions.toUndeclared( "Cannot start a new migration because the following are already ongoing: " + Joiner .on(", ") .join(Iterables.transform(currentMigrations, CloudMetadatas.toDisplayName()))); } //#3 Update node and resource information this.retryCheck(); //#4 Find all VMs and update their migration state and volumes this.prepareInstanceMigrations(instanceId); //#5 Send the MigrateInstances operation. try { AsyncRequests.sendSync(this.getConfiguration(), new MigrateInstancesType() { { this.setCorrelationId(Contexts.lookup().getCorrelationId()); this.setInstanceId(instanceId); this.setAllowHosts(destHostsWhiteList); this.getDestinationHosts().addAll(destHosts); } }); } catch (Exception ex) { //#5 On error go back and abort the migration status for every instance this.rollbackInstanceMigrations(instanceId); throw ex; } //#6 Update node and resource information; describe resources. this.retryCheck(); } catch (Exception ex) { LOG.error(ex); throw ex; } finally { //#6 Unmark this cluster as gated. this.gateLock.writeLock().unlock(); } } else { throw new ServiceStateException("Failed to request migration in the zone " + this.getPartition() + ", it is currently locked for maintenance."); } } private void rollbackInstanceEvacuations(final String sourceHost) { Predicate<VmInstance> filterHost = new Predicate<VmInstance>() { @Override public boolean apply(@Nullable VmInstance input) { String vmHost = URI.create(input.getServiceTag()).getHost(); return Strings.nullToEmpty(vmHost).equals(sourceHost); } }; Predicate<VmInstance> rollbackMigration = new Predicate<VmInstance>() { @Override public boolean apply(@Nullable VmInstance input) { input.abortMigration(); return true; } }; Predicate<VmInstance> filterAndAbort = Predicates.and(this.filterPartition, rollbackMigration); Predicate<VmInstance> rollbackMigrationTx = Entities.asTransaction(VmInstance.class, filterAndAbort); VmInstances.list(rollbackMigrationTx); } @SuppressWarnings("unchecked") private void prepareInstanceEvacuations(final String sourceHost) { Predicate<VmInstance> filterHost = new Predicate<VmInstance>() { @Override public boolean apply(@Nullable VmInstance input) { String vmHost = URI.create(input.getServiceTag()).getHost(); return Strings.nullToEmpty(vmHost).equals(sourceHost); } }; Predicate<VmInstance> startMigration = new Predicate<VmInstance>() { @Override public boolean apply(@Nullable VmInstance input) { input.startMigration(); return true; } }; Predicate<VmInstance> filterAndAbort = Predicates.and(this.filterPartition, startMigration); Predicate<VmInstance> startMigrationTx = Entities.asTransaction(VmInstance.class, filterAndAbort); VmInstances.list(startMigrationTx); } private void rollbackInstanceMigrations(final String instanceId) { Predicate<VmInstance> rollbackMigration = new Predicate<VmInstance>() { @Override public boolean apply(@Nullable VmInstance input) { input.abortMigration(); return true; } }; Predicate<VmInstance> rollbackMigrationTx = Entities.asTransaction(VmInstance.class, rollbackMigration); rollbackMigrationTx.apply(VmInstances.lookup(instanceId)); } @SuppressWarnings("unchecked") private void prepareInstanceMigrations(final String instanceId) { Predicate<VmInstance> startMigration = new Predicate<VmInstance>() { @Override public boolean apply(@Nullable VmInstance input) { input.startMigration(); return true; } }; Predicate<VmInstance> startMigrationTx = Entities.asTransaction(VmInstance.class, startMigration); startMigrationTx.apply(VmInstances.lookup(instanceId)); } private List<VmInstance> lookupCurrentMigrations() throws Exception { return VmInstances.list(this.filterPartition); } private void retryCheck() throws Exception { Exception lastEx = null; for (int i = 0; i < 5; i++) { try { this.check(); return; } catch (Exception ex) { LOG.debug("Retrying after failed attempt to refresh cluster state in check(): " + ex.getMessage()); lastEx = ex; TimeUnit.SECONDS.sleep(2); } } throw new ServiceStateException("Failed to request migration in the zone " + this.getPartition() + " because updating resources returned an error: " + (lastEx != null ? lastEx.getMessage() : "unknown error")); } }