Java tutorial
/* * Copyright (c) 2014 Spotify AB. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.spotify.helios.master; import static com.google.common.base.Charsets.UTF_8; import static com.google.common.base.Strings.isNullOrEmpty; import static com.spotify.helios.servicescommon.ServiceRegistrars.createServiceRegistrar; import static com.spotify.helios.servicescommon.ZooKeeperAclProviders.digest; import static com.spotify.helios.servicescommon.ZooKeeperAclProviders.heliosAclProvider; import com.spotify.helios.common.HeliosRuntimeException; import com.spotify.helios.master.http.VersionResponseFilter; import com.spotify.helios.master.metrics.HealthCheckGauge; import com.spotify.helios.master.metrics.ReportingResourceMethodDispatchAdapter; import com.spotify.helios.master.reaper.DeadAgentReaper; import com.spotify.helios.master.reaper.ExpiredJobReaper; import com.spotify.helios.master.reaper.JobHistoryReaper; import com.spotify.helios.master.reaper.OldJobReaper; import com.spotify.helios.master.resources.DeploymentGroupResource; import com.spotify.helios.master.resources.HistoryResource; import com.spotify.helios.master.resources.HostsResource; import com.spotify.helios.master.resources.JobsResource; import com.spotify.helios.master.resources.MastersResource; import com.spotify.helios.master.resources.VersionResource; import com.spotify.helios.rollingupdate.RollingUpdateService; import com.spotify.helios.serviceregistration.ServiceRegistrar; import com.spotify.helios.serviceregistration.ServiceRegistration; import com.spotify.helios.servicescommon.FastForwardConfig; import com.spotify.helios.servicescommon.KafkaClientProvider; import com.spotify.helios.servicescommon.KafkaSender; import com.spotify.helios.servicescommon.ManagedStatsdReporter; import com.spotify.helios.servicescommon.ReactorFactory; import com.spotify.helios.servicescommon.RiemannFacade; import com.spotify.helios.servicescommon.RiemannHeartBeat; import com.spotify.helios.servicescommon.RiemannSupport; import com.spotify.helios.servicescommon.ServiceUtil; import com.spotify.helios.servicescommon.ZooKeeperRegistrarService; import com.spotify.helios.servicescommon.coordination.CuratorClientFactory; import com.spotify.helios.servicescommon.coordination.DefaultZooKeeperClient; import com.spotify.helios.servicescommon.coordination.Paths; import com.spotify.helios.servicescommon.coordination.ZooKeeperClient; import com.spotify.helios.servicescommon.coordination.ZooKeeperClientProvider; import com.spotify.helios.servicescommon.coordination.ZooKeeperHealthChecker; import com.spotify.helios.servicescommon.coordination.ZooKeeperModelReporter; import com.spotify.helios.servicescommon.statistics.FastForwardReporter; import com.spotify.helios.servicescommon.statistics.Metrics; import com.spotify.helios.servicescommon.statistics.MetricsImpl; import com.spotify.helios.servicescommon.statistics.NoopMetrics; import ch.qos.logback.access.jetty.RequestLogImpl; import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.jvm.GarbageCollectorMetricSet; import com.codahale.metrics.jvm.MemoryUsageGaugeSet; import com.google.common.base.Strings; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.common.io.Resources; import com.google.common.util.concurrent.AbstractIdleService; import io.dropwizard.configuration.ConfigurationException; import io.dropwizard.jetty.GzipFilterFactory; import io.dropwizard.jetty.RequestLogFactory; import io.dropwizard.logging.AppenderFactory; import io.dropwizard.server.DefaultServerFactory; import io.dropwizard.setup.Environment; import org.apache.curator.RetryPolicy; import org.apache.curator.framework.AuthInfo; import org.apache.curator.framework.CuratorFramework; import org.apache.curator.framework.api.ACLProvider; import org.apache.curator.retry.ExponentialBackoffRetry; import org.apache.zookeeper.data.ACL; import org.eclipse.jetty.server.Handler; import org.eclipse.jetty.server.Server; import org.eclipse.jetty.server.handler.HandlerCollection; import org.eclipse.jetty.server.handler.RequestLogHandler; import org.eclipse.jetty.servlets.CrossOriginFilter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.EnumSet; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.concurrent.TimeUnit; import javax.servlet.DispatcherType; import javax.servlet.FilterRegistration; /** * The Helios master service. */ public class MasterService extends AbstractIdleService { private static final Logger log = LoggerFactory.getLogger(MasterService.class); private static final String LOGBACK_ACCESS_CONFIG = "logback-access.xml"; private static final String LOGBACK_ACCESS_RESOURCE = "/" + LOGBACK_ACCESS_CONFIG; private final Server server; private final MasterConfig config; private final ServiceRegistrar registrar; private final ZooKeeperClient zooKeeperClient; private final ExpiredJobReaper expiredJobReaper; private final CuratorClientFactory curatorClientFactory; private final RollingUpdateService rollingUpdateService; private final Map<String, String> environmentVariables; private final Optional<DeadAgentReaper> agentReaper; private final Optional<OldJobReaper> oldJobReaper; private final Optional<JobHistoryReaper> jobHistoryReaper; private ZooKeeperRegistrarService zkRegistrar; /** * Create a new service instance. Initializes the control interface and the worker. * * @param config The service configuration. * @param environment The DropWizard environment. * @param curatorClientFactory The zookeeper curator factory. * @param environmentVariables Env vars * @throws ConfigurationException If there is a problem with the DropWizard configuration. * @throws IOException IOException * @throws InterruptedException InterruptedException */ public MasterService(final MasterConfig config, final Environment environment, final CuratorClientFactory curatorClientFactory, final Map<String, String> environmentVariables) throws ConfigurationException, IOException, InterruptedException { this.config = config; this.curatorClientFactory = curatorClientFactory; this.environmentVariables = environmentVariables; // Configure metrics final MetricRegistry metricsRegistry = environment.metrics(); metricsRegistry.registerAll(new GarbageCollectorMetricSet()); metricsRegistry.registerAll(new MemoryUsageGaugeSet()); final RiemannSupport riemannSupport = new RiemannSupport(metricsRegistry, config.getRiemannHostPort(), config.getName(), "helios-master"); final RiemannFacade riemannFacade = riemannSupport.getFacade(); log.info("Starting metrics"); final Metrics metrics; if (config.isInhibitMetrics()) { metrics = new NoopMetrics(); } else { metrics = new MetricsImpl(metricsRegistry, MetricsImpl.Type.MASTER); metrics.start(); environment.lifecycle().manage(riemannSupport); if (!Strings.isNullOrEmpty(config.getStatsdHostPort())) { environment.lifecycle() .manage(new ManagedStatsdReporter(config.getStatsdHostPort(), metricsRegistry)); } final FastForwardConfig ffwdConfig = config.getFfwdConfig(); if (ffwdConfig != null) { environment.lifecycle().manage(FastForwardReporter.create(metricsRegistry, ffwdConfig.getAddress(), ffwdConfig.getMetricKey(), ffwdConfig.getReportingIntervalSeconds())); } } // Set up the master model this.zooKeeperClient = setupZookeeperClient(config); final ZooKeeperModelReporter modelReporter = new ZooKeeperModelReporter(riemannFacade, metrics.getZooKeeperMetrics()); final ZooKeeperClientProvider zkClientProvider = new ZooKeeperClientProvider(zooKeeperClient, modelReporter); final KafkaClientProvider kafkaClientProvider = new KafkaClientProvider(config.getKafkaBrokers()); // Create state directory, if necessary final Path stateDirectory = config.getStateDirectory().toAbsolutePath().normalize(); if (!Files.exists(stateDirectory)) { try { Files.createDirectories(stateDirectory); } catch (IOException e) { log.error("Failed to create state directory: {}", stateDirectory, e); throw Throwables.propagate(e); } } // Make a KafkaProducer for events that can be serialized to an array of bytes, // and wrap it in our KafkaSender. final KafkaSender kafkaSender = new KafkaSender(kafkaClientProvider.getDefaultProducer()); final ZooKeeperMasterModel model = new ZooKeeperMasterModel(zkClientProvider, config.getName(), kafkaSender); final ZooKeeperHealthChecker zooKeeperHealthChecker = new ZooKeeperHealthChecker(zooKeeperClient, Paths.statusMasters(), riemannFacade, TimeUnit.MINUTES, 2); environment.lifecycle().manage(zooKeeperHealthChecker); environment.healthChecks().register("zookeeper", zooKeeperHealthChecker); // Report health checks as a gauge metric environment.healthChecks().getNames().forEach(name -> environment.metrics() .register("helios." + name + ".ok", new HealthCheckGauge(environment.healthChecks(), name))); environment.lifecycle().manage(new RiemannHeartBeat(TimeUnit.MINUTES, 2, riemannFacade)); // Set up service registrar this.registrar = createServiceRegistrar(config.getServiceRegistrarPlugin(), config.getServiceRegistryAddress(), config.getDomain()); // Set up reaping of expired jobs this.expiredJobReaper = ExpiredJobReaper.newBuilder().setMasterModel(model).build(); // Set up rolling update service final ReactorFactory reactorFactory = new ReactorFactory(); this.rollingUpdateService = new RollingUpdateService(model, reactorFactory); // Set up agent reaper (de-registering hosts that have been DOWN for more than X hours) if (config.getAgentReapingTimeout() > 0) { this.agentReaper = Optional.of(new DeadAgentReaper(model, config.getAgentReapingTimeout())); } else { log.info("Reaping of dead agents disabled"); this.agentReaper = Optional.empty(); } // Set up old job reaper (removes jobs not deployed anywhere and created more than X days ago) if (config.getJobRetention() > 0) { this.oldJobReaper = Optional.of(new OldJobReaper(model, config.getJobRetention())); } else { log.info("Reaping of old jobs disabled"); this.oldJobReaper = Optional.empty(); } // Set up job history reaper (removes histories whose corresponding job doesn't exist) if (config.isJobHistoryReapingEnabled()) { this.jobHistoryReaper = Optional .of(new JobHistoryReaper(model, zkClientProvider.get("jobHistoryReaper"))); } else { log.info("Reaping of orphaned jobs disabled"); this.jobHistoryReaper = Optional.empty(); } // Set up http server environment.servlets() .addFilter("VersionResponseFilter", new VersionResponseFilter(metrics.getMasterMetrics())) .addMappingForUrlPatterns(EnumSet.of(DispatcherType.REQUEST), true, "/*"); environment.jersey().register(new ReportingResourceMethodDispatchAdapter(metrics.getMasterMetrics())); environment.jersey() .register(new JobsResource(model, metrics.getMasterMetrics(), config.getWhitelistedCapabilities())); environment.jersey().register(new HistoryResource(model, metrics.getMasterMetrics())); environment.jersey().register(new HostsResource(model)); environment.jersey().register(new MastersResource(model)); environment.jersey().register(new VersionResource()); environment.jersey().register(new UserProvider()); environment.jersey().register(new DeploymentGroupResource(model)); final DefaultServerFactory serverFactory = ServiceUtil.createServerFactory(config.getHttpEndpoint(), config.getAdminEndpoint(), false); final RequestLogFactory requestLog = new RequestLogFactory(); requestLog.setAppenders(ImmutableList.<AppenderFactory>of()); serverFactory.setRequestLogFactory(requestLog); // Enable CORS headers final FilterRegistration.Dynamic cors = environment.servlets().addFilter("CORS", CrossOriginFilter.class); // Configure CORS parameters cors.setInitParameter("allowedOrigins", "*"); cors.setInitParameter("allowedHeaders", "X-Requested-With,Content-Type,Accept,Origin"); cors.setInitParameter("allowedMethods", "OPTIONS,GET,PUT,POST,DELETE,HEAD"); // Add URL mapping cors.addMappingForUrlPatterns(EnumSet.allOf(DispatcherType.class), true, "/*"); // Enable gzip compression for POST and GET requests. Default is GET only. final GzipFilterFactory gzip = new GzipFilterFactory(); gzip.setIncludedMethods(ImmutableSet.of("GET", "POST")); serverFactory.setGzipFilterFactory(gzip); this.server = serverFactory.build(environment); setUpRequestLogging(stateDirectory); } private void setUpRequestLogging(final Path stateDirectory) { // Set up request logging final Handler originalHandler = server.getHandler(); final HandlerCollection handlerCollection; if (originalHandler instanceof HandlerCollection) { handlerCollection = (HandlerCollection) originalHandler; } else { handlerCollection = new HandlerCollection(); handlerCollection.addHandler(originalHandler); } final RequestLogHandler requestLogHandler = new RequestLogHandler(); final RequestLogImpl requestLog = new RequestLogImpl(); requestLog.setQuiet(true); if (stateDirectory.resolve(LOGBACK_ACCESS_CONFIG).toFile().exists()) { requestLog.setFileName(stateDirectory.resolve(LOGBACK_ACCESS_CONFIG).toString()); } else if (this.getClass().getResource(LOGBACK_ACCESS_RESOURCE) != null) { requestLog.setResource(LOGBACK_ACCESS_RESOURCE); } requestLogHandler.setRequestLog(requestLog); handlerCollection.addHandler(requestLogHandler); server.setHandler(handlerCollection); } @Override protected void startUp() throws Exception { logBanner(); if (!config.getNoZooKeeperMasterRegistration()) { zkRegistrar.startAsync().awaitRunning(); } expiredJobReaper.startAsync().awaitRunning(); rollingUpdateService.startAsync().awaitRunning(); agentReaper.ifPresent(reaper -> reaper.startAsync().awaitRunning()); oldJobReaper.ifPresent(reaper -> reaper.startAsync().awaitRunning()); jobHistoryReaper.ifPresent(reaper -> reaper.startAsync().awaitRunning()); try { server.start(); } catch (Exception e) { log.error("Unable to start server, shutting down", e); server.stop(); } final ServiceRegistration serviceRegistration = ServiceRegistration.newBuilder().endpoint("helios", "http", config.getHttpEndpoint().getPort(), config.getDomain(), config.getName()).build(); registrar.register(serviceRegistration); } @Override protected void shutDown() throws Exception { server.stop(); server.join(); registrar.close(); agentReaper.ifPresent(reaper -> reaper.stopAsync().awaitTerminated()); oldJobReaper.ifPresent(reaper -> reaper.stopAsync().awaitTerminated()); jobHistoryReaper.ifPresent(reaper -> reaper.stopAsync().awaitTerminated()); rollingUpdateService.stopAsync().awaitTerminated(); expiredJobReaper.stopAsync().awaitTerminated(); zkRegistrar.stopAsync().awaitTerminated(); zooKeeperClient.close(); } private void logBanner() { try { final String banner = Resources.toString(Resources.getResource("master-banner.txt"), UTF_8); log.info("\n{}", banner); } catch (IllegalArgumentException | IOException ignored) { } } /** * Create a Zookeeper client and create the control and state nodes if needed. * * @param config The service configuration. * @return A zookeeper client. */ private ZooKeeperClient setupZookeeperClient(final MasterConfig config) { ACLProvider aclProvider = null; List<AuthInfo> authorization = null; final String masterUser = config.getZookeeperAclMasterUser(); final String masterPassword = config.getZooKeeperAclMasterPassword(); final String agentUser = config.getZookeeperAclAgentUser(); final String agentDigest = config.getZooKeeperAclAgentDigest(); if (!isNullOrEmpty(masterPassword)) { if (isNullOrEmpty(masterUser)) { throw new HeliosRuntimeException("Master username must be set if a password is set"); } authorization = Lists.newArrayList( new AuthInfo("digest", String.format("%s:%s", masterUser, masterPassword).getBytes())); } if (config.isZooKeeperEnableAcls()) { if (isNullOrEmpty(masterUser) || isNullOrEmpty(masterPassword)) { throw new HeliosRuntimeException( "ZooKeeper ACLs enabled but master username and/or password not set"); } if (isNullOrEmpty(agentUser) || isNullOrEmpty(agentDigest)) { throw new HeliosRuntimeException("ZooKeeper ACLs enabled but agent username and/or digest not set"); } aclProvider = heliosAclProvider(masterUser, digest(masterUser, masterPassword), agentUser, agentDigest); } final RetryPolicy zooKeeperRetryPolicy = new ExponentialBackoffRetry(1000, 3); final CuratorFramework curator = curatorClientFactory.newClient(config.getZooKeeperConnectionString(), config.getZooKeeperSessionTimeoutMillis(), config.getZooKeeperConnectionTimeoutMillis(), zooKeeperRetryPolicy, aclProvider, authorization); final ZooKeeperClient client = new DefaultZooKeeperClient(curator, config.getZooKeeperClusterId()); client.start(); zkRegistrar = ZooKeeperRegistrarService.newBuilder().setZooKeeperClient(client) .setZooKeeperRegistrar(new MasterZooKeeperRegistrar(config.getName())).build(); // TODO: This is perhaps not the correct place to do this - but at present it's the only // place where we have access to the ACL provider. if (aclProvider != null) { // Set ACLs on the ZK root, if they aren't already set correctly. // This is handy since it avoids having to manually do this operation when setting up // a new ZK cluster. // Note that this is slightly racey -- if two masters start at the same time both might // attempt to update the ACLs but only one will succeed. That said, it's unlikely and the // effects are limited to a spurious log line. try { final List<ACL> curAcls = client.getAcl("/"); final List<ACL> wantedAcls = aclProvider.getAclForPath("/"); if (!Sets.newHashSet(curAcls).equals(Sets.newHashSet(wantedAcls))) { log.info("Current ACL's on the zookeeper root node differ from desired, updating: {} -> {}", curAcls, wantedAcls); client.getCuratorFramework().setACL().withACL(wantedAcls).forPath("/"); } } catch (Exception e) { log.error("Failed to get/set ACLs on the zookeeper root node", e); } } return client; } }