Java tutorial
/* * Copyright 2017-2019 Crown Copyright * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package uk.gov.gchq.gaffer.slider.util; import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.YarnClusterMetrics; import org.apache.hadoop.yarn.client.api.YarnClient; import org.apache.hadoop.yarn.client.api.YarnClientApplication; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.log4j.Logger; import org.apache.slider.api.ResourceKeys; import org.apache.slider.core.conf.ConfTree; import org.apache.slider.core.persist.ConfTreeSerDeser; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * This utility will auto-generate configuration files (based on a template) that can be used with Apache Slider to * deploy Gaffer onto an existing cluster managed by YARN. The configuration will be generated so that the deployed * Gaffer instance will use a specified proportion (default: 85%) of the resources (cpu, mem) available via YARN. * <p> * To run: * <pre> * {@code java -cp slider-$VERSION.jar:<slider-install>/lib/*:$(hadoop classpath) \ * uk.gov.gchq.gaffer.slider.util.AppConfigGenerator appConfig-default.json appConfig.json resources.json} * </pre> */ public class AppConfigGenerator implements Runnable { private static final Logger LOGGER = Logger.getLogger(AppConfigGenerator.class); static class SliderAppConfig { private final ConfTree appConfig; private final ConfTree resources; SliderAppConfig(final ConfTree appConfig, final ConfTree resources) { this.appConfig = appConfig; this.resources = resources; } ConfTree getAppConfig() { return this.appConfig; } ConfTree getResources() { return this.resources; } } static class AvailableResources { private final int maxCores; private final int maxMemory; private final int nodeCount; AvailableResources(final int maxCores, final int maxMemory, final int nodeCount) { this.maxCores = maxCores; this.maxMemory = maxMemory; this.nodeCount = nodeCount; } private int getMaxCores() { return this.maxCores; } private int getMaxMemory() { return this.maxMemory; } private int getNodeCount() { return this.nodeCount; } @Override public String toString() { return "AvailableResources{" + "maxCores=" + this.maxCores + ", maxMemory=" + this.maxMemory + ", nodeCount=" + this.nodeCount + '}'; } } enum COMPONENT { ACCUMULO_MASTER, ACCUMULO_TSERVER, ACCUMULO_MONITOR, ACCUMULO_GC, ACCUMULO_TRACER, ACCUMULO_PROXY } static final String ACCUMULO_TSERVER_NATIVE_MAPS_ENABLED_PROPERTY = "site.accumulo-site.tserver.memory.maps.native.enabled"; static final String ACCUMULO_TSERVER_MAX_MEMORY_PROPERTY = "site.accumulo-site.tserver.memory.maps.max"; static final String ACCUMULO_TSERVER_CONCURRENT_MINC_PROPERTY = "site.accumulo-site.tserver.compaction.minor.concurrent.max"; static final String ACCUMULO_TSERVER_CONCURRENT_MAJC_PROPERTY = "site.accumulo-site.tserver.compaction.major.concurrent.max"; static final Map<COMPONENT, String> ACCUMULO_COMPONENT_PROPERTY_LOOKUP = new HashMap<>(); static { ACCUMULO_COMPONENT_PROPERTY_LOOKUP.put(COMPONENT.ACCUMULO_MASTER, "site.accumulo-env.master_heapsize"); ACCUMULO_COMPONENT_PROPERTY_LOOKUP.put(COMPONENT.ACCUMULO_TSERVER, "site.accumulo-env.tserver_heapsize"); ACCUMULO_COMPONENT_PROPERTY_LOOKUP.put(COMPONENT.ACCUMULO_MONITOR, "site.accumulo-env.monitor_heapsize"); ACCUMULO_COMPONENT_PROPERTY_LOOKUP.put(COMPONENT.ACCUMULO_GC, "site.accumulo-env.gc_heapsize"); ACCUMULO_COMPONENT_PROPERTY_LOOKUP.put(COMPONENT.ACCUMULO_TRACER, "site.accumulo-env.other_heapsize"); ACCUMULO_COMPONENT_PROPERTY_LOOKUP.put(COMPONENT.ACCUMULO_PROXY, "site.accumulo-env.other_heapsize"); } // Command line argument parsing @Parameter(names = "-t", description = "The number of tablet servers to deploy per YARN Node") private int tserversPerNode = 1; @Parameter(names = { "-c", "--cores" }, description = "The number of cores to be used by each non-tablet server component") private int componentCores = 1; @Parameter(names = { "-m", "--mem" }, description = "The default amount of memory to be used by each component instance (in MB)") private int defaultComponentMemory = 1024; @Parameter(names = { "-u", "--usage" }, description = "The proportion of the cluster resources this application should be configured to use (as a percentage)") private int clusterUsagePercent = 85; @Parameter(names = { "-r", "--heap-container-ratio" }, description = "The ratio that should be used to calculate the size of the requests for memory from YARN, based off the Java heap size for each component") private float heapSizeToContainerMemoryRatio = 1.3f; @Parameter(names = "-s", description = "Generate the allocation so that all components could fit on a single node, otherwise the allocation will try to use as much of the resources available across the cluster as possible") private boolean singleNode = false; @Parameter(names = { "-h", "--help" }, description = "Displays this help text", help = true) private boolean help = false; @Parameter(description = "<appConfigTemplate> <appConfigOutputPath> <resourcesOutputPath>") private List<String> files = new ArrayList<>(); private String initialAppConfigPath; private String appConfigOutputPath; private String resourcesOutputPath; public void setTserversPerNode(final int tserversPerNode) { this.tserversPerNode = tserversPerNode; } public void setComponentCores(final int componentCores) { this.componentCores = componentCores; } public void setDefaultComponentMemory(final int defaultComponentMemory) { this.defaultComponentMemory = defaultComponentMemory; } public void setClusterUsagePercent(final int clusterUsagePercent) { this.clusterUsagePercent = clusterUsagePercent; } public void setSingleNode(final boolean singleNode) { this.singleNode = singleNode; } private void validateArguments() throws Exception { this.help = true; if (this.tserversPerNode <= 0) { throw new Exception("A minimum of 1 tablet server must be provisioned on each node!"); } else if (this.componentCores <= 0) { throw new Exception("Each component must be provisioned with at least 1 core!"); } else if (this.defaultComponentMemory <= 0) { throw new Exception("Components can't be provisioned with a negative amount of memory!"); } else if (this.clusterUsagePercent <= 0 || this.clusterUsagePercent > 100) { throw new Exception("Cluster usage must be provided as a percentage!"); } else if (this.files.size() != 3) { throw new Exception("Invalid number of arguments!"); } else { this.initialAppConfigPath = this.files.get(0); this.appConfigOutputPath = this.files.get(1); this.resourcesOutputPath = this.files.get(2); this.help = false; } } private int convertPropertyToNumBytes(final String value) { final String formattedValue = value.toLowerCase(); if (formattedValue.endsWith("g")) { return Integer.parseInt(formattedValue.substring(0, formattedValue.length() - 1)) * 1024; } else if (formattedValue.endsWith("m")) { return Integer.parseInt(formattedValue.substring(0, formattedValue.length() - 1)); } throw new NumberFormatException(String.format("Unable to convert %s to a number", value)); } private AvailableResources getYarnResources() throws IOException, YarnException { final Configuration config = new Configuration(); final YarnClient yarn = YarnClient.createYarnClient(); yarn.init(config); yarn.start(); // Query YARN to find out the largest container it is capable of scheduling final YarnClientApplication app = yarn.createApplication(); final Resource resources = app.getNewApplicationResponse().getMaximumResourceCapability(); // Also find out how many nodes there are in the cluster by asking for the number of registered Node Managers final YarnClusterMetrics metrics = yarn.getYarnClusterMetrics(); yarn.close(); return new AvailableResources(resources.getVirtualCores(), resources.getMemory(), metrics.getNumNodeManagers()); } private int getNativeMemoryMemoryRequirement(final ConfTree appConfig) { final String isNativeMapEnabled = appConfig.global.get(ACCUMULO_TSERVER_NATIVE_MAPS_ENABLED_PROPERTY); if (Boolean.parseBoolean(isNativeMapEnabled)) { String maxMemProperty = appConfig.global.get(ACCUMULO_TSERVER_MAX_MEMORY_PROPERTY); return this.convertPropertyToNumBytes(maxMemProperty); } return 0; } /** * Calculates how many cores and how much memory should be requested by each Accumulo Tablet server, so that as much * of the cpu and mem available in the cluster is used as possible. Note that in most cases this means your Accumulo * instance will be unable to tolerate the loss of any YARN Node Managers. * @param app Current application config * @param availableResources Resources (cpu, mem, nodes) available in the YARN cluster * @return SliderAppConfig modified with the cpu and mem that each tablet server should request * @throws IOException Not enough resources available to be split across all the requested tablet servers */ private SliderAppConfig generateSliderAppConfigForMultiNode(final SliderAppConfig app, final AvailableResources availableResources) throws IOException { final ConfTree appConfig = app.getAppConfig(); final ConfTree resources = app.getResources(); int totalCoresAvailable = availableResources.getMaxCores() * availableResources.getNodeCount(); int totalMemoryAvailable = availableResources.getMaxMemory() * availableResources.getNodeCount(); totalCoresAvailable = Math.round((float) totalCoresAvailable * ((float) this.clusterUsagePercent / 100f)); totalMemoryAvailable = Math.round((float) totalMemoryAvailable * ((float) this.clusterUsagePercent / 100f)); LOGGER.info(String.format("Trying to use %s%% of available resources across cluster = cores: %s mem: %s", this.clusterUsagePercent, totalCoresAvailable, totalMemoryAvailable)); // Slider Application Master totalCoresAvailable -= ResourceKeys.DEF_YARN_CORES; totalMemoryAvailable -= ResourceKeys.DEF_YARN_MEMORY; // Accumulo Components for (final String componentName : resources.components.keySet()) { if (!componentName.equals(COMPONENT.ACCUMULO_TSERVER.name())) { final Map<String, String> componentConfig = resources.components.get(componentName); final int instanceCount = Integer.parseInt(componentConfig.get(ResourceKeys.COMPONENT_INSTANCES)); final int cores = Integer.parseInt(componentConfig.get(ResourceKeys.YARN_CORES)); final int memory = Integer.parseInt(componentConfig.get(ResourceKeys.YARN_MEMORY)); totalCoresAvailable -= cores * instanceCount; totalMemoryAvailable -= memory * instanceCount; } } if (totalCoresAvailable <= 0 || totalMemoryAvailable <= 0) { throw new IOException(String.format("No resources left for any tablet servers! cores: %s memory: %s", totalCoresAvailable, totalMemoryAvailable)); } int tserverCores = totalCoresAvailable / (this.tserversPerNode * availableResources.getNodeCount()); int tserverMemory = totalMemoryAvailable / (this.tserversPerNode * availableResources.getNodeCount()); int tserverHeapSize = (int) Math.floor((tserverMemory - this.getNativeMemoryMemoryRequirement(appConfig)) / this.heapSizeToContainerMemoryRatio); if (tserverCores <= 0 || tserverMemory <= 0 || tserverHeapSize <= 0) { throw new IOException(String.format( "Not enough available resources to deploy %s tablet servers per node, only cores: %s memory: %s available across the cluster!", this.tserversPerNode, totalCoresAvailable, totalMemoryAvailable)); } final Map<String, String> tabletServerConfig = resources.components.get(COMPONENT.ACCUMULO_TSERVER.name()); tabletServerConfig.put(ResourceKeys.COMPONENT_INSTANCES, String.valueOf(availableResources.getNodeCount() * this.tserversPerNode)); tabletServerConfig.put(ResourceKeys.YARN_CORES, String.valueOf(tserverCores)); tabletServerConfig.put(ResourceKeys.YARN_MEMORY, String.valueOf(tserverMemory)); appConfig.global.put(ACCUMULO_COMPONENT_PROPERTY_LOOKUP.get(COMPONENT.ACCUMULO_TSERVER), String.valueOf(tserverHeapSize) + "m"); return app; } /** * Calculates how many cores and how much memory should be requested by each Accumulo Tablet server, so that (if * required) all Accumulo components could be deployed on a single YARN node. This ensures that, as long as there is * at least one YARN Node Manager available, it will be possible for your Accumulo instance to be deployed. * (NB: This only holds if all the Node Managers in your YARN cluster have the same availability of cpu and mem) * @param app Current application config * @param availableResources Resources (cpu, mem, nodes) available in the YARN cluster * @return SliderAppConfig modified with the cpu and mem that each tablet server should request * @throws IOException Not enough resources available to be split across all the requested tablet servers */ private SliderAppConfig generateSliderAppConfigForSingleNode(final SliderAppConfig app, final AvailableResources availableResources) throws IOException { final ConfTree appConfig = app.getAppConfig(); final ConfTree resources = app.getResources(); int coresRemainingPerNode = Math .round((float) availableResources.getMaxCores() * ((float) this.clusterUsagePercent / 100f)); int memoryRemainingPerNode = Math .round((float) availableResources.getMaxMemory() * ((float) this.clusterUsagePercent / 100f)); LOGGER.info(String.format("Trying to use %s%% of available resources per node = cores: %s mem: %s", this.clusterUsagePercent, coresRemainingPerNode, memoryRemainingPerNode)); // Slider Application Master coresRemainingPerNode -= ResourceKeys.DEF_YARN_CORES; memoryRemainingPerNode -= ResourceKeys.DEF_YARN_MEMORY; // Accumulo Components for (final String componentName : resources.components.keySet()) { if (!componentName.equals(COMPONENT.ACCUMULO_TSERVER.name())) { Map<String, String> componentConfig = resources.components.get(componentName); final int instanceCount = Integer.parseInt(componentConfig.get(ResourceKeys.COMPONENT_INSTANCES)); final int cores = Integer.parseInt(componentConfig.get(ResourceKeys.YARN_CORES)); final int memory = Integer.parseInt(componentConfig.get(ResourceKeys.YARN_MEMORY)); coresRemainingPerNode -= cores * instanceCount; memoryRemainingPerNode -= memory * instanceCount; } } if (coresRemainingPerNode <= 0 || memoryRemainingPerNode <= 0) { throw new IOException(String.format("No resources left for any tablet servers! cores: %s memory: %s", coresRemainingPerNode, memoryRemainingPerNode)); } int tserverCores = coresRemainingPerNode / (this.tserversPerNode * availableResources.getNodeCount()); int tserverMemory = memoryRemainingPerNode / (this.tserversPerNode * availableResources.getNodeCount()); int tserverHeapSize = (int) Math.floor((tserverMemory - this.getNativeMemoryMemoryRequirement(appConfig)) / this.heapSizeToContainerMemoryRatio); if (tserverCores <= 0 || tserverMemory <= 0 || tserverHeapSize <= 0) { throw new IOException(String.format( "Not enough available resources to deploy %s tablet servers per node, only cores: %s memory: %s available per node!", this.tserversPerNode, coresRemainingPerNode, memoryRemainingPerNode)); } final Map<String, String> tabletServerConfig = resources.components.get(COMPONENT.ACCUMULO_TSERVER.name()); tabletServerConfig.put(ResourceKeys.COMPONENT_INSTANCES, String.valueOf(availableResources.getNodeCount() * this.tserversPerNode)); tabletServerConfig.put(ResourceKeys.YARN_CORES, String.valueOf(tserverCores)); tabletServerConfig.put(ResourceKeys.YARN_MEMORY, String.valueOf(tserverMemory)); appConfig.global.put(ACCUMULO_COMPONENT_PROPERTY_LOOKUP.get(COMPONENT.ACCUMULO_TSERVER), String.valueOf(tserverHeapSize) + "m"); return app; } public SliderAppConfig generateSliderAppConfig(final ConfTree appConfig, final AvailableResources availableResources) throws IOException { final ConfTree resources = new ConfTree(); // Generate baseline YARN resource config for each Accumulo component for (int i = 0; i < COMPONENT.values().length; i++) { final COMPONENT component = COMPONENT.values()[i]; final Map<String, String> componentConfig = new HashMap<>(); componentConfig.put(ResourceKeys.COMPONENT_INSTANCES, "1"); componentConfig.put(ResourceKeys.COMPONENT_PRIORITY, String.valueOf(component.ordinal() + 1)); componentConfig.put(ResourceKeys.YARN_CORES, String.valueOf(this.componentCores)); // Start with the default memory usage for each non-tablet server component int componentMemory = this.defaultComponentMemory; // Infer how much memory is required for the component based on what its heapsize is set to final String propertyName = ACCUMULO_COMPONENT_PROPERTY_LOOKUP.get(component); if (appConfig.global.containsKey(propertyName)) { final String propertyValue = appConfig.global.get(propertyName); componentMemory = (int) Math .ceil(this.convertPropertyToNumBytes(propertyValue) * this.heapSizeToContainerMemoryRatio); } componentConfig.put(ResourceKeys.YARN_MEMORY, String.valueOf(componentMemory)); resources.components.put(component.name(), componentConfig); } // Common Config // Allow minc and majc to max out the CPU on a YARN node appConfig.global.put(ACCUMULO_TSERVER_CONCURRENT_MINC_PROPERTY, String.valueOf(availableResources.getMaxCores())); appConfig.global.put(ACCUMULO_TSERVER_CONCURRENT_MAJC_PROPERTY, String.valueOf(availableResources.getMaxCores())); // Two possible resource allocation schemes for Tablet Servers: if (this.singleNode) { return this.generateSliderAppConfigForSingleNode(new SliderAppConfig(appConfig, resources), availableResources); } else { return this.generateSliderAppConfigForMultiNode(new SliderAppConfig(appConfig, resources), availableResources); } } @Override public void run() { try { final ConfTreeSerDeser parser = new ConfTreeSerDeser(); final ConfTree initialAppConfig = parser.fromFile(new File(this.initialAppConfigPath)); LOGGER.info("Initial appConfig.json:"); LOGGER.info(initialAppConfig); AvailableResources availableClusterResources = null; availableClusterResources = this.getYarnResources(); LOGGER.info("Available Cluster Resources:"); LOGGER.info(availableClusterResources); // We query twice because for some reason YARN on EMR lies about the max resources // available per node the first time round :S // TODO: Work out why this is the case! availableClusterResources = this.getYarnResources(); LOGGER.info("Available Cluster Resources:"); LOGGER.info(availableClusterResources); final SliderAppConfig config = this.generateSliderAppConfig(initialAppConfig, availableClusterResources); LOGGER.info("Generated appConfig.json:"); LOGGER.info(config.getAppConfig()); LOGGER.info("Generated resources.json:"); LOGGER.info(config.getResources()); parser.save(config.getAppConfig(), new File(this.appConfigOutputPath)); parser.save(config.getResources(), new File(this.resourcesOutputPath)); } catch (final YarnException | IOException e) { throw new RuntimeException(e); } } public static void main(final String[] args) { final AppConfigGenerator generator = new AppConfigGenerator(); final JCommander argParser = new JCommander(generator, args); argParser.setProgramName(AppConfigGenerator.class.getSimpleName()); try { generator.validateArguments(); } catch (final Exception e) { LOGGER.error(e.getMessage()); } if (generator.help) { argParser.usage(); System.exit(1); } else { generator.run(); } } }