Example usage for org.apache.hadoop.mapreduce Cluster getClusterStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Cluster getClusterStatus.

Prototype

public ClusterMetrics getClusterStatus() throws IOException, InterruptedException

Source Link

Document

Get current cluster status.

Usage

From source file:org.apache.tinkerpop.gremlin.giraph.process.computer.GiraphGraphComputer.java

License:Apache License

@Override
public int run(final String[] args) {
    final Storage storage = FileSystemStorage.open(this.giraphConfiguration);
    storage.rm(this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION));
    this.giraphConfiguration.setBoolean(Constants.GREMLIN_HADOOP_GRAPH_WRITER_HAS_EDGES,
            this.persist.equals(Persist.EDGES));
    try {/*from  w ww.java  2s.co m*/
        // store vertex and edge filters (will propagate down to native InputFormat or else GiraphVertexInputFormat will process)
        final BaseConfiguration apacheConfiguration = new BaseConfiguration();
        apacheConfiguration.setDelimiterParsingDisabled(true);
        GraphFilterAware.storeGraphFilter(apacheConfiguration, this.giraphConfiguration, this.graphFilter);

        // it is possible to run graph computer without a vertex program (and thus, only map reduce jobs if they exist)
        if (null != this.vertexProgram) {
            // a way to verify in Giraph whether the traversal will go over the wire or not
            try {
                VertexProgram.createVertexProgram(this.hadoopGraph,
                        ConfUtil.makeApacheConfiguration(this.giraphConfiguration));
            } catch (final IllegalStateException e) {
                if (e.getCause() instanceof NumberFormatException)
                    throw new NotSerializableException(
                            "The provided traversal is not serializable and thus, can not be distributed across the cluster");
            }
            // remove historic combiners in configuration propagation (this occurs when job chaining)
            if (!this.vertexProgram.getMessageCombiner().isPresent())
                this.giraphConfiguration.unset(GiraphConstants.MESSAGE_COMBINER_CLASS.getKey());
            // split required workers across system (open map slots + max threads per machine = total amount of TinkerPop workers)
            if (!this.useWorkerThreadsInConfiguration) {
                final Cluster cluster = new Cluster(GiraphGraphComputer.this.giraphConfiguration);
                int totalMappers = cluster.getClusterStatus().getMapSlotCapacity() - 1; // 1 is needed for master
                cluster.close();
                if (this.workers <= totalMappers) {
                    this.giraphConfiguration.setWorkerConfiguration(this.workers, this.workers, 100.0F);
                    this.giraphConfiguration.setNumComputeThreads(1);
                } else {
                    if (totalMappers == 0)
                        totalMappers = 1; // happens in local mode
                    int threadsPerMapper = Long
                            .valueOf(Math.round((double) this.workers / (double) totalMappers)).intValue(); // TODO: need to find least common denominator
                    this.giraphConfiguration.setWorkerConfiguration(totalMappers, totalMappers, 100.0F);
                    this.giraphConfiguration.setNumComputeThreads(threadsPerMapper);
                }
            }
            // prepare the giraph vertex-centric computing job
            final GiraphJob job = new GiraphJob(this.giraphConfiguration,
                    Constants.GREMLIN_HADOOP_GIRAPH_JOB_PREFIX + this.vertexProgram);
            job.getInternalJob().setJarByClass(GiraphGraphComputer.class);
            this.logger.info(Constants.GREMLIN_HADOOP_GIRAPH_JOB_PREFIX + this.vertexProgram);
            // handle input paths (if any)
            String inputLocation = this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION, null);
            if (null != inputLocation && FileInputFormat.class.isAssignableFrom(this.giraphConfiguration
                    .getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, InputFormat.class))) {
                inputLocation = Constants.getSearchGraphLocation(inputLocation, storage)
                        .orElse(this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION));
                FileInputFormat.setInputPaths(job.getInternalJob(), new Path(inputLocation));
            }
            // handle output paths (if any)
            String outputLocation = this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION,
                    null);
            if (null != outputLocation && FileOutputFormat.class.isAssignableFrom(this.giraphConfiguration
                    .getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, OutputFormat.class))) {
                outputLocation = Constants.getGraphLocation(
                        this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION));
                FileOutputFormat.setOutputPath(job.getInternalJob(), new Path(outputLocation));
            }
            // execute the job and wait until it completes (if it fails, throw an exception)
            if (!job.run(true))
                throw new IllegalStateException(
                        "The GiraphGraphComputer job failed -- aborting all subsequent MapReduce jobs: "
                                + job.getInternalJob().getStatus().getFailureInfo());
            // add vertex program memory values to the return memory
            for (final MemoryComputeKey memoryComputeKey : this.vertexProgram.getMemoryComputeKeys()) {
                if (!memoryComputeKey.isTransient() && storage.exists(Constants.getMemoryLocation(
                        this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION),
                        memoryComputeKey.getKey()))) {
                    final ObjectWritableIterator iterator = new ObjectWritableIterator(this.giraphConfiguration,
                            new Path(Constants.getMemoryLocation(
                                    this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION),
                                    memoryComputeKey.getKey())));
                    if (iterator.hasNext()) {
                        this.memory.set(memoryComputeKey.getKey(), iterator.next().getValue());
                    }
                    // vertex program memory items are not stored on disk
                    storage.rm(Constants.getMemoryLocation(
                            this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION),
                            memoryComputeKey.getKey()));
                }
            }
            final Path path = new Path(Constants.getMemoryLocation(
                    this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION),
                    Constants.HIDDEN_ITERATION));
            this.memory.setIteration(
                    (Integer) new ObjectWritableIterator(this.giraphConfiguration, path).next().getValue());
            storage.rm(Constants.getMemoryLocation(
                    this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION),
                    Constants.HIDDEN_ITERATION));
        }
        // do map reduce jobs
        this.giraphConfiguration.setBoolean(Constants.GREMLIN_HADOOP_GRAPH_READER_HAS_EDGES,
                this.giraphConfiguration.getBoolean(Constants.GREMLIN_HADOOP_GRAPH_WRITER_HAS_EDGES, true));
        for (final MapReduce mapReduce : this.mapReducers) {
            this.memory.addMapReduceMemoryKey(mapReduce);
            MapReduceHelper.executeMapReduceJob(mapReduce, this.memory, this.giraphConfiguration);
        }

        // if no persistence, delete the graph and memory output
        if (this.persist.equals(Persist.NOTHING))
            storage.rm(this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION));
    } catch (final Exception e) {
        throw new IllegalStateException(e.getMessage(), e);
    }
    return 0;
}