Example usage for org.apache.hadoop.mapreduce Cluster close

List of usage examples for org.apache.hadoop.mapreduce Cluster close

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Cluster close.

Prototype

public synchronized void close() throws IOException 

Source Link

Document

Close the Cluster.

Usage

From source file:com.cloudera.oryx.computation.common.DistributedGenerationRunner.java

License:Open Source License

private static Collection<String> find(String instanceDir) throws IOException, InterruptedException {
    Collection<String> result = Lists.newArrayList();
    // This is where we will see Hadoop config problems first, so log extra info
    Cluster cluster;
    try {//from w  w  w .j ava2 s  . c  o  m
        cluster = new Cluster(OryxConfiguration.get());
    } catch (IOException ioe) {
        log.error("Unable to init the Hadoop cluster. Check that an MR2, not MR1, cluster is configured.");
        throw ioe;
    }
    try {
        JobStatus[] statuses = cluster.getAllJobStatuses();
        if (statuses != null) {
            for (JobStatus jobStatus : statuses) {
                JobStatus.State state = jobStatus.getState();
                if (state == JobStatus.State.RUNNING || state == JobStatus.State.PREP) {
                    Job job = cluster.getJob(jobStatus.getJobID());
                    if (job != null) {
                        String jobName = job.getJobName();
                        log.info("Found running job {}", jobName);
                        if (jobName.startsWith("Oryx-" + instanceDir + '-')) {
                            result.add(jobName);
                        }
                    }
                }
            }
        }
    } finally {
        cluster.close();
    }
    return result;
}

From source file:com.cloudera.oryx.computation.common.JobStep.java

License:Open Source License

private StepStatus determineStatus() throws IOException, InterruptedException {
    JobContext job = getJob();/*w w w  . ja  v  a  2s. c o m*/
    if (job == null) {
        return StepStatus.COMPLETED;
    }
    Cluster cluster = new Cluster(getConf());
    try {
        JobID jobID = job.getJobID();
        if (jobID == null) {
            return StepStatus.PENDING;
        }
        Job runningJob = cluster.getJob(jobID);
        if (runningJob == null) {
            return StepStatus.PENDING;
        }
        JobStatus.State state = runningJob.getJobState();
        switch (state) {
        case PREP:
            return StepStatus.PENDING;
        case RUNNING:
            return StepStatus.RUNNING;
        case SUCCEEDED:
            return StepStatus.COMPLETED;
        case FAILED:
            return StepStatus.FAILED;
        case KILLED:
            return StepStatus.CANCELLED;
        }
        throw new IllegalArgumentException("Unknown Hadoop job state " + state);
    } finally {
        cluster.close();
    }
}

From source file:com.cloudera.oryx.computation.common.JobStep.java

License:Open Source License

/**
 * @return three progress values, in [0,1], as a {@code float[]}, representing setup, mapper and reducer progress
 *//*from  ww  w  .j  a v  a  2  s .  c  o  m*/
private float[] determineProgresses() throws IOException, InterruptedException {
    if (exec == null) {
        return null;
    }
    Cluster cluster = new Cluster(getConf());
    try {
        JobID jobID = getJob().getJobID();
        if (jobID == null) {
            return null;
        }
        Job runningJob = cluster.getJob(jobID);
        if (runningJob == null) {
            return null;
        }

        return new float[] { runningJob.setupProgress(), runningJob.mapProgress(),
                runningJob.reduceProgress() };
    } finally {
        cluster.close();
    }
}

From source file:org.apache.tinkerpop.gremlin.giraph.process.computer.GiraphGraphComputer.java

License:Apache License

@Override
public int run(final String[] args) {
    final Storage storage = FileSystemStorage.open(this.giraphConfiguration);
    storage.rm(this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION));
    this.giraphConfiguration.setBoolean(Constants.GREMLIN_HADOOP_GRAPH_WRITER_HAS_EDGES,
            this.persist.equals(Persist.EDGES));
    try {/*ww  w .  ja  va2  s  .  com*/
        // store vertex and edge filters (will propagate down to native InputFormat or else GiraphVertexInputFormat will process)
        final BaseConfiguration apacheConfiguration = new BaseConfiguration();
        apacheConfiguration.setDelimiterParsingDisabled(true);
        GraphFilterAware.storeGraphFilter(apacheConfiguration, this.giraphConfiguration, this.graphFilter);

        // it is possible to run graph computer without a vertex program (and thus, only map reduce jobs if they exist)
        if (null != this.vertexProgram) {
            // a way to verify in Giraph whether the traversal will go over the wire or not
            try {
                VertexProgram.createVertexProgram(this.hadoopGraph,
                        ConfUtil.makeApacheConfiguration(this.giraphConfiguration));
            } catch (final IllegalStateException e) {
                if (e.getCause() instanceof NumberFormatException)
                    throw new NotSerializableException(
                            "The provided traversal is not serializable and thus, can not be distributed across the cluster");
            }
            // remove historic combiners in configuration propagation (this occurs when job chaining)
            if (!this.vertexProgram.getMessageCombiner().isPresent())
                this.giraphConfiguration.unset(GiraphConstants.MESSAGE_COMBINER_CLASS.getKey());
            // split required workers across system (open map slots + max threads per machine = total amount of TinkerPop workers)
            if (!this.useWorkerThreadsInConfiguration) {
                final Cluster cluster = new Cluster(GiraphGraphComputer.this.giraphConfiguration);
                int totalMappers = cluster.getClusterStatus().getMapSlotCapacity() - 1; // 1 is needed for master
                cluster.close();
                if (this.workers <= totalMappers) {
                    this.giraphConfiguration.setWorkerConfiguration(this.workers, this.workers, 100.0F);
                    this.giraphConfiguration.setNumComputeThreads(1);
                } else {
                    if (totalMappers == 0)
                        totalMappers = 1; // happens in local mode
                    int threadsPerMapper = Long
                            .valueOf(Math.round((double) this.workers / (double) totalMappers)).intValue(); // TODO: need to find least common denominator
                    this.giraphConfiguration.setWorkerConfiguration(totalMappers, totalMappers, 100.0F);
                    this.giraphConfiguration.setNumComputeThreads(threadsPerMapper);
                }
            }
            // prepare the giraph vertex-centric computing job
            final GiraphJob job = new GiraphJob(this.giraphConfiguration,
                    Constants.GREMLIN_HADOOP_GIRAPH_JOB_PREFIX + this.vertexProgram);
            job.getInternalJob().setJarByClass(GiraphGraphComputer.class);
            this.logger.info(Constants.GREMLIN_HADOOP_GIRAPH_JOB_PREFIX + this.vertexProgram);
            // handle input paths (if any)
            String inputLocation = this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION, null);
            if (null != inputLocation && FileInputFormat.class.isAssignableFrom(this.giraphConfiguration
                    .getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, InputFormat.class))) {
                inputLocation = Constants.getSearchGraphLocation(inputLocation, storage)
                        .orElse(this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION));
                FileInputFormat.setInputPaths(job.getInternalJob(), new Path(inputLocation));
            }
            // handle output paths (if any)
            String outputLocation = this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION,
                    null);
            if (null != outputLocation && FileOutputFormat.class.isAssignableFrom(this.giraphConfiguration
                    .getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, OutputFormat.class))) {
                outputLocation = Constants.getGraphLocation(
                        this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION));
                FileOutputFormat.setOutputPath(job.getInternalJob(), new Path(outputLocation));
            }
            // execute the job and wait until it completes (if it fails, throw an exception)
            if (!job.run(true))
                throw new IllegalStateException(
                        "The GiraphGraphComputer job failed -- aborting all subsequent MapReduce jobs: "
                                + job.getInternalJob().getStatus().getFailureInfo());
            // add vertex program memory values to the return memory
            for (final MemoryComputeKey memoryComputeKey : this.vertexProgram.getMemoryComputeKeys()) {
                if (!memoryComputeKey.isTransient() && storage.exists(Constants.getMemoryLocation(
                        this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION),
                        memoryComputeKey.getKey()))) {
                    final ObjectWritableIterator iterator = new ObjectWritableIterator(this.giraphConfiguration,
                            new Path(Constants.getMemoryLocation(
                                    this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION),
                                    memoryComputeKey.getKey())));
                    if (iterator.hasNext()) {
                        this.memory.set(memoryComputeKey.getKey(), iterator.next().getValue());
                    }
                    // vertex program memory items are not stored on disk
                    storage.rm(Constants.getMemoryLocation(
                            this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION),
                            memoryComputeKey.getKey()));
                }
            }
            final Path path = new Path(Constants.getMemoryLocation(
                    this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION),
                    Constants.HIDDEN_ITERATION));
            this.memory.setIteration(
                    (Integer) new ObjectWritableIterator(this.giraphConfiguration, path).next().getValue());
            storage.rm(Constants.getMemoryLocation(
                    this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION),
                    Constants.HIDDEN_ITERATION));
        }
        // do map reduce jobs
        this.giraphConfiguration.setBoolean(Constants.GREMLIN_HADOOP_GRAPH_READER_HAS_EDGES,
                this.giraphConfiguration.getBoolean(Constants.GREMLIN_HADOOP_GRAPH_WRITER_HAS_EDGES, true));
        for (final MapReduce mapReduce : this.mapReducers) {
            this.memory.addMapReduceMemoryKey(mapReduce);
            MapReduceHelper.executeMapReduceJob(mapReduce, this.memory, this.giraphConfiguration);
        }

        // if no persistence, delete the graph and memory output
        if (this.persist.equals(Persist.NOTHING))
            storage.rm(this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION));
    } catch (final Exception e) {
        throw new IllegalStateException(e.getMessage(), e);
    }
    return 0;
}