List of usage examples for org.apache.hadoop.mapreduce Cluster close
public synchronized void close() throws IOException
Cluster
. From source file:com.cloudera.oryx.computation.common.DistributedGenerationRunner.java
License:Open Source License
private static Collection<String> find(String instanceDir) throws IOException, InterruptedException { Collection<String> result = Lists.newArrayList(); // This is where we will see Hadoop config problems first, so log extra info Cluster cluster; try {//from w w w .j ava2 s . c o m cluster = new Cluster(OryxConfiguration.get()); } catch (IOException ioe) { log.error("Unable to init the Hadoop cluster. Check that an MR2, not MR1, cluster is configured."); throw ioe; } try { JobStatus[] statuses = cluster.getAllJobStatuses(); if (statuses != null) { for (JobStatus jobStatus : statuses) { JobStatus.State state = jobStatus.getState(); if (state == JobStatus.State.RUNNING || state == JobStatus.State.PREP) { Job job = cluster.getJob(jobStatus.getJobID()); if (job != null) { String jobName = job.getJobName(); log.info("Found running job {}", jobName); if (jobName.startsWith("Oryx-" + instanceDir + '-')) { result.add(jobName); } } } } } } finally { cluster.close(); } return result; }
From source file:com.cloudera.oryx.computation.common.JobStep.java
License:Open Source License
private StepStatus determineStatus() throws IOException, InterruptedException { JobContext job = getJob();/*w w w . ja v a 2s. c o m*/ if (job == null) { return StepStatus.COMPLETED; } Cluster cluster = new Cluster(getConf()); try { JobID jobID = job.getJobID(); if (jobID == null) { return StepStatus.PENDING; } Job runningJob = cluster.getJob(jobID); if (runningJob == null) { return StepStatus.PENDING; } JobStatus.State state = runningJob.getJobState(); switch (state) { case PREP: return StepStatus.PENDING; case RUNNING: return StepStatus.RUNNING; case SUCCEEDED: return StepStatus.COMPLETED; case FAILED: return StepStatus.FAILED; case KILLED: return StepStatus.CANCELLED; } throw new IllegalArgumentException("Unknown Hadoop job state " + state); } finally { cluster.close(); } }
From source file:com.cloudera.oryx.computation.common.JobStep.java
License:Open Source License
/** * @return three progress values, in [0,1], as a {@code float[]}, representing setup, mapper and reducer progress *//*from ww w .j a v a 2 s . c o m*/ private float[] determineProgresses() throws IOException, InterruptedException { if (exec == null) { return null; } Cluster cluster = new Cluster(getConf()); try { JobID jobID = getJob().getJobID(); if (jobID == null) { return null; } Job runningJob = cluster.getJob(jobID); if (runningJob == null) { return null; } return new float[] { runningJob.setupProgress(), runningJob.mapProgress(), runningJob.reduceProgress() }; } finally { cluster.close(); } }
From source file:org.apache.tinkerpop.gremlin.giraph.process.computer.GiraphGraphComputer.java
License:Apache License
@Override public int run(final String[] args) { final Storage storage = FileSystemStorage.open(this.giraphConfiguration); storage.rm(this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION)); this.giraphConfiguration.setBoolean(Constants.GREMLIN_HADOOP_GRAPH_WRITER_HAS_EDGES, this.persist.equals(Persist.EDGES)); try {/*ww w . ja va2 s . com*/ // store vertex and edge filters (will propagate down to native InputFormat or else GiraphVertexInputFormat will process) final BaseConfiguration apacheConfiguration = new BaseConfiguration(); apacheConfiguration.setDelimiterParsingDisabled(true); GraphFilterAware.storeGraphFilter(apacheConfiguration, this.giraphConfiguration, this.graphFilter); // it is possible to run graph computer without a vertex program (and thus, only map reduce jobs if they exist) if (null != this.vertexProgram) { // a way to verify in Giraph whether the traversal will go over the wire or not try { VertexProgram.createVertexProgram(this.hadoopGraph, ConfUtil.makeApacheConfiguration(this.giraphConfiguration)); } catch (final IllegalStateException e) { if (e.getCause() instanceof NumberFormatException) throw new NotSerializableException( "The provided traversal is not serializable and thus, can not be distributed across the cluster"); } // remove historic combiners in configuration propagation (this occurs when job chaining) if (!this.vertexProgram.getMessageCombiner().isPresent()) this.giraphConfiguration.unset(GiraphConstants.MESSAGE_COMBINER_CLASS.getKey()); // split required workers across system (open map slots + max threads per machine = total amount of TinkerPop workers) if (!this.useWorkerThreadsInConfiguration) { final Cluster cluster = new Cluster(GiraphGraphComputer.this.giraphConfiguration); int totalMappers = cluster.getClusterStatus().getMapSlotCapacity() - 1; // 1 is needed for master cluster.close(); if (this.workers <= totalMappers) { this.giraphConfiguration.setWorkerConfiguration(this.workers, this.workers, 100.0F); this.giraphConfiguration.setNumComputeThreads(1); } else { if (totalMappers == 0) totalMappers = 1; // happens in local mode int threadsPerMapper = Long .valueOf(Math.round((double) this.workers / (double) totalMappers)).intValue(); // TODO: need to find least common denominator this.giraphConfiguration.setWorkerConfiguration(totalMappers, totalMappers, 100.0F); this.giraphConfiguration.setNumComputeThreads(threadsPerMapper); } } // prepare the giraph vertex-centric computing job final GiraphJob job = new GiraphJob(this.giraphConfiguration, Constants.GREMLIN_HADOOP_GIRAPH_JOB_PREFIX + this.vertexProgram); job.getInternalJob().setJarByClass(GiraphGraphComputer.class); this.logger.info(Constants.GREMLIN_HADOOP_GIRAPH_JOB_PREFIX + this.vertexProgram); // handle input paths (if any) String inputLocation = this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION, null); if (null != inputLocation && FileInputFormat.class.isAssignableFrom(this.giraphConfiguration .getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, InputFormat.class))) { inputLocation = Constants.getSearchGraphLocation(inputLocation, storage) .orElse(this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION)); FileInputFormat.setInputPaths(job.getInternalJob(), new Path(inputLocation)); } // handle output paths (if any) String outputLocation = this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null); if (null != outputLocation && FileOutputFormat.class.isAssignableFrom(this.giraphConfiguration .getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, OutputFormat.class))) { outputLocation = Constants.getGraphLocation( this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION)); FileOutputFormat.setOutputPath(job.getInternalJob(), new Path(outputLocation)); } // execute the job and wait until it completes (if it fails, throw an exception) if (!job.run(true)) throw new IllegalStateException( "The GiraphGraphComputer job failed -- aborting all subsequent MapReduce jobs: " + job.getInternalJob().getStatus().getFailureInfo()); // add vertex program memory values to the return memory for (final MemoryComputeKey memoryComputeKey : this.vertexProgram.getMemoryComputeKeys()) { if (!memoryComputeKey.isTransient() && storage.exists(Constants.getMemoryLocation( this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), memoryComputeKey.getKey()))) { final ObjectWritableIterator iterator = new ObjectWritableIterator(this.giraphConfiguration, new Path(Constants.getMemoryLocation( this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), memoryComputeKey.getKey()))); if (iterator.hasNext()) { this.memory.set(memoryComputeKey.getKey(), iterator.next().getValue()); } // vertex program memory items are not stored on disk storage.rm(Constants.getMemoryLocation( this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), memoryComputeKey.getKey())); } } final Path path = new Path(Constants.getMemoryLocation( this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), Constants.HIDDEN_ITERATION)); this.memory.setIteration( (Integer) new ObjectWritableIterator(this.giraphConfiguration, path).next().getValue()); storage.rm(Constants.getMemoryLocation( this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), Constants.HIDDEN_ITERATION)); } // do map reduce jobs this.giraphConfiguration.setBoolean(Constants.GREMLIN_HADOOP_GRAPH_READER_HAS_EDGES, this.giraphConfiguration.getBoolean(Constants.GREMLIN_HADOOP_GRAPH_WRITER_HAS_EDGES, true)); for (final MapReduce mapReduce : this.mapReducers) { this.memory.addMapReduceMemoryKey(mapReduce); MapReduceHelper.executeMapReduceJob(mapReduce, this.memory, this.giraphConfiguration); } // if no persistence, delete the graph and memory output if (this.persist.equals(Persist.NOTHING)) storage.rm(this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION)); } catch (final Exception e) { throw new IllegalStateException(e.getMessage(), e); } return 0; }