List of usage examples for org.apache.hadoop.mapred JobClient JobClient
public JobClient(Configuration conf) throws IOException
From source file:org.apache.accumulo.server.master.CoordinateRecoveryTask.java
License:Apache License
void cleanupOldJobs() { try {/*from w w w . j a v a 2 s .co m*/ Configuration conf = CachedConfiguration.getInstance(); @SuppressWarnings("deprecation") JobClient jc = new JobClient(new org.apache.hadoop.mapred.JobConf(conf)); for (JobStatus status : jc.getAllJobs()) { if (!status.isJobComplete()) { RunningJob job = jc.getJob(status.getJobID()); if (job.getJobName().equals(LogSort.getJobName())) { log.info("found a running " + job.getJobName()); Configuration jobConfig = new Configuration(false); log.info("fetching configuration from " + job.getJobFile()); jobConfig.addResource(TraceFileSystem .wrap(FileUtil.getFileSystem(conf, ServerConfiguration.getSiteConfiguration())) .open(new Path(job.getJobFile()))); if (HdfsZooInstance.getInstance().getInstanceID() .equals(jobConfig.get(LogSort.INSTANCE_ID_PROPERTY))) { log.info("Killing job " + job.getID().toString()); } } } } FileStatus[] children = fs.listStatus(new Path(ServerConstants.getRecoveryDir())); if (children != null) { for (FileStatus child : children) { log.info("Deleting recovery directory " + child); fs.delete(child.getPath(), true); } } } catch (IOException e) { log.error("Error cleaning up old Log Sort jobs" + e); } catch (Exception e) { log.error("Unknown error cleaning up old jobs", e); } }
From source file:org.apache.accumulo.server.test.continuous.ContinuousStatsCollector.java
License:Apache License
private static String getMRStats() throws Exception { Configuration conf = CachedConfiguration.getInstance(); @SuppressWarnings("deprecation") JobClient jc = new JobClient(new org.apache.hadoop.mapred.JobConf(conf)); ClusterStatus cs = jc.getClusterStatus(false); return "" + cs.getMapTasks() + " " + cs.getMaxMapTasks() + " " + cs.getReduceTasks() + " " + cs.getMaxReduceTasks() + " " + cs.getTaskTrackers() + " " + cs.getBlacklistedTrackers(); }
From source file:org.apache.accumulo.test.continuous.ContinuousStatsCollector.java
License:Apache License
private static String getMRStats() throws Exception { Configuration conf = CachedConfiguration.getInstance(); // No alternatives for hadoop 20 JobClient jc = new JobClient(new org.apache.hadoop.mapred.JobConf(conf)); ClusterStatus cs = jc.getClusterStatus(false); return "" + cs.getMapTasks() + " " + cs.getMaxMapTasks() + " " + cs.getReduceTasks() + " " + cs.getMaxReduceTasks() + " " + cs.getTaskTrackers() + " " + cs.getBlacklistedTrackers(); }
From source file:org.apache.avro.mapred.tether.TetherJob.java
License:Apache License
/** Submit a job to the Map-Reduce framework. */ public static RunningJob submitJob(JobConf conf) throws IOException { setupTetherJob(conf);/*w ww .j a v a2 s. c o m*/ return new JobClient(conf).submitJob(conf); }
From source file:org.apache.falcon.hadoop.HadoopClientFactory.java
License:Apache License
/** * This method validates if the execute url is able to reach the MR endpoint. * * @param executeUrl jt url or RM url// w w w .j a va 2 s .co m * @throws IOException */ public void validateJobClient(String executeUrl, String rmPrincipal) throws IOException { final JobConf jobConf = new JobConf(); jobConf.set(MR_JT_ADDRESS_KEY, executeUrl); jobConf.set(YARN_RM_ADDRESS_KEY, executeUrl); /** * It is possible that the RM/JT principal can be different between clusters, * for example, the cluster is using a different KDC with cross-domain trust * with the Falcon KDC. in that case, we want to allow the user to provide * the RM principal similar to NN principal. */ if (UserGroupInformation.isSecurityEnabled() && StringUtils.isNotEmpty(rmPrincipal)) { jobConf.set(SecurityUtil.RM_PRINCIPAL, rmPrincipal); } UserGroupInformation loginUser = UserGroupInformation.getLoginUser(); try { JobClient jobClient = loginUser.doAs(new PrivilegedExceptionAction<JobClient>() { public JobClient run() throws Exception { return new JobClient(jobConf); } }); jobClient.getClusterStatus().getMapTasks(); } catch (InterruptedException e) { throw new IOException("Exception creating job client:" + e.getMessage(), e); } }
From source file:org.apache.falcon.logging.DefaultTaskLogRetriever.java
License:Apache License
@Override public List<String> retrieveTaskLogURL(String jobId) throws IOException { JobConf jobConf = new JobConf(getConf()); JobClient jobClient = new JobClient(jobConf); RunningJob job = jobClient.getJob(JobID.forName(jobId)); if (job == null) { LOG.warn("No running job for job id: {}", jobId); return getFromHistory(jobId); }/*from w ww . j ava 2 s . com*/ List<String> taskLogUrls = new ArrayList<String>(); TaskCompletionEvent[] tasks = job.getTaskCompletionEvents(0); // 0th even is setup, 1 event is launcher, 2 event is cleanup if (tasks != null && tasks.length == 3 && tasks[1] != null) { taskLogUrls.add(tasks[1].getTaskTrackerHttp() + "/tasklog?attemptid=" + tasks[1].getTaskAttemptId() + "&all=true"); return taskLogUrls; } else { LOG.warn("No running task for job: {}", jobId); return getFromHistory(jobId); } }
From source file:org.apache.hadoop.examples.Join.java
License:Apache License
/** * The main driver for sort program./*from www .j av a 2s . c o m*/ * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. */ @SuppressWarnings("unchecked") public int run(String[] args) throws Exception { Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String join_reduces = conf.get(REDUCES_PER_HOST); if (join_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(join_reduces); } Job job = Job.getInstance(conf); job.setJobName("join"); job.setJarByClass(Sort.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = TupleWritable.class; String op = "inner"; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-joinOp".equals(args[i])) { op = args[++i]; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs job.setNumReduceTasks(num_reduces); if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } FileOutputFormat.setOutputPath(job, new Path(otherArgs.remove(otherArgs.size() - 1))); List<Path> plist = new ArrayList<Path>(otherArgs.size()); for (String s : otherArgs) { plist.add(new Path(s)); } job.setInputFormatClass(CompositeInputFormat.class); job.getConfiguration().set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0]))); job.setOutputFormatClass(outputFormatClass); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:org.apache.hadoop.examples.RandomTextWriter.java
License:Apache License
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything.//from www . j a va 2 s . c o m * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { return printUsage(); } Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have " + BYTES_PER_MAP + " set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = Job.getInstance(conf); job.setJarByClass(RandomTextWriter.class); job.setJobName("random-text-writer"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RandomWriter.RandomInputFormat.class); job.setMapperClass(RandomTextMapper.class); Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else { otherArgs.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0))); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:org.apache.hadoop.examples.RandomWriter.java
License:Apache License
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything./* www . j a va 2 s .c o m*/ * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return 2; } Path outDir = new Path(args[0]); Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = Job.getInstance(conf); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(RandomInputFormat.class); job.setMapperClass(RandomMapper.class); job.setReducerClass(Reducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:org.apache.hadoop.examples.Sort.java
License:Apache License
/** * The main driver for sort program.//w w w . j a va2s.co m * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. */ public int run(String[] args) throws Exception { Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = conf.get(REDUCES_PER_HOST); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = BytesWritable.class; List<String> otherArgs = new ArrayList<String>(); InputSampler.Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-totalOrder".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs job = Job.getInstance(conf); job.setJobName("sorter"); job.setJarByClass(Sort.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(num_reduces); job.setInputFormatClass(inputFormatClass); job.setOutputFormatClass(outputFormatClass); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); // Make sure there are exactly 2 parameters left. if (otherArgs.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(job, otherArgs.get(0)); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1))); if (sampler != null) { System.out.println("Sampling input to effect total-order sort..."); job.setPartitionerClass(TotalOrderPartitioner.class); Path inputDir = FileInputFormat.getInputPaths(job)[0]; FileSystem fs = inputDir.getFileSystem(conf); inputDir = inputDir.makeQualified(fs.getUri(), fs.getWorkingDirectory()); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(conf, partitionFile); InputSampler.<K, V>writePartitionFile(job, sampler); URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); job.addCacheFile(partitionUri); } System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(job)[0] + " into " + FileOutputFormat.getOutputPath(job) + " with " + num_reduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }