List of usage examples for org.apache.hadoop.mapreduce Job setJobName
public void setJobName(String name) throws IllegalStateException
From source file:com.phantom.hadoop.examples.RandomTextWriter.java
License:Apache License
/** * This is the main routine for launching a distributed random write job. It * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The * reduce doesn't do anything./*from w w w . j av a2 s. co m*/ * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { return printUsage(); } Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have " + BYTES_PER_MAP + " set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = new Job(conf); job.setJarByClass(RandomTextWriter.class); job.setJobName("random-text-writer"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RandomWriter.RandomInputFormat.class); job.setMapperClass(RandomTextMapper.class); Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else { otherArgs.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0))); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:com.phantom.hadoop.examples.RandomWriter.java
License:Apache License
/** * This is the main routine for launching a distributed random write job. It * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The * reduce doesn't do anything.//from w w w . j a va2 s.c o m * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return 2; } Path outDir = new Path(args[0]); Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = new Job(conf); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(RandomInputFormat.class); job.setMapperClass(RandomMapper.class); job.setReducerClass(Reducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:com.phantom.hadoop.examples.terasort.TeraChecksum.java
License:Apache License
public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage();//from www . j a v a 2 s .co m return 2; } TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSum"); job.setJarByClass(TeraChecksum.class); job.setMapperClass(ChecksumMapper.class); job.setReducerClass(ChecksumReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Unsigned16.class); // force a single reducer job.setNumReduceTasks(1); job.setInputFormatClass(TeraInputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.phantom.hadoop.examples.terasort.TeraGen.java
License:Apache License
/** * @param args/*from ww w . j av a2s. c o m*/ * the cli arguments */ public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage(); return 2; } setNumberOfRows(job, parseHumanLong(args[0])); Path outputDir = new Path(args[1]); if (outputDir.getFileSystem(getConf()).exists(outputDir)) { throw new IOException("Output directory " + outputDir + " already exists."); } FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraGen"); job.setJarByClass(TeraGen.class); job.setMapperClass(SortGenMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RangeInputFormat.class); job.setOutputFormatClass(TeraOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.phantom.hadoop.examples.terasort.TeraSort.java
License:Apache License
public int run(String[] args) throws Exception { LOG.info("starting"); Job job = Job.getInstance(getConf()); Path inputDir = new Path(args[0]); Path outputDir = new Path(args[1]); boolean useSimplePartitioner = getUseSimplePartitioner(job); TeraInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TeraInputFormat.class); job.setOutputFormatClass(TeraOutputFormat.class); if (useSimplePartitioner) { job.setPartitionerClass(SimplePartitioner.class); } else {//from w ww.j a v a 2s .c om long start = System.currentTimeMillis(); Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); try { TeraInputFormat.writePartitionFile(job, partitionFile); } catch (Throwable e) { LOG.error(e.getMessage()); return -1; } job.addCacheFile(partitionUri); long end = System.currentTimeMillis(); System.out.println("Spent " + (end - start) + "ms computing partitions."); job.setPartitionerClass(TotalOrderPartitioner.class); } job.getConfiguration().setInt("dfs.replication", getOutputReplication(job)); TeraOutputFormat.setFinalSync(job, true); int ret = job.waitForCompletion(true) ? 0 : 1; LOG.info("done"); return ret; }
From source file:com.phantom.hadoop.examples.terasort.TeraValidate.java
License:Apache License
public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage();/*from w w w.j av a2 s .com*/ return 1; } TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraValidate"); job.setJarByClass(TeraValidate.class); job.setMapperClass(ValidateMapper.class); job.setReducerClass(ValidateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // force a single reducer job.setNumReduceTasks(1); // force a single split FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE); job.setInputFormatClass(TeraInputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.philiphubbard.digraph.MRBuildVerticesTest.java
License:Open Source License
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); setupTest(conf);/* ww w.j av a 2 s .c om*/ Job job = Job.getInstance(conf); job.setJobName("mrbuildverticestest"); MRBuildVertices.setupJob(job, new Path(testInput), new Path(testOutput)); conf.setBoolean(MRBuildVertices.CONFIG_PARTITION_BRANCHES_CHAINS, true); if (!job.waitForCompletion(true)) System.exit(1); cleanupTest(conf); System.exit(0); }
From source file:com.philiphubbard.digraph.MRCompressChainsTest.java
License:Open Source License
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); setupTest(conf);/*w ww. j av a 2 s. co m*/ String inputOrig = testInput; String outputOrig = testOutput; int iter = 0; boolean keepGoing = true; MRCompressChains.beginIteration(); while (keepGoing) { Job job = Job.getInstance(conf); job.setJobName("mrcompresschainstest"); MRCompressChains.setupIterationJob(job, new Path(inputOrig), new Path(outputOrig)); if (!job.waitForCompletion(true)) System.exit(1); iter++; keepGoing = MRCompressChains.continueIteration(job, new Path(inputOrig), new Path(outputOrig)); } // System.out.println("Number of iterations = " + iter); cleanupTest(conf); // System.exit(0); }
From source file:com.philiphubbard.sabe.MRAssembler.java
License:Open Source License
public boolean run(Path inputPath, Path outputPath) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); // Job.getInstance() copies the Configuration argument, so set its properties first. conf.setBoolean(MRVertex.CONFIG_ALLOW_EDGE_MULTIPLES, true); conf.setBoolean(MRVertex.CONFIG_COMPRESS_CHAIN_MULTIPLES_MUST_MATCH, false); conf.setInt(MRMerVertex.CONFIG_MER_LENGTH, vertexMerLength); conf.setBoolean(MRBuildVertices.CONFIG_PARTITION_BRANCHES_CHAINS, true); conf.setInt(MRBuildVertices.CONFIG_COVERAGE, coverage); conf.setInt(MRCompressChains.CONFIG_TERMINATION_COUNT, 1); Job buildJob = Job.getInstance(conf); buildJob.setJobName("mrassemblerbuild"); Path buildInputPath = inputPath; Path buildOutputPath = new Path("sabe.MRAssemblerTmp"); System.out.println("sabe.MRAssembler starting vertex construction"); MRBuildMerVertices.setupJob(buildJob, buildInputPath, buildOutputPath); if (!buildJob.waitForCompletion(true)) return false; ///* w w w . j a v a 2 s. c o m*/ Path compressInputPath = new Path(buildOutputPath.toString() + "/chain"); Path compressOutputPath = new Path(buildOutputPath.toString() + "/chainCompress"); int iter = 0; boolean keepGoing = true; MRCompressChains.beginIteration(); while (keepGoing) { Job compressJob = Job.getInstance(conf); compressJob.setJobName("mrassemblercompress"); System.out.println("sabe.MRAssembler starting compression iteration " + iter); MRCompressMerChains.setupIterationJob(compressJob, compressInputPath, compressOutputPath); if (!compressJob.waitForCompletion(true)) System.exit(1); iter++; keepGoing = MRCompressChains.continueIteration(compressJob, compressInputPath, compressOutputPath); } System.out.println("sabe.MRAssembler made " + iter + " compression iterations"); // Path branchPath = new Path(buildOutputPath.toString() + "/branch"); Path chainPath = compressOutputPath; FileSystem fileSystem = FileSystem.get(conf); Graph graph = buildCompressedGraph(conf, fileSystem, branchPath, chainPath); if (graph != null) { ArrayList<String> result = graph.assemble(); FSDataOutputStream out = fileSystem.create(outputPath); for (String seq : result) { out.writeBytes(seq); out.writeBytes("\n"); } } // fileSystem.delete(buildOutputPath, true); fileSystem.close(); return true; }
From source file:com.sematext.hbase.hut.RollbackUpdatesMrJob.java
License:Apache License
/** * Sets up the actual job./*from w w w .jav a 2 s .com*/ * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; conf.set("mapred.map.tasks.speculative.execution", "false"); Job job = new Job(conf, NAME + "_" + tableName); job.setJobName(NAME + "_" + tableName); job.setJarByClass(RollbackUpdatesMapper.class); // TODO: Allow passing filter and subset of rows/columns. Scan s = new Scan(); // Optional arguments. long startTime = args.length > 1 ? Long.parseLong(args[1]) : 0L; long endTime = args.length > 2 ? Long.parseLong(args[2]) : Long.MAX_VALUE; // TODO: consider using scan.setTimeRange() for limiting scanned data range. It may // not be good way to do if tss are artificial in HutPuts though // s.setTimeRange(startTime, endTime); job.getConfiguration().set(RollbackUpdatesMapper.HUT_ROLLBACK_UPDATE_MIN_TIME_ATTR, String.valueOf(startTime)); job.getConfiguration().set(RollbackUpdatesMapper.HUT_ROLLBACK_UPDATE_MAX_TIME_ATTR, String.valueOf(endTime)); s.setFilter(new HutWriteTimeRowsFilter(endTime, startTime)); s.setCacheBlocks(false); // TODO: allow user change using job params s.setCaching(512); s.setCacheBlocks(false); LOG.info("Using scan: " + s.toString()); // TODO: allow better limiting of data to be fetched if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) { s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY))); } LOG.info("starttime (inclusive): " + startTime + " (" + new Date(startTime) + ")" + ", endtime (inclusive): " + endTime + " (" + new Date(endTime) + ")"); TableMapReduceUtil.initTableMapperJob(tableName, s, RollbackUpdatesMapper.class, null, null, job); TableMapReduceUtil.initTableReducerJob(tableName, null, job); // No reducers. Just write straight to output files. job.setNumReduceTasks(0); return job; }