List of usage examples for org.apache.hadoop.mapred JobConf setOutputKeyClass
public void setOutputKeyClass(Class<?> theClass)
From source file:com.github.gaoyangthu.demo.mapred.terasort.TeraGen.java
License:Apache License
/** * @param args the cli arguments/* w ww . jav a 2 s .c om*/ */ public int run(String[] args) throws IOException { JobConf job = (JobConf) getConf(); setNumberOfRows(job, Long.parseLong(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraGen"); job.setJarByClass(TeraGen.class); job.setMapperClass(SortGenMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(RangeInputFormat.class); job.setOutputFormat(TeraOutputFormat.class); JobClient.runJob(job); return 0; }
From source file:com.github.gaoyangthu.demo.mapred.terasort.TeraSort.java
License:Apache License
public int run(String[] args) throws Exception { LOG.info("starting"); JobConf job = (JobConf) getConf(); Path inputDir = new Path(args[0]); inputDir = inputDir.makeQualified(inputDir.getFileSystem(job)); Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(TeraInputFormat.class); job.setOutputFormat(TeraOutputFormat.class); job.setPartitionerClass(TotalOrderPartitioner.class); TeraInputFormat.writePartitionFile(job, partitionFile); DistributedCache.addCacheFile(partitionUri, job); DistributedCache.createSymlink(job); job.setInt("dfs.replication", 1); TeraOutputFormat.setFinalSync(job, true); JobClient.runJob(job);/*from w w w. j a v a 2 s .c o m*/ LOG.info("done"); return 0; }
From source file:com.hadoop.examples.geolocation.GeoLocationJob.java
License:Apache License
/** * @param args/*from w w w . ja v a 2s . c o m*/ */ public static void main(String[] args) throws Exception { JobConf conf = new JobConf(GeoLocationJob.class); conf.setJobName("geolocationgroup"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(GeoLocationMapper.class); conf.setReducerClass(GeoLocationReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
From source file:com.hadoop.secondarysort.SecondarySort_MapRed.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysrot <in> <out>"); System.exit(2);//ww w. ja v a2s. c o m } JobConf jobConf = new JobConf(conf); jobConf.setMapperClass(MapClass.class); jobConf.setReducerClass(Reduce.class); jobConf.setPartitionerClass(FirstPartitioner.class); jobConf.setOutputValueGroupingComparator(FirstGroupingComparator.class); jobConf.setMapOutputKeyClass(IntPair.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(IntWritable.class); // // Job job = new Job(conf, "secondary sort"); // job.setJarByClass(SecondarySort_MapRed.class); // job.setMapperClass(MapClass.class); // job.setReducerClass(Reduce.class); // // // group and partition by the first int in the pair // job.setPartitionerClass(FirstPartitioner.class); // job.setGroupingComparatorClass(FirstGroupingComparator.class); // conf.setClass("mapred.output.key.comparator.class", // KeyComparator.class, RawComparator.class); // // job.setSortComparatorClass(SecondGroupingComparator.class); // // the map output is IntPair, IntWritable // job.setMapOutputKeyClass(IntPair.class); // job.setMapOutputValueClass(IntWritable.class); // // // the reduce output is Text, IntWritable // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(jobConf, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs[1])); }
From source file:com.hadoopilluminated.examples.dancing.DistributedPentomino.java
License:Apache License
public int run(String[] args) throws Exception { JobConf conf; int depth = 5; int width = 9; int height = 10; Class<? extends Pentomino> pentClass; if (args.length == 0) { System.out.println("pentomino <output>"); ToolRunner.printGenericCommandUsage(System.out); return -1; }/*from w w w .ja v a 2s .co m*/ conf = new JobConf(getConf()); width = conf.getInt("pent.width", width); height = conf.getInt("pent.height", height); depth = conf.getInt("pent.depth", depth); pentClass = conf.getClass("pent.class", OneSidedPentonimo.class, Pentomino.class); Path output = new Path(args[0]); Path input = new Path(output + "_input"); FileSystem fileSys = FileSystem.get(conf); try { FileInputFormat.setInputPaths(conf, input); FileOutputFormat.setOutputPath(conf, output); conf.setJarByClass(PentMap.class); conf.setJobName("dancingElephant"); Pentomino pent = ReflectionUtils.newInstance(pentClass, conf); pent.initialize(width, height); createInputDirectory(fileSys, input, pent, depth); // the keys are the prefix strings conf.setOutputKeyClass(Text.class); // the values are puzzle solutions conf.setOutputValueClass(Text.class); conf.setMapperClass(PentMap.class); conf.setReducerClass(IdentityReducer.class); conf.setNumMapTasks(2000); conf.setNumReduceTasks(1); JobClient.runJob(conf); } finally { fileSys.delete(input, true); } return 0; }
From source file:com.hadoopilluminated.examples.Grep.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 3) { System.out.println("Grep <inDir> <outDir> <regex> [<group>]"); ToolRunner.printGenericCommandUsage(System.out); return -1; }//from w w w . j a va 2s . c om Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); JobConf grepJob = new JobConf(getConf(), Grep.class); try { grepJob.setJobName("grep-search"); FileInputFormat.setInputPaths(grepJob, args[0]); grepJob.setMapperClass(RegexMapper.class); grepJob.set("mapred.mapper.regex", args[2]); if (args.length == 4) { grepJob.set("mapred.mapper.regex.group", args[3]); } grepJob.setCombinerClass(LongSumReducer.class); grepJob.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(grepJob, tempDir); grepJob.setOutputFormat(SequenceFileOutputFormat.class); grepJob.setOutputKeyClass(Text.class); grepJob.setOutputValueClass(LongWritable.class); JobClient.runJob(grepJob); JobConf sortJob = new JobConf(getConf(), Grep.class); sortJob.setJobName("grep-sort"); FileInputFormat.setInputPaths(sortJob, tempDir); sortJob.setInputFormat(SequenceFileInputFormat.class); sortJob.setMapperClass(InverseMapper.class); sortJob.setNumReduceTasks(1); // write a single file FileOutputFormat.setOutputPath(sortJob, new Path(args[1])); sortJob.setOutputKeyComparatorClass // sort by decreasing freq (LongWritable.DecreasingComparator.class); JobClient.runJob(sortJob); } finally { FileSystem.get(grepJob).delete(tempDir, true); } return 0; }
From source file:com.hadoopilluminated.examples.Join.java
License:Apache License
/** * The main driver for sort program. Invoke this method to submit the * map/reduce job.//ww w . j a v a2 s . co m * * @throws IOException When there is communication problems with the job * tracker. */ @Override public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf(), Sort.class); jobConf.setJobName("join"); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); int num_maps = cluster.getTaskTrackers() * jobConf.getInt("test.sort.maps_per_host", 10); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = jobConf.get("test.sort.reduces_per_host"); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = TupleWritable.class; String op = "inner"; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { num_maps = Integer.parseInt(args[++i]); } else if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-joinOp".equals(args[i])) { op = args[++i]; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs jobConf.setNumMapTasks(num_maps); jobConf.setNumReduceTasks(num_reduces); if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.remove(otherArgs.size() - 1))); List<Path> plist = new ArrayList<Path>(otherArgs.size()); for (String s : otherArgs) { plist.add(new Path(s)); } jobConf.setInputFormat(CompositeInputFormat.class); jobConf.set("mapred.join.expr", CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0]))); jobConf.setOutputFormat(outputFormatClass); jobConf.setOutputKeyClass(outputKeyClass); jobConf.setOutputValueClass(outputValueClass); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
From source file:com.hazelcast.jet.hadoop.impl.WriteHdfsPTest.java
License:Open Source License
@Test public void testWriteFile() throws Exception { int messageCount = 320; String mapName = randomMapName(); JetInstance instance = createJetMember(); createJetMember();/*from www . j a v a 2s . c om*/ Map<IntWritable, IntWritable> map = IntStream.range(0, messageCount).boxed() .collect(toMap(IntWritable::new, IntWritable::new)); instance.getMap(mapName).putAll(map); Path path = getPath(); JobConf conf = new JobConf(); conf.setOutputFormat(outputFormatClass); conf.setOutputCommitter(FileOutputCommitter.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(IntWritable.class); if (outputFormatClass.equals(LazyOutputFormat.class)) { LazyOutputFormat.setOutputFormatClass(conf, TextOutputFormat.class); } FileOutputFormat.setOutputPath(conf, path); Pipeline p = Pipeline.create(); p.drawFrom(Sources.map(mapName)).drainTo(HdfsSinks.hdfs(conf)) // we use higher value to increase the race chance for LazyOutputFormat .setLocalParallelism(8); Future<Void> future = instance.newJob(p).getFuture(); assertCompletesEventually(future); JobConf readJobConf = new JobConf(); readJobConf.setInputFormat(inputFormatClass); FileInputFormat.addInputPath(readJobConf, path); p = Pipeline.create(); p.drawFrom(HdfsSources.hdfs(readJobConf)).drainTo(Sinks.list("results")); future = instance.newJob(p).getFuture(); assertCompletesEventually(future); IList<Object> results = instance.getList("results"); assertEquals(messageCount, results.size()); }
From source file:com.hazelcast.jet.impl.connector.hadoop.WriteHdfsPTest.java
License:Open Source License
@Test public void testWriteFile() throws Exception { int messageCount = 20; String mapName = randomMapName(); JetInstance instance = createJetMember(); createJetMember();//from w ww . j a v a2s . co m Map<IntWritable, IntWritable> map = IntStream.range(0, messageCount).boxed() .collect(toMap(IntWritable::new, IntWritable::new)); instance.getMap(mapName).putAll(map); DAG dag = new DAG(); Vertex producer = dag.newVertex("producer", readMap(mapName)).localParallelism(1); Path path = getPath(); JobConf conf = new JobConf(); conf.setOutputFormat(outputFormatClass); conf.setOutputCommitter(FileOutputCommitter.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(conf, path); Vertex consumer = dag.newVertex("consumer", writeHdfs(conf)).localParallelism(4); dag.edge(between(producer, consumer)); Future<Void> future = instance.newJob(dag).execute(); assertCompletesEventually(future); dag = new DAG(); JobConf readJobConf = new JobConf(); readJobConf.setInputFormat(inputFormatClass); FileInputFormat.addInputPath(readJobConf, path); producer = dag.newVertex("producer", readHdfs(readJobConf)).localParallelism(8); consumer = dag.newVertex("consumer", writeList("results")).localParallelism(1); dag.edge(between(producer, consumer)); future = instance.newJob(dag).execute(); assertCompletesEventually(future); IList<Object> results = instance.getList("results"); assertEquals(messageCount, results.size()); }
From source file:com.hotels.corc.cascading.OrcFile.java
License:Apache License
/** * Sets the {@link OutputFormat} to {@link CorcOutputFormat}, sets the key and values to {@link NullWritable} and * {@link Corc} respectively./*from ww w . jav a 2 s .c o m*/ */ @Override public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setOutputFormat(CorcOutputFormat.class); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Corc.class); }