List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf(boolean loadDefaults)
From source file:com.hadoop.secondarysort.SecondarySort_MapRed.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysrot <in> <out>"); System.exit(2);// w w w . jav a 2 s . c o m } JobConf jobConf = new JobConf(conf); jobConf.setMapperClass(MapClass.class); jobConf.setReducerClass(Reduce.class); jobConf.setPartitionerClass(FirstPartitioner.class); jobConf.setOutputValueGroupingComparator(FirstGroupingComparator.class); jobConf.setMapOutputKeyClass(IntPair.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(IntWritable.class); // // Job job = new Job(conf, "secondary sort"); // job.setJarByClass(SecondarySort_MapRed.class); // job.setMapperClass(MapClass.class); // job.setReducerClass(Reduce.class); // // // group and partition by the first int in the pair // job.setPartitionerClass(FirstPartitioner.class); // job.setGroupingComparatorClass(FirstGroupingComparator.class); // conf.setClass("mapred.output.key.comparator.class", // KeyComparator.class, RawComparator.class); // // job.setSortComparatorClass(SecondGroupingComparator.class); // // the map output is IntPair, IntWritable // job.setMapOutputKeyClass(IntPair.class); // job.setMapOutputValueClass(IntWritable.class); // // // the reduce output is Text, IntWritable // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(jobConf, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs[1])); }
From source file:com.hadoopilluminated.examples.dancing.DistributedPentomino.java
License:Apache License
public int run(String[] args) throws Exception { JobConf conf;//from w w w . j ava 2 s .co m int depth = 5; int width = 9; int height = 10; Class<? extends Pentomino> pentClass; if (args.length == 0) { System.out.println("pentomino <output>"); ToolRunner.printGenericCommandUsage(System.out); return -1; } conf = new JobConf(getConf()); width = conf.getInt("pent.width", width); height = conf.getInt("pent.height", height); depth = conf.getInt("pent.depth", depth); pentClass = conf.getClass("pent.class", OneSidedPentonimo.class, Pentomino.class); Path output = new Path(args[0]); Path input = new Path(output + "_input"); FileSystem fileSys = FileSystem.get(conf); try { FileInputFormat.setInputPaths(conf, input); FileOutputFormat.setOutputPath(conf, output); conf.setJarByClass(PentMap.class); conf.setJobName("dancingElephant"); Pentomino pent = ReflectionUtils.newInstance(pentClass, conf); pent.initialize(width, height); createInputDirectory(fileSys, input, pent, depth); // the keys are the prefix strings conf.setOutputKeyClass(Text.class); // the values are puzzle solutions conf.setOutputValueClass(Text.class); conf.setMapperClass(PentMap.class); conf.setReducerClass(IdentityReducer.class); conf.setNumMapTasks(2000); conf.setNumReduceTasks(1); JobClient.runJob(conf); } finally { fileSys.delete(input, true); } return 0; }
From source file:com.hdfs.concat.crush.CrushOptionParsingTest.java
License:Apache License
@Before public void before() throws IOException { crush = new Crush(); JobConf job = new JobConf(false); crush.setConf(job);// w w w. j a v a2 s .c o m job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.setInt("mapred.reduce.tasks", 20); job.setLong("dfs.block.size", 1024 * 1024 * 64); FileSystem fs = FileSystem.get(job); fs.setWorkingDirectory(new Path(tmp.getRoot().getAbsolutePath())); crush.setFileSystem(fs); }
From source file:com.hdfs.concat.crush.CrushPartitionerTest.java
License:Apache License
@Before public void setupPartitionMap() throws IOException { job = new JobConf(false); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("crush.partition.map", tmp.getRoot().getAbsolutePath() + "/partition-map"); fs = FileSystem.get(job);/*w w w. ja v a2 s.com*/ partitionMap = new Path(tmp.getRoot().getAbsolutePath(), "partition-map"); partitioner = new CrushPartitioner(); }
From source file:com.hdfs.concat.crush.CrushReducerParameterizedTest.java
License:Apache License
@Before public void setupReducer() throws IOException { job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_014527_r_001234"); job.set("mapred.task.id", "attempt_201011081200_14527_r_001234_0"); /*//from w w w . j ava 2s . co m * This logic tree around compression simulates what the output formats do. */ if (CompressionType.NONE == compressionType) { job.setBoolean("mapred.output.compress", false); } else { job.setBoolean("mapred.output.compress", true); job.set("mapred.output.compression.type", compressionType.name()); job.set("mapred.output.compression.codec", CustomCompressionCodec.class.getName()); } outDir = tmp.newFolder("out"); tmp.newFolder("out/_temporary"); workDir = tmp.newFolder("out/_temporary/_" + job.get("mapred.task.id")); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.set("mapred.work.output.dir", workDir.getAbsolutePath()); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.setLong("crush.timestamp", 98765); job.setInt("crush.num.specs", 4); job.set("crush.0.regex", ".+/other"); job.set("crush.0.regex.replacement", "${crush.timestamp}-${crush.task.num}-middle-${crush.file.num}-tail"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", TextOutputFormat.class.getName()); job.set("crush.1.regex", ".+/dir"); job.set("crush.1.regex.replacement", "secondregex-${crush.timestamp}-${crush.task.num}-${crush.file.num}"); job.set("crush.1.input.format", TextInputFormat.class.getName()); job.set("crush.1.output.format", SequenceFileOutputFormat.class.getName()); job.set("crush.2.regex", ".+/dir/([^/]+/)*(.+)"); job.set("crush.2.regex.replacement", "thirdregex-$2-${crush.timestamp}-${crush.task.num}-${crush.file.num}"); job.set("crush.2.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.2.output.format", SequenceFileOutputFormat.class.getName()); job.set("crush.3.regex", ".+/text"); job.set("crush.3.regex.replacement", "fourthregex-${crush.task.num}-${crush.timestamp}-${crush.file.num}"); job.set("crush.3.input.format", TextInputFormat.class.getName()); job.set("crush.3.output.format", TextOutputFormat.class.getName()); reducer = new CrushReducer(); reducer.configure(job); fs = FileSystem.get(job); }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Before public void setupReducer() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_014527_r_001234"); job.set("mapred.task.id", "attempt_201011081200_14527_r_001234_0"); outDir = tmp.newFolder("out"); tmp.newFolder("out/_temporary"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.setLong("crush.timestamp", 98765); job.setInt("crush.num.specs", 3); job.set("crush.0.regex", ".+/dir"); job.set("crush.0.regex.replacement", "firstregex-${crush.timestamp}-${crush.task.num}-${crush.file.num}"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", TextOutputFormat.class.getName()); job.set("crush.1.regex", ".+/dir/([^/]+/)*(.+)"); job.set("crush.1.regex.replacement", "secondregex-$2-${crush.timestamp}-${crush.task.num}-${crush.file.num}"); job.set("crush.1.input.format", TextInputFormat.class.getName()); job.set("crush.1.output.format", TextOutputFormat.class.getName()); job.set("crush.2.regex", ".+/other"); job.set("crush.2.regex.replacement", "${crush.timestamp}-${crush.task.num}-middle-${crush.file.num}-tail"); job.set("crush.2.input.format", TextInputFormat.class.getName()); job.set("crush.2.output.format", SequenceFileOutputFormat.class.getName()); reducer = new CrushReducer(); reducer.configure(job);/* w w w . j a v a2s .c om*/ }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Test public void missingInputRegex() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.block.size", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", TextOutputFormat.class.getName()); job.set("crush.1.regex.replacement", "bar"); job.set("crush.1.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.1.output.format", TextOutputFormat.class.getName()); reducer = new CrushReducer(); try {//from w ww. jav a 2s.co m reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"No input regex: crush.1.regex".equals(e.getMessage())) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Test public void missingOutputRegex() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.block.size", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", TextOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.1.output.format", TextOutputFormat.class.getName()); reducer = new CrushReducer(); try {//w ww. j a v a2s . c o m reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"No output replacement: crush.1.regex.replacement".equals(e.getMessage())) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Test public void missingInputFormat() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.block.size", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.regex.replacement", "hello"); job.set("crush.1.output.format", SequenceFileOutputFormat.class.getName()); reducer = new CrushReducer(); try {/*w ww.j a v a2 s .c o m*/ reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"No input format: crush.1.input.format".equals(e.getMessage())) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Test public void inputFormatWrongType() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.block.size", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.regex.replacement", "hello"); job.set("crush.1.input.format", Object.class.getName()); job.set("crush.1.output.format", SequenceFileOutputFormat.class.getName()); reducer = new CrushReducer(); try {/*from ww w . j av a 2 s. c om*/ reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"Not a file input format: crush.1.input.format=java.lang.Object".equals(e.getMessage())) { throw e; } } }