List of usage examples for org.apache.hadoop.mapred JobConf set
public void set(String name, String value)
value
of the name
property. From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableSnapshotInputFormat.java
License:Apache License
private static void setColumns(JobConf job) throws IOException { // hbase mapred API doesn't support scan at the moment. Scan scan = HiveHBaseInputFormatUtil.getScan(job); byte[][] families = scan.getFamilies(); StringBuilder sb = new StringBuilder(); for (int i = 0; i < families.length; i++) { if (i > 0) sb.append(" "); sb.append(Bytes.toString(families[i])); }/*ww w .j a v a 2 s . co m*/ job.set(TableInputFormat.COLUMN_LIST, sb.toString()); }
From source file:com.github.gaoyangthu.demo.mapred.Grep.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 3) { System.out.println("Grep <inDir> <outDir> <regex> [<group>]"); ToolRunner.printGenericCommandUsage(System.out); return -1; }// ww w.j av a2 s .c o m Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); JobConf grepJob = new JobConf(getConf(), Grep.class); try { grepJob.setJobName("grep-search"); FileInputFormat.setInputPaths(grepJob, args[0]); grepJob.setMapperClass(RegexMapper.class); grepJob.set("mapred.mapper.regex", args[2]); if (args.length == 4) grepJob.set("mapred.mapper.regex.group", args[3]); grepJob.setCombinerClass(LongSumReducer.class); grepJob.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(grepJob, tempDir); grepJob.setOutputFormat(SequenceFileOutputFormat.class); grepJob.setOutputKeyClass(Text.class); grepJob.setOutputValueClass(LongWritable.class); JobClient.runJob(grepJob); JobConf sortJob = new JobConf(Grep.class); sortJob.setJobName("grep-sort"); FileInputFormat.setInputPaths(sortJob, tempDir); sortJob.setInputFormat(SequenceFileInputFormat.class); sortJob.setMapperClass(InverseMapper.class); sortJob.setNumReduceTasks(1); // write a single file FileOutputFormat.setOutputPath(sortJob, new Path(args[1])); sortJob.setOutputKeyComparatorClass // sort by decreasing freq (LongWritable.DecreasingComparator.class); JobClient.runJob(sortJob); } finally { FileSystem.get(grepJob).delete(tempDir, true); } return 0; }
From source file:com.google.mr4c.hadoop.HadoopUtils.java
License:Open Source License
public static void applyToJobConf(Properties props, JobConf conf) { for (String name : props.stringPropertyNames()) { conf.set(name, props.getProperty(name)); }//from w ww .j av a2 s . c o m }
From source file:com.google.mr4c.hadoop.HadoopUtils.java
License:Open Source License
/** * @param varMap apply environment variable values from this map * @param vars apply existing values of these environment variables */// w w w .j av a 2 s. com public static void applyEnvironmentVariables(JobConf conf, Map<String, String> varMap, List<String> vars) { Map<String, String> allMap = new HashMap<String, String>(System.getenv()); allMap.keySet().retainAll(vars); // only the env we wanted allMap.putAll(varMap); List<String> assigns = new ArrayList<String>(); for (String var : allMap.keySet()) { String val = allMap.get(var); if (!StringUtils.isEmpty(val)) { assigns.add(var + "=" + val); } } String value = StringUtils.join(assigns, ", "); conf.set(JobConf.MAPRED_MAP_TASK_ENV, value); conf.set(JobConf.MAPRED_REDUCE_TASK_ENV, value); }
From source file:com.google.mr4c.hadoop.MR4CMRJob.java
License:Open Source License
private void exportProperty(MR4CConfig bbConf, JobConf jobConf, Category category, String name, String hadoopName) {// www . j ava 2 s . c om String val = bbConf.getCategory(category).getProperty(name); if (!StringUtils.isEmpty(val)) { jobConf.set(hadoopName, val); } }
From source file:com.hadoop.mapreduce.TestLzoLazyLoading.java
License:Open Source License
public void testWithLocal() throws Exception { MiniMRCluster mr = null;// w ww. jav a 2 s .c o m try { JobConf jconf = new JobConf(); jconf.set("mapred.queue.names", "default"); mr = new MiniMRCluster(2, "file:///", 3, null, null, jconf); Configuration cf = mr.createJobConf(); cf.set("io.compression.codecs", LzoCodec.class.getName()); runWordCount(cf, false, false); runWordCount(cf, false, true); runWordCount(cf, true, false); } finally { if (mr != null) { mr.shutdown(); } } }
From source file:com.hadoopilluminated.examples.Grep.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 3) { System.out.println("Grep <inDir> <outDir> <regex> [<group>]"); ToolRunner.printGenericCommandUsage(System.out); return -1; }/*from w w w . j a va2 s . c o m*/ Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); JobConf grepJob = new JobConf(getConf(), Grep.class); try { grepJob.setJobName("grep-search"); FileInputFormat.setInputPaths(grepJob, args[0]); grepJob.setMapperClass(RegexMapper.class); grepJob.set("mapred.mapper.regex", args[2]); if (args.length == 4) { grepJob.set("mapred.mapper.regex.group", args[3]); } grepJob.setCombinerClass(LongSumReducer.class); grepJob.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(grepJob, tempDir); grepJob.setOutputFormat(SequenceFileOutputFormat.class); grepJob.setOutputKeyClass(Text.class); grepJob.setOutputValueClass(LongWritable.class); JobClient.runJob(grepJob); JobConf sortJob = new JobConf(getConf(), Grep.class); sortJob.setJobName("grep-sort"); FileInputFormat.setInputPaths(sortJob, tempDir); sortJob.setInputFormat(SequenceFileInputFormat.class); sortJob.setMapperClass(InverseMapper.class); sortJob.setNumReduceTasks(1); // write a single file FileOutputFormat.setOutputPath(sortJob, new Path(args[1])); sortJob.setOutputKeyComparatorClass // sort by decreasing freq (LongWritable.DecreasingComparator.class); JobClient.runJob(sortJob); } finally { FileSystem.get(grepJob).delete(tempDir, true); } return 0; }
From source file:com.hadoopilluminated.examples.Join.java
License:Apache License
/** * The main driver for sort program. Invoke this method to submit the * map/reduce job.//from w w w . j av a 2 s . co m * * @throws IOException When there is communication problems with the job * tracker. */ @Override public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf(), Sort.class); jobConf.setJobName("join"); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); int num_maps = cluster.getTaskTrackers() * jobConf.getInt("test.sort.maps_per_host", 10); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = jobConf.get("test.sort.reduces_per_host"); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = TupleWritable.class; String op = "inner"; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { num_maps = Integer.parseInt(args[++i]); } else if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-joinOp".equals(args[i])) { op = args[++i]; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs jobConf.setNumMapTasks(num_maps); jobConf.setNumReduceTasks(num_reduces); if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.remove(otherArgs.size() - 1))); List<Path> plist = new ArrayList<Path>(otherArgs.size()); for (String s : otherArgs) { plist.add(new Path(s)); } jobConf.setInputFormat(CompositeInputFormat.class); jobConf.set("mapred.join.expr", CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0]))); jobConf.setOutputFormat(outputFormatClass); jobConf.setOutputKeyClass(outputKeyClass); jobConf.setOutputValueClass(outputValueClass); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
From source file:com.hdfs.concat.crush.CrushOptionParsingTest.java
License:Apache License
@Before public void before() throws IOException { crush = new Crush(); JobConf job = new JobConf(false); crush.setConf(job);//w w w .j a va 2 s . c om job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.setInt("mapred.reduce.tasks", 20); job.setLong("dfs.block.size", 1024 * 1024 * 64); FileSystem fs = FileSystem.get(job); fs.setWorkingDirectory(new Path(tmp.getRoot().getAbsolutePath())); crush.setFileSystem(fs); }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Before public void setupReducer() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_014527_r_001234"); job.set("mapred.task.id", "attempt_201011081200_14527_r_001234_0"); outDir = tmp.newFolder("out"); tmp.newFolder("out/_temporary"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.setLong("crush.timestamp", 98765); job.setInt("crush.num.specs", 3); job.set("crush.0.regex", ".+/dir"); job.set("crush.0.regex.replacement", "firstregex-${crush.timestamp}-${crush.task.num}-${crush.file.num}"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", TextOutputFormat.class.getName()); job.set("crush.1.regex", ".+/dir/([^/]+/)*(.+)"); job.set("crush.1.regex.replacement", "secondregex-$2-${crush.timestamp}-${crush.task.num}-${crush.file.num}"); job.set("crush.1.input.format", TextInputFormat.class.getName()); job.set("crush.1.output.format", TextOutputFormat.class.getName()); job.set("crush.2.regex", ".+/other"); job.set("crush.2.regex.replacement", "${crush.timestamp}-${crush.task.num}-middle-${crush.file.num}-tail"); job.set("crush.2.input.format", TextInputFormat.class.getName()); job.set("crush.2.output.format", SequenceFileOutputFormat.class.getName()); reducer = new CrushReducer(); reducer.configure(job);//from w w w .ja va 2 s . co m }