List of usage examples for org.apache.hadoop.mapred JobConf set
public void set(String name, String value)
value
of the name
property. From source file:com.acme.extensions.data.SeedingHadoopAdapter.java
License:Apache License
@Override public RecordReader<JsonHolder, JsonHolder> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { job.set("seed", String.valueOf(((SeededSplit) split).getSeed())); return super.getRecordReader(((SeededSplit) split).getChildSplit(), job, reporter); }
From source file:com.benchmark.mapred.Grep.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 4) { System.out.println("Grep <inDir> <outDir> <numreduces> <regex> [<group>]"); ToolRunner.printGenericCommandUsage(System.out); return -1; }/*from w ww.ja v a2 s . c o m*/ Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); JobConf grepJob = new JobConf(getConf(), Grep.class); try { Date startIteration = new Date(); grepJob.setJobName("grep-search"); FileInputFormat.setInputPaths(grepJob, args[0]); grepJob.setMapperClass(RegexMapper.class); grepJob.set("mapred.mapper.regex", args[3]); if (args.length == 5) grepJob.set("mapred.mapper.regex.group", args[4]); grepJob.setCombinerClass(LongSumReducer.class); grepJob.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(grepJob, new Path(args[1])); grepJob.setOutputKeyClass(Text.class); grepJob.setOutputValueClass(LongWritable.class); grepJob.setNumReduceTasks(Integer.parseInt(args[2])); JobClient.runJob(grepJob); Date endIteration = new Date(); System.out.println("The iteration took " + (endIteration.getTime() - startIteration.getTime()) / 1000 + " seconds."); } finally { FileSystem.get(grepJob).delete(tempDir, true); } return 0; }
From source file:com.benchmark.mapred.Join.java
License:Apache License
/** * The main driver for sort program.//w ww . j a va 2 s. c om * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. */ public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf(), Sort.class); jobConf.setJobName("join"); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); int num_maps = cluster.getTaskTrackers() * jobConf.getInt("test.sort.maps_per_host", 10); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = jobConf.get("test.sort.reduces_per_host"); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = TupleWritable.class; String op = "inner"; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { num_maps = Integer.parseInt(args[++i]); } else if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-joinOp".equals(args[i])) { op = args[++i]; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs jobConf.setNumMapTasks(num_maps); jobConf.setNumReduceTasks(num_reduces); if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.remove(otherArgs.size() - 1))); List<Path> plist = new ArrayList<Path>(otherArgs.size()); for (String s : otherArgs) { plist.add(new Path(s)); } jobConf.setInputFormat(CompositeInputFormat.class); jobConf.set("mapred.join.expr", CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0]))); jobConf.setOutputFormat(outputFormatClass); jobConf.setOutputKeyClass(outputKeyClass); jobConf.setOutputValueClass(outputValueClass); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java
License:Apache License
/** * Set the array of {@link Path}s as the list of inputs * for the map-reduce job./*from ww w. j a va 2 s . co m*/ * * @param conf Configuration of the job. * @param inputPaths the {@link Path}s of the input directories/files * for the map-reduce job. */ public static void setInputPaths(JobConf conf, Path... inputPaths) { Path path = new Path(conf.getWorkingDirectory(), inputPaths[0]); StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString())); for (int i = 1; i < inputPaths.length; i++) { str.append(StringUtils.COMMA_STR); path = new Path(conf.getWorkingDirectory(), inputPaths[i]); str.append(StringUtils.escapeString(path.toString())); } conf.set("mapred.input.dir", str.toString()); }
From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java
License:Apache License
/** * Add a {@link Path} to the list of inputs for the map-reduce job. * //from w w w .ja v a 2 s. c o m * @param conf The configuration of the job * @param path {@link Path} to be added to the list of inputs for * the map-reduce job. */ public static void addInputPath(JobConf conf, Path path) { path = new Path(conf.getWorkingDirectory(), path); String dirStr = StringUtils.escapeString(path.toString()); String dirs = conf.get("mapred.input.dir"); conf.set("mapred.input.dir", dirs == null ? dirStr : dirs + StringUtils.COMMA_STR + dirStr); }
From source file:com.bixolabs.cascading.avro.AvroScheme.java
License:Apache License
@SuppressWarnings({ "deprecation" }) @Override/* w w w. j a v a 2s . co m*/ public void sourceInit(Tap tap, JobConf conf) { conf.set(AvroJob.INPUT_SCHEMA, getSchema().toString()); conf.setInputFormat(AvroInputFormat.class); // add AvroSerialization to io.serializations // conf.set("io.serializations", conf.get("io.serializations")+","+AvroSerialization.class.getName() ); // Collection<String> serializations = conf.getStringCollection("io.serializations"); // if (!serializations.contains(AvroSerialization.class.getName())) { // serializations.add(AvroSerialization.class.getName()); //// if (!serializations.contains("cascading.kryo.KryoSerialization")) { //// serializations.add("cascading.kryo.KryoSerialization"); } // conf.setStrings("io.serializations", serializations.toArray(new String[0])); // }; LOGGER.info(String.format("Initializing Avro scheme for source tap - scheme fields: %s", _schemeFields)); }
From source file:com.bixolabs.cascading.avro.AvroScheme.java
License:Apache License
@SuppressWarnings({ "deprecation" }) @Override// w ww .j a v a 2 s .com public void sinkInit(Tap tap, JobConf conf) { conf.set(AvroJob.OUTPUT_SCHEMA, getSchema().toString()); conf.setOutputFormat(AvroOutputFormat.class); // Since we're outputting to Avro, we need to set up output values. // TODO KKr - why don't we need to set the OutputValueClass? // TODO KKr - why do we need to set the OutputKeyComparatorClass? conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(AvroWrapper.class); conf.setOutputKeyComparatorClass(AvroKeyComparator.class); // conf.setMapOutputKeyClass(AvroKey.class); // conf.setMapOutputValueClass(AvroValue.class); // add AvroSerialization to io.serializations // Collection<String> serializations = conf.getStringCollection("io.serializations"); // if (!serializations.contains(AvroSerialization.class.getName())) { // serializations.add(AvroSerialization.class.getName()); // conf.setStrings("io.serializations", serializations.toArray(new String[0])); // } // Class<? extends Mapper> mapClass = conf.getMapperClass(); // Class<? extends Reducer> reduceClass = conf.getReducerClass(); // AvroJob.setOutputSchema(conf, getSchema()); // conf.setMapperClass(mapClass); // conf.setReducerClass(reduceClass); LOGGER.info(String.format("Initializing Avro scheme for sink tap - scheme fields: %s", _schemeFields)); }
From source file:com.bol.crazypigs.HBaseStorage15.java
License:Apache License
private JobConf initializeLocalJobConfig(Job job) { Properties udfProps = getUDFProperties(); Configuration jobConf = job.getConfiguration(); JobConf localConf = new JobConf(jobConf); if (udfProps.containsKey(HBASE_CONFIG_SET)) { for (Entry<Object, Object> entry : udfProps.entrySet()) { localConf.set((String) entry.getKey(), (String) entry.getValue()); }//from ww w . j a v a2 s. c om } else { Configuration hbaseConf = HBaseConfiguration.create(); for (Entry<String, String> entry : hbaseConf) { // JobConf may have some conf overriding ones in hbase-site.xml // So only copy hbase config not in job config to UDFContext // Also avoids copying core-default.xml and core-site.xml // props in hbaseConf to UDFContext which would be redundant. if (jobConf.get(entry.getKey()) == null) { udfProps.setProperty(entry.getKey(), entry.getValue()); localConf.set(entry.getKey(), entry.getValue()); } } udfProps.setProperty(HBASE_CONFIG_SET, "true"); } return localConf; }
From source file:com.cloudera.ByteCount.java
License:Apache License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(new Configuration()); // Trim off the hadoop-specific args String[] remArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // Pull in properties Options options = new Options(); Option property = OptionBuilder.withArgName("property=value").hasArgs(2).withValueSeparator() .withDescription("use value for given property").create("D"); options.addOption(property);/*from w w w . j a v a2 s . co m*/ Option skipChecksums = new Option("skipChecksums", "skip checksums"); options.addOption(skipChecksums); Option profile = new Option("profile", "profile tasks"); options.addOption(profile); CommandLineParser parser = new BasicParser(); CommandLine line = parser.parse(options, remArgs); Properties properties = line.getOptionProperties("D"); for (Entry<Object, Object> prop : properties.entrySet()) { conf.set(prop.getKey().toString(), prop.getValue().toString()); System.out.println("Set config key " + prop.getKey() + " to " + prop.getValue()); } if (line.hasOption("skipChecksums")) { conf.setBoolean("bytecount.skipChecksums", true); System.out.println("Skipping checksums"); } if (line.hasOption("profile")) { conf.setBoolean("mapred.task.profile", true); conf.set("mapred.task.profile.params", "-agentlib:hprof=cpu=samples,depth=100,interval=1ms,lineno=y,thread=y,file=%s"); conf.set(MRJobConfig.NUM_MAP_PROFILES, "0"); conf.set("mapred.task.profile.maps", "1"); System.out.println("Profiling map tasks"); } // Get the positional arguments out remArgs = line.getArgs(); if (remArgs.length != 2) { System.err.println("Usage: ByteCount <inputBase> <outputBase>"); System.exit(1); } String inputBase = remArgs[0]; String outputBase = remArgs[1]; Job job = Job.getInstance(conf); job.setInputFormatClass(ByteBufferInputFormat.class); job.setMapOutputKeyClass(ByteWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setMapperClass(ByteCountMapper.class); job.setReducerClass(ByteCountReducer.class); job.setCombinerClass(ByteCountReducer.class); job.setOutputKeyClass(ByteWritable.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(inputBase)); FileOutputFormat.setOutputPath(job, new Path(outputBase)); job.setJarByClass(ByteCount.class); boolean success = job.waitForCompletion(true); Counters counters = job.getCounters(); System.out.println("\tRead counters"); printCounter(counters, READ_COUNTER.BYTES_READ); printCounter(counters, READ_COUNTER.LOCAL_BYTES_READ); printCounter(counters, READ_COUNTER.SCR_BYTES_READ); printCounter(counters, READ_COUNTER.ZCR_BYTES_READ); System.exit(success ? 0 : 1); }
From source file:com.cloudera.circus.test.TestXTest.java
License:Open Source License
@Test @TestHadoop/* w w w .j a va 2s. c om*/ public void testHadoopMapReduce() throws Exception { JobConf conf = getHadoopConf(); FileSystem fs = FileSystem.get(conf); JobClient jobClient = new JobClient(conf); try { Path inputDir = new Path(getHadoopTestDir(), "input"); Path outputDir = new Path(getHadoopTestDir(), "output"); fs.mkdirs(inputDir); Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt"))); writer.write("a\n"); writer.write("b\n"); writer.write("c\n"); writer.close(); JobConf jobConf = getHadoopConf(); jobConf.setInt("mapred.map.tasks", 1); jobConf.setInt("mapred.map.max.attempts", 1); jobConf.setInt("mapred.reduce.max.attempts", 1); jobConf.set("mapred.input.dir", inputDir.toString()); jobConf.set("mapred.output.dir", outputDir.toString()); final RunningJob runningJob = jobClient.submitJob(jobConf); waitFor(60 * 1000, true, new Predicate() { @Override public boolean evaluate() throws Exception { return runningJob.isComplete(); } }); Assert.assertTrue(runningJob.isSuccessful()); Assert.assertTrue(fs.exists(new Path(outputDir, "part-00000"))); BufferedReader reader = new BufferedReader( new InputStreamReader(fs.open(new Path(outputDir, "part-00000")))); Assert.assertTrue(reader.readLine().trim().endsWith("a")); Assert.assertTrue(reader.readLine().trim().endsWith("b")); Assert.assertTrue(reader.readLine().trim().endsWith("c")); Assert.assertNull(reader.readLine()); reader.close(); } finally { fs.close(); jobClient.close(); } }