List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name)
name
property, null
if no such property exists. From source file:com.google.mr4c.hadoop.MR4CMRJobTest.java
License:Open Source License
@Test public void testExport() throws Exception { JobConf jobConf = newJobConf(); m_sourceMRJob.applyTo(jobConf);//w w w. jav a2 s .c o m assertEquals(m_jar, jobConf.getJar()); Cluster cluster = Cluster.extractFromConfig(jobConf); assertEquals(m_cluster, cluster); assertEquals("5", jobConf.get(MR4CMRJob.PROP_TASKS)); }
From source file:com.hadoopilluminated.examples.Join.java
License:Apache License
/** * The main driver for sort program. Invoke this method to submit the * map/reduce job.//from ww w . ja v a 2 s .co m * * @throws IOException When there is communication problems with the job * tracker. */ @Override public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf(), Sort.class); jobConf.setJobName("join"); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); int num_maps = cluster.getTaskTrackers() * jobConf.getInt("test.sort.maps_per_host", 10); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = jobConf.get("test.sort.reduces_per_host"); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = TupleWritable.class; String op = "inner"; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { num_maps = Integer.parseInt(args[++i]); } else if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-joinOp".equals(args[i])) { op = args[++i]; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs jobConf.setNumMapTasks(num_maps); jobConf.setNumReduceTasks(num_reduces); if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.remove(otherArgs.size() - 1))); List<Path> plist = new ArrayList<Path>(otherArgs.size()); for (String s : otherArgs) { plist.add(new Path(s)); } jobConf.setInputFormat(CompositeInputFormat.class); jobConf.set("mapred.join.expr", CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0]))); jobConf.setOutputFormat(outputFormatClass); jobConf.setOutputKeyClass(outputKeyClass); jobConf.setOutputValueClass(outputValueClass); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
From source file:com.hdfs.concat.crush.CrushOptionParsingTest.java
License:Apache License
@Test public void defaults() throws Exception { crush.createJobConfAndParseArgs(tmp.newFolder("in").getAbsolutePath(), tmp.newFolder("out").getAbsolutePath(), "20101116123015"); JobConf job = crush.getJob(); assertThat(job.get("mapred.reduce.tasks"), equalTo("20")); assertThat(job.get("mapred.output.compress"), equalTo("true")); assertThat(job.get("mapred.output.compression.type"), equalTo("BLOCK")); assertThat(job.get("mapred.output.compression.codec"), equalTo("org.apache.hadoop.io.compress.DefaultCodec")); assertThat(crush.getMaxFileBlocks(), equalTo(8)); assertThat(job.get("crush.timestamp"), equalTo("20101116123015")); assertThat(job.get("crush.num.specs"), equalTo("1")); assertThat(job.get("crush.0.regex"), equalTo(".+")); assertThat(job.get("crush.0.regex.replacement"), equalTo("crushed_file-20101116123015-${crush.task.num}-${crush.file.num}")); assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.SequenceFileInputFormat")); assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.SequenceFileOutputFormat")); }
From source file:com.hdfs.concat.crush.CrushOptionParsingTest.java
License:Apache License
@Test public void disableCompression() throws Exception { crush.createJobConfAndParseArgs("--compress=none", tmp.newFolder("in").getAbsolutePath(), tmp.newFolder("out").getAbsolutePath(), "20101116123015"); JobConf job = crush.getJob(); assertThat(job.get("mapred.reduce.tasks"), equalTo("20")); assertThat(job.get("mapred.output.compress"), equalTo("false")); assertThat(crush.getMaxFileBlocks(), equalTo(8)); assertThat(job.get("crush.timestamp"), equalTo("20101116123015")); assertThat(job.get("crush.num.specs"), equalTo("1")); assertThat(job.get("crush.0.regex"), equalTo(".+")); assertThat(job.get("crush.0.regex.replacement"), equalTo("crushed_file-20101116123015-${crush.task.num}-${crush.file.num}")); assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.SequenceFileInputFormat")); assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.SequenceFileOutputFormat")); }
From source file:com.hdfs.concat.crush.CrushOptionParsingTest.java
License:Apache License
@Test public void parse() throws Exception { crush.createJobConfAndParseArgs("--regex", ".+/ads/.+", "--replacement", "foo", "--input-format", "org.apache.hadoop.mapred.TextInputFormat", "--output-format", "org.apache.hadoop.mapred.TextOutputFormat", "--regex", ".+/act/.+", "--replacement", "bar", "--input-format", "org.apache.hadoop.mapred.TextInputFormat", "--output-format", "org.apache.hadoop.mapred.SequenceFileOutputFormat", "--regex", ".+/bid/.+", "--replacement", "hello", "--input-format", "org.apache.hadoop.mapred.SequenceFileInputFormat", "--output-format", "org.apache.hadoop.mapred.TextOutputFormat", "--threshold", "0.5", "--max-file-blocks", "100", "--compress", "org.apache.hadoop.io.compress.DefaultCodec", tmp.newFolder("in").getAbsolutePath(), tmp.newFolder("out").getAbsolutePath(), "20101116123015"); JobConf job = crush.getJob(); assertThat(job.get("mapred.reduce.tasks"), equalTo("20")); assertThat(job.get("mapred.output.compress"), equalTo("true")); assertThat(job.get("mapred.output.compression.codec"), equalTo("org.apache.hadoop.io.compress.DefaultCodec")); assertThat(crush.getMaxFileBlocks(), equalTo(100)); assertThat(job.get("crush.timestamp"), equalTo("20101116123015")); assertThat(job.get("crush.num.specs"), equalTo("3")); assertThat(job.get("crush.0.regex"), equalTo(".+/ads/.+")); assertThat(job.get("crush.0.regex.replacement"), equalTo("foo")); assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.TextInputFormat")); assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.TextOutputFormat")); assertThat(job.get("crush.1.regex"), equalTo(".+/act/.+")); assertThat(job.get("crush.1.regex.replacement"), equalTo("bar")); assertThat(job.get("crush.1.input.format"), equalTo("org.apache.hadoop.mapred.TextInputFormat")); assertThat(job.get("crush.1.output.format"), equalTo("org.apache.hadoop.mapred.SequenceFileOutputFormat")); assertThat(job.get("crush.2.regex"), equalTo(".+/bid/.+")); assertThat(job.get("crush.2.regex.replacement"), equalTo("hello")); assertThat(job.get("crush.2.input.format"), equalTo("org.apache.hadoop.mapred.SequenceFileInputFormat")); assertThat(job.get("crush.2.output.format"), equalTo("org.apache.hadoop.mapred.TextOutputFormat")); }
From source file:com.hdfs.concat.crush.CrushOptionParsingTest.java
License:Apache License
@Test public void parseOldNoType() throws Exception { long millis = currentTimeMillis(); crush.createJobConfAndParseArgs(tmp.newFolder("in").getAbsolutePath(), tmp.newFolder("out").getAbsolutePath(), "80"); JobConf job = crush.getJob(); assertThat(job.get("mapred.reduce.tasks"), equalTo("80")); assertThat(Long.parseLong(job.get("crush.timestamp")), greaterThanOrEqualTo(millis)); assertThat(job.get("crush.num.specs"), equalTo("1")); assertThat(crush.getMaxFileBlocks(), equalTo(Integer.MAX_VALUE)); assertThat(job.get("crush.0.regex"), equalTo(".+")); assertThat(job.get("crush.0.regex.replacement") .matches("crushed_file-\\d+-\\$\\{crush.task.num\\}-\\$\\{crush.file.num\\}"), is(true)); assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.SequenceFileInputFormat")); assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.SequenceFileOutputFormat")); }
From source file:com.hdfs.concat.crush.CrushOptionParsingTest.java
License:Apache License
@Test public void parseOldSequence() throws Exception { long millis = currentTimeMillis(); crush.createJobConfAndParseArgs(tmp.newFolder("in").getAbsolutePath(), tmp.newFolder("out").getAbsolutePath(), "80", "SEQUENCE"); JobConf job = crush.getJob(); assertThat(job.get("mapred.reduce.tasks"), equalTo("80")); assertThat(Long.parseLong(job.get("crush.timestamp")), greaterThanOrEqualTo(millis)); assertThat(job.get("crush.num.specs"), equalTo("1")); assertThat(crush.getMaxFileBlocks(), equalTo(Integer.MAX_VALUE)); assertThat(job.get("crush.0.regex"), equalTo(".+")); assertThat(job.get("crush.0.regex.replacement") .matches("crushed_file-\\d+-\\$\\{crush.task.num\\}-\\$\\{crush.file.num\\}"), is(true)); assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.SequenceFileInputFormat")); assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.SequenceFileOutputFormat")); }
From source file:com.hdfs.concat.crush.CrushOptionParsingTest.java
License:Apache License
@Test public void parseOldText() throws Exception { long millis = currentTimeMillis(); crush.createJobConfAndParseArgs(tmp.newFolder("in").getAbsolutePath(), tmp.newFolder("out").getAbsolutePath(), "80", "TEXT"); JobConf job = crush.getJob(); assertThat(job.get("mapred.reduce.tasks"), equalTo("80")); assertThat(Long.parseLong(job.get("crush.timestamp")), greaterThanOrEqualTo(millis)); assertThat(job.get("crush.num.specs"), equalTo("1")); assertThat(crush.getMaxFileBlocks(), equalTo(Integer.MAX_VALUE)); assertThat(job.get("crush.0.regex"), equalTo(".+")); assertThat(job.get("crush.0.regex.replacement") .matches("crushed_file-\\d+-\\$\\{crush.task.num\\}-\\$\\{crush.file.num\\}"), is(true)); assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.TextInputFormat")); assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.TextOutputFormat")); }
From source file:com.hdfs.concat.crush.CrushPartitioner.java
License:Apache License
@Override public void configure(JobConf job) { String path = job.get("crush.partition.map"); int expPartitions = job.getNumReduceTasks(); bucketToPartition = new HashMap<Text, Integer>(100); try {/*from w w w . j a v a 2s.c o m*/ FileSystem fs = FileSystem.get(job); Reader reader = new Reader(fs, new Path(path), job); Text bucket = new Text(); IntWritable partNum = new IntWritable(); while (reader.next(bucket, partNum)) { int partNumValue = partNum.get(); if (partNumValue < 0 || partNumValue >= expPartitions) { throw new IllegalArgumentException( "Partition " + partNumValue + " not allowed with " + expPartitions + " reduce tasks"); } Integer prev = bucketToPartition.put(new Text(bucket), partNumValue); if (null != prev) { throw new IllegalArgumentException("Bucket " + bucket + " appears more than once in " + path); } } } catch (IOException e) { throw new RuntimeException("Could not read partition map from " + path, e); } if (new HashSet<Integer>(bucketToPartition.values()).size() > expPartitions) { throw new IllegalArgumentException( path + " contains more than " + expPartitions + " distinct partitions"); } }
From source file:com.hdfs.concat.crush.CrushReducer.java
License:Apache License
@Override public void configure(JobConf job) { super.configure(job); this.job = job; taskNum = Integer.parseInt(job.get("mapred.tip.id").replaceFirst(".+_(\\d+)", "$1")); timestamp = Long.parseLong(job.get("crush.timestamp")); outDirPath = job.get("mapred.output.dir"); if (null == outDirPath || outDirPath.isEmpty()) { throw new IllegalArgumentException("mapred.output.dir has no value"); }/* ww w . ja v a 2 s. co m*/ /* * The files we write should be rooted in the "crush" subdir of the output directory to distinguish them from the files * created by the collector. */ outDirPath = new Path(outDirPath + "/crush").toUri().getPath(); /* * Configure the regular expressions and replacements we use to convert dir names to crush output file names. Also get the * directory data formats. */ int numSpecs = job.getInt("crush.num.specs", 0); if (numSpecs <= 0) { throw new IllegalArgumentException( "Number of regular expressions must be zero or greater: " + numSpecs); } readCrushSpecs(numSpecs); placeHolderToValue.put("crush.task.num", Integer.toString(taskNum)); placeHolderToValue.put("crush.timestamp", job.get("crush.timestamp")); try { fs = FileSystem.get(job); } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new RuntimeException(e); } }