List of usage examples for org.apache.hadoop.mapred JobConf setInt
public void setInt(String name, int value)
name
property to an int
. From source file:com.github.gaoyangthu.demo.mapred.terasort.TeraSort.java
License:Apache License
public int run(String[] args) throws Exception { LOG.info("starting"); JobConf job = (JobConf) getConf(); Path inputDir = new Path(args[0]); inputDir = inputDir.makeQualified(inputDir.getFileSystem(job)); Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(TeraInputFormat.class); job.setOutputFormat(TeraOutputFormat.class); job.setPartitionerClass(TotalOrderPartitioner.class); TeraInputFormat.writePartitionFile(job, partitionFile); DistributedCache.addCacheFile(partitionUri, job); DistributedCache.createSymlink(job); job.setInt("dfs.replication", 1); TeraOutputFormat.setFinalSync(job, true); JobClient.runJob(job);// www . j a v a 2 s .c o m LOG.info("done"); return 0; }
From source file:com.hdfs.concat.crush.CrushOptionParsingTest.java
License:Apache License
@Before public void before() throws IOException { crush = new Crush(); JobConf job = new JobConf(false); crush.setConf(job);//from ww w .j a va 2 s . c o m job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.setInt("mapred.reduce.tasks", 20); job.setLong("dfs.block.size", 1024 * 1024 * 64); FileSystem fs = FileSystem.get(job); fs.setWorkingDirectory(new Path(tmp.getRoot().getAbsolutePath())); crush.setFileSystem(fs); }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Before public void setupReducer() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_014527_r_001234"); job.set("mapred.task.id", "attempt_201011081200_14527_r_001234_0"); outDir = tmp.newFolder("out"); tmp.newFolder("out/_temporary"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.setLong("crush.timestamp", 98765); job.setInt("crush.num.specs", 3); job.set("crush.0.regex", ".+/dir"); job.set("crush.0.regex.replacement", "firstregex-${crush.timestamp}-${crush.task.num}-${crush.file.num}"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", TextOutputFormat.class.getName()); job.set("crush.1.regex", ".+/dir/([^/]+/)*(.+)"); job.set("crush.1.regex.replacement", "secondregex-$2-${crush.timestamp}-${crush.task.num}-${crush.file.num}"); job.set("crush.1.input.format", TextInputFormat.class.getName()); job.set("crush.1.output.format", TextOutputFormat.class.getName()); job.set("crush.2.regex", ".+/other"); job.set("crush.2.regex.replacement", "${crush.timestamp}-${crush.task.num}-middle-${crush.file.num}-tail"); job.set("crush.2.input.format", TextInputFormat.class.getName()); job.set("crush.2.output.format", SequenceFileOutputFormat.class.getName()); reducer = new CrushReducer(); reducer.configure(job);//from w w w. ja v a 2s. c o m }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Test public void missingInputRegex() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.block.size", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", TextOutputFormat.class.getName()); job.set("crush.1.regex.replacement", "bar"); job.set("crush.1.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.1.output.format", TextOutputFormat.class.getName()); reducer = new CrushReducer(); try {/* w w w .java 2 s . c om*/ reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"No input regex: crush.1.regex".equals(e.getMessage())) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Test public void missingOutputRegex() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.block.size", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", TextOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.1.output.format", TextOutputFormat.class.getName()); reducer = new CrushReducer(); try {/* w ww . ja va 2 s . c om*/ reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"No output replacement: crush.1.regex.replacement".equals(e.getMessage())) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Test public void missingInputFormat() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.block.size", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.regex.replacement", "hello"); job.set("crush.1.output.format", SequenceFileOutputFormat.class.getName()); reducer = new CrushReducer(); try {/*w w w .j av a2s . co m*/ reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"No input format: crush.1.input.format".equals(e.getMessage())) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Test public void inputFormatWrongType() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.block.size", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.regex.replacement", "hello"); job.set("crush.1.input.format", Object.class.getName()); job.set("crush.1.output.format", SequenceFileOutputFormat.class.getName()); reducer = new CrushReducer(); try {//from w w w . j a v a 2 s . c o m reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"Not a file input format: crush.1.input.format=java.lang.Object".equals(e.getMessage())) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Test public void missingOutputFormat() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.block.size", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.regex.replacement", "hello"); job.set("crush.1.input.format", SequenceFileInputFormat.class.getName()); reducer = new CrushReducer(); try {//from w w w. j ava 2 s . com reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"No output format: crush.1.output.format".equals(e.getMessage())) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Test public void outputFormatWrongType() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.block.size", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.regex.replacement", "hello"); job.set("crush.1.input.format", TextInputFormat.class.getName()); job.set("crush.1.output.format", Object.class.getName()); reducer = new CrushReducer(); try {//from ww w . jav a 2 s.c o m reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"Not an output format: crush.1.output.format=java.lang.Object".equals(e.getMessage())) { throw e; } } }
From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.DataPartitionerRemoteMR.java
License:Open Source License
@Override protected void partitionMatrix(MatrixObject in, String fnameNew, InputInfo ii, OutputInfo oi, long rlen, long clen, int brlen, int bclen) throws DMLRuntimeException { String jobname = "ParFor-DPMR"; long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0; JobConf job; job = new JobConf(DataPartitionerRemoteMR.class); if (_pfid >= 0) //use in parfor job.setJobName(jobname + _pfid); else //use for partition instruction job.setJobName("Partition-MR"); //maintain dml script counters Statistics.incrementNoOfCompiledMRJobs(); try {//from w ww . j a va2 s . c o m //force writing to disk (typically not required since partitioning only applied if dataset exceeds CP size) in.exportData(); //written to disk iff dirty Path path = new Path(in.getFileName()); ///// //configure the MR job MRJobConfiguration.setPartitioningInfo(job, rlen, clen, brlen, bclen, ii, oi, _format, _n, fnameNew, _keepIndexes); //set mappers, reducers, combiners job.setMapperClass(DataPartitionerRemoteMapper.class); job.setReducerClass(DataPartitionerRemoteReducer.class); if (oi == OutputInfo.TextCellOutputInfo) { //binary cell intermediates for reduced IO job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(PairWritableCell.class); } else if (oi == OutputInfo.BinaryCellOutputInfo) { job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(PairWritableCell.class); } else if (oi == OutputInfo.BinaryBlockOutputInfo) { job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(PairWritableBlock.class); //check Alignment if ((_format == PDataPartitionFormat.ROW_BLOCK_WISE_N && rlen > _n && _n % brlen != 0) || (_format == PDataPartitionFormat.COLUMN_BLOCK_WISE_N && clen > _n && _n % bclen != 0)) { throw new DMLRuntimeException( "Data partitioning format " + _format + " requires aligned blocks."); } } //set input format job.setInputFormat(ii.inputFormatClass); //set the input path and output path FileInputFormat.setInputPaths(job, path); //set output path MapReduceTool.deleteFileIfExistOnHDFS(fnameNew); //FileOutputFormat.setOutputPath(job, pathNew); job.setOutputFormat(NullOutputFormat.class); ////// //set optimization parameters //set the number of mappers and reducers //job.setNumMapTasks( _numMappers ); //use default num mappers long reducerGroups = -1; switch (_format) { case ROW_WISE: reducerGroups = rlen; break; case COLUMN_WISE: reducerGroups = clen; break; case ROW_BLOCK_WISE: reducerGroups = (rlen / brlen) + ((rlen % brlen == 0) ? 0 : 1); break; case COLUMN_BLOCK_WISE: reducerGroups = (clen / bclen) + ((clen % bclen == 0) ? 0 : 1); break; case ROW_BLOCK_WISE_N: reducerGroups = (rlen / _n) + ((rlen % _n == 0) ? 0 : 1); break; case COLUMN_BLOCK_WISE_N: reducerGroups = (clen / _n) + ((clen % _n == 0) ? 0 : 1); break; default: //do nothing } job.setNumReduceTasks((int) Math.min(_numReducers, reducerGroups)); //use FLEX scheduler configuration properties /*if( ParForProgramBlock.USE_FLEX_SCHEDULER_CONF ) { job.setInt("flex.map.min", 0); job.setInt("flex.map.max", _numMappers); job.setInt("flex.reduce.min", 0); job.setInt("flex.reduce.max", _numMappers); }*/ //disable automatic tasks timeouts and speculative task exec job.setInt("mapred.task.timeout", 0); job.setMapSpeculativeExecution(false); //set up preferred custom serialization framework for binary block format if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION) MRJobConfiguration.addBinaryBlockSerializationFramework(job); //enables the reuse of JVMs (multiple tasks per MR task) if (_jvmReuse) job.setNumTasksToExecutePerJvm(-1); //unlimited //enables compression - not conclusive for different codecs (empirically good compression ratio, but significantly slower) //job.set("mapred.compress.map.output", "true"); //job.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); //set the replication factor for the results job.setInt("dfs.replication", _replication); //set up map/reduce memory configurations (if in AM context) DMLConfig config = ConfigurationManager.getConfig(); DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config); //set the max number of retries per map task // disabled job-level configuration to respect cluster configuration // note: this refers to hadoop2, hence it never had effect on mr1 //job.setInt("mapreduce.map.maxattempts", _max_retry); //set unique working dir MRJobConfiguration.setUniqueWorkingDir(job); ///// // execute the MR job JobClient.runJob(job); //maintain dml script counters Statistics.incrementNoOfExecutedMRJobs(); } catch (Exception ex) { throw new DMLRuntimeException(ex); } if (DMLScript.STATISTICS && _pfid >= 0) { long t1 = System.nanoTime(); //only for parfor Statistics.maintainCPHeavyHitters("MR-Job_" + jobname, t1 - t0); } }