List of usage examples for org.apache.hadoop.mapred JobConf set
public void set(String name, String value)
value
of the name
property. From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Test public void missingInputRegex() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.block.size", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", TextOutputFormat.class.getName()); job.set("crush.1.regex.replacement", "bar"); job.set("crush.1.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.1.output.format", TextOutputFormat.class.getName()); reducer = new CrushReducer(); try {/* www . j av a 2 s. co m*/ reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"No input regex: crush.1.regex".equals(e.getMessage())) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Test public void missingOutputRegex() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.block.size", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", TextOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.1.output.format", TextOutputFormat.class.getName()); reducer = new CrushReducer(); try {/* w ww . ja v a 2 s .c om*/ reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"No output replacement: crush.1.regex.replacement".equals(e.getMessage())) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Test public void missingInputFormat() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.block.size", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.regex.replacement", "hello"); job.set("crush.1.output.format", SequenceFileOutputFormat.class.getName()); reducer = new CrushReducer(); try {//from w ww .j a va 2s. c o m reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"No input format: crush.1.input.format".equals(e.getMessage())) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Test public void inputFormatWrongType() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.block.size", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.regex.replacement", "hello"); job.set("crush.1.input.format", Object.class.getName()); job.set("crush.1.output.format", SequenceFileOutputFormat.class.getName()); reducer = new CrushReducer(); try {/* w w w . j av a 2 s .co m*/ reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"Not a file input format: crush.1.input.format=java.lang.Object".equals(e.getMessage())) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Test public void missingOutputFormat() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.block.size", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.regex.replacement", "hello"); job.set("crush.1.input.format", SequenceFileInputFormat.class.getName()); reducer = new CrushReducer(); try {//from www.jav a2 s .com reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"No output format: crush.1.output.format".equals(e.getMessage())) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushReducerTest.java
License:Apache License
@Test public void outputFormatWrongType() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.block.size", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.regex.replacement", "hello"); job.set("crush.1.input.format", TextInputFormat.class.getName()); job.set("crush.1.output.format", Object.class.getName()); reducer = new CrushReducer(); try {/* w w w.j a v a2 s. c o m*/ reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"Not an output format: crush.1.output.format=java.lang.Object".equals(e.getMessage())) { throw e; } } }
From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer.java
License:Open Source License
/** * /*from ww w. j a v a 2 s. c o m*/ * @param job * @param key * @param bytes */ public static void setMaxMemoryOpt(JobConf job, String key, long bytes) { String javaOptsOld = job.get(key); String javaOptsNew = null; //StringTokenizer st = new StringTokenizer( javaOptsOld, " " ); String[] tokens = javaOptsOld.split(" "); //account also for no ' ' StringBuilder sb = new StringBuilder(); for (String arg : tokens) { if (arg.startsWith("-Xmx")) //search for max mem { sb.append("-Xmx"); sb.append((bytes / (1024 * 1024))); sb.append("M"); } else sb.append(arg); sb.append(" "); } javaOptsNew = sb.toString().trim(); job.set(key, javaOptsNew); }
From source file:com.ibm.bi.dml.runtime.matrix.CSVReblockMR.java
License:Open Source License
public static AssignRowIDMRReturn runAssignRowIDMRJob(String[] inputs, InputInfo[] inputInfos, int[] brlens, int[] bclens, String reblockInstructions, int replication, String[] smallestFiles, boolean transform, String naStrings, String specFile) throws Exception { AssignRowIDMRReturn ret = new AssignRowIDMRReturn(); JobConf job; job = new JobConf(CSVReblockMR.class); job.setJobName("Assign-RowID-MR"); byte[] realIndexes = new byte[inputs.length]; for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;/*from w w w . j a v a 2 s . co m*/ //set up the input files and their format information MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, false, ConvertTarget.CELL); job.setStrings(SMALLEST_FILE_NAME_PER_INPUT, smallestFiles); //set up the aggregate instructions that will happen in the combiner and reducer MRJobConfiguration.setCSVReblockInstructions(job, reblockInstructions); //set up the replication factor for the results job.setInt("dfs.replication", replication); //set up the number of reducers job.setNumReduceTasks(1); // Print the complete instruction //if (LOG.isTraceEnabled()) //inst.printCompelteMRJobInstruction(); // configure mapper and the mapper output key value pairs job.setMapperClass(CSVAssignRowIDMapper.class); job.setMapOutputKeyClass(ByteWritable.class); job.setMapOutputValueClass(OffsetCount.class); //configure reducer job.setReducerClass(CSVAssignRowIDReducer.class); //turn off adaptivemr job.setBoolean("adaptivemr.map.enable", false); //set unique working dir MRJobConfiguration.setUniqueWorkingDir(job); //set up the output file ret.counterFile = new Path(MRJobConfiguration.constructTempOutputFilename()); job.setOutputFormat(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, ret.counterFile); job.setOutputKeyClass(ByteWritable.class); job.setOutputValueClass(OffsetCount.class); // setup properties relevant to transform job.setBoolean(MRJobConfiguration.TF_TRANSFORM, transform); if (transform) { if (naStrings != null) // Adding "dummy" string to handle the case of na_strings = "" job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(naStrings)); job.set(MRJobConfiguration.TF_SPEC_FILE, specFile); } RunningJob runjob = JobClient.runJob(job); /* Process different counters */ Group rgroup = runjob.getCounters().getGroup(NUM_ROWS_IN_MATRIX); Group cgroup = runjob.getCounters().getGroup(NUM_COLS_IN_MATRIX); ret.rlens = new long[inputs.length]; ret.clens = new long[inputs.length]; for (int i = 0; i < inputs.length; i++) { // number of non-zeros ret.rlens[i] = rgroup.getCounter(Integer.toString(i)); ret.clens[i] = cgroup.getCounter(Integer.toString(i)); } return ret; }
From source file:com.ibm.bi.dml.runtime.matrix.CSVReblockMR.java
License:Open Source License
private static JobReturn runCSVReblockJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String reblockInstructions, String otherInstructionsInReducer, int numReducers, int replication, byte[] resultIndexes, String[] outputs, OutputInfo[] outputInfos, Path counterFile, String[] smallestFiles) throws Exception { JobConf job; job = new JobConf(ReblockMR.class); job.setJobName("CSV-Reblock-MR"); byte[] realIndexes = new byte[inputs.length]; for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;//ww w . j a va 2 s . c o m //set up the input files and their format information MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, false, ConvertTarget.CELL); job.setStrings(SMALLEST_FILE_NAME_PER_INPUT, smallestFiles); //set up the dimensions of input matrices MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens); //set up the block size MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens); //set up the aggregate instructions that will happen in the combiner and reducer MRJobConfiguration.setCSVReblockInstructions(job, reblockInstructions); //set up the instructions that will happen in the reducer, after the aggregation instrucions MRJobConfiguration.setInstructionsInReducer(job, otherInstructionsInReducer); //set up the replication factor for the results job.setInt("dfs.replication", replication); //set up preferred custom serialization framework for binary block format if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION) MRJobConfiguration.addBinaryBlockSerializationFramework(job); //set up what matrices are needed to pass from the mapper to reducer HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, null, reblockInstructions, null, otherInstructionsInReducer, resultIndexes); MatrixChar_N_ReducerGroups ret = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, null, reblockInstructions, null, null, otherInstructionsInReducer, resultIndexes, mapoutputIndexes, false); MatrixCharacteristics[] stats = ret.stats; //set up the number of reducers int numRed = WriteCSVMR.determineNumReducers(rlens, clens, ConfigurationManager.getConfig().getIntValue(DMLConfig.NUM_REDUCERS), ret.numReducerGroups); job.setNumReduceTasks(numRed); // Print the complete instruction //if (LOG.isTraceEnabled()) // inst.printCompelteMRJobInstruction(stats); // Update resultDimsUnknown based on computed "stats" byte[] resultDimsUnknown = new byte[resultIndexes.length]; for (int i = 0; i < resultIndexes.length; i++) { if (stats[i].getRows() == -1 || stats[i].getCols() == -1) { resultDimsUnknown[i] = (byte) 1; } else { resultDimsUnknown[i] = (byte) 0; } } //set up the multiple output files, and their format information MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown, outputs, outputInfos, true, true); // configure mapper and the mapper output key value pairs job.setMapperClass(CSVReblockMapper.class); job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class); job.setMapOutputValueClass(BlockRow.class); //configure reducer job.setReducerClass(CSVReblockReducer.class); //turn off adaptivemr job.setBoolean("adaptivemr.map.enable", false); //set unique working dir MRJobConfiguration.setUniqueWorkingDir(job); Path cachefile = new Path(counterFile, "part-00000"); DistributedCache.addCacheFile(cachefile.toUri(), job); DistributedCache.createSymlink(job); job.set(ROWID_FILE_NAME, cachefile.toString()); RunningJob runjob = JobClient.runJob(job); MapReduceTool.deleteFileIfExistOnHDFS(counterFile, job); /* Process different counters */ Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS); for (int i = 0; i < resultIndexes.length; i++) { // number of non-zeros stats[i].setNonZeros(group.getCounter(Integer.toString(i))); // System.out.println("result #"+resultIndexes[i]+" ===>\n"+stats[i]); } return new JobReturn(stats, outputInfos, runjob.isSuccessful()); }
From source file:com.ibm.bi.dml.runtime.matrix.data.hadoopfix.DelegatingInputFormat.java
License:Apache License
@SuppressWarnings("unchecked") public RecordReader<K, V> getRecordReader(InputSplit split, JobConf conf, Reporter reporter) throws IOException { // Find the InputFormat and then the RecordReader from the // TaggedInputSplit. TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split; InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils .newInstance(taggedInputSplit.getInputFormatClass(), conf); InputSplit inputSplit = taggedInputSplit.getInputSplit(); if (inputSplit instanceof FileSplit) { FileSplit fileSplit = (FileSplit) inputSplit; conf.set("map.input.file", fileSplit.getPath().toString()); conf.setLong("map.input.start", fileSplit.getStart()); conf.setLong("map.input.length", fileSplit.getLength()); }//from ww w .j ava 2 s . c om return inputFormat.getRecordReader(taggedInputSplit.getInputSplit(), conf, reporter); }