List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks
public void setNumReduceTasks(int n)
From source file:TestFormatStorageInputFormat.java
License:Open Source License
public static void main(String[] argv) throws IOException, SerDeException { try {//w ww . j a va 2s . co m if (argv.length != 2) { System.out.println("TestFormatStorageInputFormat <input> <output>"); System.exit(-1); } JobConf conf = new JobConf(TestFormatStorageInputFormat.class); conf.setJobName("TestFormatStorageInputFormat"); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(FormatStorageOutputFormat.class); conf.set("mapred.output.compress", "flase"); conf.set("mapred.input.dir", argv[0]); Head head = new Head(); initHead(head); head.toJobConf(conf); FormatStorageSerDe serDe = initSerDe(conf); StandardStructObjectInspector oi = (StandardStructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); FileInputFormat.setInputPaths(conf, argv[0]); Path outputPath = new Path(argv[1]); FileOutputFormat.setOutputPath(conf, outputPath); InputFormat inputFormat = new FormatStorageInputFormat(); InputSplit[] inputSplits = inputFormat.getSplits(conf, 1); if (inputSplits.length == 0) { System.out.println("inputSplits is empty"); return; } else { System.out.println("get Splits:" + inputSplits.length); } int size = inputSplits.length; System.out.println("getSplits return size:" + size); for (int i = 0; i < size; i++) { FormatStorageSplit split = (FormatStorageSplit) inputSplits[i]; System.out.printf("split:" + i + "offset:" + split.getStart() + "len:" + split.getLength() + "path:" + conf.get(ConstVar.InputPath) + "beginLine:" + split.getBeginLine() + "endLine:" + split.getEndLine() + "\n"); } { int totalDelay = 0; RecordReader<WritableComparable, Writable> currRecReader = null; for (int i = 0; i < inputSplits.length; i++) { currRecReader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL); WritableComparable key; Writable value; key = currRecReader.createKey(); value = currRecReader.createValue(); long begin = System.currentTimeMillis(); int count = 0; while (currRecReader.next(key, value)) { Record record = (Record) value; Object row = serDe.deserialize(record); count++; } long end = System.currentTimeMillis(); long delay = (end - begin) / 1000; totalDelay += delay; System.out.println(count + " record read over, delay " + delay + " s"); } System.out.println("total delay:" + totalDelay); } } catch (Exception e) { e.printStackTrace(); System.out.println("get exception:" + e.getMessage()); } }
From source file:NgramMatrixBuilder.java
License:Apache License
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. *///from ww w. j a v a 2 s . c om public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), NgramMatrixBuilder.class); conf.setJobName("ngrammatrixbuilder"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } TextInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:TestColumnStorageOutputFormat.java
License:Open Source License
public static void main(String[] argv) throws IOException { try {/* w w w. j a v a 2 s . co m*/ if (argv.length != 2) { System.out.println("TestColumnStorageOutputFormat <output> <count>"); System.exit(-1); } JobConf conf = new JobConf(TestColumnStorageOutputFormat.class); conf.setJobName("TestColumnStorageOutputFormat"); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setOutputFormat(ColumnStorageOutputFormat.class); conf.set("mapred.output.compress", "flase"); conf.set("mapred.output.dir", argv[0]); Head head = new Head(); initHead(head); head.toJobConf(conf); Path outputPath = new Path(argv[0]); FileOutputFormat.setOutputPath(conf, outputPath); FileSystem fs = FileSystem.get(conf); MyColumnOutputFormat output = new MyColumnOutputFormat(head, conf, outputPath); long begin = System.currentTimeMillis(); int count = Integer.valueOf(argv[1]); String string = "hello konten"; for (int i = 0; i < count; i++) { Record record = new Record((short) 210); for (short j = 0; j < 30; j++) { record.addValue(new FieldValue((byte) 1, (short) (j * 7 + 0))); record.addValue(new FieldValue((short) 2, (short) (j * 7 + 1))); record.addValue(new FieldValue((int) 3, (short) (j * 7 + 2))); record.addValue(new FieldValue((long) 4, (short) (j * 7 + 3))); record.addValue(new FieldValue((float) 5.5, (short) (j * 7 + 4))); record.addValue(new FieldValue((double) 6.6, (short) (j * 7 + 5))); record.addValue(new FieldValue((double) 7.7, (short) (j * 7 + 6))); } output.doWrite(record); if (i % 100000 == 0) { long end = System.currentTimeMillis(); System.out.println(i + "record write, delay:" + (end - begin) / 1000 + "s"); } } long end = System.currentTimeMillis(); System.out.println(count + "record write over, delay:" + (end - begin) / 1000 + "s"); } catch (Exception e) { e.printStackTrace(); System.out.println("get exception:" + e.getMessage()); } }
From source file:BMTColumnLoader.java
License:Apache License
public int run(String[] args) { JobConf conf = new JobConf(getConf(), BMTColumnLoader.class); GenericOptionsParser parser = new GenericOptionsParser(conf, args); conf.setJobName("BMTColumnLoader"); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { other_args.add(args[i]);//w ww . j av a2s . c o m } FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); try { JobClient.runJob(conf); } catch (IOException e) { throw new RuntimeException(e); } return 0; }
From source file:MRDriver.java
License:Apache License
public int run(String args[]) throws Exception { FileSystem fs = null;//w w w.j av a 2 s . c om Path samplesMapPath = null; float epsilon = Float.parseFloat(args[0]); double delta = Double.parseDouble(args[1]); int minFreqPercent = Integer.parseInt(args[2]); int d = Integer.parseInt(args[3]); int datasetSize = Integer.parseInt(args[4]); int numSamples = Integer.parseInt(args[5]); double phi = Double.parseDouble(args[6]); Random rand; /************************ Job 1 (local FIM) Configuration ************************/ JobConf conf = new JobConf(getConf()); /* * Compute the number of required "votes" for an itemsets to be * declared frequent */ // The +1 at the end is needed to ensure reqApproxNum > numsamples / 2. int reqApproxNum = (int) Math .floor((numSamples * (1 - phi)) - Math.sqrt(numSamples * (1 - phi) * 2 * Math.log(1 / delta))) + 1; int sampleSize = (int) Math.ceil((2 / Math.pow(epsilon, 2)) * (d + Math.log(1 / phi))); //System.out.println("reducersNum: " + numSamples + " reqApproxNum: " + reqApproxNum); conf.setInt("PARMM.reducersNum", numSamples); conf.setInt("PARMM.datasetSize", datasetSize); conf.setInt("PARMM.minFreqPercent", minFreqPercent); conf.setInt("PARMM.sampleSize", sampleSize); conf.setFloat("PARMM.epsilon", epsilon); // Set the number of reducers equal to the number of samples, to // maximize parallelism. Required by our Partitioner. conf.setNumReduceTasks(numSamples); // XXX: why do we disable the speculative execution? MR conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); conf.setInt("mapred.task.timeout", MR_TIMEOUT_MILLI); /* * Enable compression of map output. * * We do it for this job and not for the aggregation one because * each mapper there only print out one record for each itemset, * so there isn't much to compress, I'd say. MR * * In Amazon MapReduce compression of the map output seems to be * happen by default and the Snappy codec is used, which is * extremely fast. */ conf.setBoolean("mapred.compress.map.output", true); //conf.setMapOutputCompressorClass(com.hadoop.compression.lzo.LzoCodec.class); conf.setJarByClass(MRDriver.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(DoubleWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); // We write the collections found in a reducers as a SequenceFile conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(conf, new Path(args[9])); // set the mapper class based on command line option switch (Integer.parseInt(args[7])) { case 1: System.out.println("running partition mapper..."); SequenceFileInputFormat.addInputPath(conf, new Path(args[8])); conf.setMapperClass(PartitionMapper.class); break; case 2: System.out.println("running binomial mapper..."); SequenceFileInputFormat.addInputPath(conf, new Path(args[8])); conf.setMapperClass(BinomialSamplerMapper.class); break; case 3: System.out.println("running coin mapper..."); SequenceFileInputFormat.addInputPath(conf, new Path(args[8])); conf.setMapperClass(CoinFlipSamplerMapper.class); case 4: System.out.println("running sampler mapper..."); SequenceFileInputFormat.addInputPath(conf, new Path(args[8])); conf.setMapperClass(InputSamplerMapper.class); // create a random sample of size T*m rand = new Random(); long sampling_start_time = System.nanoTime(); int[] samples = new int[numSamples * sampleSize]; for (int i = 0; i < numSamples * sampleSize; i++) { samples[i] = rand.nextInt(datasetSize); } // for each key in the sample, create a list of all T samples to which this key belongs Hashtable<LongWritable, ArrayList<IntWritable>> hashTable = new Hashtable<LongWritable, ArrayList<IntWritable>>(); for (int i = 0; i < numSamples * sampleSize; i++) { ArrayList<IntWritable> sampleIDs = null; LongWritable key = new LongWritable(samples[i]); if (hashTable.containsKey(key)) sampleIDs = hashTable.get(key); else sampleIDs = new ArrayList<IntWritable>(); sampleIDs.add(new IntWritable(i % numSamples)); hashTable.put(key, sampleIDs); } /* * Convert the Hastable to a MapWritable which we will * write to HDFS and distribute to all Mappers using * DistributedCache */ MapWritable map = new MapWritable(); for (LongWritable key : hashTable.keySet()) { ArrayList<IntWritable> sampleIDs = hashTable.get(key); IntArrayWritable sampleIDsIAW = new IntArrayWritable(); sampleIDsIAW.set(sampleIDs.toArray(new IntWritable[sampleIDs.size()])); map.put(key, sampleIDsIAW); } fs = FileSystem.get(URI.create("samplesMap.ser"), conf); samplesMapPath = new Path("samplesMap.ser"); FSDataOutputStream out = fs.create(samplesMapPath, true); map.write(out); out.sync(); out.close(); DistributedCache.addCacheFile(new URI(fs.getWorkingDirectory() + "/samplesMap.ser#samplesMap.ser"), conf); // stop the sampling timer long sampling_end_time = System.nanoTime(); long sampling_runtime = (sampling_end_time - sampling_start_time) / 1000000; System.out.println("sampling runtime (milliseconds): " + sampling_runtime); break; // end switch case case 5: System.out.println("running random integer partition mapper..."); conf.setInputFormat(WholeSplitInputFormat.class); Path inputFilePath = new Path(args[8]); WholeSplitInputFormat.addInputPath(conf, inputFilePath); conf.setMapperClass(RandIntPartSamplerMapper.class); // Compute number of map tasks. fs = inputFilePath.getFileSystem(conf); FileStatus inputFileStatus = fs.getFileStatus(inputFilePath); long len = inputFileStatus.getLen(); long blockSize = inputFileStatus.getBlockSize(); conf.setLong("mapred.min.split.size", blockSize); conf.setLong("mapred.max.split.size", blockSize); int mapTasksNum = ((int) (len / blockSize)) + 1; conf.setNumMapTasks(mapTasksNum); //System.out.println("len: " + len + " blockSize: " // + blockSize + " mapTasksNum: " + mapTasksNum); // Extract random integer partition of total sample // size into up to mapTasksNum partitions. // XXX I'm not sure this is a correct way to do // it. rand = new Random(); IntWritable[][] toSampleArr = new IntWritable[mapTasksNum][numSamples]; for (int j = 0; j < numSamples; j++) { IntWritable[] tempToSampleArr = new IntWritable[mapTasksNum]; int sum = 0; int i; for (i = 0; i < mapTasksNum - 1; i++) { int size = rand.nextInt(sampleSize - sum); tempToSampleArr[i] = new IntWritable(size); sum += size; if (sum > numSamples * sampleSize) { System.out.println("Something went wrong generating the sample Sizes"); System.exit(1); } if (sum == sampleSize) { break; } } if (i == mapTasksNum - 1) { tempToSampleArr[i] = new IntWritable(sampleSize - sum); } else { for (; i < mapTasksNum; i++) { tempToSampleArr[i] = new IntWritable(0); } } Collections.shuffle(Arrays.asList(tempToSampleArr)); for (i = 0; i < mapTasksNum; i++) { toSampleArr[i][j] = tempToSampleArr[i]; } } for (int i = 0; i < mapTasksNum; i++) { DefaultStringifier.storeArray(conf, toSampleArr[i], "PARMM.toSampleArr_" + i); } break; default: System.err.println("Wrong Mapper ID. Can only be in [1,5]"); System.exit(1); break; } /* * We don't use the default hash partitioner because we want to * maximize the parallelism. That's why we also fix the number * of reducers. */ conf.setPartitionerClass(FIMPartitioner.class); conf.setReducerClass(FIMReducer.class); /************************ Job 2 (aggregation) Configuration ************************/ JobConf confAggr = new JobConf(getConf()); confAggr.setInt("PARMM.reducersNum", numSamples); confAggr.setInt("PARMM.reqApproxNum", reqApproxNum); confAggr.setInt("PARMM.sampleSize", sampleSize); confAggr.setFloat("PARMM.epsilon", epsilon); // XXX: Why do we disable speculative execution? MR confAggr.setBoolean("mapred.reduce.tasks.speculative.execution", false); confAggr.setInt("mapred.task.timeout", MR_TIMEOUT_MILLI); confAggr.setJarByClass(MRDriver.class); confAggr.setMapOutputKeyClass(Text.class); confAggr.setMapOutputValueClass(DoubleWritable.class); confAggr.setOutputKeyClass(Text.class); confAggr.setOutputValueClass(Text.class); confAggr.setMapperClass(AggregateMapper.class); confAggr.setReducerClass(AggregateReducer.class); confAggr.setInputFormat(CombineSequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(confAggr, new Path(args[9])); FileOutputFormat.setOutputPath(confAggr, new Path(args[10])); long FIMjob_start_time = System.currentTimeMillis(); RunningJob FIMjob = JobClient.runJob(conf); long FIMjob_end_time = System.currentTimeMillis(); RunningJob aggregateJob = JobClient.runJob(confAggr); long aggrJob_end_time = System.currentTimeMillis(); long FIMjob_runtime = FIMjob_end_time - FIMjob_start_time; long aggrJob_runtime = aggrJob_end_time - FIMjob_end_time; if (args[7].equals("4")) { // Remove samplesMap file fs.delete(samplesMapPath, false); } Counters counters = FIMjob.getCounters(); Counters.Group FIMMapperStartTimesCounters = counters.getGroup("FIMMapperStart"); long[] FIMMapperStartTimes = new long[FIMMapperStartTimesCounters.size()]; int i = 0; for (Counters.Counter counter : FIMMapperStartTimesCounters) { FIMMapperStartTimes[i++] = counter.getCounter(); } Counters.Group FIMMapperEndTimesCounters = counters.getGroup("FIMMapperEnd"); long[] FIMMapperEndTimes = new long[FIMMapperEndTimesCounters.size()]; i = 0; for (Counters.Counter counter : FIMMapperEndTimesCounters) { FIMMapperEndTimes[i++] = counter.getCounter(); } Counters.Group FIMReducerStartTimesCounters = counters.getGroup("FIMReducerStart"); long[] FIMReducerStartTimes = new long[FIMReducerStartTimesCounters.size()]; i = 0; for (Counters.Counter counter : FIMReducerStartTimesCounters) { FIMReducerStartTimes[i++] = counter.getCounter(); } Counters.Group FIMReducerEndTimesCounters = counters.getGroup("FIMReducerEnd"); long[] FIMReducerEndTimes = new long[FIMReducerEndTimesCounters.size()]; i = 0; for (Counters.Counter counter : FIMReducerEndTimesCounters) { FIMReducerEndTimes[i++] = counter.getCounter(); } Counters countersAggr = aggregateJob.getCounters(); Counters.Group AggregateMapperStartTimesCounters = countersAggr.getGroup("AggregateMapperStart"); long[] AggregateMapperStartTimes = new long[AggregateMapperStartTimesCounters.size()]; i = 0; for (Counters.Counter counter : AggregateMapperStartTimesCounters) { AggregateMapperStartTimes[i++] = counter.getCounter(); } Counters.Group AggregateMapperEndTimesCounters = countersAggr.getGroup("AggregateMapperEnd"); long[] AggregateMapperEndTimes = new long[AggregateMapperEndTimesCounters.size()]; i = 0; for (Counters.Counter counter : AggregateMapperEndTimesCounters) { AggregateMapperEndTimes[i++] = counter.getCounter(); } Counters.Group AggregateReducerStartTimesCounters = countersAggr.getGroup("AggregateReducerStart"); long[] AggregateReducerStartTimes = new long[AggregateReducerStartTimesCounters.size()]; i = 0; for (Counters.Counter counter : AggregateReducerStartTimesCounters) { AggregateReducerStartTimes[i++] = counter.getCounter(); } Counters.Group AggregateReducerEndTimesCounters = countersAggr.getGroup("AggregateReducerEnd"); long[] AggregateReducerEndTimes = new long[AggregateReducerEndTimesCounters.size()]; i = 0; for (Counters.Counter counter : AggregateReducerEndTimesCounters) { AggregateReducerEndTimes[i++] = counter.getCounter(); } long FIMMapperStartMin = FIMMapperStartTimes[0]; for (long l : FIMMapperStartTimes) { if (l < FIMMapperStartMin) { FIMMapperStartMin = l; } } long FIMMapperEndMax = FIMMapperEndTimes[0]; for (long l : FIMMapperEndTimes) { if (l > FIMMapperEndMax) { FIMMapperEndMax = l; } } System.out.println("FIM job setup time (milliseconds): " + (FIMMapperStartMin - FIMjob_start_time)); System.out.println("FIMMapper total runtime (milliseconds): " + (FIMMapperEndMax - FIMMapperStartMin)); long[] FIMMapperRunTimes = new long[FIMMapperStartTimes.length]; long FIMMapperRunTimesSum = 0; for (int l = 0; l < FIMMapperStartTimes.length; l++) { FIMMapperRunTimes[l] = FIMMapperEndTimes[l] - FIMMapperStartTimes[l]; FIMMapperRunTimesSum += FIMMapperRunTimes[l]; } System.out.println("FIMMapper average task runtime (milliseconds): " + FIMMapperRunTimesSum / FIMMapperStartTimes.length); long FIMMapperRunTimesMin = FIMMapperRunTimes[0]; long FIMMapperRunTimesMax = FIMMapperRunTimes[0]; for (long l : FIMMapperRunTimes) { if (l < FIMMapperRunTimesMin) { FIMMapperRunTimesMin = l; } if (l > FIMMapperRunTimesMax) { FIMMapperRunTimesMax = l; } } System.out.println("FIMMapper minimum task runtime (milliseconds): " + FIMMapperRunTimesMin); System.out.println("FIMMapper maximum task runtime (milliseconds): " + FIMMapperRunTimesMax); long FIMReducerStartMin = FIMReducerStartTimes[0]; for (long l : FIMReducerStartTimes) { if (l < FIMReducerStartMin) { FIMReducerStartMin = l; } } long FIMReducerEndMax = FIMReducerEndTimes[0]; for (long l : FIMReducerEndTimes) { if (l > FIMReducerEndMax) { FIMReducerEndMax = l; } } System.out .println("FIM job shuffle phase runtime (milliseconds): " + (FIMReducerStartMin - FIMMapperEndMax)); System.out.println("FIMReducer total runtime (milliseconds): " + (FIMReducerEndMax - FIMReducerStartMin)); long[] FIMReducerRunTimes = new long[FIMReducerStartTimes.length]; long FIMReducerRunTimesSum = 0; for (int l = 0; l < FIMReducerStartTimes.length; l++) { FIMReducerRunTimes[l] = FIMReducerEndTimes[l] - FIMReducerStartTimes[l]; FIMReducerRunTimesSum += FIMReducerRunTimes[l]; } System.out.println("FIMReducer average task runtime (milliseconds): " + FIMReducerRunTimesSum / FIMReducerStartTimes.length); long FIMReducerRunTimesMin = FIMReducerRunTimes[0]; long FIMReducerRunTimesMax = FIMReducerRunTimes[0]; for (long l : FIMReducerRunTimes) { if (l < FIMReducerRunTimesMin) { FIMReducerRunTimesMin = l; } if (l > FIMReducerRunTimesMax) { FIMReducerRunTimesMax = l; } } System.out.println("FIMReducer minimum task runtime (milliseconds): " + FIMReducerRunTimesMin); System.out.println("FIMReducer maximum task runtime (milliseconds): " + FIMReducerRunTimesMax); System.out.println("FIM job cooldown time (milliseconds): " + (FIMjob_end_time - FIMReducerEndMax)); long AggregateMapperStartMin = AggregateMapperStartTimes[0]; for (long l : AggregateMapperStartTimes) { if (l < AggregateMapperStartMin) { AggregateMapperStartMin = l; } } long AggregateMapperEndMax = AggregateMapperEndTimes[0]; for (long l : AggregateMapperEndTimes) { if (l > AggregateMapperEndMax) { AggregateMapperEndMax = l; } } System.out.println( "Aggregation job setup time (milliseconds): " + (AggregateMapperStartMin - FIMjob_end_time)); System.out.println("AggregateMapper total runtime (milliseconds): " + (AggregateMapperEndMax - AggregateMapperStartMin)); long[] AggregateMapperRunTimes = new long[AggregateMapperStartTimes.length]; long AggregateMapperRunTimesSum = 0; for (int l = 0; l < AggregateMapperStartTimes.length; l++) { AggregateMapperRunTimes[l] = AggregateMapperEndTimes[l] - AggregateMapperStartTimes[l]; AggregateMapperRunTimesSum += AggregateMapperRunTimes[l]; } System.out.println("AggregateMapper average task runtime (milliseconds): " + AggregateMapperRunTimesSum / AggregateMapperStartTimes.length); long AggregateMapperRunTimesMin = AggregateMapperRunTimes[0]; long AggregateMapperRunTimesMax = AggregateMapperRunTimes[0]; for (long l : AggregateMapperRunTimes) { if (l < AggregateMapperRunTimesMin) { AggregateMapperRunTimesMin = l; } if (l > AggregateMapperRunTimesMax) { AggregateMapperRunTimesMax = l; } } System.out.println("AggregateMapper minimum task runtime (milliseconds): " + AggregateMapperRunTimesMin); System.out.println("AggregateMapper maximum task runtime (milliseconds): " + AggregateMapperRunTimesMax); long AggregateReducerStartMin = AggregateReducerStartTimes[0]; for (long l : AggregateReducerStartTimes) { if (l < AggregateReducerStartMin) { AggregateReducerStartMin = l; } } long AggregateReducerEndMax = AggregateReducerEndTimes[0]; for (long l : AggregateReducerEndTimes) { if (l > AggregateReducerEndMax) { AggregateReducerEndMax = l; } } System.out.println("Aggregate job round shuffle phase runtime (milliseconds): " + (AggregateReducerStartMin - AggregateMapperEndMax)); System.out.println("AggregateReducer total runtime (milliseconds): " + (AggregateReducerEndMax - AggregateReducerStartMin)); long[] AggregateReducerRunTimes = new long[AggregateReducerStartTimes.length]; long AggregateReducerRunTimesSum = 0; for (int l = 0; l < AggregateReducerStartTimes.length; l++) { AggregateReducerRunTimes[l] = AggregateReducerEndTimes[l] - AggregateReducerStartTimes[l]; AggregateReducerRunTimesSum += AggregateReducerRunTimes[l]; } System.out.println("AggregateReducer average task runtime (milliseconds): " + AggregateReducerRunTimesSum / AggregateReducerStartTimes.length); long AggregateReducerRunTimesMin = AggregateReducerRunTimes[0]; long AggregateReducerRunTimesMax = AggregateReducerRunTimes[0]; for (long l : AggregateReducerRunTimes) { if (l < AggregateReducerRunTimesMin) { AggregateReducerRunTimesMin = l; } if (l > AggregateReducerRunTimesMax) { AggregateReducerRunTimesMax = l; } } System.out.println("AggregateReducer minimum task runtime (milliseconds): " + AggregateReducerRunTimesMin); System.out.println("AggregateReducer maximum task runtime (milliseconds): " + AggregateReducerRunTimesMax); System.out.println( "Aggregation job cooldown time (milliseconds): " + (aggrJob_end_time - AggregateReducerEndMax)); System.out .println("total runtime (all inclusive) (milliseconds): " + (aggrJob_end_time - FIMjob_start_time)); System.out.println("total runtime (no FIM job setup, no aggregation job cooldown) (milliseconds): " + (AggregateReducerEndMax - FIMMapperStartMin)); System.out.println("total runtime (no setups, no cooldowns) (milliseconds): " + (FIMReducerEndMax - FIMMapperStartMin + AggregateReducerEndMax - AggregateMapperStartMin)); System.out.println("FIM job runtime (including setup and cooldown) (milliseconds): " + FIMjob_runtime); System.out.println("FIM job runtime (no setup, no cooldown) (milliseconds): " + (FIMReducerEndMax - FIMMapperStartMin)); System.out.println( "Aggregation job runtime (including setup and cooldown) (milliseconds): " + aggrJob_runtime); System.out.println("Aggregation job runtime (no setup, no cooldown) (milliseconds): " + (AggregateReducerEndMax - AggregateMapperStartMin)); return 0; }
From source file:Text2ColumntStorageMR.java
License:Open Source License
@SuppressWarnings("deprecation") public static void main(String[] args) throws Exception { if (args.length != 3) { System.out.println("Text2ColumnStorageMR <input> <output> <columnStorageMode>"); System.exit(-1);/* www.j av a2s. c om*/ } JobConf conf = new JobConf(Text2ColumntStorageMR.class); conf.setJobName("Text2ColumnStorageMR"); conf.setNumMapTasks(1); conf.setNumReduceTasks(4); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setMapperClass(TextFileMapper.class); conf.setReducerClass(ColumnStorageReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat((Class<? extends OutputFormat>) ColumnStorageHiveOutputFormat.class); conf.set("mapred.output.compress", "flase"); Head head = new Head(); initHead(head); head.toJobConf(conf); int bt = Integer.valueOf(args[2]); FileInputFormat.setInputPaths(conf, args[0]); Path outputPath = new Path(args[1]); FileOutputFormat.setOutputPath(conf, outputPath); FileSystem fs = outputPath.getFileSystem(conf); fs.delete(outputPath, true); JobClient jc = new JobClient(conf); RunningJob rj = null; rj = jc.submitJob(conf); String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rj.isComplete()) { try { Thread.sleep(1000); } catch (InterruptedException e) { } int mapProgress = Math.round(rj.mapProgress() * 100); int reduceProgress = Math.round(rj.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.out.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } System.exit(0); }
From source file:TestTextInputFormat.java
License:Open Source License
public static void main(String[] argv) throws IOException, SerDeException { try {/*from w ww . j ava2 s . c om*/ if (argv.length != 2) { System.out.println("TestTextInputFormat <input> <output>"); System.exit(-1); } JobConf conf = new JobConf(TestTextInputFormat.class); conf.setJobName("TestTextInputFormat"); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(FormatStorageOutputFormat.class); conf.set("mapred.output.compress", "flase"); conf.set("mapred.input.dir", argv[0]); LazySimpleSerDe serDe = initSerDe(conf); LazySimpleStructObjectInspector oi = (LazySimpleStructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); FileInputFormat.setInputPaths(conf, argv[0]); Path outputPath = new Path(argv[1]); FileOutputFormat.setOutputPath(conf, outputPath); InputFormat inputFormat = new TextInputFormat(); ((TextInputFormat) inputFormat).configure(conf); InputSplit[] inputSplits = inputFormat.getSplits(conf, 1); if (inputSplits.length == 0) { System.out.println("inputSplits is empty"); return; } else { System.out.println("get Splits:" + inputSplits.length); } int totalDelay = 0; RecordReader<WritableComparable, Writable> currRecReader = null; for (int i = 0; i < inputSplits.length; i++) { currRecReader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL); WritableComparable key; Writable value; key = currRecReader.createKey(); value = currRecReader.createValue(); long begin = System.currentTimeMillis(); int count = 0; while (currRecReader.next(key, value)) { Object row = serDe.deserialize((Text) value); oi.getStructFieldsDataAsList(row); count++; } long end = System.currentTimeMillis(); long delay = (end - begin) / 1000; totalDelay += delay; System.out.println(count + " record read over, delay " + delay + " s"); } System.out.println("total delay:" + totalDelay); return; } catch (Exception e) { e.printStackTrace(); System.out.println("get exception:" + e.getMessage()); } }
From source file:LinkReverser.java
License:Apache License
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker./*from w ww .jav a2s. c o m*/ */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), LinkReverser.class); conf.setJobName("indexreverser"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:WikipediaDocnoMappingBuilder.java
License:Apache License
@SuppressWarnings("static-access") @Override//from www .j av a 2 s. c om public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output file") .create(OUTPUT_FILE_OPTION)); options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg() .withDescription("two-letter language code").create(LANGUAGE_OPTION)); options.addOption(KEEP_ALL_OPTION, false, "keep all pages"); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_FILE_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String language = null; if (cmdline.hasOption(LANGUAGE_OPTION)) { language = cmdline.getOptionValue(LANGUAGE_OPTION); if (language.length() != 2) { System.err.println("Error: \"" + language + "\" unknown language!"); return -1; } } String inputPath = cmdline.getOptionValue(INPUT_OPTION); String outputFile = cmdline.getOptionValue(OUTPUT_FILE_OPTION); boolean keepAll = cmdline.hasOption(KEEP_ALL_OPTION); String tmpPath = "tmp-" + WikipediaDocnoMappingBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - input: " + inputPath); LOG.info(" - output file: " + outputFile); LOG.info(" - keep all pages: " + keepAll); LOG.info(" - language: " + language); // Job job = Job.getInstance(getConf()); JobConf conf = new JobConf(WikipediaDocnoMappingBuilder.class); conf.setJarByClass(WikipediaDocnoMappingBuilder.class); conf.setJobName(String.format("BuildWikipediaDocnoMapping[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, OUTPUT_FILE_OPTION, outputFile, LANGUAGE_OPTION, language)); conf.setBoolean(KEEP_ALL_OPTION, keepAll); // .getConfiguration().setBoolean(KEEP_ALL_OPTION, keepAll); if (language != null) { conf.set("wiki.language", language); } conf.setNumReduceTasks(1); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(tmpPath)); FileOutputFormat.setCompressOutput(conf, false); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(IntWritable.class); conf.setInputFormat(WikipediaPageInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(MyMapper.class); conf.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. FileSystem.get(getConf()).delete(new Path(tmpPath), true); // job.waitForCompletion(true); RunningJob job = JobClient.runJob(conf); job.waitForCompletion(); // JobClient jobClient = new JobClient(conf); long cnt = keepAll ? job.getCounters().findCounter(PageTypes.TOTAL).getValue() : job.getCounters().findCounter(PageTypes.ARTICLE).getValue(); WikipediaDocnoMapping.writeDocnoMappingData(FileSystem.get(getConf()), tmpPath + "/part-00000", (int) cnt, outputFile); FileSystem.get(getConf()).delete(new Path(tmpPath), true); return 0; }
From source file:SleepJob.java
License:Apache License
public JobConf setupJobConf(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount) { JobConf job = new JobConf(getConf(), SleepJob.class); job.setNumMapTasks(numMapper);/*from ww w . ja va2 s. com*/ job.setNumReduceTasks(numReducer); job.setMapperClass(SleepJob.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(SleepJob.class); job.setOutputFormat(NullOutputFormat.class); job.setInputFormat(SleepInputFormat.class); job.setPartitionerClass(SleepJob.class); job.setSpeculativeExecution(false); FileInputFormat.addInputPath(job, new Path("ignored")); job.setLong("sleep.job.map.sleep.time", mapSleepTime); job.setLong("sleep.job.reduce.sleep.time", reduceSleepTime); job.setInt("sleep.job.map.sleep.count", mapSleepCount); job.setInt("sleep.job.reduce.sleep.count", reduceSleepCount); return job; }