List of usage examples for org.apache.hadoop.util Progress Progress
public Progress()
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) private void runReduce(Job job, KeyValueSorter<?, ?> sorter) throws ClassNotFoundException, IOException, InterruptedException { Configuration conf = job.getConfiguration(); OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf); TaskAttemptID id = newTaskAttemptId(newReduceTaskId(job.getJobID(), 1), 0); Reducer<?, ?, ?, ?> reducer = ReflectionUtils.newInstance(job.getReducerClass(), conf); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("starting reducer: {0}@{1} ({2}records, {3}bytes)", //$NON-NLS-1$ reducer.getClass().getName(), id, sorter.getRecordCount(), sorter.getSizeInBytes())); }//from w w w. j a v a 2s . com TaskAttemptContext context = newTaskAttemptContext(conf, id); OutputCommitter committer = output.getOutputCommitter(context); committer.setupTask(context); boolean succeed = false; try { ShuffleReader reader = new ShuffleReader(sorter, new Progress()); try { RecordWriter<?, ?> writer = output.getRecordWriter(newTaskAttemptContext(conf, id)); try { Reducer.Context c = newReducerContext(conf, id, reader, sorter.getKeyClass(), sorter.getValueClass(), writer, committer, (RawComparator) job.getGroupingComparator()); reducer.run(c); } finally { writer.close(newTaskAttemptContext(conf, id)); } } finally { try { reader.close(); } catch (IOException e) { LOG.warn(MessageFormat.format("error occurred while reducer mapper input: {0} ({1})", id, job.getJobName()), e); } } doCommitTask(context, committer); succeed = true; } finally { if (succeed == false) { doAbortTask(context, committer); } } }
From source file:com.mellanox.hadoop.mapred.UdaPlugin.java
License:Apache License
public UdaPluginRT(UdaShuffleConsumerPluginShared udaShuffleConsumer, ReduceTask reduceTask, JobConf jobConf, Reporter reporter, int numMaps) throws IOException { super(jobConf); this.udaShuffleConsumer = udaShuffleConsumer; this.reduceTask = reduceTask; String totalRdmaSizeStr = jobConf.get("mapred.rdma.shuffle.total.size", "0"); // default 0 means ignoring this parameter and use instead -Xmx and mapred.job.shuffle.input.buffer.percent long totalRdmaSize = StringUtils.TraditionalBinaryPrefix.string2long(totalRdmaSizeStr); long maxRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size", 1024); long minRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size.min", 16); long shuffleMemorySize = totalRdmaSize; StringBuilder meminfoSb = new StringBuilder(); meminfoSb.append("UDA: numMaps=").append(numMaps); meminfoSb.append(", maxRdmaBufferSize=").append(maxRdmaBufferSize); meminfoSb.append("KB, minRdmaBufferSize=").append(minRdmaBufferSize).append("KB"); meminfoSb.append("KB, rdmaShuffleTotalSize=").append(totalRdmaSize); if (totalRdmaSize < 0) { LOG.warn("Illegal paramter value: mapred.rdma.shuffle.total.size=" + totalRdmaSize); }//from w w w. jav a 2 s .co m if (totalRdmaSize <= 0) { long maxHeapSize = Runtime.getRuntime().maxMemory(); double shuffleInputBufferPercent = jobConf.getFloat("mapred.job.shuffle.input.buffer.percent", DEFAULT_SHUFFLE_INPUT_PERCENT); if ((shuffleInputBufferPercent < 0) || (shuffleInputBufferPercent > 1)) { LOG.warn("UDA: mapred.job.shuffle.input.buffer.percent is out of range - set to default: " + DEFAULT_SHUFFLE_INPUT_PERCENT); shuffleInputBufferPercent = DEFAULT_SHUFFLE_INPUT_PERCENT; } shuffleMemorySize = (long) (maxHeapSize * shuffleInputBufferPercent); LOG.info("Using JAVA Xmx with mapred.job.shuffle.input.buffer.percent to limit UDA shuffle memory"); meminfoSb.append(", maxHeapSize=").append(maxHeapSize).append("B"); meminfoSb.append(", shuffleInputBufferPercent=").append(shuffleInputBufferPercent); meminfoSb.append("==> shuffleMemorySize=").append(shuffleMemorySize).append("B"); LOG.info("RDMA shuffle memory is limited to " + shuffleMemorySize / 1024 / 1024 + "MB"); } else { LOG.info("Using mapred.rdma.shuffle.total.size to limit UDA shuffle memory"); LOG.info("RDMA shuffle memory is limited to " + totalRdmaSize / 1024 / 1024 + "MB"); } LOG.debug(meminfoSb.toString()); LOG.info("UDA: user prefer rdma.buf.size=" + maxRdmaBufferSize + "KB"); LOG.info("UDA: minimum rdma.buf.size=" + minRdmaBufferSize + "KB"); if (jobConf.getSpeculativeExecution()) { // (getMapSpeculativeExecution() || getReduceSpeculativeExecution()) LOG.info("UDA has limited support for map task speculative execution"); } LOG.info("UDA: number of segments to fetch: " + numMaps); /* init variables */ init_kv_bufs(); launchCppSide(true, this); // true: this is RT => we should execute NetMerger this.j2c_queue = new J2CQueue<K, V>(); this.mTaskReporter = reporter; this.mMapsNeed = numMaps; /* send init message */ TaskAttemptID reduceId = reduceTask.getTaskID(); mParams.clear(); mParams.add(Integer.toString(numMaps)); mParams.add(reduceId.getJobID().toString()); mParams.add(reduceId.toString()); mParams.add(jobConf.get("mapred.netmerger.hybrid.lpq.size", "0")); mParams.add(Long.toString(maxRdmaBufferSize * 1024)); // in Bytes - pass the raw value we got from xml file (with only conversion to bytes) mParams.add(Long.toString(minRdmaBufferSize * 1024)); // in Bytes . passed for checking if rdmaBuffer is still larger than minRdmaBuffer after alignment mParams.add(jobConf.getOutputKeyClass().getName()); boolean compression = jobConf.getCompressMapOutput(); //"true" or "false" String alg = null; if (compression) { alg = jobConf.get("mapred.map.output.compression.codec", null); } mParams.add(alg); String bufferSize = Integer.toString(256 * 1024); if (alg != null) { if (alg.contains("lzo.LzoCodec")) { bufferSize = jobConf.get("io.compression.codec.lzo.buffersize", bufferSize); } else if (alg.contains("SnappyCodec")) { bufferSize = jobConf.get("io.compression.codec.snappy.buffersize", bufferSize); } } mParams.add(bufferSize); mParams.add(Long.toString(shuffleMemorySize)); String[] dirs = jobConf.getLocalDirs(); ArrayList<String> dirsCanBeCreated = new ArrayList<String>(); //checking if the directories can be created for (int i = 0; i < dirs.length; i++) { try { DiskChecker.checkDir(new File(dirs[i].trim())); //saving only the directories that can be created dirsCanBeCreated.add(dirs[i].trim()); } catch (DiskErrorException e) { } } //sending the directories int numDirs = dirsCanBeCreated.size(); mParams.add(Integer.toString(numDirs)); for (int i = 0; i < numDirs; i++) { mParams.add(dirsCanBeCreated.get(i)); } LOG.info("mParams array is " + mParams); LOG.info("UDA: sending INIT_COMMAND"); String msg = UdaCmd.formCmd(UdaCmd.INIT_COMMAND, mParams); UdaBridge.doCommand(msg); this.mProgress = new Progress(); this.mProgress.set(0.5f); }
From source file:org.apache.tez.runtime.library.common.sort.impl.TestTezMerger.java
License:Apache License
/** * Merge the data sets// w w w . j a v a2s . co m * * @param pathList * @param rc * @return * @throws IOException */ private TezRawKeyValueIterator merge(List<Path> pathList, RawComparator rc) throws IOException { TezMerger merger = new TezMerger(); TezRawKeyValueIterator records = merger.merge(defaultConf, localFs, IntWritable.class, LongWritable.class, null, false, 0, 1024, pathList.toArray(new Path[pathList.size()]), true, 4, new Path(workDir, "tmp_" + System.nanoTime()), ((rc == null) ? comparator : rc), new Reporter(), null, null, null, new Progress()); return records; }
From source file:org.apache.tez.runtime.library.common.sort.impl.TestTezMerger.java
License:Apache License
private void merge(List<Path> pathList, int mergeFactor, RawComparator rc) throws Exception { //Merge datasets TezMerger merger = new TezMerger(); TezRawKeyValueIterator records = merger.merge(defaultConf, localFs, IntWritable.class, LongWritable.class, null, false, 0, 1024, pathList.toArray(new Path[pathList.size()]), true, mergeFactor, new Path(workDir, "tmp_" + System.nanoTime()), ((rc == null) ? comparator : rc), new Reporter(), null, null, null, new Progress()); //Verify the merged data is correct Map<Integer, Integer> dataMap = Maps.newHashMap(); int pk = -1;/* w ww . j a va 2 s .c o m*/ while (records.next()) { DataInputBuffer key = records.getKey(); DataInputBuffer value = records.getValue(); IntWritable k = new IntWritable(); k.readFields(key); LongWritable v = new LongWritable(); v.readFields(value); if (records.isSameKey()) { LOG.info("\tSame Key : key=" + k.get() + ", val=" + v.get()); //More than one key should be present in the source data assertTrue(verificationDataSet.get(k.get()).size() > 1); //Ensure this is same as the previous key we saw assertTrue("previousKey=" + pk + ", current=" + k.get(), pk == k.get()); } else { LOG.info("key=" + k.get() + ", val=" + v.get()); } pk = k.get(); int keyCount = (dataMap.containsKey(k.get())) ? (dataMap.get(k.get()) + 1) : 1; dataMap.put(k.get(), keyCount); } //Verify if the number of distinct entries is the same in source and the test assertTrue( "dataMap=" + dataMap.keySet().size() + ", verificationSet=" + verificationDataSet.keySet().size(), dataMap.keySet().size() == verificationDataSet.keySet().size()); //Verify with source data for (Integer key : verificationDataSet.keySet()) { assertTrue( "Data size for " + key + " not matching with source; dataSize:" + dataMap.get(key).intValue() + ", source:" + verificationDataSet.get(key).size(), dataMap.get(key).intValue() == verificationDataSet.get(key).size()); } //Verify if every key has the same number of repeated items in the source dataset as well for (Map.Entry<Integer, Integer> entry : dataMap.entrySet()) { assertTrue(entry.getKey() + "", verificationDataSet.get(entry.getKey()).size() == entry.getValue()); } LOG.info("******************"); verificationDataSet.clear(); }
From source file:org.apache.tez.runtime.library.common.TestValuesIterator.java
License:Apache License
private ValuesIterator createEmptyIterator(boolean inMemory) throws IOException { if (!inMemory) { streamPaths = new Path[0]; //This will return EmptyIterator rawKeyValueIterator = TezMerger.merge(conf, fs, keyClass, valClass, null, false, -1, 1024, streamPaths, false, mergeFactor, tmpDir, comparator, new ProgressReporter(), null, null, null, null); } else {/* w w w.jav a 2 s .c o m*/ List<TezMerger.Segment> segments = Lists.newLinkedList(); //This will return EmptyIterator rawKeyValueIterator = TezMerger.merge(conf, fs, keyClass, valClass, segments, mergeFactor, tmpDir, comparator, new ProgressReporter(), new GenericCounter("readsCounter", "y"), new GenericCounter("writesCounter", "y1"), new GenericCounter("bytesReadCounter", "y2"), new Progress()); } return new ValuesIterator(rawKeyValueIterator, comparator, keyClass, valClass, conf, (TezCounter) new GenericCounter("inputKeyCounter", "y3"), (TezCounter) new GenericCounter("inputValueCounter", "y4")); }
From source file:org.apache.tez.runtime.library.common.TestValuesIterator.java
License:Apache License
/** * Create sample data (in memory / disk based), merge them and return ValuesIterator * * @param inMemory// w ww . j a v a 2 s. c o m * @return ValuesIterator * @throws IOException */ private ValuesIterator createIterator(boolean inMemory) throws IOException { if (!inMemory) { streamPaths = createFiles(); //Merge all files to get KeyValueIterator rawKeyValueIterator = TezMerger.merge(conf, fs, keyClass, valClass, null, false, -1, 1024, streamPaths, false, mergeFactor, tmpDir, comparator, new ProgressReporter(), null, null, null, null); } else { List<TezMerger.Segment> segments = createInMemStreams(); rawKeyValueIterator = TezMerger.merge(conf, fs, keyClass, valClass, segments, mergeFactor, tmpDir, comparator, new ProgressReporter(), new GenericCounter("readsCounter", "y"), new GenericCounter("writesCounter", "y1"), new GenericCounter("bytesReadCounter", "y2"), new Progress()); } return new ValuesIterator(rawKeyValueIterator, comparator, keyClass, valClass, conf, (TezCounter) new GenericCounter("inputKeyCounter", "y3"), (TezCounter) new GenericCounter("inputValueCounter", "y4")); }
From source file:skewtune.mapreduce.lib.input.MapOutputInputStream.java
License:Apache License
MapOutputInputStream(Configuration conf, TaskID reduceId, Counter inputCounter, SecretKey jobTokenSecret, List<MapOutputSplit> splits) throws IOException { if (conf.getBoolean(JobContext.MAP_OUTPUT_COMPRESS, false)) { Class<? extends CompressionCodec> codecClass = getMapOutputCompressorClass(conf, DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); decompressor = CodecPool.getDecompressor(codec); } else {/*from w ww .j a va 2 s. c o m*/ codec = null; decompressor = null; } this.inputCounter = inputCounter; this.jobTokenSecret = jobTokenSecret; this.reduceTaskId = reduceId; int totalBufSz = conf.getInt("skewtune.map.io.inputbuf", 4 * 1024 * 1024); // 4 MB PACKET_SIZE = conf.getInt("skewtune.map.io.packetsize", 128 * 1024); // 128KB final int numBuf = totalBufSz / PACKET_SIZE; buffers = new ByteBuffer[numBuf]; for (int i = 0; i < numBuf; ++i) { buffers[i] = ByteBuffer.allocate(PACKET_SIZE); } this.splits = splits; this.q = new ArrayBlockingQueue<ByteBuffer>(numBuf - 2); // producer and consumer may keep one buffer at their hands this.fetcher = new Fetcher(conf, reduceId); this.fetcher.start(); progress = new Progress(); progress.addPhases(splits.size()); }