Example usage for org.apache.hadoop.util Progress Progress

List of usage examples for org.apache.hadoop.util Progress Progress

Introduction

In this page you can find the example usage for org.apache.hadoop.util Progress Progress.

Prototype

public Progress() 

Source Link

Document

Creates a new root node.

Usage

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
private void runReduce(Job job, KeyValueSorter<?, ?> sorter)
        throws ClassNotFoundException, IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
    TaskAttemptID id = newTaskAttemptId(newReduceTaskId(job.getJobID(), 1), 0);
    Reducer<?, ?, ?, ?> reducer = ReflectionUtils.newInstance(job.getReducerClass(), conf);
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("starting reducer: {0}@{1} ({2}records, {3}bytes)", //$NON-NLS-1$
                reducer.getClass().getName(), id, sorter.getRecordCount(), sorter.getSizeInBytes()));
    }//from   w  w  w. j a  v  a 2s . com
    TaskAttemptContext context = newTaskAttemptContext(conf, id);
    OutputCommitter committer = output.getOutputCommitter(context);
    committer.setupTask(context);
    boolean succeed = false;
    try {
        ShuffleReader reader = new ShuffleReader(sorter, new Progress());
        try {
            RecordWriter<?, ?> writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            try {
                Reducer.Context c = newReducerContext(conf, id, reader, sorter.getKeyClass(),
                        sorter.getValueClass(), writer, committer, (RawComparator) job.getGroupingComparator());
                reducer.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn(MessageFormat.format("error occurred while reducer mapper input: {0} ({1})", id,
                        job.getJobName()), e);
            }
        }
        doCommitTask(context, committer);
        succeed = true;
    } finally {
        if (succeed == false) {
            doAbortTask(context, committer);
        }
    }
}

From source file:com.mellanox.hadoop.mapred.UdaPlugin.java

License:Apache License

public UdaPluginRT(UdaShuffleConsumerPluginShared udaShuffleConsumer, ReduceTask reduceTask, JobConf jobConf,
        Reporter reporter, int numMaps) throws IOException {
    super(jobConf);
    this.udaShuffleConsumer = udaShuffleConsumer;
    this.reduceTask = reduceTask;

    String totalRdmaSizeStr = jobConf.get("mapred.rdma.shuffle.total.size", "0"); // default 0 means ignoring this parameter and use instead -Xmx and mapred.job.shuffle.input.buffer.percent
    long totalRdmaSize = StringUtils.TraditionalBinaryPrefix.string2long(totalRdmaSizeStr);
    long maxRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size", 1024);
    long minRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size.min", 16);
    long shuffleMemorySize = totalRdmaSize;
    StringBuilder meminfoSb = new StringBuilder();
    meminfoSb.append("UDA: numMaps=").append(numMaps);
    meminfoSb.append(", maxRdmaBufferSize=").append(maxRdmaBufferSize);
    meminfoSb.append("KB, minRdmaBufferSize=").append(minRdmaBufferSize).append("KB");
    meminfoSb.append("KB, rdmaShuffleTotalSize=").append(totalRdmaSize);

    if (totalRdmaSize < 0) {
        LOG.warn("Illegal paramter value: mapred.rdma.shuffle.total.size=" + totalRdmaSize);
    }//from w w  w. jav  a 2 s .co  m

    if (totalRdmaSize <= 0) {
        long maxHeapSize = Runtime.getRuntime().maxMemory();
        double shuffleInputBufferPercent = jobConf.getFloat("mapred.job.shuffle.input.buffer.percent",
                DEFAULT_SHUFFLE_INPUT_PERCENT);
        if ((shuffleInputBufferPercent < 0) || (shuffleInputBufferPercent > 1)) {
            LOG.warn("UDA: mapred.job.shuffle.input.buffer.percent is out of range - set to default: "
                    + DEFAULT_SHUFFLE_INPUT_PERCENT);
            shuffleInputBufferPercent = DEFAULT_SHUFFLE_INPUT_PERCENT;
        }
        shuffleMemorySize = (long) (maxHeapSize * shuffleInputBufferPercent);

        LOG.info("Using JAVA Xmx with mapred.job.shuffle.input.buffer.percent to limit UDA shuffle memory");

        meminfoSb.append(", maxHeapSize=").append(maxHeapSize).append("B");
        meminfoSb.append(", shuffleInputBufferPercent=").append(shuffleInputBufferPercent);
        meminfoSb.append("==> shuffleMemorySize=").append(shuffleMemorySize).append("B");

        LOG.info("RDMA shuffle memory is limited to " + shuffleMemorySize / 1024 / 1024 + "MB");
    } else {
        LOG.info("Using mapred.rdma.shuffle.total.size to limit UDA shuffle memory");
        LOG.info("RDMA shuffle memory is limited to " + totalRdmaSize / 1024 / 1024 + "MB");
    }

    LOG.debug(meminfoSb.toString());
    LOG.info("UDA: user prefer rdma.buf.size=" + maxRdmaBufferSize + "KB");
    LOG.info("UDA: minimum rdma.buf.size=" + minRdmaBufferSize + "KB");

    if (jobConf.getSpeculativeExecution()) { // (getMapSpeculativeExecution() || getReduceSpeculativeExecution())
        LOG.info("UDA has limited support for map task speculative execution");
    }

    LOG.info("UDA: number of segments to fetch: " + numMaps);

    /* init variables */
    init_kv_bufs();

    launchCppSide(true, this); // true: this is RT => we should execute NetMerger

    this.j2c_queue = new J2CQueue<K, V>();
    this.mTaskReporter = reporter;
    this.mMapsNeed = numMaps;

    /* send init message */
    TaskAttemptID reduceId = reduceTask.getTaskID();

    mParams.clear();
    mParams.add(Integer.toString(numMaps));
    mParams.add(reduceId.getJobID().toString());
    mParams.add(reduceId.toString());
    mParams.add(jobConf.get("mapred.netmerger.hybrid.lpq.size", "0"));
    mParams.add(Long.toString(maxRdmaBufferSize * 1024)); // in Bytes - pass the raw value we got from xml file (with only conversion to bytes)
    mParams.add(Long.toString(minRdmaBufferSize * 1024)); // in Bytes . passed for checking if rdmaBuffer is still larger than minRdmaBuffer after alignment          
    mParams.add(jobConf.getOutputKeyClass().getName());

    boolean compression = jobConf.getCompressMapOutput(); //"true" or "false"
    String alg = null;
    if (compression) {
        alg = jobConf.get("mapred.map.output.compression.codec", null);
    }
    mParams.add(alg);

    String bufferSize = Integer.toString(256 * 1024);
    if (alg != null) {
        if (alg.contains("lzo.LzoCodec")) {
            bufferSize = jobConf.get("io.compression.codec.lzo.buffersize", bufferSize);
        } else if (alg.contains("SnappyCodec")) {
            bufferSize = jobConf.get("io.compression.codec.snappy.buffersize", bufferSize);
        }
    }
    mParams.add(bufferSize);
    mParams.add(Long.toString(shuffleMemorySize));

    String[] dirs = jobConf.getLocalDirs();
    ArrayList<String> dirsCanBeCreated = new ArrayList<String>();
    //checking if the directories can be created
    for (int i = 0; i < dirs.length; i++) {
        try {
            DiskChecker.checkDir(new File(dirs[i].trim()));
            //saving only the directories that can be created
            dirsCanBeCreated.add(dirs[i].trim());
        } catch (DiskErrorException e) {
        }
    }
    //sending the directories
    int numDirs = dirsCanBeCreated.size();
    mParams.add(Integer.toString(numDirs));
    for (int i = 0; i < numDirs; i++) {
        mParams.add(dirsCanBeCreated.get(i));
    }

    LOG.info("mParams array is " + mParams);
    LOG.info("UDA: sending INIT_COMMAND");
    String msg = UdaCmd.formCmd(UdaCmd.INIT_COMMAND, mParams);
    UdaBridge.doCommand(msg);
    this.mProgress = new Progress();
    this.mProgress.set(0.5f);
}

From source file:org.apache.tez.runtime.library.common.sort.impl.TestTezMerger.java

License:Apache License

/**
 * Merge the data sets//  w  w  w  . j  a v a2s  .  co  m
 *
 * @param pathList
 * @param rc
 * @return
 * @throws IOException
 */
private TezRawKeyValueIterator merge(List<Path> pathList, RawComparator rc) throws IOException {
    TezMerger merger = new TezMerger();
    TezRawKeyValueIterator records = merger.merge(defaultConf, localFs, IntWritable.class, LongWritable.class,
            null, false, 0, 1024, pathList.toArray(new Path[pathList.size()]), true, 4,
            new Path(workDir, "tmp_" + System.nanoTime()), ((rc == null) ? comparator : rc), new Reporter(),
            null, null, null, new Progress());
    return records;
}

From source file:org.apache.tez.runtime.library.common.sort.impl.TestTezMerger.java

License:Apache License

private void merge(List<Path> pathList, int mergeFactor, RawComparator rc) throws Exception {
    //Merge datasets
    TezMerger merger = new TezMerger();
    TezRawKeyValueIterator records = merger.merge(defaultConf, localFs, IntWritable.class, LongWritable.class,
            null, false, 0, 1024, pathList.toArray(new Path[pathList.size()]), true, mergeFactor,
            new Path(workDir, "tmp_" + System.nanoTime()), ((rc == null) ? comparator : rc), new Reporter(),
            null, null, null, new Progress());

    //Verify the merged data is correct
    Map<Integer, Integer> dataMap = Maps.newHashMap();
    int pk = -1;/*  w ww . j  a  va  2  s  .c o m*/
    while (records.next()) {
        DataInputBuffer key = records.getKey();
        DataInputBuffer value = records.getValue();

        IntWritable k = new IntWritable();
        k.readFields(key);
        LongWritable v = new LongWritable();
        v.readFields(value);

        if (records.isSameKey()) {
            LOG.info("\tSame Key : key=" + k.get() + ", val=" + v.get());
            //More than one key should be present in the source data
            assertTrue(verificationDataSet.get(k.get()).size() > 1);
            //Ensure this is same as the previous key we saw
            assertTrue("previousKey=" + pk + ", current=" + k.get(), pk == k.get());
        } else {
            LOG.info("key=" + k.get() + ", val=" + v.get());
        }
        pk = k.get();

        int keyCount = (dataMap.containsKey(k.get())) ? (dataMap.get(k.get()) + 1) : 1;
        dataMap.put(k.get(), keyCount);
    }

    //Verify if the number of distinct entries is the same in source and the test
    assertTrue(
            "dataMap=" + dataMap.keySet().size() + ", verificationSet=" + verificationDataSet.keySet().size(),
            dataMap.keySet().size() == verificationDataSet.keySet().size());

    //Verify with source data
    for (Integer key : verificationDataSet.keySet()) {
        assertTrue(
                "Data size for " + key + " not matching with source; dataSize:" + dataMap.get(key).intValue()
                        + ", source:" + verificationDataSet.get(key).size(),
                dataMap.get(key).intValue() == verificationDataSet.get(key).size());
    }

    //Verify if every key has the same number of repeated items in the source dataset as well
    for (Map.Entry<Integer, Integer> entry : dataMap.entrySet()) {
        assertTrue(entry.getKey() + "", verificationDataSet.get(entry.getKey()).size() == entry.getValue());
    }

    LOG.info("******************");
    verificationDataSet.clear();
}

From source file:org.apache.tez.runtime.library.common.TestValuesIterator.java

License:Apache License

private ValuesIterator createEmptyIterator(boolean inMemory) throws IOException {
    if (!inMemory) {
        streamPaths = new Path[0];
        //This will return EmptyIterator
        rawKeyValueIterator = TezMerger.merge(conf, fs, keyClass, valClass, null, false, -1, 1024, streamPaths,
                false, mergeFactor, tmpDir, comparator, new ProgressReporter(), null, null, null, null);
    } else {/* w  w w.jav a 2  s .c o  m*/
        List<TezMerger.Segment> segments = Lists.newLinkedList();
        //This will return EmptyIterator
        rawKeyValueIterator = TezMerger.merge(conf, fs, keyClass, valClass, segments, mergeFactor, tmpDir,
                comparator, new ProgressReporter(), new GenericCounter("readsCounter", "y"),
                new GenericCounter("writesCounter", "y1"), new GenericCounter("bytesReadCounter", "y2"),
                new Progress());
    }
    return new ValuesIterator(rawKeyValueIterator, comparator, keyClass, valClass, conf,
            (TezCounter) new GenericCounter("inputKeyCounter", "y3"),
            (TezCounter) new GenericCounter("inputValueCounter", "y4"));
}

From source file:org.apache.tez.runtime.library.common.TestValuesIterator.java

License:Apache License

/**
 * Create sample data (in memory / disk based), merge them and return ValuesIterator
 *
 * @param inMemory//  w ww  .  j a v a  2  s. c o  m
 * @return ValuesIterator
 * @throws IOException
 */
private ValuesIterator createIterator(boolean inMemory) throws IOException {
    if (!inMemory) {
        streamPaths = createFiles();
        //Merge all files to get KeyValueIterator
        rawKeyValueIterator = TezMerger.merge(conf, fs, keyClass, valClass, null, false, -1, 1024, streamPaths,
                false, mergeFactor, tmpDir, comparator, new ProgressReporter(), null, null, null, null);
    } else {
        List<TezMerger.Segment> segments = createInMemStreams();
        rawKeyValueIterator = TezMerger.merge(conf, fs, keyClass, valClass, segments, mergeFactor, tmpDir,
                comparator, new ProgressReporter(), new GenericCounter("readsCounter", "y"),
                new GenericCounter("writesCounter", "y1"), new GenericCounter("bytesReadCounter", "y2"),
                new Progress());
    }
    return new ValuesIterator(rawKeyValueIterator, comparator, keyClass, valClass, conf,
            (TezCounter) new GenericCounter("inputKeyCounter", "y3"),
            (TezCounter) new GenericCounter("inputValueCounter", "y4"));
}

From source file:skewtune.mapreduce.lib.input.MapOutputInputStream.java

License:Apache License

MapOutputInputStream(Configuration conf, TaskID reduceId, Counter inputCounter, SecretKey jobTokenSecret,
        List<MapOutputSplit> splits) throws IOException {
    if (conf.getBoolean(JobContext.MAP_OUTPUT_COMPRESS, false)) {
        Class<? extends CompressionCodec> codecClass = getMapOutputCompressorClass(conf, DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);
        decompressor = CodecPool.getDecompressor(codec);
    } else {/*from w  ww  .j a  va 2  s. c  o  m*/
        codec = null;
        decompressor = null;
    }

    this.inputCounter = inputCounter;
    this.jobTokenSecret = jobTokenSecret;
    this.reduceTaskId = reduceId;

    int totalBufSz = conf.getInt("skewtune.map.io.inputbuf", 4 * 1024 * 1024); // 4 MB
    PACKET_SIZE = conf.getInt("skewtune.map.io.packetsize", 128 * 1024); // 128KB

    final int numBuf = totalBufSz / PACKET_SIZE;
    buffers = new ByteBuffer[numBuf];
    for (int i = 0; i < numBuf; ++i) {
        buffers[i] = ByteBuffer.allocate(PACKET_SIZE);
    }
    this.splits = splits;

    this.q = new ArrayBlockingQueue<ByteBuffer>(numBuf - 2); // producer and consumer may keep one buffer at their hands
    this.fetcher = new Fetcher(conf, reduceId);
    this.fetcher.start();

    progress = new Progress();
    progress.addPhases(splits.size());
}