Example usage for org.apache.hadoop.util Progress Progress

Introduction

In this page you can find the example usage for org.apache.hadoop.util Progress Progress.

Prototype

public Progress()

Source Link

Document

Creates a new root node.

Usage

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
private void runReduce(Job job, KeyValueSorter<?, ?> sorter)
        throws ClassNotFoundException, IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
    TaskAttemptID id = newTaskAttemptId(newReduceTaskId(job.getJobID(), 1), 0);
    Reducer<?, ?, ?, ?> reducer = ReflectionUtils.newInstance(job.getReducerClass(), conf);
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("starting reducer: {0}@{1} ({2}records, {3}bytes)", //$NON-NLS-1$
                reducer.getClass().getName(), id, sorter.getRecordCount(), sorter.getSizeInBytes()));
    }//from   w  w  w. j a  v  a 2s . com
    TaskAttemptContext context = newTaskAttemptContext(conf, id);
    OutputCommitter committer = output.getOutputCommitter(context);
    committer.setupTask(context);
    boolean succeed = false;
    try {
        ShuffleReader reader = new ShuffleReader(sorter, new Progress());
        try {
            RecordWriter<?, ?> writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            try {
                Reducer.Context c = newReducerContext(conf, id, reader, sorter.getKeyClass(),
                        sorter.getValueClass(), writer, committer, (RawComparator) job.getGroupingComparator());
                reducer.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn(MessageFormat.format("error occurred while reducer mapper input: {0} ({1})", id,
                        job.getJobName()), e);
            }
        }
        doCommitTask(context, committer);
        succeed = true;
    } finally {
        if (succeed == false) {
            doAbortTask(context, committer);
        }
    }
}

From source file:com.mellanox.hadoop.mapred.UdaPlugin.java

License:Apache License

public UdaPluginRT(UdaShuffleConsumerPluginShared udaShuffleConsumer, ReduceTask reduceTask, JobConf jobConf,
        Reporter reporter, int numMaps) throws IOException {
    super(jobConf);
    this.udaShuffleConsumer = udaShuffleConsumer;
    this.reduceTask = reduceTask;

    String totalRdmaSizeStr = jobConf.get("mapred.rdma.shuffle.total.size", "0"); // default 0 means ignoring this parameter and use instead -Xmx and mapred.job.shuffle.input.buffer.percent
    long totalRdmaSize = StringUtils.TraditionalBinaryPrefix.string2long(totalRdmaSizeStr);
    long maxRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size", 1024);
    long minRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size.min", 16);
    long shuffleMemorySize = totalRdmaSize;
    StringBuilder meminfoSb = new StringBuilder();
    meminfoSb.append("UDA: numMaps=").append(numMaps);
    meminfoSb.append(", maxRdmaBufferSize=").append(maxRdmaBufferSize);
    meminfoSb.append("KB, minRdmaBufferSize=").append(minRdmaBufferSize).append("KB");
    meminfoSb.append("KB, rdmaShuffleTotalSize=").append(totalRdmaSize);

    if (totalRdmaSize < 0) {
        LOG.warn("Illegal paramter value: mapred.rdma.shuffle.total.size=" + totalRdmaSize);
    }//from w w  w. jav  a 2 s .co  m

    if (totalRdmaSize <= 0) {
        long maxHeapSize = Runtime.getRuntime().maxMemory();
        double shuffleInputBufferPercent = jobConf.getFloat("mapred.job.shuffle.input.buffer.percent",
                DEFAULT_SHUFFLE_INPUT_PERCENT);
        if ((shuffleInputBufferPercent < 0) || (shuffleInputBufferPercent > 1)) {
            LOG.warn("UDA: mapred.job.shuffle.input.buffer.percent is out of range - set to default: "
                    + DEFAULT_SHUFFLE_INPUT_PERCENT);
            shuffleInputBufferPercent = DEFAULT_SHUFFLE_INPUT_PERCENT;
        }
        shuffleMemorySize = (long) (maxHeapSize * shuffleInputBufferPercent);

        LOG.info("Using JAVA Xmx with mapred.job.shuffle.input.buffer.percent to limit UDA shuffle memory");

        meminfoSb.append(", maxHeapSize=").append(maxHeapSize).append("B");
        meminfoSb.append(", shuffleInputBufferPercent=").append(shuffleInputBufferPercent);
        meminfoSb.append("==> shuffleMemorySize=").append(shuffleMemorySize).append("B");

        LOG.info("RDMA shuffle memory is limited to " + shuffleMemorySize / 1024 / 1024 + "MB");
    } else {
        LOG.info("Using mapred.rdma.shuffle.total.size to limit UDA shuffle memory");
        LOG.info("RDMA shuffle memory is limited to " + totalRdmaSize / 1024 / 1024 + "MB");
    }

    LOG.debug(meminfoSb.toString());
    LOG.info("UDA: user prefer rdma.buf.size=" + maxRdmaBufferSize + "KB");
    LOG.info("UDA: minimum rdma.buf.size=" + minRdmaBufferSize + "KB");

    if (jobConf.getSpeculativeExecution()) { // (getMapSpeculativeExecution() || getReduceSpeculativeExecution())
        LOG.info("UDA has limited support for map task speculative execution");
    }

    LOG.info("UDA: number of segments to fetch: " + numMaps);

    /* init variables */
    init_kv_bufs();

    launchCppSide(true, this); // true: this is RT => we should execute NetMerger

    this.j2c_queue = new J2CQueue<K, V>();
    this.mTaskReporter = reporter;
    this.mMapsNeed = numMaps;

    /* send init message */
    TaskAttemptID reduceId = reduceTask.getTaskID();

    mParams.clear();
    mParams.add(Integer.toString(numMaps));
    mParams.add(reduceId.getJobID().toString());
    mParams.add(reduceId.toString());
    mParams.add(jobConf.get("mapred.netmerger.hybrid.lpq.size", "0"));
    mParams.add(Long.toString(maxRdmaBufferSize * 1024)); // in Bytes - pass the raw value we got from xml file (with only conversion to bytes)
    mParams.add(Long.toString(minRdmaBufferSize * 1024)); // in Bytes . passed for checking if rdmaBuffer is still larger than minRdmaBuffer after alignment          
    mParams.add(jobConf.getOutputKeyClass().getName());

    boolean compression = jobConf.getCompressMapOutput(); //"true" or "false"
    String alg = null;
    if (compression) {
        alg = jobConf.get("mapred.map.output.compression.codec", null);
    }
    mParams.add(alg);

    String bufferSize = Integer.toString(256 * 1024);
    if (alg != null) {
        if (alg.contains("lzo.LzoCodec")) {
            bufferSize = jobConf.get("io.compression.codec.lzo.buffersize", bufferSize);
        } else if (alg.contains("SnappyCodec")) {
            bufferSize = jobConf.get("io.compression.codec.snappy.buffersize", bufferSize);
        }
    }
    mParams.add(bufferSize);
    mParams.add(Long.toString(shuffleMemorySize));

    String[] dirs = jobConf.getLocalDirs();
    ArrayList<String> dirsCanBeCreated = new ArrayList<String>();
    //checking if the directories can be created
    for (int i = 0; i < dirs.length; i++) {
        try {
            DiskChecker.checkDir(new File(dirs[i].trim()));
            //saving only the directories that can be created
            dirsCanBeCreated.add(dirs[i].trim());
        } catch (DiskErrorException e) {
        }
    }
    //sending the directories
    int numDirs = dirsCanBeCreated.size();
    mParams.add(Integer.toString(numDirs));
    for (int i = 0; i < numDirs; i++) {
        mParams.add(dirsCanBeCreated.get(i));
    }

    LOG.info("mParams array is " + mParams);
    LOG.info("UDA: sending INIT_COMMAND");
    String msg = UdaCmd.formCmd(UdaCmd.INIT_COMMAND, mParams);
    UdaBridge.doCommand(msg);
    this.mProgress = new Progress();
    this.mProgress.set(0.5f);
}

From source file:org.apache.tez.runtime.library.common.sort.impl.TestTezMerger.java

License:Apache License

/**
 * Merge the data sets//  w  w  w  . j  a v a2s  .  co  m
 *
 * @param pathList
 * @param rc
 * @return
 * @throws IOException
 */
private TezRawKeyValueIterator merge(List<Path> pathList, RawComparator rc) throws IOException {
    TezMerger merger = new TezMerger();
    TezRawKeyValueIterator records = merger.merge(defaultConf, localFs, IntWritable.class, LongWritable.class,
            null, false, 0, 1024, pathList.toArray(new Path[pathList.size()]), true, 4,
            new Path(workDir, "tmp_" + System.nanoTime()), ((rc == null) ? comparator : rc), new Reporter(),
            null, null, null, new Progress());
    return records;
}

From source file:org.apache.tez.runtime.library.common.sort.impl.TestTezMerger.java

License:Apache License

private void merge(List<Path> pathList, int mergeFactor, RawComparator rc) throws Exception {
    //Merge datasets
    TezMerger merger = new TezMerger();
    TezRawKeyValueIterator records = merger.merge(defaultConf, localFs, IntWritable.class, LongWritable.class,
            null, false, 0, 1024, pathList.toArray(new Path[pathList.size()]), true, mergeFactor,
            new Path(workDir, "tmp_" + System.nanoTime()), ((rc == null) ? comparator : rc), new Reporter(),
            null, null, null, new Progress());

    //Verify the merged data is correct
    Map<Integer, Integer> dataMap = Maps.newHashMap();
    int pk = -1;/*  w ww . j  a  va  2  s  .c o m*/
    while (records.next()) {
        DataInputBuffer key = records.getKey();
        DataInputBuffer value = records.getValue();

        IntWritable k = new IntWritable();
        k.readFields(key);
        LongWritable v = new LongWritable();
        v.readFields(value);

        if (records.isSameKey()) {
            LOG.info("\tSame Key : key=" + k.get() + ", val=" + v.get());
            //More than one key should be present in the source data
            assertTrue(verificationDataSet.get(k.get()).size() > 1);
            //Ensure this is same as the previous key we saw
            assertTrue("previousKey=" + pk + ", current=" + k.get(), pk == k.get());
        } else {
            LOG.info("key=" + k.get() + ", val=" + v.get());
        }
        pk = k.get();

        int keyCount = (dataMap.containsKey(k.get())) ? (dataMap.get(k.get()) + 1) : 1;
        dataMap.put(k.get(), keyCount);
    }

    //Verify if the number of distinct entries is the same in source and the test
    assertTrue(
            "dataMap=" + dataMap.keySet().size() + ", verificationSet=" + verificationDataSet.keySet().size(),
            dataMap.keySet().size() == verificationDataSet.keySet().size());

    //Verify with source data
    for (Integer key : verificationDataSet.keySet()) {
        assertTrue(
                "Data size for " + key + " not matching with source; dataSize:" + dataMap.get(key).intValue()
                        + ", source:" + verificationDataSet.get(key).size(),
                dataMap.get(key).intValue() == verificationDataSet.get(key).size());
    }

    //Verify if every key has the same number of repeated items in the source dataset as well
    for (Map.Entry<Integer, Integer> entry : dataMap.entrySet()) {
        assertTrue(entry.getKey() + "", verificationDataSet.get(entry.getKey()).size() == entry.getValue());
    }

    LOG.info("******************");
    verificationDataSet.clear();
}

From source file:org.apache.tez.runtime.library.common.TestValuesIterator.java

License:Apache License

private ValuesIterator createEmptyIterator(boolean inMemory) throws IOException {
    if (!inMemory) {
        streamPaths = new Path[0];
        //This will return EmptyIterator
        rawKeyValueIterator = TezMerger.merge(conf, fs, keyClass, valClass, null, false, -1, 1024, streamPaths,
                false, mergeFactor, tmpDir, comparator, new ProgressReporter(), null, null, null, null);
    } else {/* w  w w.jav a 2  s .c o  m*/
        List<TezMerger.Segment> segments = Lists.newLinkedList();
        //This will return EmptyIterator
        rawKeyValueIterator = TezMerger.merge(conf, fs, keyClass, valClass, segments, mergeFactor, tmpDir,
                comparator, new ProgressReporter(), new GenericCounter("readsCounter", "y"),
                new GenericCounter("writesCounter", "y1"), new GenericCounter("bytesReadCounter", "y2"),
                new Progress());
    }
    return new ValuesIterator(rawKeyValueIterator, comparator, keyClass, valClass, conf,
            (TezCounter) new GenericCounter("inputKeyCounter", "y3"),
            (TezCounter) new GenericCounter("inputValueCounter", "y4"));
}

From source file:org.apache.tez.runtime.library.common.TestValuesIterator.java

License:Apache License

/**
 * Create sample data (in memory / disk based), merge them and return ValuesIterator
 *
 * @param inMemory//  w ww  .  j a v a  2  s. c o  m
 * @return ValuesIterator
 * @throws IOException
 */
private ValuesIterator createIterator(boolean inMemory) throws IOException {
    if (!inMemory) {
        streamPaths = createFiles();
        //Merge all files to get KeyValueIterator
        rawKeyValueIterator = TezMerger.merge(conf, fs, keyClass, valClass, null, false, -1, 1024, streamPaths,
                false, mergeFactor, tmpDir, comparator, new ProgressReporter(), null, null, null, null);
    } else {
        List<TezMerger.Segment> segments = createInMemStreams();
        rawKeyValueIterator = TezMerger.merge(conf, fs, keyClass, valClass, segments, mergeFactor, tmpDir,
                comparator, new ProgressReporter(), new GenericCounter("readsCounter", "y"),
                new GenericCounter("writesCounter", "y1"), new GenericCounter("bytesReadCounter", "y2"),
                new Progress());
    }
    return new ValuesIterator(rawKeyValueIterator, comparator, keyClass, valClass, conf,
            (TezCounter) new GenericCounter("inputKeyCounter", "y3"),
            (TezCounter) new GenericCounter("inputValueCounter", "y4"));
}

From source file:skewtune.mapreduce.lib.input.MapOutputInputStream.java

License:Apache License

MapOutputInputStream(Configuration conf, TaskID reduceId, Counter inputCounter, SecretKey jobTokenSecret,
        List<MapOutputSplit> splits) throws IOException {
    if (conf.getBoolean(JobContext.MAP_OUTPUT_COMPRESS, false)) {
        Class<? extends CompressionCodec> codecClass = getMapOutputCompressorClass(conf, DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);
        decompressor = CodecPool.getDecompressor(codec);
    } else {/*from w  ww  .j a  va 2  s. c  o  m*/
        codec = null;
        decompressor = null;
    }

    this.inputCounter = inputCounter;
    this.jobTokenSecret = jobTokenSecret;
    this.reduceTaskId = reduceId;

    int totalBufSz = conf.getInt("skewtune.map.io.inputbuf", 4 * 1024 * 1024); // 4 MB
    PACKET_SIZE = conf.getInt("skewtune.map.io.packetsize", 128 * 1024); // 128KB

    final int numBuf = totalBufSz / PACKET_SIZE;
    buffers = new ByteBuffer[numBuf];
    for (int i = 0; i < numBuf; ++i) {
        buffers[i] = ByteBuffer.allocate(PACKET_SIZE);
    }
    this.splits = splits;

    this.q = new ArrayBlockingQueue<ByteBuffer>(numBuf - 2); // producer and consumer may keep one buffer at their hands
    this.fetcher = new Fetcher(conf, reduceId);
    this.fetcher.start();

    progress = new Progress();
    progress.addPhases(splits.size());
}