org.apache.tez.mapreduce.examples.BroadcastLoadGen.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tez.mapreduce.examples.BroadcastLoadGen.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tez.mapreduce.examples;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Random;

import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.util.ToolRunner;
import org.apache.tez.client.TezClient;
import org.apache.tez.dag.api.DAG;
import org.apache.tez.dag.api.Edge;
import org.apache.tez.dag.api.ProcessorDescriptor;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.api.UserPayload;
import org.apache.tez.dag.api.Vertex;
import org.apache.tez.examples.TezExampleBase;
import org.apache.tez.runtime.api.LogicalOutput;
import org.apache.tez.runtime.api.ProcessorContext;
import org.apache.tez.runtime.library.api.KeyValueReader;
import org.apache.tez.runtime.library.api.KeyValueWriter;
import org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig;
import org.apache.tez.runtime.library.output.UnorderedKVOutput;
import org.apache.tez.runtime.library.processor.SimpleProcessor;

import com.google.common.base.Preconditions;

public class BroadcastLoadGen extends TezExampleBase {

    private static final Logger LOG = LoggerFactory.getLogger(RPCLoadGen.class);

    public static class InputGenProcessor extends SimpleProcessor {

        final int bytesToGenerate;

        public InputGenProcessor(ProcessorContext context) {
            super(context);
            bytesToGenerate = context.getUserPayload().getPayload().getInt(0);
        }

        @Override
        public void run() throws Exception {
            Random random = new Random();
            Preconditions.checkArgument(getOutputs().size() == 1);
            LogicalOutput out = getOutputs().values().iterator().next();
            if (out instanceof UnorderedKVOutput) {
                UnorderedKVOutput output = (UnorderedKVOutput) out;
                KeyValueWriter kvWriter = output.getWriter();
                int approxNumInts = bytesToGenerate / 6;
                for (int i = 0; i < approxNumInts; i++) {
                    kvWriter.write(NullWritable.get(), new IntWritable(random.nextInt()));
                }
            }
        }
    }

    public static class InputFetchProcessor extends SimpleProcessor {
        public InputFetchProcessor(ProcessorContext context) {
            super(context);
        }

        @Override
        public void run() throws Exception {
            Preconditions.checkArgument(inputs.size() == 1);
            KeyValueReader broadcastKvReader = (KeyValueReader) getInputs().values().iterator().next().getReader();
            long sum = 0;
            int count = 0;
            while (broadcastKvReader.next()) {
                count++;
                sum += ((IntWritable) broadcastKvReader.getCurrentValue()).get();
            }
            System.err.println("Count = " + getContext().getTaskIndex() + " * " + count + ", Sum=" + sum);
        }
    }

    private DAG createDAG(int numGenTasks, int totalSourceDataSize, int numFetcherTasks) {
        int bytesPerSource = totalSourceDataSize / numGenTasks;
        LOG.info("DataPerSourceTask(bytes)=" + bytesPerSource);
        ByteBuffer payload = ByteBuffer.allocate(4);
        payload.putInt(0, bytesPerSource);

        Vertex broadcastVertex = Vertex.create("DataGen", ProcessorDescriptor
                .create(InputGenProcessor.class.getName()).setUserPayload(UserPayload.create(payload)),
                numGenTasks);
        Vertex fetchVertex = Vertex.create("FetchVertex",
                ProcessorDescriptor.create(InputFetchProcessor.class.getName()), numFetcherTasks);
        UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig
                .newBuilder(NullWritable.class.getName(), IntWritable.class.getName())
                .setCompression(false, null, null).build();

        DAG dag = DAG.create("BroadcastLoadGen");
        dag.addVertex(broadcastVertex).addVertex(fetchVertex)
                .addEdge(Edge.create(broadcastVertex, fetchVertex, edgeConf.createDefaultBroadcastEdgeProperty()));
        return dag;
    }

    @Override
    protected final int runJob(String[] args, TezConfiguration tezConf, TezClient tezClient)
            throws TezException, InterruptedException, IOException {
        LOG.info("Running: " + this.getClass().getSimpleName() + StringUtils.join(args, " "));

        int numSourceTasks = Integer.parseInt(args[0]);
        int totalSourceData = Integer.parseInt(args[1]);
        int numFetcherTasks = Integer.parseInt(args[2]);
        LOG.info("Parameters: numSourceTasks=" + numSourceTasks + ", totalSourceDataSize(bytes)=" + totalSourceData
                + ", numFetcherTasks=" + numFetcherTasks);

        DAG dag = createDAG(numSourceTasks, totalSourceData, numFetcherTasks);
        return runDag(dag, false, LOG);
    }

    @Override
    protected void printUsage() {
        System.err.println(
                "Usage: " + "BroadcastLoadGen <num_source_tasks>  <total_source_data> <num_destination_tasks>");
        ToolRunner.printGenericCommandUsage(System.err);
    }

    @Override
    protected final int validateArgs(String[] otherArgs) {
        return otherArgs.length != 3 ? 2 : 0;
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run(new Configuration(), new BroadcastLoadGen(), args);
        System.exit(res);
    }
}