Example usage for org.apache.hadoop.mapred JobConf getOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getOutputKeyClass.

Prototype

public Class<?> getOutputKeyClass()

Source Link

Document

Get the key class for the job output data.

Usage

From source file:com.mellanox.hadoop.mapred.UdaPlugin.java

License:Apache License

public UdaPluginRT(UdaShuffleConsumerPluginShared udaShuffleConsumer, ReduceTask reduceTask, JobConf jobConf,
        Reporter reporter, int numMaps) throws IOException {
    super(jobConf);
    this.udaShuffleConsumer = udaShuffleConsumer;
    this.reduceTask = reduceTask;

    String totalRdmaSizeStr = jobConf.get("mapred.rdma.shuffle.total.size", "0"); // default 0 means ignoring this parameter and use instead -Xmx and mapred.job.shuffle.input.buffer.percent
    long totalRdmaSize = StringUtils.TraditionalBinaryPrefix.string2long(totalRdmaSizeStr);
    long maxRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size", 1024);
    long minRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size.min", 16);
    long shuffleMemorySize = totalRdmaSize;
    StringBuilder meminfoSb = new StringBuilder();
    meminfoSb.append("UDA: numMaps=").append(numMaps);
    meminfoSb.append(", maxRdmaBufferSize=").append(maxRdmaBufferSize);
    meminfoSb.append("KB, minRdmaBufferSize=").append(minRdmaBufferSize).append("KB");
    meminfoSb.append("KB, rdmaShuffleTotalSize=").append(totalRdmaSize);

    if (totalRdmaSize < 0) {
        LOG.warn("Illegal paramter value: mapred.rdma.shuffle.total.size=" + totalRdmaSize);
    }// www  .  j a va 2  s. c  om

    if (totalRdmaSize <= 0) {
        long maxHeapSize = Runtime.getRuntime().maxMemory();
        double shuffleInputBufferPercent = jobConf.getFloat("mapred.job.shuffle.input.buffer.percent",
                DEFAULT_SHUFFLE_INPUT_PERCENT);
        if ((shuffleInputBufferPercent < 0) || (shuffleInputBufferPercent > 1)) {
            LOG.warn("UDA: mapred.job.shuffle.input.buffer.percent is out of range - set to default: "
                    + DEFAULT_SHUFFLE_INPUT_PERCENT);
            shuffleInputBufferPercent = DEFAULT_SHUFFLE_INPUT_PERCENT;
        }
        shuffleMemorySize = (long) (maxHeapSize * shuffleInputBufferPercent);

        LOG.info("Using JAVA Xmx with mapred.job.shuffle.input.buffer.percent to limit UDA shuffle memory");

        meminfoSb.append(", maxHeapSize=").append(maxHeapSize).append("B");
        meminfoSb.append(", shuffleInputBufferPercent=").append(shuffleInputBufferPercent);
        meminfoSb.append("==> shuffleMemorySize=").append(shuffleMemorySize).append("B");

        LOG.info("RDMA shuffle memory is limited to " + shuffleMemorySize / 1024 / 1024 + "MB");
    } else {
        LOG.info("Using mapred.rdma.shuffle.total.size to limit UDA shuffle memory");
        LOG.info("RDMA shuffle memory is limited to " + totalRdmaSize / 1024 / 1024 + "MB");
    }

    LOG.debug(meminfoSb.toString());
    LOG.info("UDA: user prefer rdma.buf.size=" + maxRdmaBufferSize + "KB");
    LOG.info("UDA: minimum rdma.buf.size=" + minRdmaBufferSize + "KB");

    if (jobConf.getSpeculativeExecution()) { // (getMapSpeculativeExecution() || getReduceSpeculativeExecution())
        LOG.info("UDA has limited support for map task speculative execution");
    }

    LOG.info("UDA: number of segments to fetch: " + numMaps);

    /* init variables */
    init_kv_bufs();

    launchCppSide(true, this); // true: this is RT => we should execute NetMerger

    this.j2c_queue = new J2CQueue<K, V>();
    this.mTaskReporter = reporter;
    this.mMapsNeed = numMaps;

    /* send init message */
    TaskAttemptID reduceId = reduceTask.getTaskID();

    mParams.clear();
    mParams.add(Integer.toString(numMaps));
    mParams.add(reduceId.getJobID().toString());
    mParams.add(reduceId.toString());
    mParams.add(jobConf.get("mapred.netmerger.hybrid.lpq.size", "0"));
    mParams.add(Long.toString(maxRdmaBufferSize * 1024)); // in Bytes - pass the raw value we got from xml file (with only conversion to bytes)
    mParams.add(Long.toString(minRdmaBufferSize * 1024)); // in Bytes . passed for checking if rdmaBuffer is still larger than minRdmaBuffer after alignment          
    mParams.add(jobConf.getOutputKeyClass().getName());

    boolean compression = jobConf.getCompressMapOutput(); //"true" or "false"
    String alg = null;
    if (compression) {
        alg = jobConf.get("mapred.map.output.compression.codec", null);
    }
    mParams.add(alg);

    String bufferSize = Integer.toString(256 * 1024);
    if (alg != null) {
        if (alg.contains("lzo.LzoCodec")) {
            bufferSize = jobConf.get("io.compression.codec.lzo.buffersize", bufferSize);
        } else if (alg.contains("SnappyCodec")) {
            bufferSize = jobConf.get("io.compression.codec.snappy.buffersize", bufferSize);
        }
    }
    mParams.add(bufferSize);
    mParams.add(Long.toString(shuffleMemorySize));

    String[] dirs = jobConf.getLocalDirs();
    ArrayList<String> dirsCanBeCreated = new ArrayList<String>();
    //checking if the directories can be created
    for (int i = 0; i < dirs.length; i++) {
        try {
            DiskChecker.checkDir(new File(dirs[i].trim()));
            //saving only the directories that can be created
            dirsCanBeCreated.add(dirs[i].trim());
        } catch (DiskErrorException e) {
        }
    }
    //sending the directories
    int numDirs = dirsCanBeCreated.size();
    mParams.add(Integer.toString(numDirs));
    for (int i = 0; i < numDirs; i++) {
        mParams.add(dirsCanBeCreated.get(i));
    }

    LOG.info("mParams array is " + mParams);
    LOG.info("UDA: sending INIT_COMMAND");
    String msg = UdaCmd.formCmd(UdaCmd.INIT_COMMAND, mParams);
    UdaBridge.doCommand(msg);
    this.mProgress = new Progress();
    this.mProgress.set(0.5f);
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopReducerOperatorDescriptor.java

License:Apache License

public static RecordDescriptor getRecordDescriptor(JobConf conf, IHadoopClassFactory classFactory) {
    String outputKeyClassName = null;
    String outputValueClassName = null;

    if (conf.getUseNewMapper()) {
        JobContext context = new ContextFactory().createJobContext(conf);
        outputKeyClassName = context.getOutputKeyClass().getName();
        outputValueClassName = context.getOutputValueClass().getName();
    } else {//from   w  ww  .j a  va2s . c  o  m
        outputKeyClassName = conf.getOutputKeyClass().getName();
        outputValueClassName = conf.getOutputValueClass().getName();
    }

    RecordDescriptor recordDescriptor = null;
    try {
        if (classFactory == null) {
            recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                    (Class<? extends Writable>) Class.forName(outputKeyClassName),
                    (Class<? extends Writable>) Class.forName(outputValueClassName));
        } else {
            recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                    (Class<? extends Writable>) classFactory.loadClass(outputKeyClassName),
                    (Class<? extends Writable>) classFactory.loadClass(outputValueClassName));
        }
    } catch (Exception e) {
        e.printStackTrace();
        return null;
    }
    return recordDescriptor;
}

From source file:org.apache.blur.spark.Consumer.java

License:Apache License

private void run() {

    String checkpointDirectory = "hdfs://10.252.5.113:9000/user/hadoop/spark";

    // number of partition for Kafka Topic

    int _partitionCount = 5;

    List<JavaDStream<MessageAndMetadata>> streamsList = new ArrayList<JavaDStream<MessageAndMetadata>>(
            _partitionCount);//from w w w.j a v  a 2  s .  c  o  m
    JavaDStream<MessageAndMetadata> unionStreams;

    SparkConf conf = new SparkConf().setAppName("KafkaReceiver").set("spark.streaming.blockInterval", "200");

    // Path to Blur Libraries . Can be copied to each Node of Spark Cluster.

    conf.set("spark.executor.extraClassPath", "/home/apache-blur-0.2.4/lib/*");

    // Used KryoSerializer for BlurMutate and Text.
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");

    JavaStreamingContext ssc = new JavaStreamingContext(conf, new Duration(3000));

    /*
     * Receive Kafka Stream. Create individual Receivers for each Topic
     * Partition
     */

    for (int i = 0; i < _partitionCount; i++) {

        streamsList.add(ssc.receiverStream(new KafkaReceiver(_props, i)));

    }

    /*
     * Union all the streams if there is more than 1 stream
     */

    if (streamsList.size() > 1) {
        unionStreams = ssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size()));
    } else {
        // Otherwise, just use the 1 stream
        unionStreams = streamsList.get(0);
    }

    /*
     * Generate JavaPairDStream
     */

    JavaPairDStream<Text, BlurMutate> pairDStream = unionStreams
            .mapToPair(new PairFunction<MessageAndMetadata, Text, BlurMutate>() {

                private static final long serialVersionUID = 443235214978L;

                public Tuple2<Text, BlurMutate> call(MessageAndMetadata mmeta) {

                    /*
                     * create the BlurMutate from MessageAndMetadata
                     */

                    String message = new String(mmeta.getPayload());
                    String keyStr = DigestUtils.shaHex(message);
                    Text key = new Text((keyStr).getBytes());
                    BlurMutate mutate = new BlurMutate(BlurMutate.MUTATE_TYPE.REPLACE, keyStr, keyStr,
                            "family");
                    mutate.addColumn("message", message);

                    return new Tuple2<Text, BlurMutate>(key, mutate);
                }
            });

    pairDStream.foreachRDD(new Function2<JavaPairRDD<Text, BlurMutate>, Time, Void>() {

        private static final long serialVersionUID = 88875777435L;

        @Override
        public Void call(JavaPairRDD<Text, BlurMutate> rdd, Time time) throws Exception {

            /*
             * Blur Table Details
             */
            TableDescriptor tableDescriptor = new TableDescriptor();
            String tableUri = new Path("hdfs://10.252.5.113:9000/blur/tables/nrt").toString();
            tableDescriptor.tableUri = tableUri;
            tableDescriptor.cluster = "pearson";
            tableDescriptor.name = "nrt";
            tableDescriptor.shardCount = 9;
            Configuration conf = new Configuration();

            /*
             * Partition RDD to match Blur Table Shard Count. Used
             * Custom Partitioner to channel correct BlurMutate to
             * correct Shard.
             */

            final JavaPairRDD<Text, BlurMutate> pRdd = rdd
                    .partitionBy(new BlurSparkPartitioner(tableDescriptor.shardCount))
                    .persist(StorageLevel.MEMORY_ONLY_2());

            /*
             * Blur specific Configuration
             */

            BlurOutputFormat.setIndexLocally(conf, false);
            BlurOutputFormat.setOptimizeInFlight(conf, false);
            conf.setClass("mapreduce.reduce.class", DefaultBlurReducer.class, Reducer.class);
            conf.setClass("mapreduce.outputformat.class", BlurOutputFormat.class, OutputFormat.class);
            conf.setClass("mapreduce.partitioner.class", BlurPartitioner.class, Partitioner.class);
            conf.set("mapred.output.committer.class", BlurOutputCommitter.class.getName());
            conf.setInt("blur.output.max.document.buffer.size", 10000);

            BlurOutputFormat.setTableDescriptor(conf, tableDescriptor);

            JobConf jobConf = new JobConf(conf);

            jobConf.setNumReduceTasks(tableDescriptor.shardCount);
            jobConf.setOutputKeyClass(Text.class);
            jobConf.setOutputValueClass(BlurMutate.class);

            BlurMapReduceUtil.addAllJarsInBlurLib(conf);
            BlurMapReduceUtil.addDependencyJars(conf, org.apache.zookeeper.ZooKeeper.class,
                    org.apache.lucene.codecs.lucene42.Lucene42Codec.class, jobConf.getOutputKeyClass(),
                    jobConf.getOutputValueClass());

            /*
             * Write the RDD to Blur Table
             */

            if (pRdd.count() > 0)
                pRdd.saveAsNewAPIHadoopFile(tableUri, Text.class, BlurMutate.class, BlurOutputFormat.class,
                        jobConf);

            return null;
        }
    });

    // ssc.checkpoint(checkpointDirectory);
    ssc.start();
    ssc.awaitTermination();
}

From source file:org.pentaho.hadoop.mapreduce.PentahoMapReduceBase.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//from   www  .  j a v a  2  s.  c  o  m
public void configure(JobConf job) {
    super.configure(job);

    debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$

    transMapXml = job.get("transformation-map-xml");
    transCombinerXml = job.get("transformation-combiner-xml");
    transReduceXml = job.get("transformation-reduce-xml");
    mapInputStepName = job.get("transformation-map-input-stepname");
    mapOutputStepName = job.get("transformation-map-output-stepname");
    combinerInputStepName = job.get("transformation-combiner-input-stepname");
    combinerOutputStepName = job.get("transformation-combiner-output-stepname");
    combineSingleThreaded = isCombinerSingleThreaded(job);
    reduceInputStepName = job.get("transformation-reduce-input-stepname");
    reduceOutputStepName = job.get("transformation-reduce-output-stepname");
    reduceSingleThreaded = isReducerSingleThreaded(job);
    String xmlVariableSpace = job.get("variableSpace");

    if (!Const.isEmpty(xmlVariableSpace)) {
        setDebugStatus("PentahoMapReduceBase. variableSpace was retrieved from the job.  The contents: ");

        //  deserialize from xml to variable space
        XStream xStream = new XStream();

        if (xStream != null) {
            setDebugStatus("PentahoMapReduceBase: Setting classes variableSpace property.: ");
            variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace);

            for (String variableName : variableSpace.listVariables()) {
                if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) {
                    System.setProperty(variableName, variableSpace.getVariable(variableName));
                }
            }
        }
    } else {
        setDebugStatus(
                "PentahoMapReduceBase: The PDI Job's variable space was not found in the job configuration.");
        variableSpace = new Variables();
    }

    // Check for environment variables in the userDefined variables
    Iterator<Entry<String, String>> iter = job.iterator();
    while (iter.hasNext()) {
        Entry<String, String> entry = iter.next();
        if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()),
                    entry.getValue());
        } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey(), entry.getValue());
        }
    }

    MRUtil.passInformationToTransformation(variableSpace, job);

    switch (mrOperation) {
    case Combine:
        outClassK = (Class<K>) job.getMapOutputKeyClass();
        outClassV = (Class<V>) job.getMapOutputValueClass();
        break;
    case Reduce:
        outClassK = (Class<K>) job.getOutputKeyClass();
        outClassV = (Class<V>) job.getOutputValueClass();
        break;
    default:
        throw new IllegalArgumentException("Unsupported MapReduce operation: " + mrOperation);
    }

    if (debug) {
        System.out.println("Job configuration>");
        System.out.println("Output key class: " + outClassK.getName());
        System.out.println("Output value class: " + outClassV.getName());
    }

    //  set the log level to what the level of the job is
    String stringLogLevel = job.get("logLevel");
    if (!Const.isEmpty(stringLogLevel)) {
        logLevel = LogLevel.valueOf(stringLogLevel);
        setDebugStatus("Log level set to " + stringLogLevel);
    } else {
        System.out.println(
                "Could not retrieve the log level from the job configuration.  logLevel will not be set.");
    }

    createTrans(job);
}

From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java

License:Open Source License

@Test
public void testReducerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./test-res/wordcount-mapper.ktr", "./test-res/wordcount-reducer.ktr",
            "./test-res/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransReduce reducer = new GenericTransReduce();

    reducer.configure(jobConf);//from   w  ww .j  a va  2  s .  c o m

    assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
    assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java

License:Apache License

@Test
public void testReducerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr",
            "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransReduce reducer = new GenericTransReduce();

    reducer.configure(jobConf);//from w  ww. ja va2 s  .  com

    assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
    assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIT.java

License:Apache License

@Test
public void testReducerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/it/resources/wordcount-mapper.ktr",
            "./src/it/resources/wordcount-reducer.ktr", "./src/it/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransReduce reducer = new GenericTransReduce();

    reducer.configure(jobConf);//  w w  w  . j a  v a2 s. c om

    assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
    assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
}

From source file:tap.core.ReducerBridge.java

License:Apache License

@Override
public void configure(JobConf conf) {
    super.configure(conf);

    isTextOutput = conf.getOutputFormat() instanceof TextOutputFormat;
    isProtoOutput = conf.getOutputFormat() instanceof TapfileOutputFormat;

    if (isProtoOutput) {
        try {//w w w. j  a v  a2 s . co m
            mapOutClass = Class.forName(conf.get(Phase.MAP_OUT_CLASS));
            reduceOutClass = Class.forName(conf.get(Phase.REDUCE_OUT_CLASS));
            if (mapOutClass != reduceOutClass) {
                reduceOutKeyChanges = true;
                String groupBy = conf.get(Phase.GROUP_BY);
                String sortBy = conf.get(Phase.SORT_BY);
                reduceOutSchema = ReflectUtils.getSchema(ObjectFactory.newInstance(reduceOutClass));
                extractor = ReflectionKeyExtractor.getReflectionKeyExtractorForReduceOutKey(reduceOutSchema,
                        groupBy, sortBy);
            }
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    multiOutputPrefix = conf.get(Phase.MULTIPLE_OUTPUT_PREFIX);
    if (multiOutputPrefix == null)
        multiOutputPrefix = "out";

    MultipleOutputs.addMultiNamedOutput(conf, multiOutputPrefix, conf.getOutputFormat().getClass(),
            conf.getOutputKeyClass(), conf.getOutputValueClass());

    this.multiOutput = new MultipleOutputs(conf);
}

From source file:voldemort.store.readonly.mr.serialization.JsonSequenceFileOutputFormat.java

License:Apache License

public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(FileSystem ignored, JobConf job, String name,
        Progressable progress) throws IOException {

    // Shamelessly copy in hadoop code to allow us to set the metadata with
    // our schema

    // get the path of the temporary output file
    Path file = FileOutputFormat.getTaskOutputPath(job, name);

    FileSystem fs = file.getFileSystem(job);
    CompressionType compressionType = CompressionType.BLOCK;
    // find the right codec
    Class<?> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
    CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, job);

    // set the schema metadata
    /* begin jays code */
    SequenceFile.Metadata meta = new SequenceFile.Metadata();
    meta.set(new Text("key.schema"), new Text(getSchema("reducer.output.key.schema", job)));
    meta.set(new Text("value.schema"), new Text(getSchema("reducer.output.value.schema", job)));

    final SequenceFile.Writer out = SequenceFile.createWriter(fs, job, file, job.getOutputKeyClass(),
            job.getOutputValueClass(), compressionType, codec, progress, meta);
    /* end jays code */

    return new RecordWriter<BytesWritable, BytesWritable>() {

        public void write(BytesWritable key, BytesWritable value) throws IOException {

            out.append(key, value);//from w ww. jav a 2 s .c  om
        }

        public void close(Reporter reporter) throws IOException {
            out.close();
        }
    };
}