Example usage for org.apache.hadoop.mapred JobConf getOutputKeyClass

List of usage examples for org.apache.hadoop.mapred JobConf getOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getOutputKeyClass.

Prototype

public Class<?> getOutputKeyClass() 

Source Link

Document

Get the key class for the job output data.

Usage

From source file:com.mellanox.hadoop.mapred.UdaPlugin.java

License:Apache License

public UdaPluginRT(UdaShuffleConsumerPluginShared udaShuffleConsumer, ReduceTask reduceTask, JobConf jobConf,
        Reporter reporter, int numMaps) throws IOException {
    super(jobConf);
    this.udaShuffleConsumer = udaShuffleConsumer;
    this.reduceTask = reduceTask;

    String totalRdmaSizeStr = jobConf.get("mapred.rdma.shuffle.total.size", "0"); // default 0 means ignoring this parameter and use instead -Xmx and mapred.job.shuffle.input.buffer.percent
    long totalRdmaSize = StringUtils.TraditionalBinaryPrefix.string2long(totalRdmaSizeStr);
    long maxRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size", 1024);
    long minRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size.min", 16);
    long shuffleMemorySize = totalRdmaSize;
    StringBuilder meminfoSb = new StringBuilder();
    meminfoSb.append("UDA: numMaps=").append(numMaps);
    meminfoSb.append(", maxRdmaBufferSize=").append(maxRdmaBufferSize);
    meminfoSb.append("KB, minRdmaBufferSize=").append(minRdmaBufferSize).append("KB");
    meminfoSb.append("KB, rdmaShuffleTotalSize=").append(totalRdmaSize);

    if (totalRdmaSize < 0) {
        LOG.warn("Illegal paramter value: mapred.rdma.shuffle.total.size=" + totalRdmaSize);
    }// www  .  j a va 2  s. c  om

    if (totalRdmaSize <= 0) {
        long maxHeapSize = Runtime.getRuntime().maxMemory();
        double shuffleInputBufferPercent = jobConf.getFloat("mapred.job.shuffle.input.buffer.percent",
                DEFAULT_SHUFFLE_INPUT_PERCENT);
        if ((shuffleInputBufferPercent < 0) || (shuffleInputBufferPercent > 1)) {
            LOG.warn("UDA: mapred.job.shuffle.input.buffer.percent is out of range - set to default: "
                    + DEFAULT_SHUFFLE_INPUT_PERCENT);
            shuffleInputBufferPercent = DEFAULT_SHUFFLE_INPUT_PERCENT;
        }
        shuffleMemorySize = (long) (maxHeapSize * shuffleInputBufferPercent);

        LOG.info("Using JAVA Xmx with mapred.job.shuffle.input.buffer.percent to limit UDA shuffle memory");

        meminfoSb.append(", maxHeapSize=").append(maxHeapSize).append("B");
        meminfoSb.append(", shuffleInputBufferPercent=").append(shuffleInputBufferPercent);
        meminfoSb.append("==> shuffleMemorySize=").append(shuffleMemorySize).append("B");

        LOG.info("RDMA shuffle memory is limited to " + shuffleMemorySize / 1024 / 1024 + "MB");
    } else {
        LOG.info("Using mapred.rdma.shuffle.total.size to limit UDA shuffle memory");
        LOG.info("RDMA shuffle memory is limited to " + totalRdmaSize / 1024 / 1024 + "MB");
    }

    LOG.debug(meminfoSb.toString());
    LOG.info("UDA: user prefer rdma.buf.size=" + maxRdmaBufferSize + "KB");
    LOG.info("UDA: minimum rdma.buf.size=" + minRdmaBufferSize + "KB");

    if (jobConf.getSpeculativeExecution()) { // (getMapSpeculativeExecution() || getReduceSpeculativeExecution())
        LOG.info("UDA has limited support for map task speculative execution");
    }

    LOG.info("UDA: number of segments to fetch: " + numMaps);

    /* init variables */
    init_kv_bufs();

    launchCppSide(true, this); // true: this is RT => we should execute NetMerger

    this.j2c_queue = new J2CQueue<K, V>();
    this.mTaskReporter = reporter;
    this.mMapsNeed = numMaps;

    /* send init message */
    TaskAttemptID reduceId = reduceTask.getTaskID();

    mParams.clear();
    mParams.add(Integer.toString(numMaps));
    mParams.add(reduceId.getJobID().toString());
    mParams.add(reduceId.toString());
    mParams.add(jobConf.get("mapred.netmerger.hybrid.lpq.size", "0"));
    mParams.add(Long.toString(maxRdmaBufferSize * 1024)); // in Bytes - pass the raw value we got from xml file (with only conversion to bytes)
    mParams.add(Long.toString(minRdmaBufferSize * 1024)); // in Bytes . passed for checking if rdmaBuffer is still larger than minRdmaBuffer after alignment          
    mParams.add(jobConf.getOutputKeyClass().getName());

    boolean compression = jobConf.getCompressMapOutput(); //"true" or "false"
    String alg = null;
    if (compression) {
        alg = jobConf.get("mapred.map.output.compression.codec", null);
    }
    mParams.add(alg);

    String bufferSize = Integer.toString(256 * 1024);
    if (alg != null) {
        if (alg.contains("lzo.LzoCodec")) {
            bufferSize = jobConf.get("io.compression.codec.lzo.buffersize", bufferSize);
        } else if (alg.contains("SnappyCodec")) {
            bufferSize = jobConf.get("io.compression.codec.snappy.buffersize", bufferSize);
        }
    }
    mParams.add(bufferSize);
    mParams.add(Long.toString(shuffleMemorySize));

    String[] dirs = jobConf.getLocalDirs();
    ArrayList<String> dirsCanBeCreated = new ArrayList<String>();
    //checking if the directories can be created
    for (int i = 0; i < dirs.length; i++) {
        try {
            DiskChecker.checkDir(new File(dirs[i].trim()));
            //saving only the directories that can be created
            dirsCanBeCreated.add(dirs[i].trim());
        } catch (DiskErrorException e) {
        }
    }
    //sending the directories
    int numDirs = dirsCanBeCreated.size();
    mParams.add(Integer.toString(numDirs));
    for (int i = 0; i < numDirs; i++) {
        mParams.add(dirsCanBeCreated.get(i));
    }

    LOG.info("mParams array is " + mParams);
    LOG.info("UDA: sending INIT_COMMAND");
    String msg = UdaCmd.formCmd(UdaCmd.INIT_COMMAND, mParams);
    UdaBridge.doCommand(msg);
    this.mProgress = new Progress();
    this.mProgress.set(0.5f);
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopReducerOperatorDescriptor.java

License:Apache License

public static RecordDescriptor getRecordDescriptor(JobConf conf, IHadoopClassFactory classFactory) {
    String outputKeyClassName = null;
    String outputValueClassName = null;

    if (conf.getUseNewMapper()) {
        JobContext context = new ContextFactory().createJobContext(conf);
        outputKeyClassName = context.getOutputKeyClass().getName();
        outputValueClassName = context.getOutputValueClass().getName();
    } else {//from   w  ww  .j a  va2s . c  o  m
        outputKeyClassName = conf.getOutputKeyClass().getName();
        outputValueClassName = conf.getOutputValueClass().getName();
    }

    RecordDescriptor recordDescriptor = null;
    try {
        if (classFactory == null) {
            recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                    (Class<? extends Writable>) Class.forName(outputKeyClassName),
                    (Class<? extends Writable>) Class.forName(outputValueClassName));
        } else {
            recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                    (Class<? extends Writable>) classFactory.loadClass(outputKeyClassName),
                    (Class<? extends Writable>) classFactory.loadClass(outputValueClassName));
        }
    } catch (Exception e) {
        e.printStackTrace();
        return null;
    }
    return recordDescriptor;
}

From source file:org.apache.blur.spark.Consumer.java

License:Apache License

private void run() {

    String checkpointDirectory = "hdfs://10.252.5.113:9000/user/hadoop/spark";

    // number of partition for Kafka Topic

    int _partitionCount = 5;

    List<JavaDStream<MessageAndMetadata>> streamsList = new ArrayList<JavaDStream<MessageAndMetadata>>(
            _partitionCount);//from w w w.j a v  a 2  s .  c  o  m
    JavaDStream<MessageAndMetadata> unionStreams;

    SparkConf conf = new SparkConf().setAppName("KafkaReceiver").set("spark.streaming.blockInterval", "200");

    // Path to Blur Libraries . Can be copied to each Node of Spark Cluster.

    conf.set("spark.executor.extraClassPath", "/home/apache-blur-0.2.4/lib/*");

    // Used KryoSerializer for BlurMutate and Text.
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");

    JavaStreamingContext ssc = new JavaStreamingContext(conf, new Duration(3000));

    /*
     * Receive Kafka Stream. Create individual Receivers for each Topic
     * Partition
     */

    for (int i = 0; i < _partitionCount; i++) {

        streamsList.add(ssc.receiverStream(new KafkaReceiver(_props, i)));

    }

    /*
     * Union all the streams if there is more than 1 stream
     */

    if (streamsList.size() > 1) {
        unionStreams = ssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size()));
    } else {
        // Otherwise, just use the 1 stream
        unionStreams = streamsList.get(0);
    }

    /*
     * Generate JavaPairDStream
     */

    JavaPairDStream<Text, BlurMutate> pairDStream = unionStreams
            .mapToPair(new PairFunction<MessageAndMetadata, Text, BlurMutate>() {

                private static final long serialVersionUID = 443235214978L;

                public Tuple2<Text, BlurMutate> call(MessageAndMetadata mmeta) {

                    /*
                     * create the BlurMutate from MessageAndMetadata
                     */

                    String message = new String(mmeta.getPayload());
                    String keyStr = DigestUtils.shaHex(message);
                    Text key = new Text((keyStr).getBytes());
                    BlurMutate mutate = new BlurMutate(BlurMutate.MUTATE_TYPE.REPLACE, keyStr, keyStr,
                            "family");
                    mutate.addColumn("message", message);

                    return new Tuple2<Text, BlurMutate>(key, mutate);
                }
            });

    pairDStream.foreachRDD(new Function2<JavaPairRDD<Text, BlurMutate>, Time, Void>() {

        private static final long serialVersionUID = 88875777435L;

        @Override
        public Void call(JavaPairRDD<Text, BlurMutate> rdd, Time time) throws Exception {

            /*
             * Blur Table Details
             */
            TableDescriptor tableDescriptor = new TableDescriptor();
            String tableUri = new Path("hdfs://10.252.5.113:9000/blur/tables/nrt").toString();
            tableDescriptor.tableUri = tableUri;
            tableDescriptor.cluster = "pearson";
            tableDescriptor.name = "nrt";
            tableDescriptor.shardCount = 9;
            Configuration conf = new Configuration();

            /*
             * Partition RDD to match Blur Table Shard Count. Used
             * Custom Partitioner to channel correct BlurMutate to
             * correct Shard.
             */

            final JavaPairRDD<Text, BlurMutate> pRdd = rdd
                    .partitionBy(new BlurSparkPartitioner(tableDescriptor.shardCount))
                    .persist(StorageLevel.MEMORY_ONLY_2());

            /*
             * Blur specific Configuration
             */

            BlurOutputFormat.setIndexLocally(conf, false);
            BlurOutputFormat.setOptimizeInFlight(conf, false);
            conf.setClass("mapreduce.reduce.class", DefaultBlurReducer.class, Reducer.class);
            conf.setClass("mapreduce.outputformat.class", BlurOutputFormat.class, OutputFormat.class);
            conf.setClass("mapreduce.partitioner.class", BlurPartitioner.class, Partitioner.class);
            conf.set("mapred.output.committer.class", BlurOutputCommitter.class.getName());
            conf.setInt("blur.output.max.document.buffer.size", 10000);

            BlurOutputFormat.setTableDescriptor(conf, tableDescriptor);

            JobConf jobConf = new JobConf(conf);

            jobConf.setNumReduceTasks(tableDescriptor.shardCount);
            jobConf.setOutputKeyClass(Text.class);
            jobConf.setOutputValueClass(BlurMutate.class);

            BlurMapReduceUtil.addAllJarsInBlurLib(conf);
            BlurMapReduceUtil.addDependencyJars(conf, org.apache.zookeeper.ZooKeeper.class,
                    org.apache.lucene.codecs.lucene42.Lucene42Codec.class, jobConf.getOutputKeyClass(),
                    jobConf.getOutputValueClass());

            /*
             * Write the RDD to Blur Table
             */

            if (pRdd.count() > 0)
                pRdd.saveAsNewAPIHadoopFile(tableUri, Text.class, BlurMutate.class, BlurOutputFormat.class,
                        jobConf);

            return null;
        }
    });

    // ssc.checkpoint(checkpointDirectory);
    ssc.start();
    ssc.awaitTermination();
}

From source file:org.pentaho.hadoop.mapreduce.PentahoMapReduceBase.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//from   www  .  j a v a  2  s.  c  o  m
public void configure(JobConf job) {
    super.configure(job);

    debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$

    transMapXml = job.get("transformation-map-xml");
    transCombinerXml = job.get("transformation-combiner-xml");
    transReduceXml = job.get("transformation-reduce-xml");
    mapInputStepName = job.get("transformation-map-input-stepname");
    mapOutputStepName = job.get("transformation-map-output-stepname");
    combinerInputStepName = job.get("transformation-combiner-input-stepname");
    combinerOutputStepName = job.get("transformation-combiner-output-stepname");
    combineSingleThreaded = isCombinerSingleThreaded(job);
    reduceInputStepName = job.get("transformation-reduce-input-stepname");
    reduceOutputStepName = job.get("transformation-reduce-output-stepname");
    reduceSingleThreaded = isReducerSingleThreaded(job);
    String xmlVariableSpace = job.get("variableSpace");

    if (!Const.isEmpty(xmlVariableSpace)) {
        setDebugStatus("PentahoMapReduceBase. variableSpace was retrieved from the job.  The contents: ");

        //  deserialize from xml to variable space
        XStream xStream = new XStream();

        if (xStream != null) {
            setDebugStatus("PentahoMapReduceBase: Setting classes variableSpace property.: ");
            variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace);

            for (String variableName : variableSpace.listVariables()) {
                if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) {
                    System.setProperty(variableName, variableSpace.getVariable(variableName));
                }
            }
        }
    } else {
        setDebugStatus(
                "PentahoMapReduceBase: The PDI Job's variable space was not found in the job configuration.");
        variableSpace = new Variables();
    }

    // Check for environment variables in the userDefined variables
    Iterator<Entry<String, String>> iter = job.iterator();
    while (iter.hasNext()) {
        Entry<String, String> entry = iter.next();
        if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()),
                    entry.getValue());
        } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey(), entry.getValue());
        }
    }

    MRUtil.passInformationToTransformation(variableSpace, job);

    switch (mrOperation) {
    case Combine:
        outClassK = (Class<K>) job.getMapOutputKeyClass();
        outClassV = (Class<V>) job.getMapOutputValueClass();
        break;
    case Reduce:
        outClassK = (Class<K>) job.getOutputKeyClass();
        outClassV = (Class<V>) job.getOutputValueClass();
        break;
    default:
        throw new IllegalArgumentException("Unsupported MapReduce operation: " + mrOperation);
    }

    if (debug) {
        System.out.println("Job configuration>");
        System.out.println("Output key class: " + outClassK.getName());
        System.out.println("Output value class: " + outClassV.getName());
    }

    //  set the log level to what the level of the job is
    String stringLogLevel = job.get("logLevel");
    if (!Const.isEmpty(stringLogLevel)) {
        logLevel = LogLevel.valueOf(stringLogLevel);
        setDebugStatus("Log level set to " + stringLogLevel);
    } else {
        System.out.println(
                "Could not retrieve the log level from the job configuration.  logLevel will not be set.");
    }

    createTrans(job);
}

From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java

License:Open Source License

@Test
public void testReducerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./test-res/wordcount-mapper.ktr", "./test-res/wordcount-reducer.ktr",
            "./test-res/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransReduce reducer = new GenericTransReduce();

    reducer.configure(jobConf);//from   w  ww .j  a va  2  s .  c o m

    assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
    assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java

License:Apache License

@Test
public void testReducerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr",
            "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransReduce reducer = new GenericTransReduce();

    reducer.configure(jobConf);//from w  ww. ja va2 s  .  com

    assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
    assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIT.java

License:Apache License

@Test
public void testReducerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/it/resources/wordcount-mapper.ktr",
            "./src/it/resources/wordcount-reducer.ktr", "./src/it/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransReduce reducer = new GenericTransReduce();

    reducer.configure(jobConf);//  w w  w  . j a  v a2 s. c om

    assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
    assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
}

From source file:tap.core.ReducerBridge.java

License:Apache License

@Override
public void configure(JobConf conf) {
    super.configure(conf);

    isTextOutput = conf.getOutputFormat() instanceof TextOutputFormat;
    isProtoOutput = conf.getOutputFormat() instanceof TapfileOutputFormat;

    if (isProtoOutput) {
        try {//w w w. j  a v  a2 s . co m
            mapOutClass = Class.forName(conf.get(Phase.MAP_OUT_CLASS));
            reduceOutClass = Class.forName(conf.get(Phase.REDUCE_OUT_CLASS));
            if (mapOutClass != reduceOutClass) {
                reduceOutKeyChanges = true;
                String groupBy = conf.get(Phase.GROUP_BY);
                String sortBy = conf.get(Phase.SORT_BY);
                reduceOutSchema = ReflectUtils.getSchema(ObjectFactory.newInstance(reduceOutClass));
                extractor = ReflectionKeyExtractor.getReflectionKeyExtractorForReduceOutKey(reduceOutSchema,
                        groupBy, sortBy);
            }
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    multiOutputPrefix = conf.get(Phase.MULTIPLE_OUTPUT_PREFIX);
    if (multiOutputPrefix == null)
        multiOutputPrefix = "out";

    MultipleOutputs.addMultiNamedOutput(conf, multiOutputPrefix, conf.getOutputFormat().getClass(),
            conf.getOutputKeyClass(), conf.getOutputValueClass());

    this.multiOutput = new MultipleOutputs(conf);
}

From source file:voldemort.store.readonly.mr.serialization.JsonSequenceFileOutputFormat.java

License:Apache License

public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(FileSystem ignored, JobConf job, String name,
        Progressable progress) throws IOException {

    // Shamelessly copy in hadoop code to allow us to set the metadata with
    // our schema

    // get the path of the temporary output file
    Path file = FileOutputFormat.getTaskOutputPath(job, name);

    FileSystem fs = file.getFileSystem(job);
    CompressionType compressionType = CompressionType.BLOCK;
    // find the right codec
    Class<?> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
    CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, job);

    // set the schema metadata
    /* begin jays code */
    SequenceFile.Metadata meta = new SequenceFile.Metadata();
    meta.set(new Text("key.schema"), new Text(getSchema("reducer.output.key.schema", job)));
    meta.set(new Text("value.schema"), new Text(getSchema("reducer.output.value.schema", job)));

    final SequenceFile.Writer out = SequenceFile.createWriter(fs, job, file, job.getOutputKeyClass(),
            job.getOutputValueClass(), compressionType, codec, progress, meta);
    /* end jays code */

    return new RecordWriter<BytesWritable, BytesWritable>() {

        public void write(BytesWritable key, BytesWritable value) throws IOException {

            out.append(key, value);//from w ww. jav a 2 s .c  om
        }

        public void close(Reporter reporter) throws IOException {
            out.close();
        }
    };
}