Example usage for org.apache.hadoop.mapred JobConf getOutputValueClass

List of usage examples for org.apache.hadoop.mapred JobConf getOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getOutputValueClass.

Prototype

public Class<?> getOutputValueClass() 

Source Link

Document

Get the value class for job outputs.

Usage

From source file:de.tudarmstadt.ukp.dkpro.bigdata.hadoop.DkproReducer.java

License:Apache License

@Override
public void configure(JobConf job) {
    super.configure(job);
    try {//from  w  w w  .  ja  v  a 2 s . c  o  m
        // create an output writable of the appropriate type
        outValue = (CASWritable) job.getOutputValueClass().newInstance();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.hadoop.UIMAMapReduceBase.java

License:Open Source License

@Override
public void configure(JobConf job) {
    try {//from   www.  j  a v  a2 s . co m
        this.job = job;
        this.mapOutputValueClass = job.getMapOutputValueClass();
        this.outputValueClass = job.getOutputValueClass();
        this.samplingPropability = job.getInt("dkpro.map.samplingratio", 100);
        final EngineFactory engineFactory = (EngineFactory) Class
                .forName(job.get("dkpro.uima.factory", DkproHadoopDriver.class.getName())).newInstance();
        engineFactory.configure(job);

        final AnalysisEngineDescription engineDescription = getEngineDescription(engineFactory, job);

        // replace the $dir variable within the configuration.
        this.fs = FileSystem.get(job);
        this.localFS = FileSystem.getLocal(job);
        this.working_dir = new Path("uima_output_" + job.get("mapred.task.id"));
        final Path outputPath = FileOutputFormat.getOutputPath(job);
        this.results_dir = this.fs.startLocalOutput(outputPath, job.getLocalPath(this.working_dir.getName()));
        this.localFS.mkdirs(this.results_dir);
        final String[] resources = job.get("dkpro.resources", "").split(",");
        sLogger.info("Writing local data to: " + this.results_dir);
        this.resourceURIs = new TreeMap<String, URL>();
        for (final String resource : resources) {
            final URL r = job.getResource(resource);
            if (r != null && !resource.isEmpty()) {
                this.resourceURIs.put(resource, r);
            }

        }
        replaceRecursively(engineDescription);
        this.engine = createEngine(engineDescription);

    } catch (final Exception e) {
        sLogger.fatal("Error while configuring pipeline", e);
        e.printStackTrace();
        throw new RuntimeException(e);
    }

}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopReducerOperatorDescriptor.java

License:Apache License

public static RecordDescriptor getRecordDescriptor(JobConf conf, IHadoopClassFactory classFactory) {
    String outputKeyClassName = null;
    String outputValueClassName = null;

    if (conf.getUseNewMapper()) {
        JobContext context = new ContextFactory().createJobContext(conf);
        outputKeyClassName = context.getOutputKeyClass().getName();
        outputValueClassName = context.getOutputValueClass().getName();
    } else {//from  ww  w.  j av  a 2s . c o  m
        outputKeyClassName = conf.getOutputKeyClass().getName();
        outputValueClassName = conf.getOutputValueClass().getName();
    }

    RecordDescriptor recordDescriptor = null;
    try {
        if (classFactory == null) {
            recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                    (Class<? extends Writable>) Class.forName(outputKeyClassName),
                    (Class<? extends Writable>) Class.forName(outputValueClassName));
        } else {
            recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                    (Class<? extends Writable>) classFactory.loadClass(outputKeyClassName),
                    (Class<? extends Writable>) classFactory.loadClass(outputValueClassName));
        }
    } catch (Exception e) {
        e.printStackTrace();
        return null;
    }
    return recordDescriptor;
}

From source file:org.apache.blur.spark.Consumer.java

License:Apache License

private void run() {

    String checkpointDirectory = "hdfs://10.252.5.113:9000/user/hadoop/spark";

    // number of partition for Kafka Topic

    int _partitionCount = 5;

    List<JavaDStream<MessageAndMetadata>> streamsList = new ArrayList<JavaDStream<MessageAndMetadata>>(
            _partitionCount);/*from  ww w  . j av  a  2s . c  o m*/
    JavaDStream<MessageAndMetadata> unionStreams;

    SparkConf conf = new SparkConf().setAppName("KafkaReceiver").set("spark.streaming.blockInterval", "200");

    // Path to Blur Libraries . Can be copied to each Node of Spark Cluster.

    conf.set("spark.executor.extraClassPath", "/home/apache-blur-0.2.4/lib/*");

    // Used KryoSerializer for BlurMutate and Text.
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");

    JavaStreamingContext ssc = new JavaStreamingContext(conf, new Duration(3000));

    /*
     * Receive Kafka Stream. Create individual Receivers for each Topic
     * Partition
     */

    for (int i = 0; i < _partitionCount; i++) {

        streamsList.add(ssc.receiverStream(new KafkaReceiver(_props, i)));

    }

    /*
     * Union all the streams if there is more than 1 stream
     */

    if (streamsList.size() > 1) {
        unionStreams = ssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size()));
    } else {
        // Otherwise, just use the 1 stream
        unionStreams = streamsList.get(0);
    }

    /*
     * Generate JavaPairDStream
     */

    JavaPairDStream<Text, BlurMutate> pairDStream = unionStreams
            .mapToPair(new PairFunction<MessageAndMetadata, Text, BlurMutate>() {

                private static final long serialVersionUID = 443235214978L;

                public Tuple2<Text, BlurMutate> call(MessageAndMetadata mmeta) {

                    /*
                     * create the BlurMutate from MessageAndMetadata
                     */

                    String message = new String(mmeta.getPayload());
                    String keyStr = DigestUtils.shaHex(message);
                    Text key = new Text((keyStr).getBytes());
                    BlurMutate mutate = new BlurMutate(BlurMutate.MUTATE_TYPE.REPLACE, keyStr, keyStr,
                            "family");
                    mutate.addColumn("message", message);

                    return new Tuple2<Text, BlurMutate>(key, mutate);
                }
            });

    pairDStream.foreachRDD(new Function2<JavaPairRDD<Text, BlurMutate>, Time, Void>() {

        private static final long serialVersionUID = 88875777435L;

        @Override
        public Void call(JavaPairRDD<Text, BlurMutate> rdd, Time time) throws Exception {

            /*
             * Blur Table Details
             */
            TableDescriptor tableDescriptor = new TableDescriptor();
            String tableUri = new Path("hdfs://10.252.5.113:9000/blur/tables/nrt").toString();
            tableDescriptor.tableUri = tableUri;
            tableDescriptor.cluster = "pearson";
            tableDescriptor.name = "nrt";
            tableDescriptor.shardCount = 9;
            Configuration conf = new Configuration();

            /*
             * Partition RDD to match Blur Table Shard Count. Used
             * Custom Partitioner to channel correct BlurMutate to
             * correct Shard.
             */

            final JavaPairRDD<Text, BlurMutate> pRdd = rdd
                    .partitionBy(new BlurSparkPartitioner(tableDescriptor.shardCount))
                    .persist(StorageLevel.MEMORY_ONLY_2());

            /*
             * Blur specific Configuration
             */

            BlurOutputFormat.setIndexLocally(conf, false);
            BlurOutputFormat.setOptimizeInFlight(conf, false);
            conf.setClass("mapreduce.reduce.class", DefaultBlurReducer.class, Reducer.class);
            conf.setClass("mapreduce.outputformat.class", BlurOutputFormat.class, OutputFormat.class);
            conf.setClass("mapreduce.partitioner.class", BlurPartitioner.class, Partitioner.class);
            conf.set("mapred.output.committer.class", BlurOutputCommitter.class.getName());
            conf.setInt("blur.output.max.document.buffer.size", 10000);

            BlurOutputFormat.setTableDescriptor(conf, tableDescriptor);

            JobConf jobConf = new JobConf(conf);

            jobConf.setNumReduceTasks(tableDescriptor.shardCount);
            jobConf.setOutputKeyClass(Text.class);
            jobConf.setOutputValueClass(BlurMutate.class);

            BlurMapReduceUtil.addAllJarsInBlurLib(conf);
            BlurMapReduceUtil.addDependencyJars(conf, org.apache.zookeeper.ZooKeeper.class,
                    org.apache.lucene.codecs.lucene42.Lucene42Codec.class, jobConf.getOutputKeyClass(),
                    jobConf.getOutputValueClass());

            /*
             * Write the RDD to Blur Table
             */

            if (pRdd.count() > 0)
                pRdd.saveAsNewAPIHadoopFile(tableUri, Text.class, BlurMutate.class, BlurOutputFormat.class,
                        jobConf);

            return null;
        }
    });

    // ssc.checkpoint(checkpointDirectory);
    ssc.start();
    ssc.awaitTermination();
}

From source file:org.dkpro.bigdata.hadoop.UIMAMapReduceBase.java

License:Open Source License

@Override
public void configure(JobConf job) {
    try {/* ww  w .  ja  v  a2s.c  o  m*/
        this.job = job;
        this.inputName = job.get("mapred.input.dir");
        this.taskId = job.get("mapred.task.id");
        this.mapOutputValueClass = job.getMapOutputValueClass();
        this.outputValueClass = job.getOutputValueClass();
        this.samplingPropability = job.getInt("dkpro.map.samplingratio", 100);
        final EngineFactory engineFactory = (EngineFactory) Class
                .forName(job.get("dkpro.uima.factory", DkproHadoopDriver.class.getName())).newInstance();
        engineFactory.configure(job);

        final AnalysisEngineDescription engineDescription = getEngineDescription(engineFactory, job);

        // replace the $dir variable within the configuration.
        this.fs = FileSystem.get(job);
        this.localFS = FileSystem.getLocal(job);
        if (job.getBoolean("dkpro.output.onedirpertask", true)) {
            this.working_dir = new Path("uima_output_" + job.get("mapred.task.id"));
        } else {
            this.working_dir = new Path("uima_output");
        }
        final Path outputPath = FileOutputFormat.getOutputPath(job);
        this.results_dir = this.fs.startLocalOutput(outputPath, job.getLocalPath(this.working_dir.getName()));
        this.localFS.mkdirs(this.results_dir);
        final String[] resources = job.get("dkpro.resources", "").split(",");
        sLogger.info("Writing local data to: " + this.results_dir);
        this.resourceURIs = new TreeMap<String, URL>();
        for (final String resource : resources) {
            final URL r = job.getResource(resource);
            if (r != null && !resource.isEmpty()) {
                this.resourceURIs.put(resource, r);
            }

        }
        Map<String, String> variableValues = new HashMap<String, String>();
        variableValues.put("\\$dir", this.results_dir.toString());
        variableValues.put("\\$input", this.inputName);
        variableValues.put("\\$taskid", this.taskId);
        Path[] cacheFiles = DistributedCache.getLocalCacheFiles(job);
        if (cacheFiles != null) {
            for (Path cacheFile : cacheFiles) {
                variableValues.put("^\\$cache/" + cacheFile.getName(), cacheFile.toUri().getPath());
            }
        }
        for (final Entry<String, URL> resource : this.resourceURIs.entrySet()) {
            variableValues.put("\\$" + resource, resource.getValue().toString());
        }
        AnalysisEngineUtil.replaceVariables(engineDescription, variableValues);
        this.engine = createEngine(engineDescription);

    } catch (final Exception e) {
        sLogger.fatal("Error while configuring pipeline", e);
        e.printStackTrace();
        throw new RuntimeException(e);
    }

}

From source file:org.pentaho.hadoop.mapreduce.PentahoMapReduceBase.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//from  w w  w  .  j  a va 2  s.  c  o m
public void configure(JobConf job) {
    super.configure(job);

    debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$

    transMapXml = job.get("transformation-map-xml");
    transCombinerXml = job.get("transformation-combiner-xml");
    transReduceXml = job.get("transformation-reduce-xml");
    mapInputStepName = job.get("transformation-map-input-stepname");
    mapOutputStepName = job.get("transformation-map-output-stepname");
    combinerInputStepName = job.get("transformation-combiner-input-stepname");
    combinerOutputStepName = job.get("transformation-combiner-output-stepname");
    combineSingleThreaded = isCombinerSingleThreaded(job);
    reduceInputStepName = job.get("transformation-reduce-input-stepname");
    reduceOutputStepName = job.get("transformation-reduce-output-stepname");
    reduceSingleThreaded = isReducerSingleThreaded(job);
    String xmlVariableSpace = job.get("variableSpace");

    if (!Const.isEmpty(xmlVariableSpace)) {
        setDebugStatus("PentahoMapReduceBase. variableSpace was retrieved from the job.  The contents: ");

        //  deserialize from xml to variable space
        XStream xStream = new XStream();

        if (xStream != null) {
            setDebugStatus("PentahoMapReduceBase: Setting classes variableSpace property.: ");
            variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace);

            for (String variableName : variableSpace.listVariables()) {
                if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) {
                    System.setProperty(variableName, variableSpace.getVariable(variableName));
                }
            }
        }
    } else {
        setDebugStatus(
                "PentahoMapReduceBase: The PDI Job's variable space was not found in the job configuration.");
        variableSpace = new Variables();
    }

    // Check for environment variables in the userDefined variables
    Iterator<Entry<String, String>> iter = job.iterator();
    while (iter.hasNext()) {
        Entry<String, String> entry = iter.next();
        if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()),
                    entry.getValue());
        } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey(), entry.getValue());
        }
    }

    MRUtil.passInformationToTransformation(variableSpace, job);

    switch (mrOperation) {
    case Combine:
        outClassK = (Class<K>) job.getMapOutputKeyClass();
        outClassV = (Class<V>) job.getMapOutputValueClass();
        break;
    case Reduce:
        outClassK = (Class<K>) job.getOutputKeyClass();
        outClassV = (Class<V>) job.getOutputValueClass();
        break;
    default:
        throw new IllegalArgumentException("Unsupported MapReduce operation: " + mrOperation);
    }

    if (debug) {
        System.out.println("Job configuration>");
        System.out.println("Output key class: " + outClassK.getName());
        System.out.println("Output value class: " + outClassV.getName());
    }

    //  set the log level to what the level of the job is
    String stringLogLevel = job.get("logLevel");
    if (!Const.isEmpty(stringLogLevel)) {
        logLevel = LogLevel.valueOf(stringLogLevel);
        setDebugStatus("Log level set to " + stringLogLevel);
    } else {
        System.out.println(
                "Could not retrieve the log level from the job configuration.  logLevel will not be set.");
    }

    createTrans(job);
}

From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java

License:Open Source License

@Test
public void testReducerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./test-res/wordcount-mapper.ktr", "./test-res/wordcount-reducer.ktr",
            "./test-res/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransReduce reducer = new GenericTransReduce();

    reducer.configure(jobConf);/*w w  w .j a v a  2s. c o  m*/

    assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
    assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java

License:Apache License

@Test
public void testReducerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr",
            "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransReduce reducer = new GenericTransReduce();

    reducer.configure(jobConf);/* w  w w.j  av  a 2 s  .c o  m*/

    assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
    assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIT.java

License:Apache License

@Test
public void testReducerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/it/resources/wordcount-mapper.ktr",
            "./src/it/resources/wordcount-reducer.ktr", "./src/it/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransReduce reducer = new GenericTransReduce();

    reducer.configure(jobConf);/*from ww w . ja v  a2s  .  co m*/

    assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
    assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
}

From source file:tap.core.ReducerBridge.java

License:Apache License

@Override
public void configure(JobConf conf) {
    super.configure(conf);

    isTextOutput = conf.getOutputFormat() instanceof TextOutputFormat;
    isProtoOutput = conf.getOutputFormat() instanceof TapfileOutputFormat;

    if (isProtoOutput) {
        try {/*from   w w  w  . java  2  s. c  o  m*/
            mapOutClass = Class.forName(conf.get(Phase.MAP_OUT_CLASS));
            reduceOutClass = Class.forName(conf.get(Phase.REDUCE_OUT_CLASS));
            if (mapOutClass != reduceOutClass) {
                reduceOutKeyChanges = true;
                String groupBy = conf.get(Phase.GROUP_BY);
                String sortBy = conf.get(Phase.SORT_BY);
                reduceOutSchema = ReflectUtils.getSchema(ObjectFactory.newInstance(reduceOutClass));
                extractor = ReflectionKeyExtractor.getReflectionKeyExtractorForReduceOutKey(reduceOutSchema,
                        groupBy, sortBy);
            }
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    multiOutputPrefix = conf.get(Phase.MULTIPLE_OUTPUT_PREFIX);
    if (multiOutputPrefix == null)
        multiOutputPrefix = "out";

    MultipleOutputs.addMultiNamedOutput(conf, multiOutputPrefix, conf.getOutputFormat().getClass(),
            conf.getOutputKeyClass(), conf.getOutputValueClass());

    this.multiOutput = new MultipleOutputs(conf);
}