Example usage for org.apache.hadoop.mapred JobConf getOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getOutputValueClass.

Prototype

public Class<?> getOutputValueClass()

Source Link

Document

Get the value class for job outputs.

Usage

From source file:de.tudarmstadt.ukp.dkpro.bigdata.hadoop.DkproReducer.java

License:Apache License

@Override
public void configure(JobConf job) {
    super.configure(job);
    try {//from  w  w w  .  ja  v  a 2 s . c  o  m
        // create an output writable of the appropriate type
        outValue = (CASWritable) job.getOutputValueClass().newInstance();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.hadoop.UIMAMapReduceBase.java

License:Open Source License

@Override
public void configure(JobConf job) {
    try {//from   www.  j  a v  a2 s . co m
        this.job = job;
        this.mapOutputValueClass = job.getMapOutputValueClass();
        this.outputValueClass = job.getOutputValueClass();
        this.samplingPropability = job.getInt("dkpro.map.samplingratio", 100);
        final EngineFactory engineFactory = (EngineFactory) Class
                .forName(job.get("dkpro.uima.factory", DkproHadoopDriver.class.getName())).newInstance();
        engineFactory.configure(job);

        final AnalysisEngineDescription engineDescription = getEngineDescription(engineFactory, job);

        // replace the $dir variable within the configuration.
        this.fs = FileSystem.get(job);
        this.localFS = FileSystem.getLocal(job);
        this.working_dir = new Path("uima_output_" + job.get("mapred.task.id"));
        final Path outputPath = FileOutputFormat.getOutputPath(job);
        this.results_dir = this.fs.startLocalOutput(outputPath, job.getLocalPath(this.working_dir.getName()));
        this.localFS.mkdirs(this.results_dir);
        final String[] resources = job.get("dkpro.resources", "").split(",");
        sLogger.info("Writing local data to: " + this.results_dir);
        this.resourceURIs = new TreeMap<String, URL>();
        for (final String resource : resources) {
            final URL r = job.getResource(resource);
            if (r != null && !resource.isEmpty()) {
                this.resourceURIs.put(resource, r);
            }

        }
        replaceRecursively(engineDescription);
        this.engine = createEngine(engineDescription);

    } catch (final Exception e) {
        sLogger.fatal("Error while configuring pipeline", e);
        e.printStackTrace();
        throw new RuntimeException(e);
    }

}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopReducerOperatorDescriptor.java

License:Apache License

public static RecordDescriptor getRecordDescriptor(JobConf conf, IHadoopClassFactory classFactory) {
    String outputKeyClassName = null;
    String outputValueClassName = null;

    if (conf.getUseNewMapper()) {
        JobContext context = new ContextFactory().createJobContext(conf);
        outputKeyClassName = context.getOutputKeyClass().getName();
        outputValueClassName = context.getOutputValueClass().getName();
    } else {//from  ww  w.  j av  a 2s . c o  m
        outputKeyClassName = conf.getOutputKeyClass().getName();
        outputValueClassName = conf.getOutputValueClass().getName();
    }

    RecordDescriptor recordDescriptor = null;
    try {
        if (classFactory == null) {
            recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                    (Class<? extends Writable>) Class.forName(outputKeyClassName),
                    (Class<? extends Writable>) Class.forName(outputValueClassName));
        } else {
            recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                    (Class<? extends Writable>) classFactory.loadClass(outputKeyClassName),
                    (Class<? extends Writable>) classFactory.loadClass(outputValueClassName));
        }
    } catch (Exception e) {
        e.printStackTrace();
        return null;
    }
    return recordDescriptor;
}

From source file:org.apache.blur.spark.Consumer.java

License:Apache License

private void run() {

    String checkpointDirectory = "hdfs://10.252.5.113:9000/user/hadoop/spark";

    // number of partition for Kafka Topic

    int _partitionCount = 5;

    List<JavaDStream<MessageAndMetadata>> streamsList = new ArrayList<JavaDStream<MessageAndMetadata>>(
            _partitionCount);/*from  ww w  . j av  a  2s . c  o m*/
    JavaDStream<MessageAndMetadata> unionStreams;

    SparkConf conf = new SparkConf().setAppName("KafkaReceiver").set("spark.streaming.blockInterval", "200");

    // Path to Blur Libraries . Can be copied to each Node of Spark Cluster.

    conf.set("spark.executor.extraClassPath", "/home/apache-blur-0.2.4/lib/*");

    // Used KryoSerializer for BlurMutate and Text.
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");

    JavaStreamingContext ssc = new JavaStreamingContext(conf, new Duration(3000));

    /*
     * Receive Kafka Stream. Create individual Receivers for each Topic
     * Partition
     */

    for (int i = 0; i < _partitionCount; i++) {

        streamsList.add(ssc.receiverStream(new KafkaReceiver(_props, i)));

    }

    /*
     * Union all the streams if there is more than 1 stream
     */

    if (streamsList.size() > 1) {
        unionStreams = ssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size()));
    } else {
        // Otherwise, just use the 1 stream
        unionStreams = streamsList.get(0);
    }

    /*
     * Generate JavaPairDStream
     */

    JavaPairDStream<Text, BlurMutate> pairDStream = unionStreams
            .mapToPair(new PairFunction<MessageAndMetadata, Text, BlurMutate>() {

                private static final long serialVersionUID = 443235214978L;

                public Tuple2<Text, BlurMutate> call(MessageAndMetadata mmeta) {

                    /*
                     * create the BlurMutate from MessageAndMetadata
                     */

                    String message = new String(mmeta.getPayload());
                    String keyStr = DigestUtils.shaHex(message);
                    Text key = new Text((keyStr).getBytes());
                    BlurMutate mutate = new BlurMutate(BlurMutate.MUTATE_TYPE.REPLACE, keyStr, keyStr,
                            "family");
                    mutate.addColumn("message", message);

                    return new Tuple2<Text, BlurMutate>(key, mutate);
                }
            });

    pairDStream.foreachRDD(new Function2<JavaPairRDD<Text, BlurMutate>, Time, Void>() {

        private static final long serialVersionUID = 88875777435L;

        @Override
        public Void call(JavaPairRDD<Text, BlurMutate> rdd, Time time) throws Exception {

            /*
             * Blur Table Details
             */
            TableDescriptor tableDescriptor = new TableDescriptor();
            String tableUri = new Path("hdfs://10.252.5.113:9000/blur/tables/nrt").toString();
            tableDescriptor.tableUri = tableUri;
            tableDescriptor.cluster = "pearson";
            tableDescriptor.name = "nrt";
            tableDescriptor.shardCount = 9;
            Configuration conf = new Configuration();

            /*
             * Partition RDD to match Blur Table Shard Count. Used
             * Custom Partitioner to channel correct BlurMutate to
             * correct Shard.
             */

            final JavaPairRDD<Text, BlurMutate> pRdd = rdd
                    .partitionBy(new BlurSparkPartitioner(tableDescriptor.shardCount))
                    .persist(StorageLevel.MEMORY_ONLY_2());

            /*
             * Blur specific Configuration
             */

            BlurOutputFormat.setIndexLocally(conf, false);
            BlurOutputFormat.setOptimizeInFlight(conf, false);
            conf.setClass("mapreduce.reduce.class", DefaultBlurReducer.class, Reducer.class);
            conf.setClass("mapreduce.outputformat.class", BlurOutputFormat.class, OutputFormat.class);
            conf.setClass("mapreduce.partitioner.class", BlurPartitioner.class, Partitioner.class);
            conf.set("mapred.output.committer.class", BlurOutputCommitter.class.getName());
            conf.setInt("blur.output.max.document.buffer.size", 10000);

            BlurOutputFormat.setTableDescriptor(conf, tableDescriptor);

            JobConf jobConf = new JobConf(conf);

            jobConf.setNumReduceTasks(tableDescriptor.shardCount);
            jobConf.setOutputKeyClass(Text.class);
            jobConf.setOutputValueClass(BlurMutate.class);

            BlurMapReduceUtil.addAllJarsInBlurLib(conf);
            BlurMapReduceUtil.addDependencyJars(conf, org.apache.zookeeper.ZooKeeper.class,
                    org.apache.lucene.codecs.lucene42.Lucene42Codec.class, jobConf.getOutputKeyClass(),
                    jobConf.getOutputValueClass());

            /*
             * Write the RDD to Blur Table
             */

            if (pRdd.count() > 0)
                pRdd.saveAsNewAPIHadoopFile(tableUri, Text.class, BlurMutate.class, BlurOutputFormat.class,
                        jobConf);

            return null;
        }
    });

    // ssc.checkpoint(checkpointDirectory);
    ssc.start();
    ssc.awaitTermination();
}

From source file:org.dkpro.bigdata.hadoop.UIMAMapReduceBase.java

License:Open Source License

@Override
public void configure(JobConf job) {
    try {/* ww  w .  ja  v  a2s.c  o  m*/
        this.job = job;
        this.inputName = job.get("mapred.input.dir");
        this.taskId = job.get("mapred.task.id");
        this.mapOutputValueClass = job.getMapOutputValueClass();
        this.outputValueClass = job.getOutputValueClass();
        this.samplingPropability = job.getInt("dkpro.map.samplingratio", 100);
        final EngineFactory engineFactory = (EngineFactory) Class
                .forName(job.get("dkpro.uima.factory", DkproHadoopDriver.class.getName())).newInstance();
        engineFactory.configure(job);

        final AnalysisEngineDescription engineDescription = getEngineDescription(engineFactory, job);

        // replace the $dir variable within the configuration.
        this.fs = FileSystem.get(job);
        this.localFS = FileSystem.getLocal(job);
        if (job.getBoolean("dkpro.output.onedirpertask", true)) {
            this.working_dir = new Path("uima_output_" + job.get("mapred.task.id"));
        } else {
            this.working_dir = new Path("uima_output");
        }
        final Path outputPath = FileOutputFormat.getOutputPath(job);
        this.results_dir = this.fs.startLocalOutput(outputPath, job.getLocalPath(this.working_dir.getName()));
        this.localFS.mkdirs(this.results_dir);
        final String[] resources = job.get("dkpro.resources", "").split(",");
        sLogger.info("Writing local data to: " + this.results_dir);
        this.resourceURIs = new TreeMap<String, URL>();
        for (final String resource : resources) {
            final URL r = job.getResource(resource);
            if (r != null && !resource.isEmpty()) {
                this.resourceURIs.put(resource, r);
            }

        }
        Map<String, String> variableValues = new HashMap<String, String>();
        variableValues.put("\\$dir", this.results_dir.toString());
        variableValues.put("\\$input", this.inputName);
        variableValues.put("\\$taskid", this.taskId);
        Path[] cacheFiles = DistributedCache.getLocalCacheFiles(job);
        if (cacheFiles != null) {
            for (Path cacheFile : cacheFiles) {
                variableValues.put("^\\$cache/" + cacheFile.getName(), cacheFile.toUri().getPath());
            }
        }
        for (final Entry<String, URL> resource : this.resourceURIs.entrySet()) {
            variableValues.put("\\$" + resource, resource.getValue().toString());
        }
        AnalysisEngineUtil.replaceVariables(engineDescription, variableValues);
        this.engine = createEngine(engineDescription);

    } catch (final Exception e) {
        sLogger.fatal("Error while configuring pipeline", e);
        e.printStackTrace();
        throw new RuntimeException(e);
    }

}

From source file:org.pentaho.hadoop.mapreduce.PentahoMapReduceBase.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//from  w w  w  .  j  a va 2  s.  c  o m
public void configure(JobConf job) {
    super.configure(job);

    debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$

    transMapXml = job.get("transformation-map-xml");
    transCombinerXml = job.get("transformation-combiner-xml");
    transReduceXml = job.get("transformation-reduce-xml");
    mapInputStepName = job.get("transformation-map-input-stepname");
    mapOutputStepName = job.get("transformation-map-output-stepname");
    combinerInputStepName = job.get("transformation-combiner-input-stepname");
    combinerOutputStepName = job.get("transformation-combiner-output-stepname");
    combineSingleThreaded = isCombinerSingleThreaded(job);
    reduceInputStepName = job.get("transformation-reduce-input-stepname");
    reduceOutputStepName = job.get("transformation-reduce-output-stepname");
    reduceSingleThreaded = isReducerSingleThreaded(job);
    String xmlVariableSpace = job.get("variableSpace");

    if (!Const.isEmpty(xmlVariableSpace)) {
        setDebugStatus("PentahoMapReduceBase. variableSpace was retrieved from the job.  The contents: ");

        //  deserialize from xml to variable space
        XStream xStream = new XStream();

        if (xStream != null) {
            setDebugStatus("PentahoMapReduceBase: Setting classes variableSpace property.: ");
            variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace);

            for (String variableName : variableSpace.listVariables()) {
                if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) {
                    System.setProperty(variableName, variableSpace.getVariable(variableName));
                }
            }
        }
    } else {
        setDebugStatus(
                "PentahoMapReduceBase: The PDI Job's variable space was not found in the job configuration.");
        variableSpace = new Variables();
    }

    // Check for environment variables in the userDefined variables
    Iterator<Entry<String, String>> iter = job.iterator();
    while (iter.hasNext()) {
        Entry<String, String> entry = iter.next();
        if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()),
                    entry.getValue());
        } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey(), entry.getValue());
        }
    }

    MRUtil.passInformationToTransformation(variableSpace, job);

    switch (mrOperation) {
    case Combine:
        outClassK = (Class<K>) job.getMapOutputKeyClass();
        outClassV = (Class<V>) job.getMapOutputValueClass();
        break;
    case Reduce:
        outClassK = (Class<K>) job.getOutputKeyClass();
        outClassV = (Class<V>) job.getOutputValueClass();
        break;
    default:
        throw new IllegalArgumentException("Unsupported MapReduce operation: " + mrOperation);
    }

    if (debug) {
        System.out.println("Job configuration>");
        System.out.println("Output key class: " + outClassK.getName());
        System.out.println("Output value class: " + outClassV.getName());
    }

    //  set the log level to what the level of the job is
    String stringLogLevel = job.get("logLevel");
    if (!Const.isEmpty(stringLogLevel)) {
        logLevel = LogLevel.valueOf(stringLogLevel);
        setDebugStatus("Log level set to " + stringLogLevel);
    } else {
        System.out.println(
                "Could not retrieve the log level from the job configuration.  logLevel will not be set.");
    }

    createTrans(job);
}

From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java

License:Open Source License

@Test
public void testReducerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./test-res/wordcount-mapper.ktr", "./test-res/wordcount-reducer.ktr",
            "./test-res/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransReduce reducer = new GenericTransReduce();

    reducer.configure(jobConf);/*w w  w .j a v a  2s. c o  m*/

    assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
    assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java

License:Apache License

@Test
public void testReducerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr",
            "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransReduce reducer = new GenericTransReduce();

    reducer.configure(jobConf);/* w  w w.j  av  a 2 s  .c o  m*/

    assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
    assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIT.java

License:Apache License

@Test
public void testReducerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/it/resources/wordcount-mapper.ktr",
            "./src/it/resources/wordcount-reducer.ktr", "./src/it/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransReduce reducer = new GenericTransReduce();

    reducer.configure(jobConf);/*from ww w . ja v  a2s  .  co m*/

    assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
    assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
}

From source file:tap.core.ReducerBridge.java

License:Apache License

@Override
public void configure(JobConf conf) {
    super.configure(conf);

    isTextOutput = conf.getOutputFormat() instanceof TextOutputFormat;
    isProtoOutput = conf.getOutputFormat() instanceof TapfileOutputFormat;

    if (isProtoOutput) {
        try {/*from   w w  w  . java  2  s. c  o  m*/
            mapOutClass = Class.forName(conf.get(Phase.MAP_OUT_CLASS));
            reduceOutClass = Class.forName(conf.get(Phase.REDUCE_OUT_CLASS));
            if (mapOutClass != reduceOutClass) {
                reduceOutKeyChanges = true;
                String groupBy = conf.get(Phase.GROUP_BY);
                String sortBy = conf.get(Phase.SORT_BY);
                reduceOutSchema = ReflectUtils.getSchema(ObjectFactory.newInstance(reduceOutClass));
                extractor = ReflectionKeyExtractor.getReflectionKeyExtractorForReduceOutKey(reduceOutSchema,
                        groupBy, sortBy);
            }
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    multiOutputPrefix = conf.get(Phase.MULTIPLE_OUTPUT_PREFIX);
    if (multiOutputPrefix == null)
        multiOutputPrefix = "out";

    MultipleOutputs.addMultiNamedOutput(conf, multiOutputPrefix, conf.getOutputFormat().getClass(),
            conf.getOutputKeyClass(), conf.getOutputValueClass());

    this.multiOutput = new MultipleOutputs(conf);
}