Example usage for org.apache.hadoop.mapred JobConf setOutputValueClass

List of usage examples for org.apache.hadoop.mapred JobConf setOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputValueClass.

Prototype

public void setOutputValueClass(Class<?> theClass) 

Source Link

Document

Set the value class for job outputs.

Usage

From source file:cascading.scheme.SequenceFile.java

License:Open Source License

@Override
public void sinkInit(Tap tap, JobConf conf) {
    conf.setOutputKeyClass(Tuple.class); // supports TapCollector
    conf.setOutputValueClass(Tuple.class); // supports TapCollector
    conf.setOutputFormat(SequenceFileOutputFormat.class);
}

From source file:cascading.scheme.TextLine.java

License:Open Source License

@Override
public void sinkInit(Tap tap, JobConf conf) throws IOException {
    if (tap.getQualifiedPath(conf).toString().endsWith(".zip"))
        throw new IllegalStateException("cannot write zip files: " + FileOutputFormat.getOutputPath(conf));

    if (getSinkCompression() == Compress.DISABLE)
        conf.setBoolean("mapred.output.compress", false);
    else if (getSinkCompression() == Compress.ENABLE)
        conf.setBoolean("mapred.output.compress", true);

    conf.setOutputKeyClass(Text.class); // be explicit
    conf.setOutputValueClass(Text.class); // be explicit
    conf.setOutputFormat(TextOutputFormat.class);
}

From source file:cascading.scheme.WritableSequenceFile.java

License:Open Source License

@Override
public void sinkInit(Tap tap, JobConf conf) {
    super.sinkInit(tap, conf);

    if (keyType != null)
        conf.setOutputKeyClass(keyType);
    else/*ww  w. j a  va2s  .  c o m*/
        conf.setOutputKeyClass(NullWritable.class);

    if (valueType != null)
        conf.setOutputValueClass(valueType);
    else
        conf.setOutputValueClass(NullWritable.class);
}

From source file:clusteringblocks.ClusteringBlocks.java

public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    JobConf job = new JobConf(conf, ClusteringBlocks.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("ClusteringBlocks");
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormat(KeyValueTextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //   job.set("key.value.separator.in.input.line", "");

    JobClient.runJob(job);/*  ww w .jav a  2  s  . c  o m*/

    return 0;
}

From source file:cn.edu.xmu.dm.mapreduce.MultiFileWordCount.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 2) {
        printUsage();/*from ww w. j  a  v a 2 s  . c  o m*/
        return 1;
    }

    JobConf job = new JobConf(getConf(), MultiFileWordCount.class);
    job.setJobName("MultiFileWordCount");

    // set the InputFormat of the job to our InputFormat
    job.setInputFormat(MyInputFormat.class);

    // the keys are words (strings)
    job.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    job.setOutputValueClass(LongWritable.class);

    // use the defined mapper
    job.setMapperClass(MapClass.class);
    // use the WordCount Reducer
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    JobClient.runJob(job);

    return 0;
}

From source file:cn.edu.xmu.dm.mapreduce.Sort.java

License:Apache License

/**
 * The main driver for sort program. Invoke this method to submit the
 * map/reduce job./*from  w ww  .j ava  2s  .  co m*/
 * 
 * @throws IOException
 *             When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "Sorter");
    job.setJarByClass(Sort.class);

    JobConf jobConf = new JobConf(getConf(), Sort.class);
    jobConf.setJobName("sorter");

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(IdentityReducer.class);

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = jobConf.get("test.sort.reduces_per_host");
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = BytesWritable.class;
    List<String> otherArgs = new ArrayList<String>();
    InputSampler.Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                jobConf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-totalOrder".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    // Set user-supplied (possibly default) job configs
    jobConf.setNumReduceTasks(num_reduces);

    jobConf.setInputFormat(inputFormatClass);
    jobConf.setOutputFormat(outputFormatClass);

    jobConf.setOutputKeyClass(outputKeyClass);
    jobConf.setOutputValueClass(outputValueClass);

    // Make sure there are exactly 2 parameters left.
    if (otherArgs.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(jobConf, otherArgs.get(0));
    FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.get(1)));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        jobConf.setPartitionerClass(TotalOrderPartitioner.class);
        Path inputDir = FileInputFormat.getInputPaths(jobConf)[0];
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile);
        InputSampler.<K, V>writePartitionFile(jobConf, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, jobConf);
        DistributedCache.createSymlink(jobConf);
    }

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf)
            + " with " + num_reduces + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    jobResult = JobClient.runJob(jobConf);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return 0;
}

From source file:co.nubetech.hiho.job.DBQueryInputJob.java

License:Apache License

public void runJobs(Configuration conf, int jobCounter) throws IOException {

    try {//from w  w w  . j av a 2 s  .com
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new IOException(e1);
    }

    Job job = new Job(conf);
    for (Entry<String, String> entry : conf) {
        logger.warn("key, value " + entry.getKey() + "=" + entry.getValue());
    }

    // logger.debug("Number of maps " +
    // conf.getInt("mapred.map.tasks", 1));
    // conf.setInt(JobContext.NUM_MAPS,
    // conf.getInt("mapreduce.job.maps", 1));
    // job.getConfiguration().setInt("mapred.map.tasks", 4);
    job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));
    logger.warn("Number of maps " + conf.getInt(MRJobConfig.NUM_MAPS, 1));

    job.setJobName("Import job");
    job.setJarByClass(DBQueryInputJob.class);

    String strategy = conf.get(HIHOConf.INPUT_OUTPUT_STRATEGY);
    OutputStrategyEnum os = OutputStrategyEnum.value(strategy);
    if (os == null) {
        throw new IllegalArgumentException("Wrong value of output strategy. Please correct");
    }
    if (os != OutputStrategyEnum.AVRO) {
        switch (os) {

        case DUMP: {
            // job.setMapperClass(DBImportMapper.class);
            break;
        }
        /*
         * case AVRO: { job.setMapperClass(DBInputAvroMapper.class); //
         * need avro in cp // job.setJarByClass(Schema.class); // need
         * jackson which is needed by avro - ugly! //
         * job.setJarByClass(ObjectMapper.class);
         * job.setMapOutputKeyClass(NullWritable.class);
         * job.setMapOutputValueClass(AvroValue.class);
         * job.setOutputKeyClass(NullWritable.class);
         * job.setOutputValueClass(AvroValue.class);
         * job.setOutputFormatClass(AvroOutputFormat.class);
         * 
         * AvroOutputFormat.setOutputPath(job, new
         * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); break; }
         */
        case DELIMITED: {
            job.setMapperClass(DBInputDelimMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);

            NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
        }
        case JSON: {
            // job.setMapperClass(DBImportJsonMapper.class);
            // job.setJarByClass(ObjectMapper.class);
            break;
        }
        default: {
            job.setMapperClass(DBInputDelimMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);

            NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
            break;
        }
        }

        String inputQuery = conf.get(DBConfiguration.INPUT_QUERY);
        String inputBoundingQuery = conf.get(DBConfiguration.INPUT_BOUNDING_QUERY);
        logger.debug("About to set the params");
        DBQueryInputFormat.setInput(job, inputQuery, inputBoundingQuery, params);
        logger.debug("Set the params");

        job.setNumReduceTasks(0);

        try {
            // job.setJarByClass(Class.forName(conf.get(
            // org.apache.hadoop.mapred.lib.db.DBConfiguration.DRIVER_CLASS_PROPERTY)));
            logger.debug("OUTPUT format class is " + job.getOutputFormatClass());

            /*
             * org.apache.hadoop.mapreduce.OutputFormat<?, ?> output =
             * ReflectionUtils.newInstance(job.getOutputFormatClass(),
             * job.getConfiguration()); output.checkOutputSpecs(job);
             */
            logger.debug("Class is " + ReflectionUtils
                    .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName());
            job.waitForCompletion(false);
            if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null) {
                generateHiveScript(conf, job, jobCounter);
                generatePigScript(conf, job);
            }

        }
        /*
         * catch (HIHOException h) { h.printStackTrace(); }
         */
        catch (Exception e) {
            e.printStackTrace();
        } catch (HIHOException e) {
            e.printStackTrace();
        }
    }
    // avro to be handled differently, thanks to all the incompatibilities
    // in the apis.
    else {
        String inputQuery = conf.get(DBConfiguration.INPUT_QUERY);
        String inputBoundingQuery = conf.get(DBConfiguration.INPUT_BOUNDING_QUERY);
        logger.debug("About to set the params");
        // co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(job,
        // inputQuery, inputBoundingQuery, params);
        logger.debug("Set the params");

        JobConf jobConf = new JobConf(conf);

        try {
            GenericDBWritable queryWritable = getDBWritable(jobConf);
            Schema pair = DBMapper.getPairSchema(queryWritable.getColumns());

            AvroJob.setMapOutputSchema(jobConf, pair);
            GenericRecordAvroOutputFormat.setOutputPath(jobConf,
                    new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));

            co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(jobConf, inputQuery,
                    inputBoundingQuery, params);
            jobConf.setInputFormat(co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.class);
            jobConf.setMapperClass(DBInputAvroMapper.class);
            jobConf.setMapOutputKeyClass(NullWritable.class);
            jobConf.setMapOutputValueClass(AvroValue.class);
            jobConf.setOutputKeyClass(NullWritable.class);
            jobConf.setOutputValueClass(Text.class);
            jobConf.setOutputFormat(GenericRecordAvroOutputFormat.class);
            jobConf.setJarByClass(DBQueryInputJob.class);
            jobConf.setStrings("io.serializations",
                    "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization,org.apache.avro.mapred.AvroSerialization");
            jobConf.setNumReduceTasks(0);
            /*
             * jobConf.setOutputFormat(org.apache.hadoop.mapred.
             * SequenceFileOutputFormat.class);
             * org.apache.hadoop.mapred.SequenceFileOutputFormat
             * .setOutputPath(jobConf, new
             * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
             */
            JobClient.runJob(jobConf);
        } catch (Throwable e) {
            e.printStackTrace();
        }

    }

}

From source file:com.acme.extensions.mr.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    JobConf job = new JobConf();
    String[] otherArgs = new GenericOptionsParser(job, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//  w w w.j  a v a 2 s.co m
    }

    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    JobClient.runJob(job);
}

From source file:com.aerospike.hadoop.examples.externaljoin.ExternalJoin.java

License:Apache License

public int run(final String[] args) throws Exception {

    log.info("run starting");

    final Configuration conf = getConf();

    JobConf job = new JobConf(conf, ExternalJoin.class);
    job.setJobName("AerospikeExternalJoin");

    job.setMapperClass(Map.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    // job.setCombinerClass(Reduce.class);  // Reduce changes format.
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Session.class);

    job.setOutputFormat(SessionOutputFormat.class);

    for (int ii = 0; ii < args.length; ++ii)
        FileInputFormat.addInputPath(job, new Path(args[ii]));

    JobClient.runJob(job);/*from w  ww  . j a  v  a 2 s  .c o  m*/

    log.info("finished");
    return 0;
}

From source file:com.aerospike.hadoop.examples.generateprofiles.GenerateProfiles.java

License:Apache License

public int run(final String[] args) throws Exception {

    log.info("run starting");

    final Configuration conf = getConf();

    JobConf job = new JobConf(conf, GenerateProfiles.class);
    job.setJobName("AerospikeGenerateProfiles");

    job.setMapperClass(Map.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    // job.setCombinerClass(Reduce.class);  // Reduce changes format.
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Profile.class);

    job.setOutputFormat(ProfileOutputFormat.class);

    for (int ii = 0; ii < args.length; ++ii)
        FileInputFormat.addInputPath(job, new Path(args[ii]));

    JobClient.runJob(job);//  w  w w  . j ava 2s . c om

    log.info("finished");
    return 0;
}