Example usage for org.apache.hadoop.mapred JobConf setOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputKeyClass.

Prototype

public void setOutputKeyClass(Class<?> theClass)

Source Link

Document

Set the key class for the job output data.

Usage

From source file:com.github.gaoyangthu.demo.mapred.terasort.TeraGen.java

License:Apache License

/**
 * @param args the cli arguments/* w ww  . jav a  2 s  .c  om*/
 */
public int run(String[] args) throws IOException {
    JobConf job = (JobConf) getConf();
    setNumberOfRows(job, Long.parseLong(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraGen");
    job.setJarByClass(TeraGen.class);
    job.setMapperClass(SortGenMapper.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormat(RangeInputFormat.class);
    job.setOutputFormat(TeraOutputFormat.class);
    JobClient.runJob(job);
    return 0;
}

From source file:com.github.gaoyangthu.demo.mapred.terasort.TeraSort.java

License:Apache License

public int run(String[] args) throws Exception {
    LOG.info("starting");
    JobConf job = (JobConf) getConf();
    Path inputDir = new Path(args[0]);
    inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
    Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME);
    URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME);
    TeraInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraSort");
    job.setJarByClass(TeraSort.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormat(TeraInputFormat.class);
    job.setOutputFormat(TeraOutputFormat.class);
    job.setPartitionerClass(TotalOrderPartitioner.class);
    TeraInputFormat.writePartitionFile(job, partitionFile);
    DistributedCache.addCacheFile(partitionUri, job);
    DistributedCache.createSymlink(job);
    job.setInt("dfs.replication", 1);
    TeraOutputFormat.setFinalSync(job, true);
    JobClient.runJob(job);/*from w  w w. j  a v  a  2  s .c o  m*/
    LOG.info("done");
    return 0;
}

From source file:com.hadoop.examples.geolocation.GeoLocationJob.java

License:Apache License

/**
 * @param args/*from  w w w  .  ja v  a 2s .  c  o m*/
 */
public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(GeoLocationJob.class);
    conf.setJobName("geolocationgroup");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(GeoLocationMapper.class);
    conf.setReducerClass(GeoLocationReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);

}

From source file:com.hadoop.secondarysort.SecondarySort_MapRed.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysrot <in> <out>");
        System.exit(2);//ww  w. ja  v a2s. c o  m
    }

    JobConf jobConf = new JobConf(conf);
    jobConf.setMapperClass(MapClass.class);
    jobConf.setReducerClass(Reduce.class);

    jobConf.setPartitionerClass(FirstPartitioner.class);
    jobConf.setOutputValueGroupingComparator(FirstGroupingComparator.class);

    jobConf.setMapOutputKeyClass(IntPair.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(IntWritable.class);

    //
    // Job job = new Job(conf, "secondary sort");
    // job.setJarByClass(SecondarySort_MapRed.class);
    // job.setMapperClass(MapClass.class);
    // job.setReducerClass(Reduce.class);
    //
    // // group and partition by the first int in the pair
    // job.setPartitionerClass(FirstPartitioner.class);
    // job.setGroupingComparatorClass(FirstGroupingComparator.class);
    // conf.setClass("mapred.output.key.comparator.class",
    // KeyComparator.class, RawComparator.class);
    // // job.setSortComparatorClass(SecondGroupingComparator.class);
    // // the map output is IntPair, IntWritable
    // job.setMapOutputKeyClass(IntPair.class);
    // job.setMapOutputValueClass(IntWritable.class);
    //
    // // the reduce output is Text, IntWritable
    // job.setOutputKeyClass(Text.class);
    // job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(jobConf, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs[1]));

}

From source file:com.hadoopilluminated.examples.dancing.DistributedPentomino.java

License:Apache License

public int run(String[] args) throws Exception {
    JobConf conf;
    int depth = 5;
    int width = 9;
    int height = 10;
    Class<? extends Pentomino> pentClass;
    if (args.length == 0) {
        System.out.println("pentomino <output>");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }/*from   w w w  .ja v a 2s .co  m*/

    conf = new JobConf(getConf());
    width = conf.getInt("pent.width", width);
    height = conf.getInt("pent.height", height);
    depth = conf.getInt("pent.depth", depth);
    pentClass = conf.getClass("pent.class", OneSidedPentonimo.class, Pentomino.class);

    Path output = new Path(args[0]);
    Path input = new Path(output + "_input");
    FileSystem fileSys = FileSystem.get(conf);
    try {
        FileInputFormat.setInputPaths(conf, input);
        FileOutputFormat.setOutputPath(conf, output);
        conf.setJarByClass(PentMap.class);

        conf.setJobName("dancingElephant");
        Pentomino pent = ReflectionUtils.newInstance(pentClass, conf);
        pent.initialize(width, height);
        createInputDirectory(fileSys, input, pent, depth);

        // the keys are the prefix strings
        conf.setOutputKeyClass(Text.class);
        // the values are puzzle solutions
        conf.setOutputValueClass(Text.class);

        conf.setMapperClass(PentMap.class);
        conf.setReducerClass(IdentityReducer.class);

        conf.setNumMapTasks(2000);
        conf.setNumReduceTasks(1);

        JobClient.runJob(conf);
    } finally {
        fileSys.delete(input, true);
    }
    return 0;
}

From source file:com.hadoopilluminated.examples.Grep.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }//from   w  w  w .  j a va 2s  . c  om

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    JobConf grepJob = new JobConf(getConf(), Grep.class);

    try {

        grepJob.setJobName("grep-search");

        FileInputFormat.setInputPaths(grepJob, args[0]);

        grepJob.setMapperClass(RegexMapper.class);
        grepJob.set("mapred.mapper.regex", args[2]);
        if (args.length == 4) {
            grepJob.set("mapred.mapper.regex.group", args[3]);
        }

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormat(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        JobClient.runJob(grepJob);

        JobConf sortJob = new JobConf(getConf(), Grep.class);
        sortJob.setJobName("grep-sort");

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormat(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        sortJob.setNumReduceTasks(1); // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setOutputKeyComparatorClass // sort by decreasing freq
        (LongWritable.DecreasingComparator.class);

        JobClient.runJob(sortJob);
    } finally {
        FileSystem.get(grepJob).delete(tempDir, true);
    }
    return 0;
}

From source file:com.hadoopilluminated.examples.Join.java

License:Apache License

/**
 * The main driver for sort program. Invoke this method to submit the
 * map/reduce job.//ww  w  . j a v  a2  s  . co  m
 *
 * @throws IOException When there is communication problems with the job
 * tracker.
 */
@Override
public int run(String[] args) throws Exception {
    JobConf jobConf = new JobConf(getConf(), Sort.class);
    jobConf.setJobName("join");

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(IdentityReducer.class);

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_maps = cluster.getTaskTrackers() * jobConf.getInt("test.sort.maps_per_host", 10);
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = jobConf.get("test.sort.reduces_per_host");
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = TupleWritable.class;
    String op = "inner";
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                num_maps = Integer.parseInt(args[++i]);
            } else if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-joinOp".equals(args[i])) {
                op = args[++i];
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    // Set user-supplied (possibly default) job configs
    jobConf.setNumMapTasks(num_maps);
    jobConf.setNumReduceTasks(num_reduces);

    if (otherArgs.size() < 2) {
        System.out.println("ERROR: Wrong number of parameters: ");
        return printUsage();
    }

    FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.remove(otherArgs.size() - 1)));
    List<Path> plist = new ArrayList<Path>(otherArgs.size());
    for (String s : otherArgs) {
        plist.add(new Path(s));
    }

    jobConf.setInputFormat(CompositeInputFormat.class);
    jobConf.set("mapred.join.expr",
            CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0])));
    jobConf.setOutputFormat(outputFormatClass);

    jobConf.setOutputKeyClass(outputKeyClass);
    jobConf.setOutputValueClass(outputValueClass);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(jobConf);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return 0;
}

From source file:com.hazelcast.jet.hadoop.impl.WriteHdfsPTest.java

License:Open Source License

@Test
public void testWriteFile() throws Exception {
    int messageCount = 320;
    String mapName = randomMapName();
    JetInstance instance = createJetMember();
    createJetMember();/*from  www .  j a  v a  2s . c  om*/

    Map<IntWritable, IntWritable> map = IntStream.range(0, messageCount).boxed()
            .collect(toMap(IntWritable::new, IntWritable::new));
    instance.getMap(mapName).putAll(map);

    Path path = getPath();

    JobConf conf = new JobConf();
    conf.setOutputFormat(outputFormatClass);
    conf.setOutputCommitter(FileOutputCommitter.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(IntWritable.class);

    if (outputFormatClass.equals(LazyOutputFormat.class)) {
        LazyOutputFormat.setOutputFormatClass(conf, TextOutputFormat.class);
    }

    FileOutputFormat.setOutputPath(conf, path);

    Pipeline p = Pipeline.create();
    p.drawFrom(Sources.map(mapName)).drainTo(HdfsSinks.hdfs(conf))
            // we use higher value to increase the race chance for LazyOutputFormat
            .setLocalParallelism(8);

    Future<Void> future = instance.newJob(p).getFuture();
    assertCompletesEventually(future);

    JobConf readJobConf = new JobConf();
    readJobConf.setInputFormat(inputFormatClass);
    FileInputFormat.addInputPath(readJobConf, path);

    p = Pipeline.create();
    p.drawFrom(HdfsSources.hdfs(readJobConf)).drainTo(Sinks.list("results"));

    future = instance.newJob(p).getFuture();
    assertCompletesEventually(future);

    IList<Object> results = instance.getList("results");
    assertEquals(messageCount, results.size());
}

From source file:com.hazelcast.jet.impl.connector.hadoop.WriteHdfsPTest.java

License:Open Source License

@Test
public void testWriteFile() throws Exception {
    int messageCount = 20;
    String mapName = randomMapName();
    JetInstance instance = createJetMember();
    createJetMember();//from   w ww  .  j a  v a2s . co m

    Map<IntWritable, IntWritable> map = IntStream.range(0, messageCount).boxed()
            .collect(toMap(IntWritable::new, IntWritable::new));
    instance.getMap(mapName).putAll(map);

    DAG dag = new DAG();
    Vertex producer = dag.newVertex("producer", readMap(mapName)).localParallelism(1);

    Path path = getPath();

    JobConf conf = new JobConf();
    conf.setOutputFormat(outputFormatClass);
    conf.setOutputCommitter(FileOutputCommitter.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(IntWritable.class);

    FileOutputFormat.setOutputPath(conf, path);

    Vertex consumer = dag.newVertex("consumer", writeHdfs(conf)).localParallelism(4);

    dag.edge(between(producer, consumer));

    Future<Void> future = instance.newJob(dag).execute();
    assertCompletesEventually(future);

    dag = new DAG();
    JobConf readJobConf = new JobConf();
    readJobConf.setInputFormat(inputFormatClass);
    FileInputFormat.addInputPath(readJobConf, path);
    producer = dag.newVertex("producer", readHdfs(readJobConf)).localParallelism(8);

    consumer = dag.newVertex("consumer", writeList("results")).localParallelism(1);

    dag.edge(between(producer, consumer));
    future = instance.newJob(dag).execute();
    assertCompletesEventually(future);

    IList<Object> results = instance.getList("results");
    assertEquals(messageCount, results.size());
}

From source file:com.hotels.corc.cascading.OrcFile.java

License:Apache License

/**
 * Sets the {@link OutputFormat} to {@link CorcOutputFormat}, sets the key and values to {@link NullWritable} and
 * {@link Corc} respectively./*from   ww  w  .  jav a  2  s .c o  m*/
 */
@Override
public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap,
        JobConf conf) {
    conf.setOutputFormat(CorcOutputFormat.class);
    conf.setOutputKeyClass(NullWritable.class);
    conf.setOutputValueClass(Corc.class);
}