Example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the key class for the map output data.

Usage

From source file:com.test.hadoop.unoExample.CardDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    String input, output;/*www  .  j a v a2  s .  co  m*/
    if (args.length == 2) {
        input = args[0];
        output = args[1];
    } else {
        System.err.println("Incorrect number of arguments.  Expected: input output");
        return -1;
    }

    Job job = new Job(getConf());
    job.setJarByClass(CardDriver.class);
    job.setJobName(this.getClass().getName());

    FileInputFormat.setInputPaths(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setMapperClass(CardMapper.class);
    job.setReducerClass(CardTotalReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:com.tetsuyaodaka.hadoop.math.matrix.MatrixMult.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Date startProc = new Date(System.currentTimeMillis());
    System.out.println("process started at " + startProc);

    Configuration conf = new Configuration();
    int I = Integer.parseInt(args[3]); // Num of Row of MatrixA
    int K = Integer.parseInt(args[4]); // Num of Row of MatrixB'

    int IB = Integer.parseInt(args[5]); // RowBlock Size of MatrixA
    int KB = Integer.parseInt(args[6]); // RowBlock Size of MatrixB'

    int M = 0;// w w w.ja v  a 2 s.c o  m
    if (I % IB == 0) {
        M = I / IB;
    } else {
        M = I / IB + 1;
    }

    int N = 0;
    if (K % KB == 0) {
        N = K / KB;
    } else {
        N = K / KB + 1;
    }

    conf.set("I", args[3]); // Num of Row of MatrixA
    conf.set("K", args[4]); // Num of Row of MatrixB'
    conf.set("IB", args[5]); // RowBlock Size of MatrixA
    conf.set("KB", args[6]); // RowBlock Size of MatrixB'
    conf.set("M", new Integer(M).toString());
    conf.set("N", new Integer(N).toString());

    Job job = new Job(conf, "MatrixMultiplication");
    job.setJarByClass(MatrixMult.class);

    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(MatrixMult.IndexPair.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Mapper?????
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapA.class); // matrixA
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapB.class); // matrixB
    FileOutputFormat.setOutputPath(job, new Path(args[2])); // output path

    System.out.println("num of MatrixA RowBlock(M) is " + M);
    System.out.println("num of MatrixB RowBlock(N) is " + N);

    boolean success = job.waitForCompletion(true);

    Date endProc = new Date(System.currentTimeMillis());
    System.out.println("process ended at " + endProc);

    System.out.println(success);
}

From source file:com.tetsuyaodaka.hadoop.math.matrix.MatrixMultiplication.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Date startProc = new Date(System.currentTimeMillis());
    System.out.println("process started at " + startProc);

    Configuration conf = new Configuration();
    int I = Integer.parseInt(args[3]); // Num of Row of MatrixA
    int K = Integer.parseInt(args[4]); // Num of Row of MatrixB'

    int IB = Integer.parseInt(args[5]); // RowBlock Size of MatrixA
    int KB = Integer.parseInt(args[6]); // RowBlock Size of MatrixB'

    int M = 0;//from w w w  .  j a va  2  s  .c  o m
    if (I % IB == 0) {
        M = I / IB;
    } else {
        M = I / IB + 1;
    }

    int N = 0;
    if (K % KB == 0) {
        N = K / KB;
    } else {
        N = K / KB + 1;
    }

    conf.set("I", args[3]); // Num of Row of MatrixA
    conf.set("K", args[4]); // Num of Row of MatrixB'
    conf.set("IB", args[5]); // RowBlock Size of MatrixA
    conf.set("KB", args[6]); // RowBlock Size of MatrixB'
    conf.set("M", new Integer(M).toString());
    conf.set("N", new Integer(N).toString());

    Job job = new Job(conf, "MatrixMultiplication");
    job.setJarByClass(MatrixMultiplication.class);

    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(MatrixMultiplication.IndexPair.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Mapper?????
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapA.class); // matrixA
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapB.class); // matrixB
    FileOutputFormat.setOutputPath(job, new Path(args[2])); // output path

    System.out.println("num of MatrixA RowBlock(M) is " + M);
    System.out.println("num of MatrixB ColBlock(N) is " + N);

    boolean success = job.waitForCompletion(true);

    Date endProc = new Date(System.currentTimeMillis());
    System.out.println("process ended at " + endProc);

    System.out.println(success);
}

From source file:com.tetsuyaodaka.hadoop.math.matrix.TransformMatrix.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Date startProc = new Date(System.currentTimeMillis());
    System.out.println("process started at " + startProc);

    Configuration conf = new Configuration();
    if (args[2].equals("yes")) {
        conf.set("transpose", "true"); // transpose
    } else {// w  w  w. j av a 2 s.com
        conf.set("transpose", "false"); // 
    }

    Job job = new Job(conf, "MatrixMultiplication");
    job.setJarByClass(TransformMatrix.class);

    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    // Mapper?????
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, Map.class); // matrixA
    FileOutputFormat.setOutputPath(job, new Path(args[1])); // output path

    boolean success = job.waitForCompletion(true);

    Date endProc = new Date(System.currentTimeMillis());
    System.out.println("process ended at " + endProc);

    System.out.println(success);
}

From source file:com.toddbodnar.simpleHadoop.distributedHadoopDriver.java

/**
 * Runs a job/*from  w w  w.j a v  a  2 s. co  m*/
 *
 * @param theJob the MapReduceJob to be run
 * @param verbose if true, output progress information
 */
public static void run(MapReduceJob theJob, boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = GetConfiguration.get();
    Job job = Job.getInstance(conf, theJob.toString());
    job.setJarByClass(distributedHadoopDriver.class);

    job.setMapperClass(theJob.getMapper().getClass());
    job.setReducerClass(theJob.getReducer().getClass());

    job.setMapOutputKeyClass(theJob.getKeyType());
    job.setMapOutputValueClass(theJob.getValueType());

    theJob.writeConfig(job.getConfiguration());

    hdfsFile input = hdfsFile.transferToHDFS(theJob.getInput().getFile());
    if (!input.equals(theJob.getInput().getFile())) {
        garbage_collector.noteCreated(input);
    }
    if (theJob.getClass().equals(join.class)) {
        join jobLeftJoin = (join) theJob;

        hdfsFile input2 = hdfsFile.transferToHDFS(jobLeftJoin.getOtherInput().getFile());
        if (!input2.equals(jobLeftJoin.getOtherInput().getFile())) {
            garbage_collector.noteCreated(input2);
        }

        Mapper maps[] = jobLeftJoin.getMapperPairs();
        MultipleInputs.addInputPath(job, input.getPath(), TextInputFormat.class, maps[0].getClass());
        MultipleInputs.addInputPath(job, input2.getPath(), TextInputFormat.class, maps[1].getClass());
    } else {
        MultipleInputs.addInputPath(job, input.getPath(), TextInputFormat.class);
    }

    job.getConfiguration().set(TextOutputFormat.SEPERATOR, "");

    job.setOutputFormatClass(TextOutputFormat.class);

    //FileInputFormat.setInputPaths(job, new Path(theJob.getInput().getFile().getLocation()));
    Path out = new Path(settings.hdfs_prefix + "/TMP_TABLE_" + theJob.hashCode());
    FileOutputFormat.setOutputPath(job, out);

    boolean success = job.waitForCompletion(true);

    if (!success) {
        System.err.println("Error processing " + theJob);
        return;
    }

    FileSystem fs = FileSystem.get(GetConfiguration.get());

    fs.delete(new Path(out, "_SUCCESS"), false);

    table output = new table(new hdfsFile(out), theJob.getOutput().getColNames());
    output.setSeperator(theJob.getOutput().getSeperator());

    theJob.setOutput(output);

    garbage_collector.noteCreated(output.getFile());
}

From source file:com.tomslabs.grid.avro.AvroWordCount.java

License:Apache License

public static Job createSubmitableJob(final Configuration conf, final Path inputPath, final Path outputPath)
        throws IOException {

    conf.set(AvroFileOutputFormat.OUTPUT_SCHEMA, WordCountSchema.getSchema().toString());

    conf.setInt("mapred.max.split.size", 1024000);
    conf.setInt("mapred.reduce.tasks", 10);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", true);
    final Job job = new Job(conf, "Word Count");
    job.setJarByClass(AvroWordCount.class);

    job.setInputFormatClass(AvroFileInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setMapperClass(WordCountMapper.class);

    job.setReducerClass(WordCountReducer.class);

    job.setOutputKeyClass(GenericRecord.class);
    job.setOutputValueClass(NullWritable.class);
    job.setOutputFormatClass(AvroFileOutputFormat.class);
    AvroFileOutputFormat.setDeflateLevel(job, 3);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    return job;// w w w. j a  va  2 s.com
}

From source file:com.toshiba.mwcloud.gs.hadoop.mapreduce.examples.GSWordCount.java

License:Apache License

/**
 * <div lang="ja">/*from w w w  .java 2s .com*/
 * WordCount?MapReduce???
 * @param args 
 * @return ???0????????1
 * @throws Exception ??????
 * </div><div lang="en">
 * Run a MapReduce job of WordCount.
 * @param args command argument
 * @return 0 for normal termination of the job and 1 otherwise
 * @throws Exception processing failed.
 * </div>
 */
public int run(String[] args) throws Exception {
    GSConf gsConf = new GSConf();
    gsConf.parseArg(args);

    Configuration conf = getConf();
    gsConf.setup(conf);

    Job job = Job.getInstance(conf, APP_NAME);
    job.setJarByClass(GSWordCount.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(GSRowWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(GSRowInputFormat.class);
    job.setOutputFormatClass(GSRowOutputFormat.class);

    int res = job.waitForCompletion(true) ? 0 : 1;

    if (res == 0) {
        printResult(gsConf);
    }

    return res;
}

From source file:com.trexinhca.TrexinHCATest.java

License:Apache License

public static void main(String[] args) throws Exception {

    ks = KieServices.Factory.get();// w w  w. ja  va2s.c o m
    kContainer = ks.getKieClasspathContainer();
    ksession = TrexinHCATest.kContainer.newKieSession("MapReduceKS");
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: TrexinHCATest <in> [<in>...] <out>");
        System.exit(2);
    }
    Job job = Job.getInstance(conf);
    job.setJobName("HCATest");
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(TrexinHCAReducer.class);
    job.setReducerClass(TrexinHCAReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(TextOutputFormat.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    job.setJarByClass(TrexinHCATest.class);
    job.waitForCompletion(true);

}

From source file:com.twitter.algebra.matrix.multiply.ABOuterHDFSBroadcastOfA.java

License:Apache License

/**
 * Perform A x B, where A and B refer to the paths that contain matrices in
 * {@link SequenceFileInputFormat} Refer to {@link ABOuterHDFSBroadcastOfA}
 * for further details.//from w w  w .  j  a va  2 s.  c  om
 * 
 * @param conf
 *          the initial configuration
 * @param matrixInputPath
 *          path to matrix A
 * @param inMemMatrixDir
 *          path to matrix B (must be small enough to fit into memory)
 * @param matrixOutputPath
 *          path to which AxB will be written
 * @param inMemMatrixNumRows
 *          B rows
 * @param inMemMatrixNumCols
 *          B cols
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public void run(Configuration conf, String inMemMatrixDir, Path matrixInputPath, Path matrixOutputPath,
        int inMemMatrixNumRows, int inMemMatrixNumCols)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(MATRIXINMEMORY, inMemMatrixDir);
    conf.setInt(MATRIXINMEMORYROWS, inMemMatrixNumRows);
    conf.setInt(MATRIXINMEMORYCOLS, inMemMatrixNumCols);
    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(ABOuterHDFSBroadcastOfA.class);
    job.setJobName(ABOuterHDFSBroadcastOfA.class.getSimpleName());
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    // ensures total order (when used with {@link MatrixOutputFormat}),
    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, inMemMatrixNumRows);

    job.setCombinerClass(AtBOuterStaticMapsideJoinJob.MyReducer.class);

    job.setReducerClass(AtBOuterStaticMapsideJoinJob.MyReducer.class);
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.matrix.multiply.AtBOuterStaticMapsideJoinJob.java

License:Apache License

public void run(Configuration conf, Path atPath, Path bPath, Path outPath, int outCardinality)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.setInt(OUT_CARD, outCardinality);
    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJobName(AtBOuterStaticMapsideJoinJob.class.getSimpleName());
    job.setJarByClass(AtBOuterStaticMapsideJoinJob.class);

    FileSystem fs = FileSystem.get(atPath.toUri(), conf);
    atPath = fs.makeQualified(atPath);//from  w  w w  .ja  v  a  2  s .c  o m
    bPath = fs.makeQualified(bPath);
    job.setInputFormatClass(CompositeInputFormat.class);
    //mapside join expression
    job.getConfiguration().set(CompositeInputFormat.JOIN_EXPR,
            CompositeInputFormat.compose("inner", SequenceFileInputFormat.class, atPath, bPath));

    job.setOutputFormatClass(MatrixOutputFormat.class);
    outPath = fs.makeQualified(outPath);
    FileOutputFormat.setOutputPath(job, outPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    job.setCombinerClass(MyReducer.class);

    int numReducers = conf.getInt("algebra.reduceslots.multiply", 10);
    job.setNumReduceTasks(numReducers);

    job.setReducerClass(MyReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed");
}