Example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the key class for the map output data.

Usage

From source file:com.test.hadoop.unoExample.CardDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    String input, output;/*www  .  j a v a2  s .  co  m*/
    if (args.length == 2) {
        input = args[0];
        output = args[1];
    } else {
        System.err.println("Incorrect number of arguments.  Expected: input output");
        return -1;
    }

    Job job = new Job(getConf());
    job.setJarByClass(CardDriver.class);
    job.setJobName(this.getClass().getName());

    FileInputFormat.setInputPaths(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setMapperClass(CardMapper.class);
    job.setReducerClass(CardTotalReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:com.tetsuyaodaka.hadoop.math.matrix.MatrixMult.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Date startProc = new Date(System.currentTimeMillis());
    System.out.println("process started at " + startProc);

    Configuration conf = new Configuration();
    int I = Integer.parseInt(args[3]); // Num of Row of MatrixA
    int K = Integer.parseInt(args[4]); // Num of Row of MatrixB'

    int IB = Integer.parseInt(args[5]); // RowBlock Size of MatrixA
    int KB = Integer.parseInt(args[6]); // RowBlock Size of MatrixB'

    int M = 0;// w w w.ja v  a 2 s.c o  m
    if (I % IB == 0) {
        M = I / IB;
    } else {
        M = I / IB + 1;
    }

    int N = 0;
    if (K % KB == 0) {
        N = K / KB;
    } else {
        N = K / KB + 1;
    }

    conf.set("I", args[3]); // Num of Row of MatrixA
    conf.set("K", args[4]); // Num of Row of MatrixB'
    conf.set("IB", args[5]); // RowBlock Size of MatrixA
    conf.set("KB", args[6]); // RowBlock Size of MatrixB'
    conf.set("M", new Integer(M).toString());
    conf.set("N", new Integer(N).toString());

    Job job = new Job(conf, "MatrixMultiplication");
    job.setJarByClass(MatrixMult.class);

    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(MatrixMult.IndexPair.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Mapper?????
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapA.class); // matrixA
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapB.class); // matrixB
    FileOutputFormat.setOutputPath(job, new Path(args[2])); // output path

    System.out.println("num of MatrixA RowBlock(M) is " + M);
    System.out.println("num of MatrixB RowBlock(N) is " + N);

    boolean success = job.waitForCompletion(true);

    Date endProc = new Date(System.currentTimeMillis());
    System.out.println("process ended at " + endProc);

    System.out.println(success);
}

From source file:com.tetsuyaodaka.hadoop.math.matrix.MatrixMultiplication.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Date startProc = new Date(System.currentTimeMillis());
    System.out.println("process started at " + startProc);

    Configuration conf = new Configuration();
    int I = Integer.parseInt(args[3]); // Num of Row of MatrixA
    int K = Integer.parseInt(args[4]); // Num of Row of MatrixB'

    int IB = Integer.parseInt(args[5]); // RowBlock Size of MatrixA
    int KB = Integer.parseInt(args[6]); // RowBlock Size of MatrixB'

    int M = 0;//from w w w  .  j a va  2  s  .c  o m
    if (I % IB == 0) {
        M = I / IB;
    } else {
        M = I / IB + 1;
    }

    int N = 0;
    if (K % KB == 0) {
        N = K / KB;
    } else {
        N = K / KB + 1;
    }

    conf.set("I", args[3]); // Num of Row of MatrixA
    conf.set("K", args[4]); // Num of Row of MatrixB'
    conf.set("IB", args[5]); // RowBlock Size of MatrixA
    conf.set("KB", args[6]); // RowBlock Size of MatrixB'
    conf.set("M", new Integer(M).toString());
    conf.set("N", new Integer(N).toString());

    Job job = new Job(conf, "MatrixMultiplication");
    job.setJarByClass(MatrixMultiplication.class);

    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(MatrixMultiplication.IndexPair.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Mapper?????
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapA.class); // matrixA
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapB.class); // matrixB
    FileOutputFormat.setOutputPath(job, new Path(args[2])); // output path

    System.out.println("num of MatrixA RowBlock(M) is " + M);
    System.out.println("num of MatrixB ColBlock(N) is " + N);

    boolean success = job.waitForCompletion(true);

    Date endProc = new Date(System.currentTimeMillis());
    System.out.println("process ended at " + endProc);

    System.out.println(success);
}

From source file:com.tetsuyaodaka.hadoop.math.matrix.TransformMatrix.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Date startProc = new Date(System.currentTimeMillis());
    System.out.println("process started at " + startProc);

    Configuration conf = new Configuration();
    if (args[2].equals("yes")) {
        conf.set("transpose", "true"); // transpose
    } else {// w  w  w. j av a 2 s.com
        conf.set("transpose", "false"); // 
    }

    Job job = new Job(conf, "MatrixMultiplication");
    job.setJarByClass(TransformMatrix.class);

    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    // Mapper?????
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, Map.class); // matrixA
    FileOutputFormat.setOutputPath(job, new Path(args[1])); // output path

    boolean success = job.waitForCompletion(true);

    Date endProc = new Date(System.currentTimeMillis());
    System.out.println("process ended at " + endProc);

    System.out.println(success);
}

From source file:com.toddbodnar.simpleHadoop.distributedHadoopDriver.java

/**
 * Runs a job/*from  w w  w.j a v  a  2 s. co  m*/
 *
 * @param theJob the MapReduceJob to be run
 * @param verbose if true, output progress information
 */
public static void run(MapReduceJob theJob, boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = GetConfiguration.get();
    Job job = Job.getInstance(conf, theJob.toString());
    job.setJarByClass(distributedHadoopDriver.class);

    job.setMapperClass(theJob.getMapper().getClass());
    job.setReducerClass(theJob.getReducer().getClass());

    job.setMapOutputKeyClass(theJob.getKeyType());
    job.setMapOutputValueClass(theJob.getValueType());

    theJob.writeConfig(job.getConfiguration());

    hdfsFile input = hdfsFile.transferToHDFS(theJob.getInput().getFile());
    if (!input.equals(theJob.getInput().getFile())) {
        garbage_collector.noteCreated(input);
    }
    if (theJob.getClass().equals(join.class)) {
        join jobLeftJoin = (join) theJob;

        hdfsFile input2 = hdfsFile.transferToHDFS(jobLeftJoin.getOtherInput().getFile());
        if (!input2.equals(jobLeftJoin.getOtherInput().getFile())) {
            garbage_collector.noteCreated(input2);
        }

        Mapper maps[] = jobLeftJoin.getMapperPairs();
        MultipleInputs.addInputPath(job, input.getPath(), TextInputFormat.class, maps[0].getClass());
        MultipleInputs.addInputPath(job, input2.getPath(), TextInputFormat.class, maps[1].getClass());
    } else {
        MultipleInputs.addInputPath(job, input.getPath(), TextInputFormat.class);
    }

    job.getConfiguration().set(TextOutputFormat.SEPERATOR, "");

    job.setOutputFormatClass(TextOutputFormat.class);

    //FileInputFormat.setInputPaths(job, new Path(theJob.getInput().getFile().getLocation()));
    Path out = new Path(settings.hdfs_prefix + "/TMP_TABLE_" + theJob.hashCode());
    FileOutputFormat.setOutputPath(job, out);

    boolean success = job.waitForCompletion(true);

    if (!success) {
        System.err.println("Error processing " + theJob);
        return;
    }

    FileSystem fs = FileSystem.get(GetConfiguration.get());

    fs.delete(new Path(out, "_SUCCESS"), false);

    table output = new table(new hdfsFile(out), theJob.getOutput().getColNames());
    output.setSeperator(theJob.getOutput().getSeperator());

    theJob.setOutput(output);

    garbage_collector.noteCreated(output.getFile());
}

From source file:com.tomslabs.grid.avro.AvroWordCount.java

License:Apache License

public static Job createSubmitableJob(final Configuration conf, final Path inputPath, final Path outputPath)
        throws IOException {

    conf.set(AvroFileOutputFormat.OUTPUT_SCHEMA, WordCountSchema.getSchema().toString());

    conf.setInt("mapred.max.split.size", 1024000);
    conf.setInt("mapred.reduce.tasks", 10);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", true);
    final Job job = new Job(conf, "Word Count");
    job.setJarByClass(AvroWordCount.class);

    job.setInputFormatClass(AvroFileInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setMapperClass(WordCountMapper.class);

    job.setReducerClass(WordCountReducer.class);

    job.setOutputKeyClass(GenericRecord.class);
    job.setOutputValueClass(NullWritable.class);
    job.setOutputFormatClass(AvroFileOutputFormat.class);
    AvroFileOutputFormat.setDeflateLevel(job, 3);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    return job;// w w w. j a  va  2 s.com
}

From source file:com.toshiba.mwcloud.gs.hadoop.mapreduce.examples.GSWordCount.java

License:Apache License

/**
 * <div lang="ja">/*from w w w  .java 2s .com*/
 * WordCount?MapReduce???
 * @param args 
 * @return ???0????????1
 * @throws Exception ??????
 * </div><div lang="en">
 * Run a MapReduce job of WordCount.
 * @param args command argument
 * @return 0 for normal termination of the job and 1 otherwise
 * @throws Exception processing failed.
 * </div>
 */
public int run(String[] args) throws Exception {
    GSConf gsConf = new GSConf();
    gsConf.parseArg(args);

    Configuration conf = getConf();
    gsConf.setup(conf);

    Job job = Job.getInstance(conf, APP_NAME);
    job.setJarByClass(GSWordCount.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(GSRowWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(GSRowInputFormat.class);
    job.setOutputFormatClass(GSRowOutputFormat.class);

    int res = job.waitForCompletion(true) ? 0 : 1;

    if (res == 0) {
        printResult(gsConf);
    }

    return res;
}

From source file:com.trexinhca.TrexinHCATest.java

License:Apache License

public static void main(String[] args) throws Exception {

    ks = KieServices.Factory.get();// w w  w. ja  va2s.c o m
    kContainer = ks.getKieClasspathContainer();
    ksession = TrexinHCATest.kContainer.newKieSession("MapReduceKS");
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: TrexinHCATest <in> [<in>...] <out>");
        System.exit(2);
    }
    Job job = Job.getInstance(conf);
    job.setJobName("HCATest");
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(TrexinHCAReducer.class);
    job.setReducerClass(TrexinHCAReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(TextOutputFormat.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    job.setJarByClass(TrexinHCATest.class);
    job.waitForCompletion(true);

}

From source file:com.twitter.algebra.matrix.multiply.ABOuterHDFSBroadcastOfA.java

License:Apache License

/**
 * Perform A x B, where A and B refer to the paths that contain matrices in
 * {@link SequenceFileInputFormat} Refer to {@link ABOuterHDFSBroadcastOfA}
 * for further details.//from w w  w .  j  a va  2 s.  c  om
 * 
 * @param conf
 *          the initial configuration
 * @param matrixInputPath
 *          path to matrix A
 * @param inMemMatrixDir
 *          path to matrix B (must be small enough to fit into memory)
 * @param matrixOutputPath
 *          path to which AxB will be written
 * @param inMemMatrixNumRows
 *          B rows
 * @param inMemMatrixNumCols
 *          B cols
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public void run(Configuration conf, String inMemMatrixDir, Path matrixInputPath, Path matrixOutputPath,
        int inMemMatrixNumRows, int inMemMatrixNumCols)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(MATRIXINMEMORY, inMemMatrixDir);
    conf.setInt(MATRIXINMEMORYROWS, inMemMatrixNumRows);
    conf.setInt(MATRIXINMEMORYCOLS, inMemMatrixNumCols);
    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(ABOuterHDFSBroadcastOfA.class);
    job.setJobName(ABOuterHDFSBroadcastOfA.class.getSimpleName());
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    // ensures total order (when used with {@link MatrixOutputFormat}),
    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, inMemMatrixNumRows);

    job.setCombinerClass(AtBOuterStaticMapsideJoinJob.MyReducer.class);

    job.setReducerClass(AtBOuterStaticMapsideJoinJob.MyReducer.class);
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.matrix.multiply.AtBOuterStaticMapsideJoinJob.java

License:Apache License

public void run(Configuration conf, Path atPath, Path bPath, Path outPath, int outCardinality)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.setInt(OUT_CARD, outCardinality);
    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJobName(AtBOuterStaticMapsideJoinJob.class.getSimpleName());
    job.setJarByClass(AtBOuterStaticMapsideJoinJob.class);

    FileSystem fs = FileSystem.get(atPath.toUri(), conf);
    atPath = fs.makeQualified(atPath);//from  w  w w  .ja  v  a  2  s .c  o m
    bPath = fs.makeQualified(bPath);
    job.setInputFormatClass(CompositeInputFormat.class);
    //mapside join expression
    job.getConfiguration().set(CompositeInputFormat.JOIN_EXPR,
            CompositeInputFormat.compose("inner", SequenceFileInputFormat.class, atPath, bPath));

    job.setOutputFormatClass(MatrixOutputFormat.class);
    outPath = fs.makeQualified(outPath);
    FileOutputFormat.setOutputPath(job, outPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    job.setCombinerClass(MyReducer.class);

    int numReducers = conf.getInt("algebra.reduceslots.multiply", 10);
    job.setNumReduceTasks(numReducers);

    job.setReducerClass(MyReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed");
}