Example usage for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

Job(JobConf conf) throws IOException

Source Link

Usage

From source file:ComRoughSetApproInputSampler.java

License:Apache License

/**
 * Driver for InputSampler from the command line.
 * Configures a JobConf instance and calls {@link #writePartitionFile}.
 *//*  ww  w  .jav a  2 s.  c o m*/
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    ArrayList<String> otherArgs = new ArrayList<String>();
    Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-r".equals(args[i])) {
                job.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-inFormat".equals(args[i])) {
                job.setInputFormatClass(Class.forName(args[++i]).asSubclass(InputFormat.class));
            } else if ("-keyClass".equals(args[i])) {
                job.setMapOutputKeyClass(Class.forName(args[++i]).asSubclass(WritableComparable.class));
            } else if ("-splitSample".equals(args[i])) {
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new SplitSampler<K, V>(numSamples, maxSplits);
            } else if ("-splitRandom".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else if ("-splitInterval".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new IntervalSampler<K, V>(pcnt, maxSplits);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    if (job.getNumReduceTasks() <= 1) {
        System.err.println("Sampler requires more than one reducer");
        return printUsage();
    }
    if (otherArgs.size() < 2) {
        System.out.println("ERROR: Wrong number of parameters: ");
        return printUsage();
    }
    if (null == sampler) {
        sampler = new RandomSampler<K, V>(0.1, 10000, 10);
    }

    Path outf = new Path(otherArgs.remove(otherArgs.size() - 1));
    TotalOrderPartitioner.setPartitionFile(getConf(), outf);
    for (String s : otherArgs) {
        FileInputFormat.addInputPath(job, new Path(s));
    }
    ComRoughSetApproInputSampler.<K, V>writePartitionFile(job, sampler);

    return 0;
}

From source file:MapReduce3.java

public static void main(String[] args) throws Exception {

    ////from  w  ww. j  a  va2  s . com
    String dst = "hdfs://localhost:9000/data/2006a.csv";

    //??
    //  String dstOut = "hdfs://localhost:9000/mapreduce/result3/1";
    String dstOut = "/Users/wendyzhuo/NetBeansProjects/final_Hadoop/src/output3/1";
    String outFiles = "/Users/wendyzhuo/NetBeansProjects/final_Hadoop/src/output3/2";
    Configuration hadoopConfig = new Configuration();

    hadoopConfig.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());

    hadoopConfig.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    Job job = new Job(hadoopConfig);
    Job job2 = new Job(hadoopConfig);

    FileInputFormat.addInputPath(job, new Path(dst));
    FileOutputFormat.setOutputPath(job, new Path(dstOut));
    FileInputFormat.addInputPath(job2, new Path(dstOut));
    FileOutputFormat.setOutputPath(job2, new Path(outFiles));

    JobConf map1Conf = new JobConf(false);
    ChainMapper.addMapper(job, TempMapper.class, LongWritable.class, Text.class, CompositeKey_wd.class,
            IntWritable.class, map1Conf);
    JobConf reduceConf = new JobConf(false);
    ChainReducer.setReducer(job, TempReducer.class, CompositeKey_wd.class, IntWritable.class,
            CompositeKey_wd.class, IntWritable.class, reduceConf);

    JobConf map2Conf = new JobConf(false);
    ChainMapper.addMapper(job2, TempMapper2.class, LongWritable.class, Text.class, IntWritable.class,
            CompositeKey_wd.class, map2Conf);
    JobConf map3Conf = new JobConf(false);
    ChainReducer.setReducer(job2, TempReduce2.class, IntWritable.class, CompositeKey_wd.class, Text.class,
            IntWritable.class, map3Conf);
    //       
    //  JobClient.runJob(job);

    //MapperReducer?
    //        job.setMapperClass(TempMapper.class);
    //
    //        job.setReducerClass(TempReducer.class);

    //?KeyValue
    job.setOutputKeyClass(CompositeKey_wd.class);

    job.setOutputValueClass(IntWritable.class);

    job2.setMapOutputKeyClass(IntWritable.class);
    job2.setMapOutputValueClass(CompositeKey_wd.class);

    //  job2.setSortComparatorClass(LongWritable.DecreasingComparator.class);

    //job?
    job.waitForCompletion(true);
    System.out.println("Finished1");
    job2.waitForCompletion(true);
    System.out.println("Finished2");

}

From source file:PerformanceEvaluation.java

License:Apache License

private void doMapReduce(final Class<? extends Test> cmd)
        throws IOException, InterruptedException, ClassNotFoundException {
    Path inputDir = writeInputFile(this.conf);
    this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
    this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
    Job job = new Job(this.conf);
    job.setJarByClass(PerformanceEvaluation.class);
    job.setJobName("HBase Performance Evaluation");

    job.setInputFormatClass(PeInputFormat.class);
    PeInputFormat.setInputPaths(job, inputDir);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LongWritable.class);

    job.setMapperClass(EvaluationMapTask.class);
    job.setReducerClass(LongSumReducer.class);

    job.setNumReduceTasks(1);/*from w  ww  . j  a v  a  2  s.  co m*/

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(inputDir, "outputs"));

    TableMapReduceUtil.addDependencyJars(job);
    job.waitForCompletion(true);
}

From source file:HiddenMarkovModelBuilder.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "HMM model builder";
    job.setJobName(jobName);//  w  ww . j av a 2 s  . c om

    job.setJarByClass(HiddenMarkovModelBuilder.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "avenir");
    job.setMapperClass(HiddenMarkovModelBuilder.StateTransitionMapper.class);
    job.setReducerClass(HiddenMarkovModelBuilder.StateTransitionReducer.class);
    job.setCombinerClass(MarkovStateTransitionModel.StateTransitionCombiner.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:MarkovStateTransitionModel.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Markov tate transition model";
    job.setJobName(jobName);//from www.  ja v a  2  s .c om

    job.setJarByClass(MarkovStateTransitionModel.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "avenir");
    job.setMapperClass(MarkovStateTransitionModel.StateTransitionMapper.class);
    job.setReducerClass(MarkovStateTransitionModel.StateTransitionReducer.class);
    job.setCombinerClass(MarkovStateTransitionModel.StateTransitionCombiner.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:TestBAM.java

License:Open Source License

public int run(String[] args) throws Exception {
    final Configuration conf = getConf();

    conf.set(MyOutputFormat.HEADER_FROM_FILE, args[0]);
    DistributedCache.addFileToClassPath(new Path("hdfs:///libjars/hadoop-bam-7.0.0-jar-with-dependencies.jar"),
            conf);//from w w  w  .  j a v a 2 s  . c om

    final Job job = new Job(conf);

    job.setJarByClass(TestBAM.class);
    job.setMapperClass(TestBAMMapper.class);
    job.setReducerClass(TestBAMReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(SAMRecordWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SAMRecordWritable.class);

    job.setInputFormatClass(AnySAMInputFormat.class);
    job.setOutputFormatClass(TestBAM.MyOutputFormat.class);

    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(args[0]));

    org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.submit();

    if (!job.waitForCompletion(true)) {
        System.err.println("sort :: Job failed.");
        return 1;
    }

    return 0;
}

From source file:Top20AZRestaurants.java

@Override
public int run(String[] args) throws Exception {
    Job job1 = new Job(getConf());
    job1.setSortComparatorClass(MyDecreasingDoubleComparator.class);
    job1.setJobName("Top20 AZ Restaurants ChainJob");
    job1.setJarByClass(Top20AZRestaurants.class);

    JobConf map1Conf = new JobConf(false);
    ChainMapper.addMapper(job1, Top20Mapper.class, LongWritable.class, Text.class, Text.class, Text.class,
            map1Conf);/* w ww.ja va 2  s .c  om*/
    JobConf map2Conf = new JobConf(false);
    ChainMapper.addMapper(job1, Top20MapperRedo.class, Text.class, Text.class, DoubleWritable.class, Text.class,
            map2Conf);
    JobConf reduceConf = new JobConf(false);
    ChainReducer.setReducer(job1, Top20ReducerRedo.class, DoubleWritable.class, Text.class, Text.class,
            DoubleWritable.class, reduceConf);
    FileInputFormat.setInputPaths(job1, new Path(args[0]));
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));

    boolean success = job1.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:a.b.c.MultiFileWordCount.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 2) {
        printUsage();// w ww . j  av  a  2  s  .  c om
        return 2;
    }

    Job job = new Job(getConf());
    job.setJobName("MultiFileWordCount");
    job.setJarByClass(MultiFileWordCount.class);

    //set the InputFormat of the job to our InputFormat
    job.setInputFormatClass(MyInputFormat.class);

    // the keys are words (strings)
    job.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    job.setOutputValueClass(IntWritable.class);

    //use the defined mapper
    job.setMapperClass(MapClass.class);
    //use the WordCount Reducer
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:alluxio.examples.keyvalue.hadoop.CloneStoreMapReduce.java

License:Apache License

/**
 * @param args two parameters, the first is the input key-value store path, the second is the
 *    output key-value store path/*from w ww  .j  ava 2  s  .c om*/
 * @throws Exception if any exception happens
 */
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    // NOTE(binfan): we are using the deprecated constructor of Job instance to compile with
    // hadoop-1.0. If this is not a concern, a better way is
    //     Job job = Job.getInstance(conf);
    Job job = new Job(conf);

    job.setJobName("CloneStoreMapReduce");
    job.setJarByClass(CloneStoreMapReduce.class);

    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);

    job.setMapperClass(CloneStoreMapper.class);
    job.setReducerClass(CloneStoreReducer.class);

    job.setInputFormatClass(KeyValueInputFormat.class);
    job.setOutputFormatClass(KeyValueOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.MatrixMultiplicationBenchmark.java

License:Apache License

public long doBenchmark() {
    try {// w w w.  ja v a2s.c om
        Configuration conf = null;

        switch (type) {
        // case JAVA:
        // m_matrixA.multiplyJava(m_matrixB, m_matrixC);
        // break;
        case CPU:
            conf = MatrixMultiplicationCpu.createMatrixMultiplicationCpuConf(m_conf, m_transposedMatrixAPath,
                    m_matrixBPath, m_matrixCPath, Integer.MAX_VALUE, false);
            break;
        case GPU:
            conf = MatrixMultiplicationGpu.createMatrixMultiplicationGpuConf(m_conf, m_transposedMatrixAPath,
                    m_matrixBPath, m_matrixCPath, Integer.MAX_VALUE, TILE_WIDTH, false);
            break;
        default:
            break;
        }

        Job job = new Job(conf);
        job.setJobName("MatrixMultiplication on " + type + " with n=" + n);

        long startTime = System.currentTimeMillis();
        boolean status = job.waitForCompletion(false);
        long endTime = System.currentTimeMillis() - startTime;
        System.out.println("MatrixMultiplication on " + type + " with size: " + n + " finished in "
                + (endTime / 1000.0) + " seconds with return: " + status);

        return endTime;

    } catch (Exception e) {
        e.printStackTrace();
    }

    return 0;
}