Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(Configuration conf, Class exampleClass) 

Source Link

Document

Construct a map/reduce job configuration.

Usage

From source file:averageprocessingtimesbytype.AverageProcessingTimesByType.java

public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    JobConf job = new JobConf(conf, AverageProcessingTimesByType.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("AverageProcessingTimesByType");
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormat(KeyValueTextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //   job.set("key.value.separator.in.input.line", "");

    JobClient.runJob(job);//from  ww  w  . jav  a  2  s .co  m

    return 0;
}

From source file:babel.prep.PrepStep.java

License:Apache License

public PrepStep(Configuration conf) throws IOException {
    super(new JobConf(conf, PrepStep.class));
    m_fs = FileSystem.get(getConf());
}

From source file:boa.datagen.SeqSort.java

License:Apache License

/**
 * The main driver for sort program./*from   w ww  .j  ava 2s  .c  om*/
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker.
 */
@Override
public int run(String[] args) throws Exception {
    System.out.println(inPath);

    JobConf jobConf = new JobConf(getConf(), SeqSort.class);
    jobConf.setJobName("sorter");

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(IdentityReducer.class);

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = jobConf.get("test.sort.reduces_per_host");
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }

    // Set user-supplied (possibly default) job configs
    jobConf.setNumReduceTasks(num_reduces);

    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(BytesWritable.class);

    SequenceFileOutputFormat.setCompressOutput(jobConf, true);
    SequenceFileOutputFormat.setOutputCompressorClass(jobConf, SnappyCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(jobConf, CompressionType.BLOCK);

    // Make sure there are exactly 2 parameters left.
    FileInputFormat.setInputPaths(jobConf, inPath);
    FileOutputFormat.setOutputPath(jobConf, new Path(outPath));

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf)
            + " with " + num_reduces + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    jobResult = JobClient.runJob(jobConf);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return 0;
}

From source file:Business.DataJoin.java

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    JobConf job = new JobConf(conf, DataJoin.class);

    final File f = new File(MapReduceOne.class.getProtectionDomain().getCodeSource().getLocation().getPath());
    String inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/inFiles/";
    String outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outFiles/OutputOne";
    //use the arguments instead if provided.
    if (args.length > 1) {
        inFiles = args[1];//ww  w .  ja  va  2 s  .com
        outFiles = args[2];
    }
    Path in = new Path(inFiles);
    Path out = new Path(outFiles);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Data Join");
    job.setMapperClass(MapClass.class);
    job.setReducerClass(ReduceClass.class);

    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(TaggedWritable.class);
    job.set("mapred.textoutputformat.separator", ",");

    JobClient.runJob(job);
    return 0;
}

From source file:ca.etsmtl.lasi.hbasewikipedialoader.HBaseWikipediaLoader.java

License:Apache License

/**
 * Sets up the actual job.//from  w w  w.  ja v a2 s.c  om
 * 
 * @param conf
 *          The current configuration.
 * @param args
 *          The command line parameters.
 * @return The newly created job.
 * @throws IOException
 *           When setting up the job fails.
 */
public static JobConf createSubmittableJob(HBaseConfiguration conf, String[] args) throws IOException {
    JobConf jobConf = new JobConf(conf, HBaseWikipediaLoader.class);
    jobConf.setJobName(NAME);

    // Stream stuff
    jobConf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader");
    jobConf.set("stream.recordreader.begin", "<page>");
    jobConf.set("stream.recordreader.end", "</page>");

    jobConf.setSpeculativeExecution(false);

    jobConf.setMapOutputKeyClass(ImmutableBytesWritable.class);
    jobConf.setMapOutputValueClass(BatchUpdate.class);

    jobConf.setMapperClass(Map.class);

    jobConf.setNumReduceTasks(0);

    jobConf.setInputFormat(StreamInputFormat.class);
    jobConf.setOutputFormat(TableOutputFormat.class);
    jobConf.set(TableOutputFormat.OUTPUT_TABLE, TABLE);
    jobConf.setOutputKeyClass(ImmutableBytesWritable.class);
    jobConf.setOutputValueClass(BatchUpdate.class);

    StreamInputFormat.setInputPaths(jobConf, new Path(args[0]));
    FileOutputFormat.setOutputPath(jobConf, new Path("/tmp/" + NAME + "-" + System.currentTimeMillis()));

    return jobConf;

}

From source file:ca.etsmtl.logti.log792.mti830.RowCounter.java

License:Apache License

/**
 * @param args/*from  ww  w  .  j  a va 2 s . c o m*/
 * @return the JobConf
 * @throws IOException
 */
@SuppressWarnings({ "unused", "deprecation" })
public JobConf createSubmittableJob(String[] args) throws IOException {
    JobConf c = new JobConf(getConf(), RowCounter.class);
    c.setJobName(NAME);
    // Columns are space delimited
    StringBuilder sb = new StringBuilder();
    final int columnoffset = 2;
    for (int i = columnoffset; i < args.length; i++) {
        if (i > columnoffset) {
            sb.append(" ");
        }
        sb.append(args[i]);
    }
    // Second argument is the table name.
    TableMapReduceUtil.initTableMapJob(args[1], sb.toString(), this.getClass(), ImmutableBytesWritable.class,
            RowResult.class, c);
    c.setReducerClass(IdentityReducer.class);
    // First arg is the output directory.
    FileOutputFormat.setOutputPath(c, new Path(args[0]));
    return c;
}

From source file:cl.borrego.store.hadoop.SimpleClient.java

License:Open Source License

public void main() {
    // Create a new JobConf
    JobConf job = new JobConf(new Configuration(), SimpleClient.class);

    // Specify various job-specific parameters
    job.setJobName("myjob");
    // Submit the job, then poll for progress until the job is complete
    try {//  ww  w.j  a  va2 s.  c  om
        JobClient.runJob(job);
    } catch (IOException e) {
        e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
    }

}

From source file:clusteringblocks.ClusteringBlocks.java

public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    JobConf job = new JobConf(conf, ClusteringBlocks.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("ClusteringBlocks");
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormat(KeyValueTextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //   job.set("key.value.separator.in.input.line", "");

    JobClient.runJob(job);//w ww  .j  a  v a 2  s  .  c  om

    return 0;
}

From source file:cn.edu.xmu.dm.mapreduce.MultiFileWordCount.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 2) {
        printUsage();//from w w w.ja v  a2 s.  c om
        return 1;
    }

    JobConf job = new JobConf(getConf(), MultiFileWordCount.class);
    job.setJobName("MultiFileWordCount");

    // set the InputFormat of the job to our InputFormat
    job.setInputFormat(MyInputFormat.class);

    // the keys are words (strings)
    job.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    job.setOutputValueClass(LongWritable.class);

    // use the defined mapper
    job.setMapperClass(MapClass.class);
    // use the WordCount Reducer
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    JobClient.runJob(job);

    return 0;
}

From source file:cn.edu.xmu.dm.mapreduce.Sort.java

License:Apache License

/**
 * The main driver for sort program. Invoke this method to submit the
 * map/reduce job.//  www.  ja  v  a  2  s .  c om
 * 
 * @throws IOException
 *             When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "Sorter");
    job.setJarByClass(Sort.class);

    JobConf jobConf = new JobConf(getConf(), Sort.class);
    jobConf.setJobName("sorter");

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(IdentityReducer.class);

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = jobConf.get("test.sort.reduces_per_host");
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = BytesWritable.class;
    List<String> otherArgs = new ArrayList<String>();
    InputSampler.Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                jobConf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-totalOrder".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    // Set user-supplied (possibly default) job configs
    jobConf.setNumReduceTasks(num_reduces);

    jobConf.setInputFormat(inputFormatClass);
    jobConf.setOutputFormat(outputFormatClass);

    jobConf.setOutputKeyClass(outputKeyClass);
    jobConf.setOutputValueClass(outputValueClass);

    // Make sure there are exactly 2 parameters left.
    if (otherArgs.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(jobConf, otherArgs.get(0));
    FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.get(1)));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        jobConf.setPartitionerClass(TotalOrderPartitioner.class);
        Path inputDir = FileInputFormat.getInputPaths(jobConf)[0];
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile);
        InputSampler.<K, V>writePartitionFile(jobConf, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, jobConf);
        DistributedCache.createSymlink(jobConf);
    }

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf)
            + " with " + num_reduces + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    jobResult = JobClient.runJob(jobConf);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return 0;
}