Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(Configuration conf, Class exampleClass) 

Source Link

Document

Construct a map/reduce job configuration.

Usage

From source file:com.bigdata.diane.MiniTestDFSIO.java

License:Apache License

@SuppressWarnings("deprecation")
private static void runIOTest(Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass,
        Path outputDir, Configuration fsConfig) throws IOException {
    JobConf job = new JobConf(fsConfig, MiniTestDFSIO.class);

    FileInputFormat.setInputPaths(job, CONTROL_DIR);
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(mapperClass);//www  .j  a  v  a  2s  .  c o  m
    job.setReducerClass(AccumulatingReducer.class);

    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);
    JobClient.runJob(job);
}

From source file:com.chriscx.mapred.Driver.java

public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), Driver.class);
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        if ("-skip".equals(args[i])) {
            DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf);
            conf.setBoolean("wordcount.skip.patterns", true);
        } else {// w  w w.j  a v a 2 s.c  om
            other_args.add(args[i]);
        }
    }

    FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:com.chriscx.matching.Driver.java

public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), com.chriscx.mapred.Driver.class);
    conf.setJobName("Matching");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        if ("-skip".equals(args[i])) {
            DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf);
            conf.setBoolean("wordcount.skip.patterns", true);
        } else {//from  w  ww.j a va 2  s.com
            other_args.add(args[i]);
        }
    }

    FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:com.cloudera.avro.MapredColorCount.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MapredColorCount <input path> <output path>");
        return -1;
    }//from  www  . j a  va2s  .c o  m

    JobConf conf = new JobConf(getConf(), MapredColorCount.class);
    conf.setJobName("colorcount");

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    AvroJob.setMapperClass(conf, ColorCountMapper.class);
    AvroJob.setReducerClass(conf, ColorCountReducer.class);

    // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set
    // relevant config options such as input/output format, map output
    // classes, and output key class.
    AvroJob.setInputSchema(conf, User.getClassSchema());
    AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT)));

    JobClient.runJob(conf);
    return 0;
}

From source file:com.cloudera.recordservice.examples.mapreduce.MapredColorCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    org.apache.log4j.BasicConfigurator.configure();

    if (args.length != 2) {
        System.err.println("Usage: MapredColorCount <input path> <output path>");
        return -1;
    }/* ww w . j  a  v a  2s  .  c o  m*/

    JobConf conf = new JobConf(getConf(), MapredColorCount.class);
    conf.setJobName("colorcount With Generic Records");

    // RECORDSERVICE:
    // By using the recordservice AvroJob utility, we can configure at run time to
    // switch between using the recordservice or not.
    // In this example, we'll set the conf to true to enable the RecordService..
    conf.setBoolean(com.cloudera.recordservice.avro.AvroJob.USE_RECORD_SERVICE_INPUT_FORMAT_CONF_KEY, true);
    com.cloudera.recordservice.avro.AvroJob.setInputFormat(conf, org.apache.avro.mapred.AvroInputFormat.class);

    // RECORDSERVICE:
    // To read from a table instead of a path, comment out setInputPaths and instead use:
    RecordServiceConfig.setInputTable(conf, "rs", "users");
    //FileInputFormat.setInputPaths(conf, new Path(args[0]));

    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    AvroJob.setMapperClass(conf, ColorCountMapper.class);
    AvroJob.setReducerClass(conf, ColorCountReducer.class);

    // Note that AvroJob.setOutputSchema set relevant config options such as output
    // format, map output classes, and output key class.
    // Do not need to setInputSchema when using Generic Records.
    AvroJob.setOutputSchema(conf,
            Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.INT)));

    JobClient.runJob(conf);
    return 0;
}

From source file:com.csiro.hadoop.UFORecord.java

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), getClass());
    conf.setJobName("UFO count");

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: avro UFO counter <in> <out>");
        System.exit(2);/*  w  w  w .j  av a2 s  .  c om*/

    }

    org.apache.hadoop.mapred.FileInputFormat.addInputPath(conf, new Path(otherArgs[0]));
    Path outputPath = new Path(otherArgs[1]);
    org.apache.hadoop.mapred.FileOutputFormat.setOutputPath(conf, outputPath);
    outputPath.getFileSystem(conf).delete(outputPath);
    Schema input_schema = Schema.parse(getClass().getResourceAsStream("ufo.avsc"));
    AvroJob.setInputSchema(conf, input_schema);
    AvroJob.setMapOutputSchema(conf,
            Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG)));

    AvroJob.setOutputSchema(conf, OUTPUT_SCHEMA);
    AvroJob.setMapperClass(conf, AvroRecordMapper.class);
    AvroJob.setReducerClass(conf, AvroRecordReducer.class);
    conf.setInputFormat(AvroInputFormat.class);
    JobClient.runJob(conf);

    return 0;
}

From source file:com.datatorrent.demos.mroperator.LogCountsPerHour.java

License:Open Source License

public int run(String[] args) throws Exception {
    // Create a configuration
    Configuration conf = getConf();

    // Create a job from the default configuration that will use the WordCount class
    JobConf job = new JobConf(conf, LogCountsPerHour.class);

    // Define our input path as the first command line argument and our output path as the second
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    // Create File Input/Output formats for these paths (in the job)
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // Configure the job: name, mapper, reducer, and combiner
    job.setJobName("LogAveragePerHour");
    job.setMapperClass(LogMapClass.class);
    job.setReducerClass(LogReduce.class);
    job.setCombinerClass(LogReduce.class);

    // Configure the output
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(DateWritable.class);
    job.setOutputValueClass(IntWritable.class);

    // Run the job
    JobClient.runJob(job);/*from   w w w . j  av  a2s. co m*/
    return 0;
}

From source file:com.ebay.erl.mobius.core.MobiusJob.java

License:Apache License

/**
 * Add a job, represented by the <code>aNewJob</code> object, into the execution queue.
 * <p>// w  ww  .  ja va  2 s  .c  o  m
 * 
 * Users can use this method to add one or more jobs' configuration into the job queue, and Mobius engine
 * will analyze the <code>aNewJob</code> objects within the queue to understand the dependence of jobs.  
 * For example, if job B's input is from job A, then job B won't be submitted until A is completed 
 * successfully.  If A failed, the B will not be submitted.
 * <p>
 *  
 * 
 * @param aNewJobConf a {@link Configuration} object represents a Hadoop job. 
 * @throws IOException
 */
protected void addToExecQueue(Configuration aNewJobConf) throws IOException {
    // Add the new job into execution engine and realize
    // its dependency, if any.
    //
    // To realize the job dependency, we need to analyze the input
    // path of this new job.
    // 
    // The inputs of a job could be:
    // 1) if aNewJob is not a derived job (ex: result of another MR job), 
    // then the inputs of the job can be retrieved from "mapred.input.dir",
    // or from {@link MultipleInputs} (ex, joining different type of dataset)/
    // 2) if aNewJob is a derived job, the input  is from the output of previous
    // MR job.

    String inputFolders = aNewJobConf.get("mapred.input.dir", "");
    if (inputFolders.length() == 0) {
        // the value of "mapred.input.dir" is empty, assuming the inputs of this job 
        // are coming from {@link MultipleInputs}.

        String multipleInputs = aNewJobConf
                .get("mapred.input.dir.mappers"/* for using old MultipleInputs, v0.20.X */, aNewJobConf.get(
                        "mapreduce.input.multipleinputs.dir.formats"/* for new MultipleInputs, v0.23.X */, ""));

        if (multipleInputs.length() > 0) {
            // the input paths of this job is coming from MultipleInputs, extract the input paths.
            // The format from {@link MultipleInputs} is like: hadoop_path1;corresponding_mapper1,hadoop_path2;corresponding_mapper2...
            String[] pathAndMapperPairs = multipleInputs.split(",");
            for (String aPair : pathAndMapperPairs) {
                String[] pathToMapper = aPair.split(";");
                String path = pathToMapper[0];
                String mapper = pathToMapper[1];

                if (inputFolders.length() == 0) {
                    inputFolders = getPathOnly(path);
                } else {
                    inputFolders = inputFolders + "," + getPathOnly(path);
                }
            }
        } else {
            throw new IllegalArgumentException("Cannot find input path(s) of job: ["
                    + aNewJobConf.get("mapred.job.name") + "] from the following attributes: "
                    + "mapred.input.dir, mapred.input.dir.mappers, nor mapreduce.input.multipleinputs.dir.formats. "
                    + "Please specify the input path(s) of this job.");
        }
    } else {
        // the input path of this job is specified in mapred.input.dir
        inputFolders = getPathOnly(inputFolders);
    }

    ////////////////////////////////////////////////////////////
    // validate output path of this job, to ensure it doesn't
    // use the same folder of another job's output.
    ////////////////////////////////////////////////////////////
    String outputPath = aNewJobConf.get("mapred.output.dir", "");
    if (outputPath.isEmpty())
        throw new IllegalStateException(
                "Please specify the output directory of job:" + aNewJobConf.get("mapred.job.name"));

    if (this.isOutputOfAnotherJob(outputPath)) {
        throw new IllegalArgumentException("Job [" + aNewJobConf.get("mapred.job.name") + "]'s output ["
                + outputPath + "] is " + "the output of job[" + jobTopology.get(outputPath).getJobName() + "], "
                + "please make sure to use different output folder for each job.");
    }

    //////////////////////////////////////////////////////////////////
    // pass all the validation, start to build the dependencies.
    //////////////////////////////////////////////////////////////////
    Job newJob = new ConfigurableJob(new JobConf(aNewJobConf, this.getClass()));

    newJob.setJobName(aNewJobConf.get("mapred.job.name", aNewJobConf.get("mapreduce.job.name", "Mobius Job")));
    for (String anInputOfNewJob : inputFolders.split(",")) {
        // Added to track inputs for local PC sampling
        inputPaths.add(anInputOfNewJob);

        Job dependsOn = jobTopology.get(this.getFS().makeQualified(new Path(anInputOfNewJob)).toUri());
        if (dependsOn != null) {
            List<Job> dependingJobs = newJob.getDependingJobs();

            boolean alreadyInDependency = dependingJobs != null && dependingJobs.contains(dependsOn);
            if (alreadyInDependency) {
                // already added, do nothing.
            } else {
                LOGGER.info(newJob.getJobName() + " depends on " + dependsOn.getJobName());
                newJob.addDependingJob(dependsOn);
            }
        }
    }

    // put the output of this <code>newJob</code> into job topology
    // so that later if a job read this <code>newJob</code>'s output
    // as its input, then the system can detect the dependency.

    URI outputPathURI = this.getFS().makeQualified(new Path(outputPath)).toUri();
    LOGGER.info("Adding Job:" + newJob.getJobName() + "\tOutput:[" + outputPath.toString() + "]");
    jobTopology.put(outputPathURI, newJob);
}

From source file:com.facebook.LinkBench.LinkBenchDriverMR.java

License:Apache License

/**
 * create JobConf for map reduce job//from ww  w .ja  va  2 s .  com
 * @param currentphase LOAD or REQUEST
 * @param nmappers number of mappers (loader or requester)
 */
private JobConf createJobConf(int currentphase, int nmappers) {
    final JobConf jobconf = new JobConf(getConf(), getClass());
    jobconf.setJobName("LinkBench MapReduce Driver");

    if (USE_INPUT_FILES) {
        jobconf.setInputFormat(SequenceFileInputFormat.class);
    } else {
        jobconf.setInputFormat(LinkBenchInputFormat.class);
    }
    jobconf.setOutputKeyClass(IntWritable.class);
    jobconf.setOutputValueClass(LongWritable.class);
    jobconf.setOutputFormat(SequenceFileOutputFormat.class);
    if (currentphase == LOAD) {
        jobconf.setMapperClass(LoadMapper.class);
    } else { //REQUEST
        jobconf.setMapperClass(RequestMapper.class);
    }
    jobconf.setNumMapTasks(nmappers);
    jobconf.setReducerClass(LoadRequestReducer.class);
    jobconf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobconf.setSpeculativeExecution(false);

    return jobconf;
}

From source file:com.github.gaoyangthu.demo.mapred.DBCountPageView.java

License:Apache License

@Override
//Usage DBCountPageView [driverClass dburl]
public int run(String[] args) throws Exception {

    String driverClassName = DRIVER_CLASS;
    String url = DB_URL;//from   www .  ja  v a2  s  .c  om

    if (args.length > 1) {
        driverClassName = args[0];
        url = args[1];
    }

    initialize(driverClassName, url);

    JobConf job = new JobConf(getConf(), DBCountPageView.class);

    job.setJobName("Count Pageviews of URLs");

    job.setMapperClass(PageviewMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(PageviewReducer.class);

    DBConfiguration.configureDB(job, driverClassName, url);

    DBInputFormat.setInput(job, AccessRecord.class, "Access", null, "url", AccessFieldNames);

    DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(PageviewRecord.class);
    job.setOutputValueClass(NullWritable.class);

    try {
        JobClient.runJob(job);

        boolean correct = verify();
        if (!correct) {
            throw new RuntimeException("Evaluation was not correct!");
        }
    } finally {
        shutdown();
    }
    return 0;
}