Example usage for org.apache.hadoop.mapred JobConf setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setJobName.

Prototype

public void setJobName(String name)

Source Link

Document

Set the user-specified job name.

Usage

From source file:com.cloudera.recordservice.tests.TestMiniClusterController.java

License:Apache License

public static void fillInWordCountMRJobConf(JobConf conf) {
    String input = "select n_comment from tpch.nation";

    conf.setJobName("samplejob-wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    com.cloudera.recordservice.mr.RecordServiceConfig.setInputQuery(conf, input);
    setRandomOutputDir(conf);//from w w  w. j a  v a 2 s .  c  om
}

From source file:com.csiro.hadoop.UFORecord.java

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), getClass());
    conf.setJobName("UFO count");

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: avro UFO counter <in> <out>");
        System.exit(2);/*from  w w w  .  j av a  2s. c o  m*/

    }

    org.apache.hadoop.mapred.FileInputFormat.addInputPath(conf, new Path(otherArgs[0]));
    Path outputPath = new Path(otherArgs[1]);
    org.apache.hadoop.mapred.FileOutputFormat.setOutputPath(conf, outputPath);
    outputPath.getFileSystem(conf).delete(outputPath);
    Schema input_schema = Schema.parse(getClass().getResourceAsStream("ufo.avsc"));
    AvroJob.setInputSchema(conf, input_schema);
    AvroJob.setMapOutputSchema(conf,
            Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG)));

    AvroJob.setOutputSchema(conf, OUTPUT_SCHEMA);
    AvroJob.setMapperClass(conf, AvroRecordMapper.class);
    AvroJob.setReducerClass(conf, AvroRecordReducer.class);
    conf.setInputFormat(AvroInputFormat.class);
    JobClient.runJob(conf);

    return 0;
}

From source file:com.datatorrent.demos.mroperator.LineIndexer.java

License:Open Source License

/**
 * The actual main() method for our program; this is the
 * "driver" for the MapReduce job./*from  ww  w.ja v  a2  s.com*/
 */
public static void main(String[] args) {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(LineIndexer.class);

    conf.setJobName("LineIndexer");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(conf, new Path("input"));
    FileOutputFormat.setOutputPath(conf, new Path("output"));

    conf.setMapperClass(LineIndexMapper.class);
    conf.setReducerClass(LineIndexReducer.class);

    client.setConf(conf);

    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:com.datatorrent.demos.mroperator.LogCountsPerHour.java

License:Open Source License

public int run(String[] args) throws Exception {
    // Create a configuration
    Configuration conf = getConf();

    // Create a job from the default configuration that will use the WordCount class
    JobConf job = new JobConf(conf, LogCountsPerHour.class);

    // Define our input path as the first command line argument and our output path as the second
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    // Create File Input/Output formats for these paths (in the job)
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // Configure the job: name, mapper, reducer, and combiner
    job.setJobName("LogAveragePerHour");
    job.setMapperClass(LogMapClass.class);
    job.setReducerClass(LogReduce.class);
    job.setCombinerClass(LogReduce.class);

    // Configure the output
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(DateWritable.class);
    job.setOutputValueClass(IntWritable.class);

    // Run the job
    JobClient.runJob(job);/*  www . ja va 2s  .co m*/
    return 0;
}

From source file:com.datatorrent.demos.mroperator.WordCount.java

License:Open Source License

public void run(String[] args) throws Exception {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);// w ww . ja v a 2  s . c o  m
}

From source file:com.digitalpebble.behemoth.ClassifierJob.java

License:Apache License

public int run(String[] args) throws Exception {

    Options options = new Options();
    // automatically generate the help statement
    HelpFormatter formatter = new HelpFormatter();
    // create the parser
    CommandLineParser parser = new GnuParser();

    options.addOption("h", "help", false, "print this message");
    options.addOption("i", "input", true, "input Behemoth corpus");
    options.addOption("o", "output", true, "output Behemoth corpus");
    options.addOption("m", "model", true, "location of the model");

    // parse the command line arguments
    CommandLine line = null;/*from w w w .j ava 2  s  . co  m*/
    try {
        line = parser.parse(options, args);
        String input = line.getOptionValue("i");
        String output = line.getOptionValue("o");
        String model = line.getOptionValue("m");
        if (line.hasOption("help")) {
            formatter.printHelp("ClassifierJob", options);
            return 0;
        }
        if (model == null | input == null | output == null) {
            formatter.printHelp("ClassifierJob", options);
            return -1;
        }
    } catch (ParseException e) {
        formatter.printHelp("ClassifierJob", options);
    }

    final FileSystem fs = FileSystem.get(getConf());

    Path inputPath = new Path(line.getOptionValue("i"));
    Path outputPath = new Path(line.getOptionValue("o"));
    String modelPath = line.getOptionValue("m");

    JobConf job = new JobConf(getConf());

    // push the model file to the DistributedCache
    DistributedCache.addCacheArchive(new URI(modelPath), job);

    job.setJarByClass(this.getClass());

    job.setJobName("ClassifierJob : " + inputPath.toString());

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(BehemothDocument.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BehemothDocument.class);

    job.setMapperClass(TextClassifierMapper.class);
    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.set(modelNameParam, modelPath);

    try {
        JobClient.runJob(job);
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
    }

    return 0;
}

From source file:com.digitalpebble.behemoth.commoncrawl.CorpusMerger.java

License:Apache License

public int run(String[] args) throws Exception {

    Options options = new Options();
    // automatically generate the help statement
    HelpFormatter formatter = new HelpFormatter();
    // create the parser
    CommandLineParser parser = new GnuParser();

    options.addOption("h", "help", false, "print this message");
    options.addOption("i", "input", true, "input Behemoth corpus");
    options.addOption("o", "output", true, "output Behemoth corpus");

    // parse the command line arguments
    CommandLine line = null;/*ww w  .j a  v a 2  s . c o m*/
    try {
        line = parser.parse(options, args);
        String input = line.getOptionValue("i");
        if (line.hasOption("help")) {
            formatter.printHelp("CorpusMerger", options);
            return 0;
        }
        if (input == null) {
            formatter.printHelp("CorpusMerger", options);
            return -1;
        }
    } catch (ParseException e) {
        formatter.printHelp("CorpusMerger", options);
        return -1;
    }

    Path outputPath = new Path(line.getOptionValue("o"));

    String[] paths = (line.getOptionValues("i"));

    JobConf job = new JobConf(getConf());
    // MUST not forget the line below
    job.setJarByClass(this.getClass());

    job.setJobName("CorpusMerger");

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BehemothDocument.class);

    //job.setMapperClass(IdentityMapper.class);

    job.setReducerClass(MergerReducer.class);

    for (String in : paths)
        FileInputFormat.addInputPath(job, new Path(in));

    FileOutputFormat.setOutputPath(job, outputPath);

    try {
        long start = System.currentTimeMillis();
        JobClient.runJob(job);
        long finish = System.currentTimeMillis();
        if (LOG.isInfoEnabled()) {
            LOG.info("CorpusMerger completed. Timing: " + (finish - start) + " ms");
        }
    } catch (Exception e) {
        LOG.error("Exception caught", e);
        // fs.delete(outputPath, true);
    } finally {
    }

    return 0;
}

From source file:com.digitalpebble.behemoth.es.ESIndexerJob.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length != 1) {
        String syntax = "com.digitalpebble.behemoth.ESIndexerJob input";
        System.err.println(syntax);
        return -1;
    }//w  w w  . j a  v a  2 s.  c  om

    Path inputPath = new Path(args[0]);

    JobConf job = new JobConf(getConf());

    job.setJarByClass(this.getClass());

    job.setJobName("Indexing " + inputPath + " into ElasticSearch");

    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapOutputValueClass(MapWritable.class);

    job.setMapperClass(BehemothToESMapper.class);

    job.setSpeculativeExecution(false); // disable speculative execution
    // when writing to ES

    // job.set("es.resource", "radio/artists"); // index used for storing
    // data
    job.setOutputFormat(EsOutputFormat.class); // use dedicated output
    // format

    FileInputFormat.addInputPath(job, inputPath);

    // no reducer : send straight to elasticsearch at end of mapping
    job.setNumReduceTasks(0);

    try {
        long start = System.currentTimeMillis();
        JobClient.runJob(job);
        long finish = System.currentTimeMillis();
        if (LOG.isInfoEnabled()) {
            LOG.info("ESIndexerJob completed. Timing: " + (finish - start) + " ms");
        }
    } catch (Exception e) {
        LOG.error("Exception while running job", e);
        return -1;
    }
    return 0;
}

From source file:com.digitalpebble.behemoth.gate.GATEDriver.java

License:Apache License

public int run(String[] args) throws Exception {

    final FileSystem fs = FileSystem.get(getConf());

    if (args.length < 3 | args.length > 4) {
        String syntax = "com.digitalpebble.behemoth.gate.GATEDriver in out path_gate_file [-XML]";
        System.err.println(syntax);
        return -1;
    }/*w  w w  .ja  v a  2  s .  c  o m*/

    boolean dumpGATEXML = false;

    for (String arg : args) {
        if (arg.equalsIgnoreCase("-xml"))
            dumpGATEXML = true;
    }

    Path inputPath = new Path(args[0]);
    Path outputPath = new Path(args[1]);
    String zip_application_path = args[2];

    // check that the GATE application has been stored on HDFS
    Path zap = new Path(zip_application_path);
    if (fs.exists(zap) == false) {
        System.err
                .println("The GATE application " + zip_application_path + "can't be found on HDFS - aborting");
        return -1;
    }

    JobConf job = new JobConf(getConf());
    // MUST not forget the line below
    job.setJarByClass(this.getClass());

    job.setJobName("Processing " + args[0] + " with GATE application from " + zip_application_path);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.setOutputKeyClass(Text.class);

    if (dumpGATEXML) {
        job.setOutputValueClass(Text.class);
        job.setMapperClass(GATEXMLMapper.class);
    } else {
        job.setOutputValueClass(BehemothDocument.class);
        job.setMapperClass(GATEMapper.class);
    }

    // detect if any filters have been defined
    // and activate the reducer accordingly
    boolean isFilterRequired = BehemothReducer.isRequired(job);
    if (isFilterRequired)
        job.setReducerClass(BehemothReducer.class);
    else {
        job.setNumReduceTasks(0);
    }

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    // push the zipped_gate_application onto the DistributedCache
    DistributedCache.addCacheArchive(new URI(zip_application_path), job);

    job.set("gate.application.path", zip_application_path.toString());

    try {
        long start = System.currentTimeMillis();
        JobClient.runJob(job);
        long finish = System.currentTimeMillis();
        if (LOG.isInfoEnabled()) {
            LOG.info("GATEDriver completed. Timing: " + (finish - start) + " ms");
        }
    } catch (Exception e) {
        LOG.error("Exception caught", e);
        // leave even partial output
        // fs.delete(outputPath, true);
    } finally {
    }

    return 0;
}

From source file:com.digitalpebble.behemoth.io.nutch.NutchSegmentConverterJob.java

License:Apache License

public void convert(List<Path> list, Path output) throws IOException {

    JobConf job = new JobConf(getConf());

    job.setJobName("Converting Nutch segments");
    job.setJarByClass(this.getClass());

    for (Path p : list) {
        FileInputFormat.addInputPath(job, new Path(p, Content.DIR_NAME));
    }/* w  ww  . j  a  va 2 s.co m*/

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(NutchSegmentConverterJob.class);

    // no reducers
    job.setNumReduceTasks(0);

    FileOutputFormat.setOutputPath(job, output);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BehemothDocument.class);

    long start = System.currentTimeMillis();
    JobClient.runJob(job);
    long finish = System.currentTimeMillis();
    if (LOG.isInfoEnabled()) {
        LOG.info("NutchSegmentConverter completed. Timing: " + (finish - start) + " ms");
    }
}