Example usage for org.apache.hadoop.mapreduce Job setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJobName.

Prototype

public void setJobName(String name) throws IllegalStateException

Source Link

Document

Set the user-specified job name.

Usage

From source file:it.crs4.seal.tsv_sort.TsvSort.java

License:Apache License

public int run(String[] args) throws Exception {
    LOG.info("starting");

    TsvSortOptionParser parser = new TsvSortOptionParser();
    parser.parse(getConf(), args);/*from w  w  w .  j  av a  2 s . c om*/

    LOG.info("Using " + parser.getNReduceTasks() + " reduce tasks");

    Job job = new Job(getConf());

    job.setJobName("TsvSort " + parser.getInputPaths().get(0));
    job.setJarByClass(TsvSort.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(TsvInputFormat.class);
    job.setOutputFormatClass(TextValueOutputFormat.class);
    job.setPartitionerClass(TotalOrderPartitioner.class);

    // output path
    FileOutputFormat.setOutputPath(job, parser.getOutputPath());

    FileSystem fs = parser.getOutputPath().getFileSystem(job.getConfiguration());
    /*
     *
     * Pick a random name for the partition file in the same directory as the
     * output path.  So, TsvSort /user/me/input /user/me/output
     * results in the partition file being placed in /user/me/_partition.lst.12340921387402174
     *
     * Why not place it directly in the input path?
     *
     *   We wouldn't be able to run two sorts on the same data at the same time.
     *   We've received complaints about this in the past, so it has been a
     *   limit in practice.
     *
     * Why not place it directly in the output path?
     *
     *   We'd have to create the output path before the output format did.
     *   For this to work we'd have to disable the FileOutputFormat's default check
     *   that verifies that the output directory doesn't exist.  This means that we'd
     *   need some other way to ensure that we're not writing to the same path where
     *   some other job wrote.
     */
    Path partitionFile;
    Random rnd = new Random();
    do {
        partitionFile = new Path(parser.getOutputPath().getParent(),
                String.format("_partition.lst.%012d", Math.abs(rnd.nextLong())));
    } while (fs.exists(partitionFile)); // this is still subject to a race condition between it and another instance of this program
    partitionFile = partitionFile.makeQualified(fs);
    LOG.info("partition file path: " + partitionFile);

    URI partitionUri = new URI(partitionFile.toString() + "#" + PARTITION_SYMLINK);
    LOG.debug("partitionUri for distributed cache: " + partitionUri);

    // input paths
    for (Path p : parser.getInputPaths())
        TsvInputFormat.addInputPath(job, p);

    LOG.info("sampling input");
    TextSampler.writePartitionFile(new TsvInputFormat(), job, partitionFile);
    LOG.info("created partitions");
    try {
        DistributedCache.addCacheFile(partitionUri, job.getConfiguration());
        DistributedCache.createSymlink(job.getConfiguration());

        int retcode = job.waitForCompletion(true) ? 0 : 1;
        LOG.info("done");
        return retcode;
    } finally {
        LOG.debug("deleting partition file " + partitionFile);
        fs.delete(partitionFile, false);
    }
}

From source file:job.ItemPDFJob.java

License:Open Source License

/**
 *  il metodo run prende in input i parametri passati da riga di comando e setta i parametri d'esecuzione mapreduce
 *  fornisce in output 1 in caso di successo, 0 in caso di fallimento
 *  @param args [] la prima cella di memoria dell'array contiene l'input path, 
 *  la seconda l'output path per l'esecuzione ItemPDFJob la terza l'output path per l'esecuzione di UserProfileJob, 
 *  la quarta e' opzionale e contiene il valore K con cui eseguire il processo
 *  @return lo stato di completamento del job, 0 se fallisce l'esecuzione 1 se l'esecuzione termina con successo
 *  @throws Exception//from  ww w  .j av a2  s.  com
 */
@Override
public int run(String[] args) throws Exception {
    String input = GLOBALS.getTRAIN_FILE_NAME();
    String output = GLOBALS.getOUTPUT1();
    conf.set("split", GLOBALS.getSPLIT_TOKEN());
    Job pr = new Job(conf);
    pr.setJarByClass(ItemPDFJob.class);
    pr.setJobName("ItemPDF Job");
    FileInputFormat.setInputPaths(pr, new Path(input));
    FileOutputFormat.setOutputPath(pr, new Path(output));
    pr.setMapperClass(ItemProfileMapper.class);
    pr.setReducerClass(ItemProfileReducer.class);
    pr.setMapOutputKeyClass(IntWritable.class);
    pr.setMapOutputValueClass(BooleanWritable.class);
    boolean success = pr.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:job.UserProfileJob.java

License:Open Source License

/**
 *    il metodo run prende in input gli argomenti passati da riga di comando e setta i parametri d'esecuzione del job mapreduce
 *    fornisce in output 1 in caso di successo del processo, 0 in caso di fallimento
 *    @param args [] la prima cella di memoria dell'array contiene l'input path, 
 *  la seconda l'output path per l'esecuzione ItemPDFJob la terza l'output path per l'esecuzione di UserProfileJob, 
 *  la quarta e' opzionale e contiene il valore K con cui eseguire il processo
 *  @return lo stato di completamento del job, 0 se fallisce l'esecuzione 1 se l'esecuzione termina con successo
 *  @throws Exception/*from w  w  w.ja v  a 2  s . c o m*/
 */
@Override
public int run(String[] args) throws Exception {
    String input = GLOBALS.getTRAIN_FILE_NAME();
    String output = GLOBALS.getOUTPUT2();
    conf.set("k", GLOBALS.getK() + "");
    conf.set("output1", GLOBALS.getOUTPUT1());
    Job pr = new Job(conf);
    pr.setJarByClass(UserProfileJob.class);
    pr.setJobName("UserProfile Job");
    FileInputFormat.setInputPaths(pr, new Path(input));
    FileOutputFormat.setOutputPath(pr, new Path(output));
    pr.setMapperClass(UserProfileMapper.class);
    pr.setReducerClass(UserProfileReducer.class);
    pr.setMapOutputKeyClass(IntWritable.class);
    pr.setMapOutputValueClass(PointWritable.class);
    boolean success = pr.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:layer.AutoCoder.java

License:Apache License

/**
 * Runs this tool./*ww  w  . j ava2 s. c o  m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath0 = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + AutoCoder.class.getSimpleName());
    LOG.info(" - input path: " + inputPath0);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);

    Configuration conf = getConf();
    initialParameters(conf);

    for (int iterations = 1; iterations < GlobalUtil.NUM_LAYER + 1; iterations++) {
        LOG.info("** Layer: " + iterations);
        try {

            Job job = Job.getInstance(conf);
            job.setJobName(AutoCoder.class.getSimpleName());
            job.setJarByClass(AutoCoder.class);
            // set the path of the information of k clusters in this iteration
            job.getConfiguration().set("sidepath", inputPath0 + "/side_output");
            job.getConfiguration().setInt("layer_ind", iterations);
            job.setNumReduceTasks(reduceTasks);

            String inputPath = inputPath0 + "/train";
            dataShuffle();

            FileInputFormat.setInputPaths(job, new Path(inputPath));
            FileOutputFormat.setOutputPath(job, new Path(outputPath));

            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(ModelNode.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(ModelNode.class);

            job.setMapperClass(MyMapper.class);
            job.setReducerClass(MyReducer.class);
            job.setPartitionerClass(MyPartitioner.class);

            // Delete the output directory if it exists already.
            Path outputDir = new Path(outputPath);
            FileSystem.get(getConf()).delete(outputDir, true);

            long startTime = System.currentTimeMillis();
            job.waitForCompletion(true);
            LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

            prepareNextIteration(inputPath0, outputPath, iterations, conf, reduceTasks);
        } catch (Exception exp) {
            exp.printStackTrace();
        }
    }

    return 0;
}

From source file:mapreducemaxstock.MapReduceMaxStock.java

/**
 * @param args the command line arguments
 *//*from  w w  w  .  j av a 2s  . c  o m*/
public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Invalid parameters:input_path output_path");
        System.exit(-1);
    }

    //Create MapReduce job
    Job job = new Job();
    job.setJarByClass(MapReduceMaxStock.class);
    job.setJobName("MaxStockPrice");

    //Specify input and output paths
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //Specify input and output formats
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    //Specify mapper and reducer classes
    job.setMapperClass(StockPriceMapper.class);
    job.setReducerClass(StockPriceReducer.class);

    //Specify Key,Value types
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(FloatWritable.class);

    //submit job
    boolean jobStatus = job.waitForCompletion(true);

    System.exit(jobStatus ? 0 : 1);
}

From source file:mil.nga.giat.geowave.mapreduce.copy.StoreCopyJobRunner.java

License:Open Source License

/**
 * Main method to execute the MapReduce analytic.
 *///  w  w w  .  j av a 2s. c om
public int runJob() throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = super.getConf();
    if (conf == null) {
        conf = new Configuration();
        setConf(conf);
    }

    GeoWaveConfiguratorBase.setRemoteInvocationParams(options.getHdfsHostPort(),
            options.getJobTrackerOrResourceManHostPort(), conf);

    final Job job = Job.getInstance(conf);

    job.setJarByClass(this.getClass());

    job.setJobName(jobName);

    job.setMapperClass(StoreCopyMapper.class);
    job.setReducerClass(StoreCopyReducer.class);

    job.setInputFormatClass(GeoWaveInputFormat.class);
    job.setOutputFormatClass(GeoWaveOutputFormat.class);

    job.setMapOutputKeyClass(GeoWaveInputKey.class);
    job.setMapOutputValueClass(ObjectWritable.class);
    job.setOutputKeyClass(GeoWaveOutputKey.class);
    job.setOutputValueClass(Object.class);
    job.setNumReduceTasks(options.getNumReducers());

    GeoWaveInputFormat.setMinimumSplitCount(job.getConfiguration(), options.getMinSplits());
    GeoWaveInputFormat.setMaximumSplitCount(job.getConfiguration(), options.getMaxSplits());

    GeoWaveInputFormat.setStoreOptions(job.getConfiguration(), inputStoreOptions);

    GeoWaveOutputFormat.setStoreOptions(job.getConfiguration(), outputStoreOptions);

    final AdapterIndexMappingStore adapterIndexMappingStore = inputStoreOptions
            .createAdapterIndexMappingStore();
    try (CloseableIterator<DataAdapter<?>> adapterIt = inputStoreOptions.createAdapterStore().getAdapters()) {
        while (adapterIt.hasNext()) {
            DataAdapter<?> dataAdapter = adapterIt.next();

            LOGGER.debug("Adding adapter to output config: "
                    + StringUtils.stringFromBinary(dataAdapter.getAdapterId().getBytes()));

            GeoWaveOutputFormat.addDataAdapter(job.getConfiguration(), dataAdapter);

            final AdapterToIndexMapping mapping = adapterIndexMappingStore
                    .getIndicesForAdapter(dataAdapter.getAdapterId());

            JobContextAdapterIndexMappingStore.addAdapterToIndexMapping(job.getConfiguration(), mapping);
        }
    }

    try (CloseableIterator<Index<?, ?>> indexIt = inputStoreOptions.createIndexStore().getIndices()) {
        while (indexIt.hasNext()) {
            Index<?, ?> index = indexIt.next();
            if (index instanceof PrimaryIndex) {
                LOGGER.debug("Adding index to output config: "
                        + StringUtils.stringFromBinary(index.getId().getBytes()));

                GeoWaveOutputFormat.addIndex(job.getConfiguration(), (PrimaryIndex) index);
            }
        }
    }

    boolean retVal = false;
    try {
        retVal = job.waitForCompletion(true);
    } catch (final IOException ex) {
        LOGGER.error("Error waiting for store copy job: ", ex);
    }

    return retVal ? 0 : 1;
}

From source file:minor_MapReduce.C4_5.java

License:Open Source License

private static void summarizeData() throws Exception {
    Job job = Job.getInstance();
    job.setJarByClass(C4_5.class);
    job.setJobName("C4.5_summarizeData");

    FileInputFormat.addInputPath(job, input_path);
    FileOutputFormat.setOutputPath(job, summarized_data_path);

    job.setMapperClass(SummarizeMapper.class);
    job.setReducerClass(SummarizeReducer.class);

    job.setOutputKeyClass(TextArrayWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.waitForCompletion(false);// w  w  w  .  ja  va  2s.  c om

    /* Store it locally */
    Option optPath = SequenceFile.Reader.file(new Path(summarized_data_path.toString() + "/part-r-00000"));
    SequenceFile.Reader reader = new SequenceFile.Reader(new Configuration(), optPath);

    TextArrayWritable key = new TextArrayWritable();
    IntWritable val = new IntWritable();

    summarized_data = new HashMap<String[], Integer>();
    while (reader.next(key, val)) {
        summarized_data.put(key.toStrings(), val.get());
    }

    reader.close();
}

From source file:model.AutoCoder.java

License:Apache License

/**
 * Runs this tool./*from   w  w  w.  j  a v a 2s.  c o m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT) + "/part*";
    String outputPath = cmdline.getOptionValue(OUTPUT);
    //String inputPath = "mingled_v2/part*";
    //String outputPath = "output";
    String dataPath = cmdline.getOptionValue(INPUT) + "/common";
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + AutoCoder.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    Configuration conf = getConf();
    initialParameters(conf);

    conf.set("dataPath", dataPath);

    conf.set("mapreduce.map.memory.mb", "2048");
    conf.set("mapreduce.map.java.opts", "-Xmx2048m");
    conf.set("mapreduce.reduce.memory.mb", "2048");
    conf.set("mapreduce.reduce.java.opts", "-Xmx2048m");

    Job job = Job.getInstance(conf);
    job.setJobName(AutoCoder.class.getSimpleName());
    job.setJarByClass(AutoCoder.class);
    // set the path of the information of k clusters in this iteration
    job.getConfiguration().set("sidepath", inputPath + "/side_output");
    job.setNumReduceTasks(reduceTasks);

    dataShuffle();

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileInputFormat.setMaxInputSplitSize(job, 1000 * 1024 * 1024);
    FileInputFormat.setMinInputSplitSize(job, 1000 * 1024 * 1024);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(ModelNode.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(SuperModel.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    //prepareNextIteration(inputPath0, outputPath,iterations,conf,reduceTasks);

    return 0;
}

From source file:my.mahout.AbstractJob.java

License:Apache License

protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, String jobname)
        throws IOException {

    Job job = HadoopUtil.prepareJob(inputPath, outputPath, inputFormat, mapper, mapperKey, mapperValue,
            outputFormat, getConf());//w  w  w .  ja  va 2 s .  c o m

    String name = jobname != null ? jobname
            : HadoopUtil.getCustomJobName(getClass().getSimpleName(), job, mapper, Reducer.class);

    job.setJobName(name);
    return job;

}