Example usage for org.apache.hadoop.mapreduce Job setJobName

List of usage examples for org.apache.hadoop.mapreduce Job setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJobName.

Prototype

public void setJobName(String name) throws IllegalStateException 

Source Link

Document

Set the user-specified job name.

Usage

From source file:com.moz.fiji.mapreduce.gather.FijiGatherJobBuilder.java

License:Apache License

/** {@inheritDoc} */
@Override//from   w  w w.  j a  v a 2 s.c  om
protected void configureJob(Job job) throws IOException {
    // Construct the gatherer instance.
    if (null == mGathererClass) {
        throw new JobConfigurationException("Must specify a gatherer.");
    }

    final Configuration conf = job.getConfiguration();

    // Serialize the gatherer class name into the job configuration.
    conf.setClass(FijiConfKeys.FIJI_GATHERER_CLASS, mGathererClass, FijiGatherer.class);

    if ((getJobOutput() instanceof HFileMapReduceJobOutput) && (null == mReducerClass)) {
        mReducerClass = IdentityReducer.class;
    }

    final StringBuilder name = new StringBuilder("Fiji gather: " + mGathererClass.getSimpleName());
    if (null != mReducerClass) {
        name.append(" / " + mReducerClass.getSimpleName());
    }
    job.setJobName(name.toString());

    mGatherer = ReflectionUtils.newInstance(mGathererClass, conf);
    mMapper.setConf(conf);
    mDataRequest = mGatherer.getDataRequest();

    // Construct the combiner instance (if specified).
    if (null != mCombinerClass) {
        mCombiner = ReflectionUtils.newInstance(mCombinerClass, conf);
    }

    // Construct the reducer instance (if specified).
    if (null != mReducerClass) {
        mReducer = ReflectionUtils.newInstance(mReducerClass, conf);
    }

    // Configure the table input job (requires mGatherer, mMapper and mReducer to be set):
    super.configureJob(job);

    // Some validation:
    if (getJobOutput() instanceof HFileMapReduceJobOutput) {
        if (mReducer instanceof IdentityReducer) {
            Preconditions.checkState(mGatherer.getOutputKeyClass() == HFileKeyValue.class,
                    String.format("Gatherer '%s' writing HFiles must output HFileKeyValue keys, but got '%s'",
                            mGathererClass.getName(), mGatherer.getOutputKeyClass().getName()));
            Preconditions.checkState(mGatherer.getOutputValueClass() == NullWritable.class,
                    String.format("Gatherer '%s' writing HFiles must output NullWritable values, but got '%s'",
                            mGathererClass.getName(), mGatherer.getOutputValueClass().getName()));
        }
        Preconditions.checkState(mReducer.getOutputKeyClass() == HFileKeyValue.class,
                String.format("Reducer '%s' writing HFiles must output HFileKeyValue keys, but got '%s'",
                        mReducerClass.getName(), mReducer.getOutputKeyClass().getName()));
        Preconditions.checkState(mReducer.getOutputValueClass() == NullWritable.class,
                String.format("Reducer '%s' writing HFiles must output NullWritable values, but got '%s'",
                        mReducerClass.getName(), mReducer.getOutputValueClass().getName()));
    }
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

public Job setupJob(String jobName, Path outputFile, Class<? extends Mapper> mapperClass,
        Class<? extends Reducer> reducerClass, EntityId startKey, EntityId limitKey, FijiRowFilter filter)
        throws Exception {
    final Job job = new Job(createConfiguration());
    final Configuration conf = job.getConfiguration();

    // Get settings for test.
    final FijiDataRequest request = FijiDataRequest.builder()
            .addColumns(ColumnsDef.create().add("info", "name").add("info", "email")).build();

    job.setJarByClass(IntegrationTestFijiTableInputFormat.class);

    // Setup the InputFormat.
    FijiTableInputFormat.configureJob(job, getFooTable().getURI(), request, startKey, limitKey, filter);
    job.setInputFormatClass(HBaseFijiTableInputFormat.class);

    // Duplicate functionality from MapReduceJobBuilder, since we are not using it here:
    final List<Path> jarFiles = Lists.newArrayList();
    final FileSystem fs = FileSystem.getLocal(conf);
    for (String cpEntry : System.getProperty("java.class.path").split(":")) {
        if (cpEntry.endsWith(".jar")) {
            jarFiles.add(fs.makeQualified(new Path(cpEntry)));
        }/*from   w  ww .  java  2 s .c o  m*/
    }
    DistributedCacheJars.addJarsToDistributedCache(job, jarFiles);

    // Create a test job.
    job.setJobName(jobName);

    // Setup the OutputFormat.
    TextOutputFormat.setOutputPath(job, outputFile.getParent());
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Set the mapper class.
    if (null != mapperClass) {
        job.setMapperClass(mapperClass);
    }
    // Set the reducer class.
    if (null != reducerClass) {
        job.setReducerClass(reducerClass);
    }

    return job;
}

From source file:com.moz.fiji.mapreduce.pivot.FijiPivotJobBuilder.java

License:Apache License

/** {@inheritDoc} */
@Override//  w  ww . ja  va 2s.co  m
protected void configureJob(Job job) throws IOException {
    final Configuration conf = job.getConfiguration();

    if (null == mPivoterClass) {
        throw new JobConfigurationException("Must specify a FijiPivoter class.");
    }

    // Serialize the pivoter class name into the job configuration.
    conf.setClass(FijiConfKeys.FIJI_PIVOTER_CLASS, mPivoterClass, FijiPivoter.class);

    // Producers should output to HFiles.
    mMapper = new PivoterMapper();
    mReducer = new IdentityReducer<Object, Object>();

    job.setJobName("FijiPivoter: " + mPivoterClass.getSimpleName());

    mPivoter = ReflectionUtils.newInstance(mPivoterClass, job.getConfiguration());
    mDataRequest = mPivoter.getDataRequest();

    // Configure the table input job.
    super.configureJob(job);
}

From source file:com.moz.fiji.mapreduce.produce.FijiProduceJobBuilder.java

License:Apache License

/** {@inheritDoc} */
@Override/*from   w  w w .ja v a2 s . c  om*/
protected void configureJob(Job job) throws IOException {
    final Configuration conf = job.getConfiguration();

    // Construct the producer instance.
    if (null == mProducerClass) {
        throw new JobConfigurationException("Must specify a producer.");
    }

    // Serialize the producer class name into the job configuration.
    conf.setClass(FijiConfKeys.FIJI_PRODUCER_CLASS, mProducerClass, FijiProducer.class);

    // Write to the table, but make sure the output table is the same as the input table.
    if (!getInputTableURI().equals(mJobOutput.getOutputTableURI())) {
        throw new JobConfigurationException("Output table must be the same as the input table.");
    }

    // Producers should output to HFiles.
    mMapper = new ProduceMapper();
    mReducer = new IdentityReducer<Object, Object>();

    job.setJobName("Fiji produce: " + mProducerClass.getSimpleName());

    mProducer = ReflectionUtils.newInstance(mProducerClass, job.getConfiguration());
    mDataRequest = mProducer.getDataRequest();

    // Configure the table input job.
    super.configureJob(job);
}

From source file:com.mozilla.hadoop.Backup.java

License:Apache License

/**
 * @param args//w  ww .j av  a  2  s  . c  o  m
 * @return
 * @throws IOException
 * @throws ParseException 
 */
public Job initJob(String[] args) throws IOException, ParseException {

    Path inputPath = null;
    Path loadPath = null;
    String outputPath = null;
    boolean useSpecifiedPaths = false;
    for (int idx = 0; idx < args.length; idx++) {
        if ("-f".equals(args[idx])) {
            useSpecifiedPaths = true;
            loadPath = new Path(args[++idx]);
        } else if (idx == args.length - 1) {
            outputPath = args[idx];
        } else {
            inputPath = new Path(args[idx]);
        }
    }

    Path mrOutputPath = new Path(NAME + "-results");

    conf.setBoolean("mapred.map.tasks.speculative.execution", false);
    conf.set("backup.input.path", inputPath.toString());
    conf.set("backup.output.path", outputPath);

    FileSystem inputFs = null;
    FileSystem outputFs = null;
    Path[] inputSources = null;
    try {
        inputFs = FileSystem.get(inputPath.toUri(), new Configuration());
        outputFs = FileSystem.get(getConf());
        if (useSpecifiedPaths) {
            inputSources = createInputSources(loadPaths(outputFs, loadPath), outputFs);
        } else {
            inputSources = createInputSources(getPaths(inputFs, inputPath, 0, 2), outputFs);
        }
    } finally {
        checkAndClose(inputFs);
        checkAndClose(outputFs);
    }

    Job job = new Job(getConf());
    job.setJobName(NAME);
    job.setJarByClass(Backup.class);

    job.setMapperClass(BackupMapper.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(0);

    job.setInputFormatClass(TextInputFormat.class);

    for (Path source : inputSources) {
        System.out.println("Adding input path: " + source.toString());
        FileInputFormat.addInputPath(job, source);
    }

    FileOutputFormat.setOutputPath(job, mrOutputPath);

    return job;
}

From source file:com.mozilla.socorro.hadoop.CrashCountToHbase.java

License:LGPL

/**
 * @param args//from   w ww . java2 s  .c o  m
 * @return
 * @throws IOException
 * @throws ParseException
 */
public Job initJob(String[] args) throws IOException {
    Job job = new Job(getConf());
    job.setJobName(NAME);
    job.setJarByClass(CrashCountToHbase.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));

    job.setMapperClass(CrashCountToHBaseMapper.class);
    job.setReducerClass(CrashCountToHBaseReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job;
}

From source file:com.mozilla.socorro.hadoop.CrashReportJob.java

License:LGPL

/**
 * @param args/*  w  ww . jav  a 2 s  .  c o  m*/
 * @return
 * @throws IOException
 * @throws ParseException
 */
public static Job initJob(String jobName, Configuration conf, Class<?> mainClass,
        Class<? extends TableMapper> mapperClass, Class<? extends Reducer> combinerClass,
        Class<? extends Reducer> reducerClass, Map<byte[], byte[]> columns,
        Class<? extends WritableComparable> keyOut, Class<? extends Writable> valueOut, Path outputPath)
        throws IOException, ParseException {
    // Set both start/end time and start/stop row
    Calendar startCal = Calendar.getInstance();
    Calendar endCal = Calendar.getInstance();

    SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");

    String startDateStr = conf.get(START_DATE);
    String endDateStr = conf.get(END_DATE);
    if (!StringUtils.isBlank(startDateStr)) {
        startCal.setTime(sdf.parse(startDateStr));
    }
    if (!StringUtils.isBlank(endDateStr)) {
        endCal.setTime(sdf.parse(endDateStr));
    }

    conf.setLong(START_TIME, startCal.getTimeInMillis());
    conf.setLong(END_TIME, DateUtil.getEndTimeAtResolution(endCal.getTimeInMillis(), Calendar.DATE));

    Job job = new Job(conf);
    job.setJobName(jobName);
    job.setJarByClass(mainClass);

    // input table configuration
    Scan[] scans = MultiScanTableMapReduceUtil.generateScans(startCal, endCal, columns, 100, false);
    MultiScanTableMapReduceUtil.initMultiScanTableMapperJob(TABLE_NAME_CRASH_REPORTS, scans, mapperClass,
            keyOut, valueOut, job);

    if (combinerClass != null) {
        job.setCombinerClass(combinerClass);
    }

    if (reducerClass != null) {
        job.setReducerClass(reducerClass);
    } else {
        job.setNumReduceTasks(0);
    }

    FileOutputFormat.setOutputPath(job, outputPath);

    return job;
}

From source file:com.mycompany.maprpractice.runnerClass.WordCount.java

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }/*from   w ww  . j a va 2s  .  com*/

    Job job = new org.apache.hadoop.mapreduce.Job();
    job.setJarByClass(WordCount.class);
    job.setJobName("WordCounter");

    String inputPath = "C:\\Users\\priyamdixit\\Desktop\\TestData\\wordCount.txt";
    String outputPath = "C:\\Users\\priyamdixit\\Desktop\\TestData";

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    //      FileInputFormat.addInputPath(job, new Path(args[0]));
    //      FileOutputFormat.setOutputPath(job, new Path(args[1]));
    //   
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);

    int returnValue = job.waitForCompletion(true) ? 0 : 1;
    System.out.println("job.isSuccessful " + job.isSuccessful());
    return returnValue;
}

From source file:com.ngdata.hbaseindexer.mr.HBaseMapReduceIndexerTool.java

License:Apache License

public int run(HBaseIndexingOptions hbaseIndexingOpts, JobProcessCallback callback) throws Exception {

    if (hbaseIndexingOpts.isDryRun) {
        return new IndexerDryRun(hbaseIndexingOpts, getConf(), System.out).run();
    }/*www  . j a v  a2 s  .com*/

    long programStartTime = System.currentTimeMillis();
    Configuration conf = getConf();

    IndexingSpecification indexingSpec = hbaseIndexingOpts.getIndexingSpecification();

    conf.set(HBaseIndexerMapper.INDEX_COMPONENT_FACTORY_KEY, indexingSpec.getIndexerComponentFactory());
    conf.set(HBaseIndexerMapper.INDEX_CONFIGURATION_CONF_KEY,
            new String(indexingSpec.getConfiguration(), Charsets.UTF_8));
    conf.set(HBaseIndexerMapper.INDEX_NAME_CONF_KEY, indexingSpec.getIndexerName());
    conf.set(HBaseIndexerMapper.TABLE_NAME_CONF_KEY, indexingSpec.getTableName());
    HBaseIndexerMapper.configureIndexConnectionParams(conf, indexingSpec.getIndexConnectionParams());

    IndexerComponentFactory factory = IndexerComponentFactoryUtil.getComponentFactory(
            indexingSpec.getIndexerComponentFactory(),
            new ByteArrayInputStream(indexingSpec.getConfiguration()), indexingSpec.getIndexConnectionParams());
    IndexerConf indexerConf = factory.createIndexerConf();

    Map<String, String> params = indexerConf.getGlobalParams();
    String morphlineFile = params.get(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM);
    if (hbaseIndexingOpts.morphlineFile != null) {
        morphlineFile = hbaseIndexingOpts.morphlineFile.getPath();
    }
    if (morphlineFile != null) {
        conf.set(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM, new File(morphlineFile).getName());
        ForkedMapReduceIndexerTool.addDistributedCacheFile(new File(morphlineFile), conf);
    }

    String morphlineId = params.get(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM);
    if (hbaseIndexingOpts.morphlineId != null) {
        morphlineId = hbaseIndexingOpts.morphlineId;
    }
    if (morphlineId != null) {
        conf.set(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM, morphlineId);
    }

    conf.setBoolean(HBaseIndexerMapper.INDEX_DIRECT_WRITE_CONF_KEY, hbaseIndexingOpts.isDirectWrite());

    if (hbaseIndexingOpts.fairSchedulerPool != null) {
        conf.set("mapred.fairscheduler.pool", hbaseIndexingOpts.fairSchedulerPool);
    }

    // switch off a false warning about allegedly not implementing Tool
    // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html
    // also see https://issues.apache.org/jira/browse/HADOOP-8183
    getConf().setBoolean("mapred.used.genericoptionsparser", true);

    if (hbaseIndexingOpts.log4jConfigFile != null) {
        Utils.setLogConfigFile(hbaseIndexingOpts.log4jConfigFile, getConf());
        ForkedMapReduceIndexerTool.addDistributedCacheFile(hbaseIndexingOpts.log4jConfigFile, conf);
    }

    Job job = Job.getInstance(getConf());
    job.setJobName(getClass().getSimpleName() + "/" + HBaseIndexerMapper.class.getSimpleName());
    job.setJarByClass(HBaseIndexerMapper.class);
    //        job.setUserClassesTakesPrecedence(true);

    TableMapReduceUtil.initTableMapperJob(hbaseIndexingOpts.getScans(), HBaseIndexerMapper.class, Text.class,
            SolrInputDocumentWritable.class, job);

    // explicitely set hbase configuration on the job because the TableMapReduceUtil overwrites it with the hbase defaults
    // (see HBASE-4297 which is not really fixed in hbase 0.94.6 on all code paths)
    HBaseConfiguration.merge(job.getConfiguration(), getConf());

    int mappers = new JobClient(job.getConfiguration()).getClusterStatus().getMaxMapTasks(); // MR1
    //mappers = job.getCluster().getClusterStatus().getMapSlotCapacity(); // Yarn only
    LOG.info("Cluster reports {} mapper slots", mappers);

    LOG.info("Using these parameters: " + "reducers: {}, shards: {}, fanout: {}, maxSegments: {}",
            new Object[] { hbaseIndexingOpts.reducers, hbaseIndexingOpts.shards, hbaseIndexingOpts.fanout,
                    hbaseIndexingOpts.maxSegments });

    if (hbaseIndexingOpts.isDirectWrite()) {
        CloudSolrServer solrServer = new CloudSolrServer(hbaseIndexingOpts.zkHost);
        solrServer.setDefaultCollection(hbaseIndexingOpts.collection);

        if (hbaseIndexingOpts.clearIndex) {
            clearSolr(indexingSpec.getIndexConnectionParams());
        }

        // Run a mapper-only MR job that sends index documents directly to a live Solr instance.
        job.setOutputFormatClass(NullOutputFormat.class);
        job.setNumReduceTasks(0);
        job.submit();
        callback.jobStarted(job.getJobID().toString(), job.getTrackingURL());
        if (!ForkedMapReduceIndexerTool.waitForCompletion(job, hbaseIndexingOpts.isVerbose)) {
            return -1; // job failed
        }
        commitSolr(indexingSpec.getIndexConnectionParams());
        ForkedMapReduceIndexerTool.goodbye(job, programStartTime);
        return 0;
    } else {
        FileSystem fileSystem = FileSystem.get(getConf());

        if (fileSystem.exists(hbaseIndexingOpts.outputDir)) {
            if (hbaseIndexingOpts.overwriteOutputDir) {
                LOG.info("Removing existing output directory {}", hbaseIndexingOpts.outputDir);
                if (!fileSystem.delete(hbaseIndexingOpts.outputDir, true)) {
                    LOG.error("Deleting output directory '{}' failed", hbaseIndexingOpts.outputDir);
                    return -1;
                }
            } else {
                LOG.error("Output directory '{}' already exists. Run with --overwrite-output-dir to "
                        + "overwrite it, or remove it manually", hbaseIndexingOpts.outputDir);
                return -1;
            }
        }

        int exitCode = ForkedMapReduceIndexerTool.runIndexingPipeline(job, callback, getConf(),
                hbaseIndexingOpts.asOptions(), programStartTime, fileSystem, null, -1, // File-based parameters
                -1, // num mappers, only of importance for file-based indexing
                hbaseIndexingOpts.reducers);

        if (hbaseIndexingOpts.isGeneratedOutputDir()) {
            LOG.info("Deleting generated output directory " + hbaseIndexingOpts.outputDir);
            fileSystem.delete(hbaseIndexingOpts.outputDir, true);
        }
        return exitCode;
    }
}

From source file:com.nikoo28.excel.mapreduce.ExcelDriver.java

License:Apache License

/**
 * Main entry point for the example./*from  w w w .j a  va  2s  . c  o m*/
 *
 * @param args arguments
 * @throws Exception when something goes wrong
 */
public static void main(String[] args) throws Exception {
    logger.info("Driver started");

    Job job = new Job();
    job.setJarByClass(ExcelDriver.class);
    job.setJobName("Excel Record Reader");

    job.setMapperClass(ExcelMapper.class);
    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(ExcelInputFormat.class);

    job.waitForCompletion(true);
}