Example usage for org.apache.hadoop.mapreduce Job setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJobName.

Prototype

public void setJobName(String name) throws IllegalStateException

Source Link

Document

Set the user-specified job name.

Usage

From source file:com.phantom.hadoop.examples.RandomTextWriter.java

License:Apache License

/**
 * This is the main routine for launching a distributed random write job. It
 * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The
 * reduce doesn't do anything./*from  w w  w . j av a2 s. co m*/
 * 
 * @throws IOException
 */
public int run(String[] args) throws Exception {
    if (args.length == 0) {
        return printUsage();
    }

    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
    long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
        System.err.println("Cannot have " + BYTES_PER_MAP + " set to 0");
        return -2;
    }
    long totalBytesToWrite = conf.getLong(TOTAL_BYTES,
            numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
        numMaps = 1;
        conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
    }
    conf.setInt(MRJobConfig.NUM_MAPS, numMaps);

    Job job = new Job(conf);

    job.setJarByClass(RandomTextWriter.class);
    job.setJobName("random-text-writer");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
    job.setMapperClass(RandomTextMapper.class);

    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    job.setOutputFormatClass(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));

    System.out.println("Running " + numMaps + " maps.");

    // reducer NONE
    job.setNumReduceTasks(0);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds.");

    return ret;
}

From source file:com.phantom.hadoop.examples.RandomWriter.java

License:Apache License

/**
 * This is the main routine for launching a distributed random write job. It
 * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The
 * reduce doesn't do anything.//from   w w  w .  j  a va2  s.c  o  m
 * 
 * @throws IOException
 */
public int run(String[] args) throws Exception {
    if (args.length == 0) {
        System.out.println("Usage: writer <out-dir>");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }

    Path outDir = new Path(args[0]);
    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
    long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
        System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0");
        return -2;
    }
    long totalBytesToWrite = conf.getLong(TOTAL_BYTES,
            numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
        numMaps = 1;
        conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
    }
    conf.setInt(MRJobConfig.NUM_MAPS, numMaps);

    Job job = new Job(conf);

    job.setJarByClass(RandomWriter.class);
    job.setJobName("random-writer");
    FileOutputFormat.setOutputPath(job, outDir);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setInputFormatClass(RandomInputFormat.class);
    job.setMapperClass(RandomMapper.class);
    job.setReducerClass(Reducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    System.out.println("Running " + numMaps + " maps.");

    // reducer NONE
    job.setNumReduceTasks(0);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds.");

    return ret;
}

From source file:com.phantom.hadoop.examples.terasort.TeraChecksum.java

License:Apache License

public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();//from   www  . j a  v a 2  s  .co  m
        return 2;
    }
    TeraInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraSum");
    job.setJarByClass(TeraChecksum.class);
    job.setMapperClass(ChecksumMapper.class);
    job.setReducerClass(ChecksumReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Unsigned16.class);
    // force a single reducer
    job.setNumReduceTasks(1);
    job.setInputFormatClass(TeraInputFormat.class);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.phantom.hadoop.examples.terasort.TeraGen.java

License:Apache License

/**
 * @param args/*from ww w  .  j  av a2s. c o  m*/
 *            the cli arguments
 */
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();
        return 2;
    }
    setNumberOfRows(job, parseHumanLong(args[0]));
    Path outputDir = new Path(args[1]);
    if (outputDir.getFileSystem(getConf()).exists(outputDir)) {
        throw new IOException("Output directory " + outputDir + " already exists.");
    }
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraGen");
    job.setJarByClass(TeraGen.class);
    job.setMapperClass(SortGenMapper.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(RangeInputFormat.class);
    job.setOutputFormatClass(TeraOutputFormat.class);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.phantom.hadoop.examples.terasort.TeraSort.java

License:Apache License

public int run(String[] args) throws Exception {
    LOG.info("starting");
    Job job = Job.getInstance(getConf());
    Path inputDir = new Path(args[0]);
    Path outputDir = new Path(args[1]);
    boolean useSimplePartitioner = getUseSimplePartitioner(job);
    TeraInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraSort");
    job.setJarByClass(TeraSort.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(TeraInputFormat.class);
    job.setOutputFormatClass(TeraOutputFormat.class);
    if (useSimplePartitioner) {
        job.setPartitionerClass(SimplePartitioner.class);
    } else {//from w  ww.j  a  v a  2s  .c  om
        long start = System.currentTimeMillis();
        Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME);
        URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME);
        try {
            TeraInputFormat.writePartitionFile(job, partitionFile);
        } catch (Throwable e) {
            LOG.error(e.getMessage());
            return -1;
        }
        job.addCacheFile(partitionUri);
        long end = System.currentTimeMillis();
        System.out.println("Spent " + (end - start) + "ms computing partitions.");
        job.setPartitionerClass(TotalOrderPartitioner.class);
    }

    job.getConfiguration().setInt("dfs.replication", getOutputReplication(job));
    TeraOutputFormat.setFinalSync(job, true);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    LOG.info("done");
    return ret;
}

From source file:com.phantom.hadoop.examples.terasort.TeraValidate.java

License:Apache License

public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();/*from   w w w.j av  a2  s .com*/
        return 1;
    }
    TeraInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraValidate");
    job.setJarByClass(TeraValidate.class);
    job.setMapperClass(ValidateMapper.class);
    job.setReducerClass(ValidateReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    // force a single reducer
    job.setNumReduceTasks(1);
    // force a single split
    FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
    job.setInputFormatClass(TeraInputFormat.class);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.philiphubbard.digraph.MRBuildVerticesTest.java

License:Open Source License

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();

    setupTest(conf);/*  ww  w.j  av a 2 s  .c  om*/

    Job job = Job.getInstance(conf);
    job.setJobName("mrbuildverticestest");

    MRBuildVertices.setupJob(job, new Path(testInput), new Path(testOutput));
    conf.setBoolean(MRBuildVertices.CONFIG_PARTITION_BRANCHES_CHAINS, true);

    if (!job.waitForCompletion(true))
        System.exit(1);

    cleanupTest(conf);

    System.exit(0);
}

From source file:com.philiphubbard.digraph.MRCompressChainsTest.java

License:Open Source License

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();

    setupTest(conf);/*w  ww.  j av a  2 s.  co  m*/
    String inputOrig = testInput;
    String outputOrig = testOutput;

    int iter = 0;
    boolean keepGoing = true;
    MRCompressChains.beginIteration();
    while (keepGoing) {
        Job job = Job.getInstance(conf);
        job.setJobName("mrcompresschainstest");

        MRCompressChains.setupIterationJob(job, new Path(inputOrig), new Path(outputOrig));

        if (!job.waitForCompletion(true))
            System.exit(1);

        iter++;
        keepGoing = MRCompressChains.continueIteration(job, new Path(inputOrig), new Path(outputOrig));
    }

    //

    System.out.println("Number of iterations = " + iter);

    cleanupTest(conf);

    //

    System.exit(0);
}

From source file:com.philiphubbard.sabe.MRAssembler.java

License:Open Source License

public boolean run(Path inputPath, Path outputPath)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();

    // Job.getInstance() copies the Configuration argument, so set its properties first.

    conf.setBoolean(MRVertex.CONFIG_ALLOW_EDGE_MULTIPLES, true);
    conf.setBoolean(MRVertex.CONFIG_COMPRESS_CHAIN_MULTIPLES_MUST_MATCH, false);
    conf.setInt(MRMerVertex.CONFIG_MER_LENGTH, vertexMerLength);
    conf.setBoolean(MRBuildVertices.CONFIG_PARTITION_BRANCHES_CHAINS, true);
    conf.setInt(MRBuildVertices.CONFIG_COVERAGE, coverage);
    conf.setInt(MRCompressChains.CONFIG_TERMINATION_COUNT, 1);

    Job buildJob = Job.getInstance(conf);
    buildJob.setJobName("mrassemblerbuild");

    Path buildInputPath = inputPath;
    Path buildOutputPath = new Path("sabe.MRAssemblerTmp");

    System.out.println("sabe.MRAssembler starting vertex construction");

    MRBuildMerVertices.setupJob(buildJob, buildInputPath, buildOutputPath);

    if (!buildJob.waitForCompletion(true))
        return false;

    ///*  w w w  . j  a  v  a  2 s. c o  m*/

    Path compressInputPath = new Path(buildOutputPath.toString() + "/chain");
    Path compressOutputPath = new Path(buildOutputPath.toString() + "/chainCompress");

    int iter = 0;
    boolean keepGoing = true;
    MRCompressChains.beginIteration();
    while (keepGoing) {
        Job compressJob = Job.getInstance(conf);
        compressJob.setJobName("mrassemblercompress");

        System.out.println("sabe.MRAssembler starting compression iteration " + iter);

        MRCompressMerChains.setupIterationJob(compressJob, compressInputPath, compressOutputPath);

        if (!compressJob.waitForCompletion(true))
            System.exit(1);

        iter++;
        keepGoing = MRCompressChains.continueIteration(compressJob, compressInputPath, compressOutputPath);
    }

    System.out.println("sabe.MRAssembler made " + iter + " compression iterations");

    //

    Path branchPath = new Path(buildOutputPath.toString() + "/branch");
    Path chainPath = compressOutputPath;

    FileSystem fileSystem = FileSystem.get(conf);

    Graph graph = buildCompressedGraph(conf, fileSystem, branchPath, chainPath);
    if (graph != null) {
        ArrayList<String> result = graph.assemble();

        FSDataOutputStream out = fileSystem.create(outputPath);
        for (String seq : result) {
            out.writeBytes(seq);
            out.writeBytes("\n");
        }
    }

    //

    fileSystem.delete(buildOutputPath, true);

    fileSystem.close();

    return true;
}

From source file:com.sematext.hbase.hut.RollbackUpdatesMrJob.java

License:Apache License

/**
 * Sets up the actual job./*from w  w  w  .jav a 2 s .com*/
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];

    conf.set("mapred.map.tasks.speculative.execution", "false");

    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJobName(NAME + "_" + tableName);
    job.setJarByClass(RollbackUpdatesMapper.class);
    // TODO: Allow passing filter and subset of rows/columns.
    Scan s = new Scan();
    // Optional arguments.
    long startTime = args.length > 1 ? Long.parseLong(args[1]) : 0L;
    long endTime = args.length > 2 ? Long.parseLong(args[2]) : Long.MAX_VALUE;

    // TODO: consider using scan.setTimeRange() for limiting scanned data range. It may
    //       not be good way to do if tss are artificial in HutPuts though
    //    s.setTimeRange(startTime, endTime);
    job.getConfiguration().set(RollbackUpdatesMapper.HUT_ROLLBACK_UPDATE_MIN_TIME_ATTR,
            String.valueOf(startTime));
    job.getConfiguration().set(RollbackUpdatesMapper.HUT_ROLLBACK_UPDATE_MAX_TIME_ATTR,
            String.valueOf(endTime));

    s.setFilter(new HutWriteTimeRowsFilter(endTime, startTime));

    s.setCacheBlocks(false);
    // TODO: allow user change using job params
    s.setCaching(512);
    s.setCacheBlocks(false);

    LOG.info("Using scan: " + s.toString());

    // TODO: allow better limiting of data to be fetched
    if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) {
        s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY)));
    }

    LOG.info("starttime (inclusive): " + startTime + " (" + new Date(startTime) + ")"
            + ", endtime (inclusive): " + endTime + " (" + new Date(endTime) + ")");

    TableMapReduceUtil.initTableMapperJob(tableName, s, RollbackUpdatesMapper.class, null, null, job);
    TableMapReduceUtil.initTableReducerJob(tableName, null, job);
    // No reducers.  Just write straight to output files.
    job.setNumReduceTasks(0);
    return job;
}