Example usage for org.apache.hadoop.mapred JobConf setJobName

List of usage examples for org.apache.hadoop.mapred JobConf setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setJobName.

Prototype

public void setJobName(String name) 

Source Link

Document

Set the user-specified job name.

Usage

From source file:com.linkedin.mapred.AbstractAvroJob.java

License:Open Source License

/**
 * Sets up various standard settings in the JobConf. You probably don't want to mess with this.
 * //from  w  ww . j  a  va2  s .  c  om
 * @return A configured JobConf.
 * @throws IOException
 * @throws URISyntaxException 
 */
protected JobConf createJobConf() throws IOException, URISyntaxException {
    JobConf conf = new JobConf();

    conf.setJobName(getJobId());
    conf.setInputFormat(AvroInputFormat.class);
    conf.setOutputFormat(AvroOutputFormat.class);

    AvroOutputFormat.setDeflateLevel(conf, 9);

    String hadoop_ugi = _config.getString("hadoop.job.ugi", null);
    if (hadoop_ugi != null) {
        conf.set("hadoop.job.ugi", hadoop_ugi);
    }
    if (_config.getBoolean("is.local", false)) {
        conf.set("mapred.job.tracker", "local");
        conf.set("fs.default.name", "file:///");
        conf.set("mapred.local.dir", "/tmp/map-red");

        _log.info("Running locally, no hadoop jar set.");
    }

    // set JVM options if present
    if (_config.containsKey("mapred.child.java.opts")) {
        conf.set("mapred.child.java.opts", _config.getString("mapred.child.java.opts"));
        _log.info("mapred.child.java.opts set to " + _config.getString("mapred.child.java.opts"));
    }

    if (_config.containsKey(INPUT_PATHS)) {
        List<String> inputPathnames = _config.getStringList(INPUT_PATHS);
        for (String pathname : inputPathnames) {
            AvroUtils.addAllSubPaths(conf, new Path(pathname));
        }
        AvroJob.setInputSchema(conf, AvroUtils.getAvroInputSchema(conf));
    }

    if (_config.containsKey(OUTPUT_PATH)) {
        Path path = new Path(_config.get(OUTPUT_PATH));
        AvroOutputFormat.setOutputPath(conf, path);

        if (_config.getBoolean("force.output.overwrite", false)) {
            FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf);
            fs.delete(FileOutputFormat.getOutputPath(conf), true);
        }
    }
    // set all hadoop configs
    for (String key : _config.keySet()) {
        String lowerCase = key.toLowerCase();
        if (lowerCase.startsWith(HADOOP_PREFIX)) {
            String newKey = key.substring(HADOOP_PREFIX.length());
            conf.set(newKey, _config.get(key));
        }
    }
    return conf;
}

From source file:com.liveramp.hank.hadoop.HadoopDomainBuilder.java

License:Apache License

public static void main(String[] args) throws IOException, InvalidConfigurationException {
    CommandLineChecker.check(args,/*from  w w w .j av  a 2s . co  m*/
            new String[] { "domain name", "config path", "jobjar", "input path", "output_path" },
            HadoopDomainBuilder.class);
    String domainName = args[0];
    CoordinatorConfigurator configurator = new YamlCoordinatorConfigurator(args[1]);
    String jobJar = args[2];
    String inputPath = args[3];
    String outputPath = args[4];

    DomainBuilderProperties properties = new DomainBuilderProperties(domainName, configurator)
            .setOutputPath(outputPath);
    JobConf conf = new JobConf();
    conf.setJar(jobJar);
    conf.setJobName(HadoopDomainBuilder.class.getSimpleName() + " Domain " + domainName + ", Output path: "
            + outputPath);
    HadoopDomainBuilder builder = new HadoopDomainBuilder(conf, inputPath, SequenceFileInputFormat.class,
            DomainBuilderMapperDefault.class);
    LOG.info("Building Hank domain " + domainName + " from input " + inputPath
            + " and coordinator configuration " + configurator);
    // TODO: Create DomainVersionProperties
    throw new NotImplementedException("TODO: Create DomainVersionProperties");
    // builder.buildHankDomain(properties, null);
}

From source file:com.liveramp.hank.hadoop.HadoopDomainCompactor.java

License:Apache License

public static void main(String[] args) throws IOException, InvalidConfigurationException {
    CommandLineChecker.check(args, new String[] { "domain name", "version to compact number",
            "mapred.task.timeout", "config path", "jobjar" }, HadoopDomainCompactor.class);
    String domainName = args[0];//  w w w.j a  v a  2  s . c o m
    Integer versionToCompactNumber = Integer.valueOf(args[1]);
    Integer mapredTaskTimeout = Integer.valueOf(args[2]);
    CoordinatorConfigurator configurator = new YamlCoordinatorConfigurator(args[3]);
    String jobJar = args[4];

    DomainCompactorProperties properties = new DomainCompactorProperties(domainName, versionToCompactNumber,
            configurator);
    JobConf conf = new JobConf();
    conf.setJar(jobJar);
    conf.set("mapred.task.timeout", mapredTaskTimeout.toString());
    conf.setJobName(HadoopDomainCompactor.class.getSimpleName() + " Domain " + domainName + ", Version "
            + versionToCompactNumber);
    HadoopDomainCompactor compactor = new HadoopDomainCompactor(conf);
    LOG.info("Compacting Hank domain " + domainName + " version " + versionToCompactNumber
            + " with coordinator configuration " + configurator);
    compactor.buildHankDomain(properties,
            new IncrementalDomainVersionProperties.Base("Version " + versionToCompactNumber + " compacted"));
}

From source file:com.manning.hip.ch4.joins.improved.impl.OptimizedDataJoinJob.java

License:Apache License

public static JobConf createDataJoinJob(String args[]) throws IOException {

    String inputDir = args[0];/*from   ww w .jav a 2 s  . c  o  m*/
    String outputDir = args[1];
    Class inputFormat = SequenceFileInputFormat.class;
    if (args[2].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileInputFormat: " + args[2]);
    } else {
        System.out.println("Using TextInputFormat: " + args[2]);
        inputFormat = TextInputFormat.class;
    }
    int numOfReducers = Integer.parseInt(args[3]);
    Class mapper = getClassByName(args[4]);
    Class reducer = getClassByName(args[5]);
    Class mapoutputValueClass = getClassByName(args[6]);
    Class outputFormat = TextOutputFormat.class;
    Class outputValueClass = Text.class;
    if (args[7].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileOutputFormat: " + args[7]);
        outputFormat = SequenceFileOutputFormat.class;
        outputValueClass = getClassByName(args[7]);
    } else {
        System.out.println("Using TextOutputFormat: " + args[7]);
    }
    long maxNumOfValuesPerGroup = 100;
    String jobName = "";
    if (args.length > 8) {
        maxNumOfValuesPerGroup = Long.parseLong(args[8]);
    }
    if (args.length > 9) {
        jobName = args[9];
    }
    Configuration defaults = new Configuration();
    JobConf job = new JobConf(defaults, OptimizedDataJoinJob.class);
    job.setJobName("DataJoinJob: " + jobName);

    FileSystem fs = FileSystem.get(defaults);
    fs.delete(new Path(outputDir));
    FileInputFormat.setInputPaths(job, inputDir);

    job.setInputFormat(inputFormat);

    job.setMapperClass(mapper);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(outputFormat);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    job.setMapOutputKeyClass(CompositeKey.class);
    job.setMapOutputValueClass(mapoutputValueClass);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(outputValueClass);
    job.setReducerClass(reducer);

    job.setPartitionerClass(CompositeKeyPartitioner.class);
    job.setOutputKeyComparatorClass(CompositeKeyComparator.class);
    job.setOutputValueGroupingComparator(CompositeKeyOnlyComparator.class);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(numOfReducers);
    job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
    return job;
}

From source file:com.me.neu.Popular_question.Runner.java

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(Runner.class);
    conf.setJobName("pop-ques");

    conf.setMapperClass(Mapper1.class);

    // conf.setOutputKeyComparatorClass(DescendingIntComparable.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setReducerClass(Reducer1.class);

    // take the input and output from the command line
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);// www. ja  v  a 2  s  . co m

}

From source file:com.me.neu.popular_tag_year.Runner.java

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(Runner.class);
    conf.setJobName("tag-year");

    conf.setMapperClass(Mapper1.class);

    // conf.setOutputKeyComparatorClass(DescendingIntComparable.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setReducerClass(Reducer1.class);

    // take the input and output from the command line
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/*from   w w  w .j  ava  2s  .c  o  m*/

}

From source file:com.me.neu.stackoverflow.Runner.java

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(Runner.class);
    conf.setJobName("tag-reco");

    conf.setMapperClass(Mapper1.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setReducerClass(Reducer1.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/* w  w w . j a v a2s  . c  o  m*/

}

From source file:com.mh2c.WikipediaDumpLoaderDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // arg checks

    JobConf conf = new JobConf(getClass());
    conf.setJobName("WP dump loader");

    // Set the mapper class, but skip the reduce phase
    conf.setMapperClass(WikipediaDumpLoaderMapper.class);
    conf.setNumReduceTasks(0);//from w ww. ja v a  2 s .  c  om
    // The object key/value pairs are text
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    // Stream XML into the job
    conf.setInputFormat(StreamInputFormat.class);
    StreamInputFormat.addInputPath(conf, new Path(args[0]));
    // Use the XML record reader, with each page as one record
    conf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader");
    conf.set("stream.recordreader.begin", "<page>");
    conf.set("stream.recordreader.end", "</page>");
    // Emit sequence files
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);
    return 0;
}

From source file:com.mh2c.WikipediaWordCountDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // arg checks

    JobConf conf = new JobConf(getClass());
    conf.setJobName("WP word count");

    // Set the mapper and reducer classes, and use the reducer as a combiner
    conf.setMapperClass(WikipediaWordCountMapper.class);
    conf.setReducerClass(WikipediaWordCountReducer.class);
    conf.setCombinerClass(WikipediaWordCountReducer.class);
    // The object key/value pairs are text words and integer counts
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    // Read in sequence files
    conf.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(conf, new Path(args[0]));
    // Emit ordinary text files
    conf.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/*from   www . j a v a  2s  .  c  o  m*/
    return 0;
}

From source file:com.mycompany.app.TestStagingDirectoryPermissions.java

License:Apache License

@Test
public void perms() throws IOException, InterruptedException {
    MiniDFSCluster minidfs = null;//www  . jav a 2s.  c o m
    FileSystem fs = null;
    MiniMRClientCluster minimr = null;
    try {
        Configuration conf = new Configuration(true);
        conf.set("fs.permission.umask-mode", "0077");
        minidfs = new MiniDFSCluster.Builder(conf).build();
        minidfs.waitActive();

        fs = minidfs.getFileSystem();
        conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString());
        Path p = path("/in");
        fs.mkdirs(p);

        FSDataOutputStream os = fs.create(new Path(p, "input.txt"));
        os.write("hello!".getBytes("UTF-8"));
        os.close();

        String user = UserGroupInformation.getCurrentUser().getUserName();
        Path home = new Path("/User/" + user);
        fs.mkdirs(home);
        minimr = MiniMRClientClusterFactory.create(this.getClass(), 1, conf);
        JobConf job = new JobConf(minimr.getConfig());

        job.setJobName("PermsTest");
        JobClient client = new JobClient(job);
        FileInputFormat.addInputPath(job, p);
        FileOutputFormat.setOutputPath(job, path("/out"));
        job.setInputFormat(TextInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(MySleepMapper.class);

        job.setNumReduceTasks(1);
        RunningJob submittedJob = client.submitJob(job);

        // Sleep for a bit to let localization finish
        System.out.println("Sleeping...");
        Thread.sleep(3 * 1000l);
        System.out.println("Done sleeping...");
        assertFalse(UserGroupInformation.isSecurityEnabled());

        Path stagingRoot = path("/tmp/hadoop-yarn/staging/" + user + "/.staging/");
        assertTrue(fs.exists(stagingRoot));
        assertEquals(1, fs.listStatus(stagingRoot).length);
        Path staging = fs.listStatus(stagingRoot)[0].getPath();
        Path jobXml = path(staging + "/job.xml");

        assertTrue(fs.exists(jobXml));

        FileStatus fileStatus = fs.getFileStatus(jobXml);
        System.out.println("job.xml permission = " + fileStatus.getPermission());
        assertTrue(fileStatus.getPermission().getOtherAction().implies(FsAction.READ));
        assertTrue(fileStatus.getPermission().getGroupAction().implies(FsAction.READ));

        submittedJob.waitForCompletion();
    } finally {
        if (minimr != null) {
            minimr.stop();
        }
        if (fs != null) {
            fs.close();
        }
        if (minidfs != null) {
            minidfs.shutdown(true);
        }
    }
}