Example usage for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults)

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:com.alexholmes.hadooputils.test.MiniHadoop.java

License:Apache License

/**
 * Creates a {@link MiniMRCluster} and {@link MiniDFSCluster} all working within
 * the directory supplied in {@code tmpDir}.
 *
 * The DFS will be formatted regardless if there was one or not before in the
 * given location.//from www .  j ava2s. co m
 *
 * @param config the Hadoop configuration
 * @param taskTrackers number of task trackers to start
 * @param dataNodes number of data nodes to start
 * @param tmpDir the temporary directory which the Hadoop cluster will use for storage
 * @throws IOException thrown if the base directory cannot be set.
 */
public MiniHadoop(final Configuration config, final int taskTrackers, final int dataNodes, final File tmpDir)
        throws IOException {

    if (taskTrackers < 1) {
        throw new IllegalArgumentException("Invalid taskTrackers value, must be greater than 0");
    }
    if (dataNodes < 1) {
        throw new IllegalArgumentException("Invalid dataNodes value, must be greater than 0");
    }

    config.set("hadoop.tmp.dir", tmpDir.getAbsolutePath());

    if (tmpDir.exists()) {
        FileUtils.forceDelete(tmpDir);
    }
    FileUtils.forceMkdir(tmpDir);

    // used by MiniDFSCluster for DFS storage
    System.setProperty("test.build.data", new File(tmpDir, "data").getAbsolutePath());

    // required by JobHistory.initLogDir
    System.setProperty("hadoop.log.dir", new File(tmpDir, "logs").getAbsolutePath());

    JobConf jobConfig = new JobConf(config);

    dfsCluster = new MiniDFSCluster(jobConfig, dataNodes, true, null);
    fileSystem = dfsCluster.getFileSystem();
    mrCluster = new MiniMRCluster(0, 0, taskTrackers, fileSystem.getUri().toString(), 1, null, null, null,
            jobConfig);
}

From source file:com.asakusafw.runtime.stage.launcher.LauncherOptionsParserTest.java

License:Apache License

/**
 * w/ libjars./*from w w  w. j av a  2 s. c o  m*/
 * @throws Exception if failed
 */
@Test
public void w_libjars() throws Exception {
    File lib = putFile("dummy.jar");
    LauncherOptions options = parse(
            new String[] { MockTool.class.getName(), LauncherOptionsParser.KEY_ARG_LIBRARIES, lib.getPath(), });
    assertClasspath(options.getApplicationClassLoader().getURLs(), "testing");
    assertThat(lib, is(inClasspath(options.getApplicationClassLoader().getURLs())));

    assertClasspath(GenericOptionsParser.getLibJars(conf), "testing");
    assertThat(lib, is(inClasspath(GenericOptionsParser.getLibJars(conf))));

    JobConf jc = new JobConf(conf);
    assertThat(jc.getJar(), is(nullValue()));
}

From source file:com.atlantbh.jmeter.plugins.hadooputilities.jobstatistics.JobLayer.java

License:Apache License

protected JobClient prepareJobClient(String jobTracker) throws IOException {
    Configuration conf = new Configuration();
    conf.set("mapred.job.tracker", jobTracker);

    JobConf jobConf = new JobConf(conf);
    JobClient client = new JobClient(jobConf);

    return client;
}

From source file:com.benchmark.mapred.dancing.DistributedPentomino.java

License:Apache License

public int run(String[] args) throws Exception {
    JobConf conf;/*w w  w . j  a va2 s .co  m*/
    int depth = 5;
    int width = 9;
    int height = 10;
    Class<? extends Pentomino> pentClass;
    if (args.length == 0) {
        System.out.println("pentomino <output>");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    conf = new JobConf(getConf());
    width = conf.getInt("pent.width", width);
    height = conf.getInt("pent.height", height);
    depth = conf.getInt("pent.depth", depth);
    pentClass = conf.getClass("pent.class", OneSidedPentomino.class, Pentomino.class);

    Path output = new Path(args[0]);
    Path input = new Path(output + "_input");
    FileSystem fileSys = FileSystem.get(conf);
    try {
        FileInputFormat.setInputPaths(conf, input);
        FileOutputFormat.setOutputPath(conf, output);
        conf.setJarByClass(PentMap.class);

        conf.setJobName("dancingElephant");
        Pentomino pent = ReflectionUtils.newInstance(pentClass, conf);
        pent.initialize(width, height);
        createInputDirectory(fileSys, input, pent, depth);

        // the keys are the prefix strings
        conf.setOutputKeyClass(Text.class);
        // the values are puzzle solutions
        conf.setOutputValueClass(Text.class);

        conf.setMapperClass(PentMap.class);
        conf.setReducerClass(IdentityReducer.class);

        conf.setNumMapTasks(2000);
        conf.setNumReduceTasks(1);

        JobClient.runJob(conf);
    } finally {
        fileSys.delete(input, true);
    }
    return 0;
}

From source file:com.benchmark.mapred.RandomTextWriter.java

License:Apache License

/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.//w  w  w  .j ava 2  s  . com
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {
    if (args.length == 0) {
        return printUsage();
    }

    JobConf job = new JobConf(getConf());

    job.setJarByClass(RandomTextWriter.class);
    job.setJobName("random-text-writer");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormat(RandomWriter.RandomInputFormat.class);
    job.setMapperClass(Map.class);

    JobClient client = new JobClient(job);
    ClusterStatus cluster = client.getClusterStatus();
    int numMapsPerHost = job.getInt("test.randomtextwrite.maps_per_host", 10);
    long numBytesToWritePerMap = job.getLong("test.randomtextwrite.bytes_per_map", 1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
        System.err.println("Cannot have test.randomtextwrite.bytes_per_map set to 0");
        return -2;
    }
    long totalBytesToWrite = job.getLong("test.randomtextwrite.total_bytes",
            numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
        numMaps = 1;
        job.setLong("test.randomtextwrite.bytes_per_map", totalBytesToWrite);
    }

    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    job.setOutputFormat(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));

    job.setNumMapTasks(numMaps);
    System.out.println("Running " + numMaps + " maps.");

    // reducer NONE
    job.setNumReduceTasks(0);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(job);
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds.");

    return 0;
}

From source file:com.benchmark.mapred.RandomWriter.java

License:Apache License

/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.// w w  w .  ja v a 2s  . c om
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {
    if (args.length == 0) {
        System.out.println("Usage: writer <out-dir>");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    Path outDir = new Path(args[0]);
    JobConf job = new JobConf(getConf());

    job.setJarByClass(RandomWriter.class);
    job.setJobName("random-writer");
    FileOutputFormat.setOutputPath(job, outDir);

    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);

    job.setInputFormat(RandomInputFormat.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(IdentityReducer.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    JobClient client = new JobClient(job);
    ClusterStatus cluster = client.getClusterStatus();
    int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
    long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map", 1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
        System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
        return -2;
    }
    long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes",
            numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
        numMaps = 1;
        job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite);
    }

    job.setNumMapTasks(numMaps);
    System.out.println("Running " + numMaps + " maps.");

    // reducer NONE
    job.setNumReduceTasks(0);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(job);
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds.");

    return 0;
}

From source file:com.bol.crazypigs.HBaseStorage15.java

License:Apache License

private JobConf initializeLocalJobConfig(Job job) {
    Properties udfProps = getUDFProperties();
    Configuration jobConf = job.getConfiguration();
    JobConf localConf = new JobConf(jobConf);
    if (udfProps.containsKey(HBASE_CONFIG_SET)) {
        for (Entry<Object, Object> entry : udfProps.entrySet()) {
            localConf.set((String) entry.getKey(), (String) entry.getValue());
        }//from  w w w  .j  a  v a 2  s .  c  om
    } else {
        Configuration hbaseConf = HBaseConfiguration.create();
        for (Entry<String, String> entry : hbaseConf) {
            // JobConf may have some conf overriding ones in hbase-site.xml
            // So only copy hbase config not in job config to UDFContext
            // Also avoids copying core-default.xml and core-site.xml
            // props in hbaseConf to UDFContext which would be redundant.
            if (jobConf.get(entry.getKey()) == null) {
                udfProps.setProperty(entry.getKey(), entry.getValue());
                localConf.set(entry.getKey(), entry.getValue());
            }
        }
        udfProps.setProperty(HBASE_CONFIG_SET, "true");
    }
    return localConf;
}

From source file:com.cloudera.avro.AvroWordCount.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: AvroWordCount <input path> <output path>");
        return -1;
    }/*from  w w  w  . j a  v a  2s.co m*/

    JobConf conf = new JobConf(AvroWordCount.class);
    conf.setJobName("wordcount");

    // We call setOutputSchema first so we can override the configuration
    // parameters it sets
    AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT)));

    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);
    conf.setOutputKeyComparatorClass(Text.Comparator.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);
    return 0;
}

From source file:com.cloudera.ByteCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(new Configuration());

    // Trim off the hadoop-specific args
    String[] remArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    // Pull in properties
    Options options = new Options();

    Option property = OptionBuilder.withArgName("property=value").hasArgs(2).withValueSeparator()
            .withDescription("use value for given property").create("D");
    options.addOption(property);//from   w  w w.java  2 s. co m

    Option skipChecksums = new Option("skipChecksums", "skip checksums");
    options.addOption(skipChecksums);

    Option profile = new Option("profile", "profile tasks");
    options.addOption(profile);

    CommandLineParser parser = new BasicParser();
    CommandLine line = parser.parse(options, remArgs);

    Properties properties = line.getOptionProperties("D");
    for (Entry<Object, Object> prop : properties.entrySet()) {
        conf.set(prop.getKey().toString(), prop.getValue().toString());
        System.out.println("Set config key " + prop.getKey() + " to " + prop.getValue());
    }

    if (line.hasOption("skipChecksums")) {
        conf.setBoolean("bytecount.skipChecksums", true);
        System.out.println("Skipping checksums");
    }

    if (line.hasOption("profile")) {
        conf.setBoolean("mapred.task.profile", true);
        conf.set("mapred.task.profile.params",
                "-agentlib:hprof=cpu=samples,depth=100,interval=1ms,lineno=y,thread=y,file=%s");
        conf.set(MRJobConfig.NUM_MAP_PROFILES, "0");
        conf.set("mapred.task.profile.maps", "1");
        System.out.println("Profiling map tasks");
    }

    // Get the positional arguments out
    remArgs = line.getArgs();
    if (remArgs.length != 2) {
        System.err.println("Usage: ByteCount <inputBase> <outputBase>");
        System.exit(1);
    }
    String inputBase = remArgs[0];
    String outputBase = remArgs[1];

    Job job = Job.getInstance(conf);

    job.setInputFormatClass(ByteBufferInputFormat.class);

    job.setMapOutputKeyClass(ByteWritable.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setMapperClass(ByteCountMapper.class);
    job.setReducerClass(ByteCountReducer.class);
    job.setCombinerClass(ByteCountReducer.class);

    job.setOutputKeyClass(ByteWritable.class);
    job.setOutputValueClass(LongWritable.class);

    FileInputFormat.addInputPath(job, new Path(inputBase));
    FileOutputFormat.setOutputPath(job, new Path(outputBase));

    job.setJarByClass(ByteCount.class);

    boolean success = job.waitForCompletion(true);

    Counters counters = job.getCounters();
    System.out.println("\tRead counters");
    printCounter(counters, READ_COUNTER.BYTES_READ);
    printCounter(counters, READ_COUNTER.LOCAL_BYTES_READ);
    printCounter(counters, READ_COUNTER.SCR_BYTES_READ);
    printCounter(counters, READ_COUNTER.ZCR_BYTES_READ);

    System.exit(success ? 0 : 1);
}

From source file:com.cloudera.hbase.OldWordCount.java

License:Open Source License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(WordCount.class);
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from  w  w  w  .j a va  2 s .c  om*/
    }
    conf.setJobName("word count");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(conf, new Path(otherArgs[1]));
    JobClient.runJob(conf);
    return;
}