Example usage for org.apache.hadoop.mapred JobConf set

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf set.

Prototype

public void set(String name, String value)

Source Link

Document

Set the value of the name property.

Usage

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableSnapshotInputFormat.java

License:Apache License

private static void setColumns(JobConf job) throws IOException {
    // hbase mapred API doesn't support scan at the moment.
    Scan scan = HiveHBaseInputFormatUtil.getScan(job);
    byte[][] families = scan.getFamilies();
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < families.length; i++) {
        if (i > 0)
            sb.append(" ");
        sb.append(Bytes.toString(families[i]));
    }/*ww  w .j a v  a  2  s .  co  m*/
    job.set(TableInputFormat.COLUMN_LIST, sb.toString());
}

From source file:com.github.gaoyangthu.demo.mapred.Grep.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }// ww w.j av a2 s .c  o  m

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    JobConf grepJob = new JobConf(getConf(), Grep.class);

    try {

        grepJob.setJobName("grep-search");

        FileInputFormat.setInputPaths(grepJob, args[0]);

        grepJob.setMapperClass(RegexMapper.class);
        grepJob.set("mapred.mapper.regex", args[2]);
        if (args.length == 4)
            grepJob.set("mapred.mapper.regex.group", args[3]);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormat(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        JobClient.runJob(grepJob);

        JobConf sortJob = new JobConf(Grep.class);
        sortJob.setJobName("grep-sort");

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormat(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        sortJob.setNumReduceTasks(1); // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setOutputKeyComparatorClass // sort by decreasing freq
        (LongWritable.DecreasingComparator.class);

        JobClient.runJob(sortJob);
    } finally {
        FileSystem.get(grepJob).delete(tempDir, true);
    }
    return 0;
}

From source file:com.google.mr4c.hadoop.HadoopUtils.java

License:Open Source License

public static void applyToJobConf(Properties props, JobConf conf) {
    for (String name : props.stringPropertyNames()) {
        conf.set(name, props.getProperty(name));
    }//from  w  ww  .j  av  a2 s  . c o m
}

From source file:com.google.mr4c.hadoop.HadoopUtils.java

License:Open Source License

/**
  * @param varMap apply environment variable values from this map
  * @param vars apply existing values of these environment variables
*/// w  w w .j  av a 2 s. com
public static void applyEnvironmentVariables(JobConf conf, Map<String, String> varMap, List<String> vars) {
    Map<String, String> allMap = new HashMap<String, String>(System.getenv());
    allMap.keySet().retainAll(vars); // only the env we wanted
    allMap.putAll(varMap);
    List<String> assigns = new ArrayList<String>();
    for (String var : allMap.keySet()) {
        String val = allMap.get(var);
        if (!StringUtils.isEmpty(val)) {
            assigns.add(var + "=" + val);
        }
    }
    String value = StringUtils.join(assigns, ", ");
    conf.set(JobConf.MAPRED_MAP_TASK_ENV, value);
    conf.set(JobConf.MAPRED_REDUCE_TASK_ENV, value);
}

From source file:com.google.mr4c.hadoop.MR4CMRJob.java

License:Open Source License

private void exportProperty(MR4CConfig bbConf, JobConf jobConf, Category category, String name,
        String hadoopName) {// www  .  j ava 2  s . c om
    String val = bbConf.getCategory(category).getProperty(name);
    if (!StringUtils.isEmpty(val)) {
        jobConf.set(hadoopName, val);
    }
}

From source file:com.hadoop.mapreduce.TestLzoLazyLoading.java

License:Open Source License

public void testWithLocal() throws Exception {
    MiniMRCluster mr = null;//  w ww. jav a  2 s .c  o m
    try {
        JobConf jconf = new JobConf();
        jconf.set("mapred.queue.names", "default");
        mr = new MiniMRCluster(2, "file:///", 3, null, null, jconf);
        Configuration cf = mr.createJobConf();
        cf.set("io.compression.codecs", LzoCodec.class.getName());
        runWordCount(cf, false, false);
        runWordCount(cf, false, true);
        runWordCount(cf, true, false);
    } finally {
        if (mr != null) {
            mr.shutdown();
        }
    }
}

From source file:com.hadoopilluminated.examples.Grep.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }/*from w  w w  . j a  va2 s  . c  o  m*/

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    JobConf grepJob = new JobConf(getConf(), Grep.class);

    try {

        grepJob.setJobName("grep-search");

        FileInputFormat.setInputPaths(grepJob, args[0]);

        grepJob.setMapperClass(RegexMapper.class);
        grepJob.set("mapred.mapper.regex", args[2]);
        if (args.length == 4) {
            grepJob.set("mapred.mapper.regex.group", args[3]);
        }

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormat(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        JobClient.runJob(grepJob);

        JobConf sortJob = new JobConf(getConf(), Grep.class);
        sortJob.setJobName("grep-sort");

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormat(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        sortJob.setNumReduceTasks(1); // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setOutputKeyComparatorClass // sort by decreasing freq
        (LongWritable.DecreasingComparator.class);

        JobClient.runJob(sortJob);
    } finally {
        FileSystem.get(grepJob).delete(tempDir, true);
    }
    return 0;
}

From source file:com.hadoopilluminated.examples.Join.java

License:Apache License

/**
 * The main driver for sort program. Invoke this method to submit the
 * map/reduce job.//from   w  w  w .  j av a  2  s  .  co m
 *
 * @throws IOException When there is communication problems with the job
 * tracker.
 */
@Override
public int run(String[] args) throws Exception {
    JobConf jobConf = new JobConf(getConf(), Sort.class);
    jobConf.setJobName("join");

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(IdentityReducer.class);

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_maps = cluster.getTaskTrackers() * jobConf.getInt("test.sort.maps_per_host", 10);
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = jobConf.get("test.sort.reduces_per_host");
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = TupleWritable.class;
    String op = "inner";
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                num_maps = Integer.parseInt(args[++i]);
            } else if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-joinOp".equals(args[i])) {
                op = args[++i];
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    // Set user-supplied (possibly default) job configs
    jobConf.setNumMapTasks(num_maps);
    jobConf.setNumReduceTasks(num_reduces);

    if (otherArgs.size() < 2) {
        System.out.println("ERROR: Wrong number of parameters: ");
        return printUsage();
    }

    FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.remove(otherArgs.size() - 1)));
    List<Path> plist = new ArrayList<Path>(otherArgs.size());
    for (String s : otherArgs) {
        plist.add(new Path(s));
    }

    jobConf.setInputFormat(CompositeInputFormat.class);
    jobConf.set("mapred.join.expr",
            CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0])));
    jobConf.setOutputFormat(outputFormatClass);

    jobConf.setOutputKeyClass(outputKeyClass);
    jobConf.setOutputValueClass(outputValueClass);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(jobConf);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return 0;
}

From source file:com.hdfs.concat.crush.CrushOptionParsingTest.java

License:Apache License

@Before
public void before() throws IOException {
    crush = new Crush();

    JobConf job = new JobConf(false);
    crush.setConf(job);//w  w  w  .j  a va  2 s .  c  om

    job.set("fs.default.name", "file:///");
    job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
    job.setInt("mapred.reduce.tasks", 20);
    job.setLong("dfs.block.size", 1024 * 1024 * 64);

    FileSystem fs = FileSystem.get(job);
    fs.setWorkingDirectory(new Path(tmp.getRoot().getAbsolutePath()));

    crush.setFileSystem(fs);
}

From source file:com.hdfs.concat.crush.CrushReducerTest.java

License:Apache License

@Before
public void setupReducer() {
    JobConf job = new JobConf(false);

    job.set("mapred.tip.id", "task_201011081200_014527_r_001234");
    job.set("mapred.task.id", "attempt_201011081200_14527_r_001234_0");

    outDir = tmp.newFolder("out");
    tmp.newFolder("out/_temporary");

    job.set("mapred.output.dir", outDir.getAbsolutePath());

    job.set("fs.default.name", "file:///");
    job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");

    job.setLong("crush.timestamp", 98765);

    job.setInt("crush.num.specs", 3);
    job.set("crush.0.regex", ".+/dir");
    job.set("crush.0.regex.replacement", "firstregex-${crush.timestamp}-${crush.task.num}-${crush.file.num}");
    job.set("crush.0.input.format", SequenceFileInputFormat.class.getName());
    job.set("crush.0.output.format", TextOutputFormat.class.getName());

    job.set("crush.1.regex", ".+/dir/([^/]+/)*(.+)");
    job.set("crush.1.regex.replacement",
            "secondregex-$2-${crush.timestamp}-${crush.task.num}-${crush.file.num}");
    job.set("crush.1.input.format", TextInputFormat.class.getName());
    job.set("crush.1.output.format", TextOutputFormat.class.getName());

    job.set("crush.2.regex", ".+/other");
    job.set("crush.2.regex.replacement", "${crush.timestamp}-${crush.task.num}-middle-${crush.file.num}-tail");
    job.set("crush.2.input.format", TextInputFormat.class.getName());
    job.set("crush.2.output.format", SequenceFileOutputFormat.class.getName());

    reducer = new CrushReducer();

    reducer.configure(job);//from   w  w w .ja  va  2  s .  co  m
}