Example usage for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name)

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:com.google.mr4c.hadoop.MR4CMRJobTest.java

License:Open Source License

@Test
public void testExport() throws Exception {
    JobConf jobConf = newJobConf();
    m_sourceMRJob.applyTo(jobConf);//w  w w. jav  a2 s  .c  o m
    assertEquals(m_jar, jobConf.getJar());
    Cluster cluster = Cluster.extractFromConfig(jobConf);
    assertEquals(m_cluster, cluster);
    assertEquals("5", jobConf.get(MR4CMRJob.PROP_TASKS));
}

From source file:com.hadoopilluminated.examples.Join.java

License:Apache License

/**
 * The main driver for sort program. Invoke this method to submit the
 * map/reduce job.//from   ww  w . ja v a  2  s  .co m
 *
 * @throws IOException When there is communication problems with the job
 * tracker.
 */
@Override
public int run(String[] args) throws Exception {
    JobConf jobConf = new JobConf(getConf(), Sort.class);
    jobConf.setJobName("join");

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(IdentityReducer.class);

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_maps = cluster.getTaskTrackers() * jobConf.getInt("test.sort.maps_per_host", 10);
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = jobConf.get("test.sort.reduces_per_host");
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = TupleWritable.class;
    String op = "inner";
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                num_maps = Integer.parseInt(args[++i]);
            } else if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-joinOp".equals(args[i])) {
                op = args[++i];
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    // Set user-supplied (possibly default) job configs
    jobConf.setNumMapTasks(num_maps);
    jobConf.setNumReduceTasks(num_reduces);

    if (otherArgs.size() < 2) {
        System.out.println("ERROR: Wrong number of parameters: ");
        return printUsage();
    }

    FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.remove(otherArgs.size() - 1)));
    List<Path> plist = new ArrayList<Path>(otherArgs.size());
    for (String s : otherArgs) {
        plist.add(new Path(s));
    }

    jobConf.setInputFormat(CompositeInputFormat.class);
    jobConf.set("mapred.join.expr",
            CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0])));
    jobConf.setOutputFormat(outputFormatClass);

    jobConf.setOutputKeyClass(outputKeyClass);
    jobConf.setOutputValueClass(outputValueClass);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(jobConf);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return 0;
}

From source file:com.hdfs.concat.crush.CrushOptionParsingTest.java

License:Apache License

@Test
public void defaults() throws Exception {
    crush.createJobConfAndParseArgs(tmp.newFolder("in").getAbsolutePath(),
            tmp.newFolder("out").getAbsolutePath(), "20101116123015");

    JobConf job = crush.getJob();

    assertThat(job.get("mapred.reduce.tasks"), equalTo("20"));
    assertThat(job.get("mapred.output.compress"), equalTo("true"));
    assertThat(job.get("mapred.output.compression.type"), equalTo("BLOCK"));
    assertThat(job.get("mapred.output.compression.codec"),
            equalTo("org.apache.hadoop.io.compress.DefaultCodec"));

    assertThat(crush.getMaxFileBlocks(), equalTo(8));

    assertThat(job.get("crush.timestamp"), equalTo("20101116123015"));

    assertThat(job.get("crush.num.specs"), equalTo("1"));

    assertThat(job.get("crush.0.regex"), equalTo(".+"));
    assertThat(job.get("crush.0.regex.replacement"),
            equalTo("crushed_file-20101116123015-${crush.task.num}-${crush.file.num}"));
    assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.SequenceFileInputFormat"));
    assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.SequenceFileOutputFormat"));
}

From source file:com.hdfs.concat.crush.CrushOptionParsingTest.java

License:Apache License

@Test
public void disableCompression() throws Exception {
    crush.createJobConfAndParseArgs("--compress=none", tmp.newFolder("in").getAbsolutePath(),
            tmp.newFolder("out").getAbsolutePath(), "20101116123015");

    JobConf job = crush.getJob();

    assertThat(job.get("mapred.reduce.tasks"), equalTo("20"));
    assertThat(job.get("mapred.output.compress"), equalTo("false"));

    assertThat(crush.getMaxFileBlocks(), equalTo(8));

    assertThat(job.get("crush.timestamp"), equalTo("20101116123015"));

    assertThat(job.get("crush.num.specs"), equalTo("1"));

    assertThat(job.get("crush.0.regex"), equalTo(".+"));
    assertThat(job.get("crush.0.regex.replacement"),
            equalTo("crushed_file-20101116123015-${crush.task.num}-${crush.file.num}"));
    assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.SequenceFileInputFormat"));
    assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.SequenceFileOutputFormat"));
}

From source file:com.hdfs.concat.crush.CrushOptionParsingTest.java

License:Apache License

@Test
public void parse() throws Exception {
    crush.createJobConfAndParseArgs("--regex", ".+/ads/.+", "--replacement", "foo", "--input-format",
            "org.apache.hadoop.mapred.TextInputFormat", "--output-format",
            "org.apache.hadoop.mapred.TextOutputFormat", "--regex", ".+/act/.+", "--replacement", "bar",
            "--input-format", "org.apache.hadoop.mapred.TextInputFormat", "--output-format",
            "org.apache.hadoop.mapred.SequenceFileOutputFormat", "--regex", ".+/bid/.+", "--replacement",
            "hello", "--input-format", "org.apache.hadoop.mapred.SequenceFileInputFormat", "--output-format",
            "org.apache.hadoop.mapred.TextOutputFormat", "--threshold", "0.5", "--max-file-blocks", "100",
            "--compress", "org.apache.hadoop.io.compress.DefaultCodec",

            tmp.newFolder("in").getAbsolutePath(), tmp.newFolder("out").getAbsolutePath(), "20101116123015");

    JobConf job = crush.getJob();

    assertThat(job.get("mapred.reduce.tasks"), equalTo("20"));
    assertThat(job.get("mapred.output.compress"), equalTo("true"));
    assertThat(job.get("mapred.output.compression.codec"),
            equalTo("org.apache.hadoop.io.compress.DefaultCodec"));

    assertThat(crush.getMaxFileBlocks(), equalTo(100));

    assertThat(job.get("crush.timestamp"), equalTo("20101116123015"));

    assertThat(job.get("crush.num.specs"), equalTo("3"));

    assertThat(job.get("crush.0.regex"), equalTo(".+/ads/.+"));
    assertThat(job.get("crush.0.regex.replacement"), equalTo("foo"));
    assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.TextInputFormat"));
    assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.TextOutputFormat"));

    assertThat(job.get("crush.1.regex"), equalTo(".+/act/.+"));
    assertThat(job.get("crush.1.regex.replacement"), equalTo("bar"));
    assertThat(job.get("crush.1.input.format"), equalTo("org.apache.hadoop.mapred.TextInputFormat"));
    assertThat(job.get("crush.1.output.format"), equalTo("org.apache.hadoop.mapred.SequenceFileOutputFormat"));

    assertThat(job.get("crush.2.regex"), equalTo(".+/bid/.+"));
    assertThat(job.get("crush.2.regex.replacement"), equalTo("hello"));
    assertThat(job.get("crush.2.input.format"), equalTo("org.apache.hadoop.mapred.SequenceFileInputFormat"));
    assertThat(job.get("crush.2.output.format"), equalTo("org.apache.hadoop.mapred.TextOutputFormat"));
}

From source file:com.hdfs.concat.crush.CrushOptionParsingTest.java

License:Apache License

@Test
public void parseOldNoType() throws Exception {
    long millis = currentTimeMillis();

    crush.createJobConfAndParseArgs(tmp.newFolder("in").getAbsolutePath(),
            tmp.newFolder("out").getAbsolutePath(), "80");

    JobConf job = crush.getJob();

    assertThat(job.get("mapred.reduce.tasks"), equalTo("80"));
    assertThat(Long.parseLong(job.get("crush.timestamp")), greaterThanOrEqualTo(millis));
    assertThat(job.get("crush.num.specs"), equalTo("1"));

    assertThat(crush.getMaxFileBlocks(), equalTo(Integer.MAX_VALUE));

    assertThat(job.get("crush.0.regex"), equalTo(".+"));
    assertThat(job.get("crush.0.regex.replacement")
            .matches("crushed_file-\\d+-\\$\\{crush.task.num\\}-\\$\\{crush.file.num\\}"), is(true));
    assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.SequenceFileInputFormat"));
    assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.SequenceFileOutputFormat"));
}

From source file:com.hdfs.concat.crush.CrushOptionParsingTest.java

License:Apache License

@Test
public void parseOldSequence() throws Exception {
    long millis = currentTimeMillis();

    crush.createJobConfAndParseArgs(tmp.newFolder("in").getAbsolutePath(),
            tmp.newFolder("out").getAbsolutePath(), "80", "SEQUENCE");

    JobConf job = crush.getJob();

    assertThat(job.get("mapred.reduce.tasks"), equalTo("80"));
    assertThat(Long.parseLong(job.get("crush.timestamp")), greaterThanOrEqualTo(millis));
    assertThat(job.get("crush.num.specs"), equalTo("1"));

    assertThat(crush.getMaxFileBlocks(), equalTo(Integer.MAX_VALUE));

    assertThat(job.get("crush.0.regex"), equalTo(".+"));
    assertThat(job.get("crush.0.regex.replacement")
            .matches("crushed_file-\\d+-\\$\\{crush.task.num\\}-\\$\\{crush.file.num\\}"), is(true));
    assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.SequenceFileInputFormat"));
    assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.SequenceFileOutputFormat"));
}

From source file:com.hdfs.concat.crush.CrushOptionParsingTest.java

License:Apache License

@Test
public void parseOldText() throws Exception {
    long millis = currentTimeMillis();

    crush.createJobConfAndParseArgs(tmp.newFolder("in").getAbsolutePath(),
            tmp.newFolder("out").getAbsolutePath(), "80", "TEXT");

    JobConf job = crush.getJob();

    assertThat(job.get("mapred.reduce.tasks"), equalTo("80"));
    assertThat(Long.parseLong(job.get("crush.timestamp")), greaterThanOrEqualTo(millis));
    assertThat(job.get("crush.num.specs"), equalTo("1"));

    assertThat(crush.getMaxFileBlocks(), equalTo(Integer.MAX_VALUE));

    assertThat(job.get("crush.0.regex"), equalTo(".+"));
    assertThat(job.get("crush.0.regex.replacement")
            .matches("crushed_file-\\d+-\\$\\{crush.task.num\\}-\\$\\{crush.file.num\\}"), is(true));
    assertThat(job.get("crush.0.input.format"), equalTo("org.apache.hadoop.mapred.TextInputFormat"));
    assertThat(job.get("crush.0.output.format"), equalTo("org.apache.hadoop.mapred.TextOutputFormat"));
}

From source file:com.hdfs.concat.crush.CrushPartitioner.java

License:Apache License

@Override
public void configure(JobConf job) {
    String path = job.get("crush.partition.map");
    int expPartitions = job.getNumReduceTasks();

    bucketToPartition = new HashMap<Text, Integer>(100);

    try {/*from w  w  w  .  j a  v  a  2s.c  o  m*/
        FileSystem fs = FileSystem.get(job);

        Reader reader = new Reader(fs, new Path(path), job);

        Text bucket = new Text();
        IntWritable partNum = new IntWritable();

        while (reader.next(bucket, partNum)) {
            int partNumValue = partNum.get();

            if (partNumValue < 0 || partNumValue >= expPartitions) {
                throw new IllegalArgumentException(
                        "Partition " + partNumValue + " not allowed with " + expPartitions + " reduce tasks");
            }

            Integer prev = bucketToPartition.put(new Text(bucket), partNumValue);

            if (null != prev) {
                throw new IllegalArgumentException("Bucket " + bucket + " appears more than once in " + path);
            }
        }
    } catch (IOException e) {
        throw new RuntimeException("Could not read partition map from " + path, e);
    }

    if (new HashSet<Integer>(bucketToPartition.values()).size() > expPartitions) {
        throw new IllegalArgumentException(
                path + " contains more than " + expPartitions + " distinct partitions");
    }
}

From source file:com.hdfs.concat.crush.CrushReducer.java

License:Apache License

@Override
public void configure(JobConf job) {
    super.configure(job);

    this.job = job;

    taskNum = Integer.parseInt(job.get("mapred.tip.id").replaceFirst(".+_(\\d+)", "$1"));
    timestamp = Long.parseLong(job.get("crush.timestamp"));

    outDirPath = job.get("mapred.output.dir");

    if (null == outDirPath || outDirPath.isEmpty()) {
        throw new IllegalArgumentException("mapred.output.dir has no value");
    }/*  ww w  .  ja  v  a 2 s.  co  m*/

    /*
     * The files we write should be rooted in the "crush" subdir of the output directory to distinguish them from the files
     * created by the collector.
     */
    outDirPath = new Path(outDirPath + "/crush").toUri().getPath();

    /*
     * Configure the regular expressions and replacements we use to convert dir names to crush output file names. Also get the
     * directory data formats.
     */
    int numSpecs = job.getInt("crush.num.specs", 0);

    if (numSpecs <= 0) {
        throw new IllegalArgumentException(
                "Number of regular expressions must be zero or greater: " + numSpecs);
    }

    readCrushSpecs(numSpecs);

    placeHolderToValue.put("crush.task.num", Integer.toString(taskNum));
    placeHolderToValue.put("crush.timestamp", job.get("crush.timestamp"));

    try {
        fs = FileSystem.get(job);
    } catch (RuntimeException e) {
        throw e;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}