Example usage for org.apache.hadoop.mapred JobConf set

List of usage examples for org.apache.hadoop.mapred JobConf set

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf set.

Prototype

public void set(String name, String value) 

Source Link

Document

Set the value of the name property.

Usage

From source file:com.acme.extensions.data.SeedingHadoopAdapter.java

License:Apache License

@Override
public RecordReader<JsonHolder, JsonHolder> getRecordReader(InputSplit split, JobConf job, Reporter reporter)
        throws IOException {

    job.set("seed", String.valueOf(((SeededSplit) split).getSeed()));
    return super.getRecordReader(((SeededSplit) split).getChildSplit(), job, reporter);
}

From source file:com.benchmark.mapred.Grep.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 4) {
        System.out.println("Grep <inDir> <outDir> <numreduces> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }/*from w  ww.ja  v  a2 s . c  o m*/

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    JobConf grepJob = new JobConf(getConf(), Grep.class);

    try {

        Date startIteration = new Date();
        grepJob.setJobName("grep-search");

        FileInputFormat.setInputPaths(grepJob, args[0]);
        grepJob.setMapperClass(RegexMapper.class);
        grepJob.set("mapred.mapper.regex", args[3]);
        if (args.length == 5)
            grepJob.set("mapred.mapper.regex.group", args[4]);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);
        FileOutputFormat.setOutputPath(grepJob, new Path(args[1]));
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);
        grepJob.setNumReduceTasks(Integer.parseInt(args[2]));

        JobClient.runJob(grepJob);

        Date endIteration = new Date();
        System.out.println("The iteration took " + (endIteration.getTime() - startIteration.getTime()) / 1000
                + " seconds.");
    } finally {
        FileSystem.get(grepJob).delete(tempDir, true);
    }
    return 0;
}

From source file:com.benchmark.mapred.Join.java

License:Apache License

/**
 * The main driver for sort program.//w ww . j a  va  2  s. c om
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker.
 */
public int run(String[] args) throws Exception {
    JobConf jobConf = new JobConf(getConf(), Sort.class);
    jobConf.setJobName("join");

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(IdentityReducer.class);

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_maps = cluster.getTaskTrackers() * jobConf.getInt("test.sort.maps_per_host", 10);
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = jobConf.get("test.sort.reduces_per_host");
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = TupleWritable.class;
    String op = "inner";
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                num_maps = Integer.parseInt(args[++i]);
            } else if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-joinOp".equals(args[i])) {
                op = args[++i];
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    // Set user-supplied (possibly default) job configs
    jobConf.setNumMapTasks(num_maps);
    jobConf.setNumReduceTasks(num_reduces);

    if (otherArgs.size() < 2) {
        System.out.println("ERROR: Wrong number of parameters: ");
        return printUsage();
    }

    FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.remove(otherArgs.size() - 1)));
    List<Path> plist = new ArrayList<Path>(otherArgs.size());
    for (String s : otherArgs) {
        plist.add(new Path(s));
    }

    jobConf.setInputFormat(CompositeInputFormat.class);
    jobConf.set("mapred.join.expr",
            CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0])));
    jobConf.setOutputFormat(outputFormatClass);

    jobConf.setOutputKeyClass(outputKeyClass);
    jobConf.setOutputValueClass(outputValueClass);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(jobConf);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return 0;
}

From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java

License:Apache License

/**
 * Set the array of {@link Path}s as the list of inputs
 * for the map-reduce job./*from   ww w. j a  va  2  s .  co m*/
 * 
 * @param conf Configuration of the job. 
 * @param inputPaths the {@link Path}s of the input directories/files 
 * for the map-reduce job.
 */
public static void setInputPaths(JobConf conf, Path... inputPaths) {
    Path path = new Path(conf.getWorkingDirectory(), inputPaths[0]);
    StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString()));
    for (int i = 1; i < inputPaths.length; i++) {
        str.append(StringUtils.COMMA_STR);
        path = new Path(conf.getWorkingDirectory(), inputPaths[i]);
        str.append(StringUtils.escapeString(path.toString()));
    }
    conf.set("mapred.input.dir", str.toString());
}

From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java

License:Apache License

/**
 * Add a {@link Path} to the list of inputs for the map-reduce job.
 * //from   w w w  .ja v a 2  s.  c  o m
 * @param conf The configuration of the job 
 * @param path {@link Path} to be added to the list of inputs for 
 *            the map-reduce job.
 */
public static void addInputPath(JobConf conf, Path path) {
    path = new Path(conf.getWorkingDirectory(), path);
    String dirStr = StringUtils.escapeString(path.toString());
    String dirs = conf.get("mapred.input.dir");
    conf.set("mapred.input.dir", dirs == null ? dirStr : dirs + StringUtils.COMMA_STR + dirStr);
}

From source file:com.bixolabs.cascading.avro.AvroScheme.java

License:Apache License

@SuppressWarnings({ "deprecation" })
@Override/*  w  w  w.  j a  v  a 2s  . co m*/
public void sourceInit(Tap tap, JobConf conf) {
    conf.set(AvroJob.INPUT_SCHEMA, getSchema().toString());
    conf.setInputFormat(AvroInputFormat.class);

    // add AvroSerialization to io.serializations
    //           conf.set("io.serializations", conf.get("io.serializations")+","+AvroSerialization.class.getName() );
    //        Collection<String> serializations = conf.getStringCollection("io.serializations");
    //        if (!serializations.contains(AvroSerialization.class.getName())) {
    //            serializations.add(AvroSerialization.class.getName()); 
    ////        if (!serializations.contains("cascading.kryo.KryoSerialization")) {
    ////            serializations.add("cascading.kryo.KryoSerialization"); }
    //            conf.setStrings("io.serializations", serializations.toArray(new String[0]));
    //        };

    LOGGER.info(String.format("Initializing Avro scheme for source tap - scheme fields: %s", _schemeFields));
}

From source file:com.bixolabs.cascading.avro.AvroScheme.java

License:Apache License

@SuppressWarnings({ "deprecation" })
@Override//  w  ww .j  a v  a 2 s .com
public void sinkInit(Tap tap, JobConf conf) {
    conf.set(AvroJob.OUTPUT_SCHEMA, getSchema().toString());
    conf.setOutputFormat(AvroOutputFormat.class);

    // Since we're outputting to Avro, we need to set up output values.
    // TODO KKr - why don't we need to set the OutputValueClass?
    // TODO KKr - why do we need to set the OutputKeyComparatorClass?
    conf.setOutputKeyClass(NullWritable.class);
    conf.setOutputValueClass(AvroWrapper.class);
    conf.setOutputKeyComparatorClass(AvroKeyComparator.class);
    //        conf.setMapOutputKeyClass(AvroKey.class);
    //        conf.setMapOutputValueClass(AvroValue.class);

    // add AvroSerialization to io.serializations
    //        Collection<String> serializations = conf.getStringCollection("io.serializations");
    //        if (!serializations.contains(AvroSerialization.class.getName())) {
    //            serializations.add(AvroSerialization.class.getName());
    //            conf.setStrings("io.serializations", serializations.toArray(new String[0]));
    //        }

    // Class<? extends Mapper> mapClass = conf.getMapperClass();
    // Class<? extends Reducer> reduceClass = conf.getReducerClass();
    // AvroJob.setOutputSchema(conf, getSchema());
    // conf.setMapperClass(mapClass);
    // conf.setReducerClass(reduceClass);

    LOGGER.info(String.format("Initializing Avro scheme for sink tap - scheme fields: %s", _schemeFields));
}

From source file:com.bol.crazypigs.HBaseStorage15.java

License:Apache License

private JobConf initializeLocalJobConfig(Job job) {
    Properties udfProps = getUDFProperties();
    Configuration jobConf = job.getConfiguration();
    JobConf localConf = new JobConf(jobConf);
    if (udfProps.containsKey(HBASE_CONFIG_SET)) {
        for (Entry<Object, Object> entry : udfProps.entrySet()) {
            localConf.set((String) entry.getKey(), (String) entry.getValue());
        }//from ww w  . j a  v  a2 s.  c om
    } else {
        Configuration hbaseConf = HBaseConfiguration.create();
        for (Entry<String, String> entry : hbaseConf) {
            // JobConf may have some conf overriding ones in hbase-site.xml
            // So only copy hbase config not in job config to UDFContext
            // Also avoids copying core-default.xml and core-site.xml
            // props in hbaseConf to UDFContext which would be redundant.
            if (jobConf.get(entry.getKey()) == null) {
                udfProps.setProperty(entry.getKey(), entry.getValue());
                localConf.set(entry.getKey(), entry.getValue());
            }
        }
        udfProps.setProperty(HBASE_CONFIG_SET, "true");
    }
    return localConf;
}

From source file:com.cloudera.ByteCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(new Configuration());

    // Trim off the hadoop-specific args
    String[] remArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    // Pull in properties
    Options options = new Options();

    Option property = OptionBuilder.withArgName("property=value").hasArgs(2).withValueSeparator()
            .withDescription("use value for given property").create("D");
    options.addOption(property);/*from   w w w .  j a v a2  s  .  co  m*/

    Option skipChecksums = new Option("skipChecksums", "skip checksums");
    options.addOption(skipChecksums);

    Option profile = new Option("profile", "profile tasks");
    options.addOption(profile);

    CommandLineParser parser = new BasicParser();
    CommandLine line = parser.parse(options, remArgs);

    Properties properties = line.getOptionProperties("D");
    for (Entry<Object, Object> prop : properties.entrySet()) {
        conf.set(prop.getKey().toString(), prop.getValue().toString());
        System.out.println("Set config key " + prop.getKey() + " to " + prop.getValue());
    }

    if (line.hasOption("skipChecksums")) {
        conf.setBoolean("bytecount.skipChecksums", true);
        System.out.println("Skipping checksums");
    }

    if (line.hasOption("profile")) {
        conf.setBoolean("mapred.task.profile", true);
        conf.set("mapred.task.profile.params",
                "-agentlib:hprof=cpu=samples,depth=100,interval=1ms,lineno=y,thread=y,file=%s");
        conf.set(MRJobConfig.NUM_MAP_PROFILES, "0");
        conf.set("mapred.task.profile.maps", "1");
        System.out.println("Profiling map tasks");
    }

    // Get the positional arguments out
    remArgs = line.getArgs();
    if (remArgs.length != 2) {
        System.err.println("Usage: ByteCount <inputBase> <outputBase>");
        System.exit(1);
    }
    String inputBase = remArgs[0];
    String outputBase = remArgs[1];

    Job job = Job.getInstance(conf);

    job.setInputFormatClass(ByteBufferInputFormat.class);

    job.setMapOutputKeyClass(ByteWritable.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setMapperClass(ByteCountMapper.class);
    job.setReducerClass(ByteCountReducer.class);
    job.setCombinerClass(ByteCountReducer.class);

    job.setOutputKeyClass(ByteWritable.class);
    job.setOutputValueClass(LongWritable.class);

    FileInputFormat.addInputPath(job, new Path(inputBase));
    FileOutputFormat.setOutputPath(job, new Path(outputBase));

    job.setJarByClass(ByteCount.class);

    boolean success = job.waitForCompletion(true);

    Counters counters = job.getCounters();
    System.out.println("\tRead counters");
    printCounter(counters, READ_COUNTER.BYTES_READ);
    printCounter(counters, READ_COUNTER.LOCAL_BYTES_READ);
    printCounter(counters, READ_COUNTER.SCR_BYTES_READ);
    printCounter(counters, READ_COUNTER.ZCR_BYTES_READ);

    System.exit(success ? 0 : 1);
}

From source file:com.cloudera.circus.test.TestXTest.java

License:Open Source License

@Test
@TestHadoop/*  w  w w  .j a va 2s. c om*/
public void testHadoopMapReduce() throws Exception {
    JobConf conf = getHadoopConf();
    FileSystem fs = FileSystem.get(conf);
    JobClient jobClient = new JobClient(conf);
    try {
        Path inputDir = new Path(getHadoopTestDir(), "input");
        Path outputDir = new Path(getHadoopTestDir(), "output");

        fs.mkdirs(inputDir);
        Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
        writer.write("a\n");
        writer.write("b\n");
        writer.write("c\n");
        writer.close();

        JobConf jobConf = getHadoopConf();
        jobConf.setInt("mapred.map.tasks", 1);
        jobConf.setInt("mapred.map.max.attempts", 1);
        jobConf.setInt("mapred.reduce.max.attempts", 1);
        jobConf.set("mapred.input.dir", inputDir.toString());
        jobConf.set("mapred.output.dir", outputDir.toString());
        final RunningJob runningJob = jobClient.submitJob(jobConf);
        waitFor(60 * 1000, true, new Predicate() {
            @Override
            public boolean evaluate() throws Exception {
                return runningJob.isComplete();
            }
        });
        Assert.assertTrue(runningJob.isSuccessful());
        Assert.assertTrue(fs.exists(new Path(outputDir, "part-00000")));
        BufferedReader reader = new BufferedReader(
                new InputStreamReader(fs.open(new Path(outputDir, "part-00000"))));
        Assert.assertTrue(reader.readLine().trim().endsWith("a"));
        Assert.assertTrue(reader.readLine().trim().endsWith("b"));
        Assert.assertTrue(reader.readLine().trim().endsWith("c"));
        Assert.assertNull(reader.readLine());
        reader.close();
    } finally {
        fs.close();
        jobClient.close();
    }
}