Example usage for org.apache.hadoop.mapred JobConf setBoolean

List of usage examples for org.apache.hadoop.mapred JobConf setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setBoolean.

Prototype

public void setBoolean(String name, boolean value) 

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:cascading.scheme.TextLine.java

License:Open Source License

@Override
public void sinkInit(Tap tap, JobConf conf) throws IOException {
    if (tap.getQualifiedPath(conf).toString().endsWith(".zip"))
        throw new IllegalStateException("cannot write zip files: " + FileOutputFormat.getOutputPath(conf));

    if (getSinkCompression() == Compress.DISABLE)
        conf.setBoolean("mapred.output.compress", false);
    else if (getSinkCompression() == Compress.ENABLE)
        conf.setBoolean("mapred.output.compress", true);

    conf.setOutputKeyClass(Text.class); // be explicit
    conf.setOutputValueClass(Text.class); // be explicit
    conf.setOutputFormat(TextOutputFormat.class);
}

From source file:cascading.tap.hadoop.HadoopMR1TapPlatformTest.java

License:Open Source License

@Test
public void testCombinedHfs() throws Exception {
    getPlatform().copyFromLocal(inputFileLower);
    getPlatform().copyFromLocal(inputFileUpper);

    Hfs sourceLower = new Hfs(new TextLine(new Fields("offset", "line")), InputData.inputFileLower);
    Hfs sourceUpper = new Hfs(new TextLine(new Fields("offset", "line")), InputData.inputFileUpper);

    // create a CombinedHfs instance on these files
    Tap source = new MultiSourceTap<Hfs, JobConf, RecordReader>(sourceLower, sourceUpper);

    FlowProcess<JobConf> process = getPlatform().getFlowProcess();
    JobConf conf = process.getConfigCopy();

    // set the combine flag
    conf.setBoolean(HfsProps.COMBINE_INPUT_FILES, true);

    conf.set("cascading.flow.platform", "hadoop"); // only supported on mr based platforms

    // test the input format and the split
    source.sourceConfInit(process, conf);

    InputFormat inputFormat = conf.getInputFormat();

    assertEquals(Hfs.CombinedInputFormat.class, inputFormat.getClass());
    InputSplit[] splits = inputFormat.getSplits(conf, 1);

    assertEquals(1, splits.length);/*  w  w w.  j  a  v  a 2 s .  com*/

    validateLength(source.openForRead(process), 10);
}

From source file:com.alexholmes.hadooputils.sort.Sort.java

License:Apache License

/**
 * The driver for the sort MapReduce job.
 *
 * @param jobConf           sort configuration
 * @param numMapTasks       number of map tasks
 * @param numReduceTasks    number of reduce tasks
 * @param sampler           sampler, if required
 * @param codecClass        the compression codec for compressing final outputs
 * @param mapCodecClass     the compression codec for compressing intermediary map outputs
 * @param createLzopIndexes whether or not a MR job should be launched to create LZOP indexes
 *                          for the job output files
 * @param inputDirAsString  input directory in CSV-form
 * @param outputDirAsString output directory
 * @return true if the job completed successfully
 * @throws IOException        if something went wrong
 * @throws URISyntaxException if a URI wasn't correctly formed
 *//* w  w  w .ja  v a 2 s. com*/
public boolean runJob(final JobConf jobConf, final Integer numMapTasks, final Integer numReduceTasks,
        final InputSampler.Sampler<K, V> sampler, final Class<? extends CompressionCodec> codecClass,
        final Class<? extends CompressionCodec> mapCodecClass, final boolean createLzopIndexes,
        final String inputDirAsString, final String outputDirAsString) throws IOException, URISyntaxException {

    jobConf.setJarByClass(Sort.class);
    jobConf.setJobName("sorter");

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();

    if (numMapTasks != null) {
        jobConf.setNumMapTasks(numMapTasks);
    }
    if (numReduceTasks != null) {
        jobConf.setNumReduceTasks(numReduceTasks);
    } else {
        int numReduces = (int) (cluster.getMaxReduceTasks() * 0.9);
        String sortReduces = jobConf.get("test.sort.reduces_per_host");
        if (sortReduces != null) {
            numReduces = cluster.getTaskTrackers() * Integer.parseInt(sortReduces);
        }

        // Set user-supplied (possibly default) job configs
        jobConf.setNumReduceTasks(numReduces);
    }

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(SortReduce.class);

    jobConf.setInputFormat(SortInputFormat.class);

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);

    if (mapCodecClass != null) {
        jobConf.setMapOutputCompressorClass(mapCodecClass);
    }

    if (codecClass != null) {
        jobConf.setBoolean("mapred.output.compress", true);
        jobConf.setClass("mapred.output.compression.codec", codecClass, CompressionCodec.class);
    }

    FileInputFormat.setInputPaths(jobConf, inputDirAsString);
    FileOutputFormat.setOutputPath(jobConf, new Path(outputDirAsString));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        jobConf.setPartitionerClass(TotalOrderPartitioner.class);
        Path inputDir = FileInputFormat.getInputPaths(jobConf)[0];

        FileSystem fileSystem = FileSystem.get(jobConf);

        if (fileSystem.exists(inputDir) && fileSystem.isFile(inputDir)) {
            inputDir = inputDir.getParent();
        }
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile);
        InputSampler.writePartitionFile(jobConf, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, jobConf);
        DistributedCache.createSymlink(jobConf);
    }

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf)
            + " with " + jobConf.getNumReduceTasks() + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    jobResult = JobClient.runJob(jobConf);
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took "
            + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds.");

    if (jobResult.isSuccessful()) {
        if (createLzopIndexes && codecClass != null && LzopCodec.class.equals(codecClass)) {
            new LzoIndexer(jobConf).index(new Path(outputDirAsString));
        }
        return true;
    }
    return false;
}

From source file:com.alexholmes.hadooputils.sort.SortInputFormat.java

License:Apache License

@Override
public void configure(final JobConf conf) {
    super.configure(conf);

    // by default the DeprecatedLzoTextInputFormat.listStatus will ignore
    // files that don't end in ".lzo". since we want to work with any file
    // we turn this feature off
    ////from   w  w  w. j a v a2  s  .  c o m
    conf.setBoolean(LzoInputFormatCommon.IGNORE_NONLZO_KEY, false);
}

From source file:com.benchmark.mapred.terasort.TeraOutputFormat.java

License:Apache License

/**
 * Set the requirement for a final sync before the stream is closed.
 *//*from  www  .j  a  v a2 s.c  o m*/
public static void setFinalSync(JobConf conf, boolean newValue) {
    conf.setBoolean(FINAL_SYNC_ATTRIBUTE, newValue);
}

From source file:com.chriscx.mapred.Driver.java

public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), Driver.class);
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        if ("-skip".equals(args[i])) {
            DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf);
            conf.setBoolean("wordcount.skip.patterns", true);
        } else {/*w  ww. java  2  s  . c om*/
            other_args.add(args[i]);
        }
    }

    FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:com.chriscx.matching.Driver.java

public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), com.chriscx.mapred.Driver.class);
    conf.setJobName("Matching");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        if ("-skip".equals(args[i])) {
            DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf);
            conf.setBoolean("wordcount.skip.patterns", true);
        } else {/*from  w  ww  .j  av a 2  s.  c  o m*/
            other_args.add(args[i]);
        }
    }

    FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:com.cloudera.ByteCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(new Configuration());

    // Trim off the hadoop-specific args
    String[] remArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    // Pull in properties
    Options options = new Options();

    Option property = OptionBuilder.withArgName("property=value").hasArgs(2).withValueSeparator()
            .withDescription("use value for given property").create("D");
    options.addOption(property);//from   w ww . j ava 2s. c o m

    Option skipChecksums = new Option("skipChecksums", "skip checksums");
    options.addOption(skipChecksums);

    Option profile = new Option("profile", "profile tasks");
    options.addOption(profile);

    CommandLineParser parser = new BasicParser();
    CommandLine line = parser.parse(options, remArgs);

    Properties properties = line.getOptionProperties("D");
    for (Entry<Object, Object> prop : properties.entrySet()) {
        conf.set(prop.getKey().toString(), prop.getValue().toString());
        System.out.println("Set config key " + prop.getKey() + " to " + prop.getValue());
    }

    if (line.hasOption("skipChecksums")) {
        conf.setBoolean("bytecount.skipChecksums", true);
        System.out.println("Skipping checksums");
    }

    if (line.hasOption("profile")) {
        conf.setBoolean("mapred.task.profile", true);
        conf.set("mapred.task.profile.params",
                "-agentlib:hprof=cpu=samples,depth=100,interval=1ms,lineno=y,thread=y,file=%s");
        conf.set(MRJobConfig.NUM_MAP_PROFILES, "0");
        conf.set("mapred.task.profile.maps", "1");
        System.out.println("Profiling map tasks");
    }

    // Get the positional arguments out
    remArgs = line.getArgs();
    if (remArgs.length != 2) {
        System.err.println("Usage: ByteCount <inputBase> <outputBase>");
        System.exit(1);
    }
    String inputBase = remArgs[0];
    String outputBase = remArgs[1];

    Job job = Job.getInstance(conf);

    job.setInputFormatClass(ByteBufferInputFormat.class);

    job.setMapOutputKeyClass(ByteWritable.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setMapperClass(ByteCountMapper.class);
    job.setReducerClass(ByteCountReducer.class);
    job.setCombinerClass(ByteCountReducer.class);

    job.setOutputKeyClass(ByteWritable.class);
    job.setOutputValueClass(LongWritable.class);

    FileInputFormat.addInputPath(job, new Path(inputBase));
    FileOutputFormat.setOutputPath(job, new Path(outputBase));

    job.setJarByClass(ByteCount.class);

    boolean success = job.waitForCompletion(true);

    Counters counters = job.getCounters();
    System.out.println("\tRead counters");
    printCounter(counters, READ_COUNTER.BYTES_READ);
    printCounter(counters, READ_COUNTER.LOCAL_BYTES_READ);
    printCounter(counters, READ_COUNTER.SCR_BYTES_READ);
    printCounter(counters, READ_COUNTER.ZCR_BYTES_READ);

    System.exit(success ? 0 : 1);
}

From source file:com.cloudera.recordservice.avro.mapred.ColorCount.java

License:Apache License

/**
 * Run the MR1 color count with generic records, and return a map of favorite colors to
 * the number of users.//from w  ww .j  a v  a  2  s.  c  om
 */
public static java.util.Map<String, Integer> countColors() throws IOException {
    String output = TestUtil.getTempDirectory();
    Path outputPath = new Path(output);

    JobConf conf = new JobConf(ColorCount.class);
    conf.setJobName("MR1 Color Count With Generic Records");
    conf.setInt("mapreduce.job.reduces", 1);

    conf.setBoolean(com.cloudera.recordservice.avro.AvroJob.USE_RECORD_SERVICE_INPUT_FORMAT_CONF_KEY, true);
    com.cloudera.recordservice.avro.AvroJob.setInputFormat(conf, org.apache.avro.mapred.AvroInputFormat.class);

    RecordServiceConfig.setInputTable(conf, "rs", "users");
    FileOutputFormat.setOutputPath(conf, outputPath);

    AvroJob.setMapperClass(conf, Map.class);
    AvroJob.setReducerClass(conf, Reduce.class);
    AvroJob.setOutputSchema(conf,
            Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.INT)));

    JobClient.runJob(conf);

    // Read the result and return it. Since we set the number of reducers to 1,
    // there is always just one file containing the value.
    SeekableInput input = new FsInput(new Path(output + "/part-00000.avro"), conf);
    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);
    java.util.Map<String, Integer> colorMap = new HashMap<String, Integer>();
    for (GenericRecord datum : fileReader) {
        colorMap.put(datum.get(0).toString(), Integer.parseInt(datum.get(1).toString()));
    }
    return colorMap;
}

From source file:com.cloudera.recordservice.examples.mapreduce.MapredColorCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    org.apache.log4j.BasicConfigurator.configure();

    if (args.length != 2) {
        System.err.println("Usage: MapredColorCount <input path> <output path>");
        return -1;
    }/*from   ww w.  j  a  v a2 s  . c o  m*/

    JobConf conf = new JobConf(getConf(), MapredColorCount.class);
    conf.setJobName("colorcount With Generic Records");

    // RECORDSERVICE:
    // By using the recordservice AvroJob utility, we can configure at run time to
    // switch between using the recordservice or not.
    // In this example, we'll set the conf to true to enable the RecordService..
    conf.setBoolean(com.cloudera.recordservice.avro.AvroJob.USE_RECORD_SERVICE_INPUT_FORMAT_CONF_KEY, true);
    com.cloudera.recordservice.avro.AvroJob.setInputFormat(conf, org.apache.avro.mapred.AvroInputFormat.class);

    // RECORDSERVICE:
    // To read from a table instead of a path, comment out setInputPaths and instead use:
    RecordServiceConfig.setInputTable(conf, "rs", "users");
    //FileInputFormat.setInputPaths(conf, new Path(args[0]));

    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    AvroJob.setMapperClass(conf, ColorCountMapper.class);
    AvroJob.setReducerClass(conf, ColorCountReducer.class);

    // Note that AvroJob.setOutputSchema set relevant config options such as output
    // format, map output classes, and output key class.
    // Do not need to setInputSchema when using Generic Records.
    AvroJob.setOutputSchema(conf,
            Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.INT)));

    JobClient.runJob(conf);
    return 0;
}