Example usage for org.apache.hadoop.mapred JobConf getNumMapTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getNumMapTasks.

Prototype

public int getNumMapTasks()

Source Link

Document

Get the configured number of map tasks for this job.

Usage

From source file:org.hxx.hadoop.GeneratorRedHbase.java

License:Apache License

private RunningJob generateJob(String table, Path segment, int numLists, long topN, long curTime,
        boolean filter, boolean norm, boolean force) throws IOException {
    LOG.info("Generator: segment=" + segment);

    JobConf job = new NutchJob(getConf());
    job.setJarByClass(GeneratorRedHbase.class);
    job.setJobName("generate: from " + table + " "
            + (new SimpleDateFormat("MMdd HH:mm:ss")).format(System.currentTimeMillis()));
    // job.setLong(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 300000);

    if (numLists == -1) {
        numLists = job.getNumMapTasks(); // a partition per fetch task
    }/* w  w w .j  ava2 s  .  com*/
    if ("local".equals(job.get("mapred.job.tracker")) && numLists != 1) {
        // override
        LOG.info("Generator: jobtracker is 'local', generating exactly one partition.");
        numLists = 1;
    }
    // job.setLong(GENERATOR_CUR_TIME, curTime);
    // record real generation time
    long generateTime = System.currentTimeMillis();
    job.setLong(Nutch.GENERATE_TIME_KEY, generateTime);
    job.setLong(GENERATOR_TOP_N, topN);
    job.setBoolean(GENERATOR_FILTER, filter);
    job.setBoolean(GENERATOR_NORMALISE, norm);
    job.set(GENERATL_TABLE, table);
    job.setInt(GENERATL_REDUCENUM, numLists);
    job.setInt("partition.url.seed", new Random().nextInt());

    job.setInputFormat(CodeInputFormat.class);
    job.setNumMapTasks(1);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(GenerateMark.class);
    job.setNumReduceTasks(numLists);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(CrawlDatum.class);
    job.setOutputKeyComparatorClass(HashComparator.class);
    Path output = new Path(segment, CrawlDatum.GENERATE_DIR_NAME);
    FileOutputFormat.setOutputPath(job, output);

    RunningJob r = null;
    try {
        r = JobClient.runJob(job);
    } catch (IOException e) {
        throw e;
    }
    return r;
}

From source file:org.macau.util.FuzzyJoinDriver.java

License:Apache License

/**
 * //  w  w w.jav  a  2  s . co  m
 * @param job
 * @throws IOException
 * run the job and output the basic information of the job
 * the start time
 * the finished time
 * the running time(finished_Time - start_Time)
 */
public static void run(JobConf job) throws IOException {
    job.setJarByClass(FuzzyJoinDriver.class);
    //
    // print info
    //
    String ret = "FuzzyJoinDriver(" + job.getJobName() + ")\n" + "  Input Path:  {";
    Path inputs[] = FileInputFormat.getInputPaths(job);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0) {
            ret += "\n                ";
        }
        ret += inputs[ctr].toString();
    }
    ret += "}\n";
    ret += "  Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + "  Map Jobs:    "
            + job.getNumMapTasks() + "\n" + "  Reduce Jobs: " + job.getNumReduceTasks() + "\n"
            + "  Properties:  {";
    String[][] properties = new String[][] {
            new String[] { FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY, FuzzyJoinConfig.SIMILARITY_NAME_VALUE },
            new String[] { FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
                    "" + FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE },
            new String[] { FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE },
            new String[] { TOKENS_PACKAGE_PROPERTY, TOKENS_PACKAGE_VALUE },
            new String[] { TOKENS_LENGTHSTATS_PROPERTY, "" + TOKENS_LENGTHSTATS_VALUE },
            new String[] { RIDPAIRS_GROUP_CLASS_PROPERTY, RIDPAIRS_GROUP_CLASS_VALUE },
            new String[] { RIDPAIRS_GROUP_FACTOR_PROPERTY, "" + RIDPAIRS_GROUP_FACTOR_VALUE },
            new String[] { FuzzyJoinConfig.DATA_TOKENS_PROPERTY, "" },
            new String[] { DATA_JOININDEX_PROPERTY, "" }, };
    for (int crt = 0; crt < properties.length; crt++) {
        if (crt > 0) {
            ret += "\n                ";
        }
        ret += properties[crt][0] + "=" + job.get(properties[crt][0], properties[crt][1]);
    }
    ret += "}";
    System.out.println(ret);
    //
    // run job
    //
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(job);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println(
            "The job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds.");
}

From source file:org.pentaho.hadoop.mapreduce.MRUtil.java

License:Apache License

public static void passInformationToTransformation(final VariableSpace variableSpace, final JobConf job) {
    if (variableSpace != null && job != null) {
        variableSpace.setVariable("Internal.Hadoop.NumMapTasks", Integer.toString(job.getNumMapTasks()));
        variableSpace.setVariable("Internal.Hadoop.NumReduceTasks", Integer.toString(job.getNumReduceTasks()));
        String taskId = job.get("mapred.task.id");
        variableSpace.setVariable("Internal.Hadoop.TaskId", taskId);
        // TODO: Verify if the string range holds true for all Hadoop distributions
        // Extract the node number from the task ID.
        // The consensus currently is that it's the part after the last underscore.
        ////  ww  w. java 2  s  .co m
        // Examples:
        // job_201208090841_9999
        // job_201208090841_10000
        //
        String nodeNumber;
        if (Const.isEmpty(taskId)) {
            nodeNumber = "0";
        } else {
            int lastUnderscoreIndex = taskId.lastIndexOf("_");
            if (lastUnderscoreIndex >= 0) {
                nodeNumber = taskId.substring(lastUnderscoreIndex + 1);
            } else {
                nodeNumber = "0";
            }
        }
        // get rid of zeroes.
        //
        variableSpace.setVariable("Internal.Hadoop.NodeNumber", Integer.toString(Integer.valueOf(nodeNumber)));
    }
}

From source file:org.pooledtimeseries.MeanChiSquareDistanceCalculation.java

License:Apache License

public static void main(String[] args) throws Exception {

    Configuration baseConf = new Configuration();
    baseConf.set("mapreduce.job.maps", "96");
    baseConf.set("mapred.tasktracker.map.tasks.maximum", "96");

    JobConf conf = new JobConf(baseConf, MeanChiSquareDistanceCalculation.class);
    System.out.println("Before Map:" + conf.getNumMapTasks());
    conf.setNumMapTasks(96);/*from w ww .j  a v a  2 s.  c  o  m*/
    System.out.println("After Map:" + conf.getNumMapTasks());

    conf.setJobName("mean_chi_square_calculation");

    System.out.println("Track:" + baseConf.get("mapred.job.tracker"));
    System.out.println("Job Name- " + conf.getJobName());
    System.out.println(baseConf.get("mapreduce.job.maps"));

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(DoubleWritable.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(DoubleWritable.class);

    conf.setOutputFormat(TextOutputFormat.class);

    conf.setInputFormat(CartesianInputFormat.class);
    CartesianInputFormat.setLeftInputInfo(conf, SequenceFileInputFormat.class, args[0]);
    CartesianInputFormat.setRightInputInfo(conf, SequenceFileInputFormat.class, args[0]);

    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);

    JobClient.runJob(conf);
}

From source file:org.pooledtimeseries.SimilarityCalculation.java

License:Apache License

public static void main(String[] args) throws Exception {

    JobConf conf = new JobConf();
    System.out.println("Before Map:" + conf.getNumMapTasks());
    conf.setNumMapTasks(196);/*w  w  w  . ja  v a  2s.  c  o  m*/
    System.out.println("After Map:" + conf.getNumMapTasks());
    conf.setJobName("similarity_calc");

    conf.set("meanDistsFilePath", args[2]);

    System.out.println("Job Name: " + conf.getJobName());
    conf.setJarByClass(SimilarityCalculation.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setInputFormat(CartesianInputFormat.class);
    CartesianInputFormat.setLeftInputInfo(conf, SequenceFileInputFormat.class, args[0]);
    CartesianInputFormat.setRightInputInfo(conf, SequenceFileInputFormat.class, args[0]);

    conf.setOutputFormat(TextOutputFormat.class);

    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    conf.setMapperClass(Map.class);

    JobClient.runJob(conf);
}

From source file:org.smartfrog.services.hadoop.mapreduce.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample keys. By default reads 100,000 keys from 10
 * locations in the input, sorts them and picks N-1 keys to generate N equally sized partitions.
 *
 * @param conf     the job to sample/*from  www .  java  2s  .co m*/
 * @param partFile where to write the output file to
 * @throws IOException if something goes wrong
 */
public static void writePartitionFile(JobConf conf, Path partFile) throws IOException {
    TeraInputFormat inFormat = new TeraInputFormat();
    TextSampler sampler = new TextSampler();
    Text key = new Text();
    Text value = new Text();
    int partitions = conf.getNumReduceTasks();
    long sampleSize = conf.getLong(TeraConstants.SAMPLE_SIZE, 100000);
    InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
    int samples = Math.min(10, splits.length);
    long recordsPerSample = sampleSize / samples;
    int sampleStep = splits.length / samples;
    long records = 0;
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        RecordReader<Text, Text> reader = inFormat.getRecordReader(splits[sampleStep * i], conf, null);
        while (reader.next(key, value)) {
            sampler.addKey(key);
            records += 1;
            if ((i + 1) * recordsPerSample <= records) {
                break;
            }
        }
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    if (outFs.exists(partFile)) {
        outFs.delete(partFile, false);
    }
    SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, Text.class,
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    for (Text split : sampler.createPartitions(partitions)) {
        writer.append(split, nullValue);
    }
    writer.close();
}