Example usage for org.apache.hadoop.mapred JobConf getNumMapTasks

List of usage examples for org.apache.hadoop.mapred JobConf getNumMapTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getNumMapTasks.

Prototype

public int getNumMapTasks() 

Source Link

Document

Get the configured number of map tasks for this job.

Usage

From source file:crunch.MaxTemperature.java

License:Apache License

@Test
    @Ignore("Needs more investigation")
    public void compression() throws IOException {
        int recordLength = 1024;
        Path input = new Path("input.bz2");
        createFile(input, 24, recordLength);
        System.out.println(">>>>>>" + fs.getLength(input));

        JobConf job = new JobConf();
        job.set("fs.default.name", fs.getUri().toString());
        FileInputFormat.addInputPath(job, input);
        InputFormat<LongWritable, Text> inputFormat = job.getInputFormat();
        InputSplit[] splits = inputFormat.getSplits(job, job.getNumMapTasks());

        System.out.println(Arrays.asList(splits));
        assertThat(splits.length, is(2));
        checkSplit(splits[0], 0, 4096);//  w ww.j  a va  2  s.c  o m
        checkSplit(splits[1], 4096, 4096);

        checkRecordReader(inputFormat, splits[0], job, recordLength, 0, 4);
        checkRecordReader(inputFormat, splits[1], job, recordLength, 5, 12);

    }

From source file:edu.brown.cs.mapreduce.BenchmarkBase.java

License:Open Source License

public void runJob(JobConf _conf) throws Exception {
    String ret = "BenchmarkBase(" + _conf.getJobName() + ")\n" + "\tInput Path:  {";
    Path inputs[] = FileInputFormat.getInputPaths(_conf);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0)
            ret += ", ";
        ret += inputs[ctr].toString();/*  w w  w . j a v  a  2 s. com*/
    }
    ret += "}\n";

    ret += "\tOutput Path: " + FileOutputFormat.getOutputPath(_conf) + "\n" + "\tMap Jobs:    "
            + _conf.getNumMapTasks() + "\n" + "\tReduce Jobs: " + _conf.getNumReduceTasks() + "\n"
            + "\tProperties:  " + this.options;
    System.out.println(ret);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(_conf);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println(
            "The job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds.");
    this.last_job = _conf;
    return;
}

From source file:edu.iu.common.MultiFileInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    // Generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration();
    int numMaps = jobConf.getNumMapTasks();
    LOG.info("NUMBER OF FILES: " + files.size());
    LOG.info("NUMBER OF MAPS: " + numMaps);
    int avg = files.size() / numMaps;
    int rest = files.size() % numMaps;
    int tmp = 0;/*from ww w . ja va 2  s.  c o  m*/
    long length = 0;
    List<Path> pathList = null;
    Set<String> hostSet = null;
    // Random random = new Random(System.nanoTime());
    for (FileStatus file : files) {
        if (tmp == 0) {
            pathList = new ArrayList<Path>();
            hostSet = new HashSet<String>();
        }
        if (tmp < avg) {
            pathList.add(file.getPath());
            length = length + file.getLen();
            FileSystem fs = file.getPath().getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen());
            for (BlockLocation blockLocation : blkLocations) {
                for (String host : blockLocation.getHosts()) {
                    hostSet.add(host);
                }
            }
            tmp++;
            if (tmp == avg && rest == 0) {
                LOG.info("Split on host: " + getHostsString(hostSet));
                splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0])));
                tmp = 0;
                length = 0;
            }
        } else if (tmp == avg && rest > 0) {
            pathList.add(file.getPath());
            length = length + file.getLen();
            FileSystem fs = file.getPath().getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen());
            for (BlockLocation blockLocation : blkLocations) {
                for (String host : blockLocation.getHosts()) {
                    hostSet.add(host);
                }
            }
            rest--;
            LOG.info("Split on host: " + getHostsString(hostSet));
            splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0])));
            tmp = 0;
            length = 0;
        }
    }
    // Save the number of input files in the job-conf
    job.getConfiguration().setLong(NUM_INPUT_FILES, numMaps);
    LOG.info("Total # of splits: " + splits.size());
    return splits;
}

From source file:edu.iu.fileformat.MultiFileInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    // Generate splits
    List<InputSplit> splits = new ArrayList<>();
    List<FileStatus> files = listStatus(job);
    org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration();
    int numMaps = jobConf.getNumMapTasks();
    LOG.info("NUMBER OF FILES: " + files.size());
    LOG.info("NUMBER OF MAPS: " + numMaps);
    // randomizeFileListOrder(files);
    int avg = files.size() / numMaps;
    int rest = files.size() % numMaps;
    int tmp = 0;/*from w  ww .  j  a  va2s  .co m*/
    long length = 0;
    List<Path> pathList = null;
    Set<String> hostSet = null;
    for (FileStatus file : files) {
        if (tmp == 0) {
            pathList = new ArrayList<>();
            hostSet = new HashSet<>();
        }
        if (tmp < avg) {
            pathList.add(file.getPath());
            length = length + file.getLen();
            FileSystem fs = file.getPath().getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen());
            for (BlockLocation blockLocation : blkLocations) {
                for (String host : blockLocation.getHosts()) {
                    hostSet.add(host);
                }
            }
            tmp++;
            if (tmp == avg && rest == 0) {
                LOG.info("Split on host: " + getHostsString(hostSet));
                splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0])));
                tmp = 0;
                length = 0;
            }
        } else if (tmp == avg && rest > 0) {
            pathList.add(file.getPath());
            length = length + file.getLen();
            FileSystem fs = file.getPath().getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen());
            for (BlockLocation blockLocation : blkLocations) {
                for (String host : blockLocation.getHosts()) {
                    hostSet.add(host);
                }
            }
            rest--;
            LOG.info("Split on host: " + getHostsString(hostSet));
            splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0])));
            tmp = 0;
            length = 0;
        }
    }
    // Save the number of input files in the
    // job-conf
    job.getConfiguration().setLong(NUM_INPUT_FILES, numMaps);
    LOG.info("Total # of splits: " + splits.size());
    return splits;
}

From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java

License:Apache License

public JobInfo getInfo(Submission submission, RunningJob job, JobConf conf)
        throws NotFoundException, InternalException {

    JobInfo info = new JobInfo();

    info.setNativeID(submission.getHadoopID());
    info.setName(job.getJobName());//from  w  ww  .j a va2s . c o m
    info.setTest(false);

    if (conf == null)
        // Can't proceed any further if configuration is unavailable
        return info;

    info.setRequestedMapTasks(conf.getNumMapTasks());
    info.setRequestedReduceTasks(conf.getNumReduceTasks());
    info.setMapper(conf.get(CONF_MAPPER));
    info.setReducer(conf.get(CONF_REDUCER));
    info.setNumericSort(conf.getBoolean(CONF_NUMERIC, false));
    info.setInputPath(
            JobServiceHandler.relativizePath(_homeDir, FileInputFormat.getInputPaths(conf)[0]).toString());
    info.setOutputPath(
            JobServiceHandler.relativizePath(_homeDir, FileOutputFormat.getOutputPath(conf)).toString());

    return info;
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.FuzzyJoinDriver.java

License:Apache License

public static void run(JobConf job) throws IOException {
    job.setJarByClass(FuzzyJoinDriver.class);
    ///*from   w ww  .  j av  a2 s .c o  m*/
    // print info
    //
    String ret = "FuzzyJoinDriver(" + job.getJobName() + ")\n" + "  Input Path:  {";
    Path inputs[] = FileInputFormat.getInputPaths(job);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0) {
            ret += "\n                ";
        }
        ret += inputs[ctr].toString();
    }
    ret += "}\n";
    ret += "  Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + "  Map Jobs:    "
            + job.getNumMapTasks() + "\n" + "  Reduce Jobs: " + job.getNumReduceTasks() + "\n"
            + "  Properties:  {";
    String[][] properties = new String[][] {
            new String[] { FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY, FuzzyJoinConfig.SIMILARITY_NAME_VALUE },
            new String[] { FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
                    "" + FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE },
            new String[] { FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE },
            new String[] { TOKENS_PACKAGE_PROPERTY, TOKENS_PACKAGE_VALUE },
            new String[] { TOKENS_LENGTHSTATS_PROPERTY, "" + TOKENS_LENGTHSTATS_VALUE },
            new String[] { RIDPAIRS_GROUP_CLASS_PROPERTY, RIDPAIRS_GROUP_CLASS_VALUE },
            new String[] { RIDPAIRS_GROUP_FACTOR_PROPERTY, "" + RIDPAIRS_GROUP_FACTOR_VALUE },
            new String[] { FuzzyJoinConfig.DATA_TOKENS_PROPERTY, "" },
            new String[] { DATA_JOININDEX_PROPERTY, "" }, };
    for (int crt = 0; crt < properties.length; crt++) {
        if (crt > 0) {
            ret += "\n                ";
        }
        ret += properties[crt][0] + "=" + job.get(properties[crt][0], properties[crt][1]);
    }
    ret += "}";
    System.out.println(ret);
    //
    // run job
    //
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(job);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println(
            "The job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds.");
}

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

private InputSplit[] getOldInputSplits(JobConf conf) throws IOException {
    InputFormat inputFormat = conf.getInputFormat();
    return inputFormat.getSplits(conf, conf.getNumMapTasks());
}

From source file:edu.ucsb.cs.knn.KnnDriver.java

License:Apache License

/**
 * Submit the configured job to Hadoop JobTracker to start the process.
 *///from ww w  .  j av  a  2s . co m
public static void run(JobConf job) throws IOException {

    job.setJarByClass(KnnDriver.class); // This method sets the jar
    String ret = stars() + "\nKnnDriver(" + job.getJobName() + ")\n" + "  Input Path:  {";
    Path inputs[] = FileInputFormat.getInputPaths(job);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0) {
            ret += "\n                ";
        }
        ret += inputs[ctr].toString();
    }
    ret += "}\n";
    ret += "  Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + "  Map Tasks:    "
            + job.getNumMapTasks() + "\n" + "  Reduce Tasks: " + job.getNumReduceTasks() + "\n";
    ret += "  Threshold:    " + job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE) + "\n";
    System.out.println(ret);
    //
    // run job
    //
    JobClient.runJob(job);
}

From source file:edu.ucsb.cs.utilities.JobSubmitter.java

License:Apache License

public static void run(JobConf job, String title, float Threshold) {

    String ret = stars() + "\n [" + title + "]\n" + stars() + "\n  Running job:  " + job.getJobName()
            + "\n  Input Path:   {";
    Path inputs[] = FileInputFormat.getInputPaths(job);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0) {
            ret += "\n                ";
        }//w w w .j  av  a  2  s .c  om
        ret += inputs[ctr].toString();
    }
    ret += "}\n";
    ret += "  Output Path:  " + FileOutputFormat.getOutputPath(job) + "\n" + "  Num. of mappers: "
            + job.getNumMapTasks() + "\n" + "  Num. of reducers: " + job.getNumReduceTasks() + "\n";
    if (Threshold != -1)
        ret += "  Threshold: " + Threshold + "\n";
    //      for (int ctr = 0; ctr < Properties.requiredParameters.size(); ctr++)//check
    //         ret += Properties.requiredParameters.get(ctr) + "\n";
    System.out.println(ret);
    try {
        Date startTime = new Date();
        JobClient.runJob(job);
        Date end_time = new Date();
        System.err.println(
                "Job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds.");

    } catch (IOException e) {
        System.err.println("ERROR: While submitting the job :(");
        e.printStackTrace();
    }
}

From source file:gov.nasa.jpl.memex.pooledtimeseries.MeanChiSquareDistanceCalculation.java

License:Apache License

public static void main(String[] args) throws Exception {
    System.loadLibrary(Core.NATIVE_LIBRARY_NAME);

    Configuration baseConf = new Configuration();
    baseConf.set("mapreduce.job.maps", "96");
    baseConf.set("mapred.tasktracker.map.tasks.maximum", "96");

    JobConf conf = new JobConf();
    System.out.println("Before Map:" + conf.getNumMapTasks());
    conf.setNumMapTasks(96);/*  www  .j  av  a 2s  . co  m*/
    System.out.println("After Map:" + conf.getNumMapTasks());

    Job job = Job.getInstance(baseConf);
    job.setJarByClass(MeanChiSquareDistanceCalculation.class);

    job.setJobName("mean_chi_square_calculation");
    System.out.println("Job ID" + job.getJobID());
    System.out.println("Track:" + baseConf.get("mapred.job.tracker"));
    System.out.println("Job Name" + job.getJobName());
    System.out.println(baseConf.get("mapreduce.job.maps"));
    System.out.println("Caching video-metric-bak.tgz");
    job.addCacheArchive(new URI("/user/pts/video-metric-bak.tgz"));
    URI[] cacheFiles = job.getCacheFiles();
    if (cacheFiles != null && cacheFiles.length > 0) {
        System.out.println("Cache file ->" + cacheFiles[0]);
    }
    System.out.println("Cached video-metric-bak.tgz");

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DoubleWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.waitForCompletion(true);

}