List of usage examples for org.apache.hadoop.mapred JobConf getNumMapTasks
public int getNumMapTasks()
From source file:crunch.MaxTemperature.java
License:Apache License
@Test @Ignore("Needs more investigation") public void compression() throws IOException { int recordLength = 1024; Path input = new Path("input.bz2"); createFile(input, 24, recordLength); System.out.println(">>>>>>" + fs.getLength(input)); JobConf job = new JobConf(); job.set("fs.default.name", fs.getUri().toString()); FileInputFormat.addInputPath(job, input); InputFormat<LongWritable, Text> inputFormat = job.getInputFormat(); InputSplit[] splits = inputFormat.getSplits(job, job.getNumMapTasks()); System.out.println(Arrays.asList(splits)); assertThat(splits.length, is(2)); checkSplit(splits[0], 0, 4096);// w ww.j a va 2 s.c o m checkSplit(splits[1], 4096, 4096); checkRecordReader(inputFormat, splits[0], job, recordLength, 0, 4); checkRecordReader(inputFormat, splits[1], job, recordLength, 5, 12); }
From source file:edu.brown.cs.mapreduce.BenchmarkBase.java
License:Open Source License
public void runJob(JobConf _conf) throws Exception { String ret = "BenchmarkBase(" + _conf.getJobName() + ")\n" + "\tInput Path: {"; Path inputs[] = FileInputFormat.getInputPaths(_conf); for (int ctr = 0; ctr < inputs.length; ctr++) { if (ctr > 0) ret += ", "; ret += inputs[ctr].toString();/* w w w . j a v a 2 s. com*/ } ret += "}\n"; ret += "\tOutput Path: " + FileOutputFormat.getOutputPath(_conf) + "\n" + "\tMap Jobs: " + _conf.getNumMapTasks() + "\n" + "\tReduce Jobs: " + _conf.getNumReduceTasks() + "\n" + "\tProperties: " + this.options; System.out.println(ret); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(_conf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println( "The job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds."); this.last_job = _conf; return; }
From source file:edu.iu.common.MultiFileInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { // Generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> files = listStatus(job); org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration(); int numMaps = jobConf.getNumMapTasks(); LOG.info("NUMBER OF FILES: " + files.size()); LOG.info("NUMBER OF MAPS: " + numMaps); int avg = files.size() / numMaps; int rest = files.size() % numMaps; int tmp = 0;/*from ww w . ja va 2 s. c o m*/ long length = 0; List<Path> pathList = null; Set<String> hostSet = null; // Random random = new Random(System.nanoTime()); for (FileStatus file : files) { if (tmp == 0) { pathList = new ArrayList<Path>(); hostSet = new HashSet<String>(); } if (tmp < avg) { pathList.add(file.getPath()); length = length + file.getLen(); FileSystem fs = file.getPath().getFileSystem(job.getConfiguration()); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen()); for (BlockLocation blockLocation : blkLocations) { for (String host : blockLocation.getHosts()) { hostSet.add(host); } } tmp++; if (tmp == avg && rest == 0) { LOG.info("Split on host: " + getHostsString(hostSet)); splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0]))); tmp = 0; length = 0; } } else if (tmp == avg && rest > 0) { pathList.add(file.getPath()); length = length + file.getLen(); FileSystem fs = file.getPath().getFileSystem(job.getConfiguration()); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen()); for (BlockLocation blockLocation : blkLocations) { for (String host : blockLocation.getHosts()) { hostSet.add(host); } } rest--; LOG.info("Split on host: " + getHostsString(hostSet)); splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0]))); tmp = 0; length = 0; } } // Save the number of input files in the job-conf job.getConfiguration().setLong(NUM_INPUT_FILES, numMaps); LOG.info("Total # of splits: " + splits.size()); return splits; }
From source file:edu.iu.fileformat.MultiFileInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { // Generate splits List<InputSplit> splits = new ArrayList<>(); List<FileStatus> files = listStatus(job); org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration(); int numMaps = jobConf.getNumMapTasks(); LOG.info("NUMBER OF FILES: " + files.size()); LOG.info("NUMBER OF MAPS: " + numMaps); // randomizeFileListOrder(files); int avg = files.size() / numMaps; int rest = files.size() % numMaps; int tmp = 0;/*from w ww . j a va2s .co m*/ long length = 0; List<Path> pathList = null; Set<String> hostSet = null; for (FileStatus file : files) { if (tmp == 0) { pathList = new ArrayList<>(); hostSet = new HashSet<>(); } if (tmp < avg) { pathList.add(file.getPath()); length = length + file.getLen(); FileSystem fs = file.getPath().getFileSystem(job.getConfiguration()); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen()); for (BlockLocation blockLocation : blkLocations) { for (String host : blockLocation.getHosts()) { hostSet.add(host); } } tmp++; if (tmp == avg && rest == 0) { LOG.info("Split on host: " + getHostsString(hostSet)); splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0]))); tmp = 0; length = 0; } } else if (tmp == avg && rest > 0) { pathList.add(file.getPath()); length = length + file.getLen(); FileSystem fs = file.getPath().getFileSystem(job.getConfiguration()); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen()); for (BlockLocation blockLocation : blkLocations) { for (String host : blockLocation.getHosts()) { hostSet.add(host); } } rest--; LOG.info("Split on host: " + getHostsString(hostSet)); splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0]))); tmp = 0; length = 0; } } // Save the number of input files in the // job-conf job.getConfiguration().setLong(NUM_INPUT_FILES, numMaps); LOG.info("Total # of splits: " + splits.size()); return splits; }
From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java
License:Apache License
public JobInfo getInfo(Submission submission, RunningJob job, JobConf conf) throws NotFoundException, InternalException { JobInfo info = new JobInfo(); info.setNativeID(submission.getHadoopID()); info.setName(job.getJobName());//from w ww .j a va2s . c o m info.setTest(false); if (conf == null) // Can't proceed any further if configuration is unavailable return info; info.setRequestedMapTasks(conf.getNumMapTasks()); info.setRequestedReduceTasks(conf.getNumReduceTasks()); info.setMapper(conf.get(CONF_MAPPER)); info.setReducer(conf.get(CONF_REDUCER)); info.setNumericSort(conf.getBoolean(CONF_NUMERIC, false)); info.setInputPath( JobServiceHandler.relativizePath(_homeDir, FileInputFormat.getInputPaths(conf)[0]).toString()); info.setOutputPath( JobServiceHandler.relativizePath(_homeDir, FileOutputFormat.getOutputPath(conf)).toString()); return info; }
From source file:edu.uci.ics.fuzzyjoin.hadoop.FuzzyJoinDriver.java
License:Apache License
public static void run(JobConf job) throws IOException { job.setJarByClass(FuzzyJoinDriver.class); ///*from w ww . j av a2 s .c o m*/ // print info // String ret = "FuzzyJoinDriver(" + job.getJobName() + ")\n" + " Input Path: {"; Path inputs[] = FileInputFormat.getInputPaths(job); for (int ctr = 0; ctr < inputs.length; ctr++) { if (ctr > 0) { ret += "\n "; } ret += inputs[ctr].toString(); } ret += "}\n"; ret += " Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + " Map Jobs: " + job.getNumMapTasks() + "\n" + " Reduce Jobs: " + job.getNumReduceTasks() + "\n" + " Properties: {"; String[][] properties = new String[][] { new String[] { FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY, FuzzyJoinConfig.SIMILARITY_NAME_VALUE }, new String[] { FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY, "" + FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE }, new String[] { FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE }, new String[] { TOKENS_PACKAGE_PROPERTY, TOKENS_PACKAGE_VALUE }, new String[] { TOKENS_LENGTHSTATS_PROPERTY, "" + TOKENS_LENGTHSTATS_VALUE }, new String[] { RIDPAIRS_GROUP_CLASS_PROPERTY, RIDPAIRS_GROUP_CLASS_VALUE }, new String[] { RIDPAIRS_GROUP_FACTOR_PROPERTY, "" + RIDPAIRS_GROUP_FACTOR_VALUE }, new String[] { FuzzyJoinConfig.DATA_TOKENS_PROPERTY, "" }, new String[] { DATA_JOININDEX_PROPERTY, "" }, }; for (int crt = 0; crt < properties.length; crt++) { if (crt > 0) { ret += "\n "; } ret += properties[crt][0] + "=" + job.get(properties[crt][0], properties[crt][1]); } ret += "}"; System.out.println(ret); // // run job // Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println( "The job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds."); }
From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java
License:Apache License
private InputSplit[] getOldInputSplits(JobConf conf) throws IOException { InputFormat inputFormat = conf.getInputFormat(); return inputFormat.getSplits(conf, conf.getNumMapTasks()); }
From source file:edu.ucsb.cs.knn.KnnDriver.java
License:Apache License
/** * Submit the configured job to Hadoop JobTracker to start the process. *///from ww w . j av a 2s . co m public static void run(JobConf job) throws IOException { job.setJarByClass(KnnDriver.class); // This method sets the jar String ret = stars() + "\nKnnDriver(" + job.getJobName() + ")\n" + " Input Path: {"; Path inputs[] = FileInputFormat.getInputPaths(job); for (int ctr = 0; ctr < inputs.length; ctr++) { if (ctr > 0) { ret += "\n "; } ret += inputs[ctr].toString(); } ret += "}\n"; ret += " Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + " Map Tasks: " + job.getNumMapTasks() + "\n" + " Reduce Tasks: " + job.getNumReduceTasks() + "\n"; ret += " Threshold: " + job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE) + "\n"; System.out.println(ret); // // run job // JobClient.runJob(job); }
From source file:edu.ucsb.cs.utilities.JobSubmitter.java
License:Apache License
public static void run(JobConf job, String title, float Threshold) { String ret = stars() + "\n [" + title + "]\n" + stars() + "\n Running job: " + job.getJobName() + "\n Input Path: {"; Path inputs[] = FileInputFormat.getInputPaths(job); for (int ctr = 0; ctr < inputs.length; ctr++) { if (ctr > 0) { ret += "\n "; }//w w w .j av a 2 s .c om ret += inputs[ctr].toString(); } ret += "}\n"; ret += " Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + " Num. of mappers: " + job.getNumMapTasks() + "\n" + " Num. of reducers: " + job.getNumReduceTasks() + "\n"; if (Threshold != -1) ret += " Threshold: " + Threshold + "\n"; // for (int ctr = 0; ctr < Properties.requiredParameters.size(); ctr++)//check // ret += Properties.requiredParameters.get(ctr) + "\n"; System.out.println(ret); try { Date startTime = new Date(); JobClient.runJob(job); Date end_time = new Date(); System.err.println( "Job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds."); } catch (IOException e) { System.err.println("ERROR: While submitting the job :("); e.printStackTrace(); } }
From source file:gov.nasa.jpl.memex.pooledtimeseries.MeanChiSquareDistanceCalculation.java
License:Apache License
public static void main(String[] args) throws Exception { System.loadLibrary(Core.NATIVE_LIBRARY_NAME); Configuration baseConf = new Configuration(); baseConf.set("mapreduce.job.maps", "96"); baseConf.set("mapred.tasktracker.map.tasks.maximum", "96"); JobConf conf = new JobConf(); System.out.println("Before Map:" + conf.getNumMapTasks()); conf.setNumMapTasks(96);/* www .j av a 2s . co m*/ System.out.println("After Map:" + conf.getNumMapTasks()); Job job = Job.getInstance(baseConf); job.setJarByClass(MeanChiSquareDistanceCalculation.class); job.setJobName("mean_chi_square_calculation"); System.out.println("Job ID" + job.getJobID()); System.out.println("Track:" + baseConf.get("mapred.job.tracker")); System.out.println("Job Name" + job.getJobName()); System.out.println(baseConf.get("mapreduce.job.maps")); System.out.println("Caching video-metric-bak.tgz"); job.addCacheArchive(new URI("/user/pts/video-metric-bak.tgz")); URI[] cacheFiles = job.getCacheFiles(); if (cacheFiles != null && cacheFiles.length > 0) { System.out.println("Cache file ->" + cacheFiles[0]); } System.out.println("Cached video-metric-bak.tgz"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DoubleWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(DoubleWritable.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.waitForCompletion(true); }