List of usage examples for org.apache.hadoop.mapred JobConf getJobName
public String getJobName()
From source file:com.ebay.erl.mobius.core.mapred.ConfigurableJob.java
License:Apache License
private static void writePartitionFile(JobConf job, Sampler sampler) { try {/* ww w.jav a2 s . c o m*/ //////////////////////////////////////////////// // first, getting samples from the data sources //////////////////////////////////////////////// LOGGER.info("Running local sampling for job [" + job.getJobName() + "]"); InputFormat inf = job.getInputFormat(); Object[] samples = sampler.getSample(inf, job); LOGGER.info("Samples retrieved, sorting..."); //////////////////////////////////////////////// // sort the samples //////////////////////////////////////////////// RawComparator comparator = job.getOutputKeyComparator(); Arrays.sort(samples, comparator); if (job.getBoolean("mobius.print.sample", false)) { PrintWriter pw = new PrintWriter( new OutputStreamWriter(new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream( new File(job.get("mobius.sample.file", "./samples.txt.gz"))))))); for (Object obj : samples) { pw.println(obj); } pw.flush(); pw.close(); } //////////////////////////////////////////////// // start to write partition files //////////////////////////////////////////////// FileSystem fs = FileSystem.get(job); Path partitionFile = fs.makeQualified(new Path(TotalOrderPartitioner.getPartitionFile(job))); while (fs.exists(partitionFile)) { partitionFile = new Path(partitionFile.toString() + "." + System.currentTimeMillis()); } fs.deleteOnExit(partitionFile); TotalOrderPartitioner.setPartitionFile(job, partitionFile); LOGGER.info("write partition file to:" + partitionFile.toString()); int reducersNbr = job.getNumReduceTasks(); Set<Object> wroteSamples = new HashSet<Object>(); SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, partitionFile, Tuple.class, NullWritable.class); float avgReduceSize = samples.length / reducersNbr; int lastBegin = 0; for (int i = 0; i < samples.length;) { // trying to distribute the load for every reducer evenly, // dividing the <code>samples</code> into a set of blocks // separated by boundaries, objects that selected from the // <code>samples</code> array, and each blocks should have // about the same size. // find the last index of element that equals to samples[i], as // such element might appear multiple times in the samples. int upperBound = Util.findUpperBound(samples, samples[i], comparator); int lowerBound = i;//Util.findLowerBound(samples, samples[i], comparator); // the repeat time of samples[i], if the key itself is too big // select it as boundary int currentElemSize = upperBound - lowerBound + 1; if (currentElemSize > avgReduceSize * 2) // greater than two times of average reducer size { // the current element is too big, greater than // two times of the <code>avgReduceSize</code>, // put itself as boundary writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get()); wroteSamples.add(((DataJoinKey) samples[i]).getKey()); //pw.println(samples[i]); // immediate put the next element to the boundary, // the next element starts at <code> upperBound+1 // </code>, to prevent the current one consume even // more. if (upperBound + 1 < samples.length) { writer.append(((DataJoinKey) samples[upperBound + 1]).getKey(), NullWritable.get()); wroteSamples.add(((DataJoinKey) samples[upperBound + 1]).getKey()); //pw.println(samples[upperBound+1]); // move on to the next element of <code>samples[upperBound+1]/code> lastBegin = Util.findUpperBound(samples, samples[upperBound + 1], comparator) + 1; i = lastBegin; } else { break; } } else { // current element is small enough to be consider // with previous group int size = upperBound - lastBegin; if (size > avgReduceSize) { // by including the current elements, we have // found a block that's big enough, select it // as boundary writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get()); wroteSamples.add(((DataJoinKey) samples[i]).getKey()); //pw.println(samples[i]); i = upperBound + 1; lastBegin = i; } else { i = upperBound + 1; } } } writer.close(); // if the number of wrote samples doesn't equals to number of // reducer minus one, then it means the key spaces is too small // hence TotalOrderPartitioner won't work, it works only if // the partition boundaries are distinct. // // we need to change the number of reducers if (wroteSamples.size() + 1 != reducersNbr) { LOGGER.info("Write complete, but key space is too small, sample size=" + wroteSamples.size() + ", reducer size:" + (reducersNbr)); LOGGER.info("Set the reducer size to:" + (wroteSamples.size() + 1)); // add 1 because the wrote samples define boundary, ex, if // the sample size is two with two element [300, 1000], then // there should be 3 reducers, one for handling i<300, one // for n300<=i<1000, and another one for 1000<=i job.setNumReduceTasks((wroteSamples.size() + 1)); } samples = null; } catch (IOException e) { LOGGER.error(e.getMessage(), e); throw new RuntimeException(e); } }
From source file:com.ricemap.spateDB.core.SpatialSite.java
License:Apache License
public static void setCells(JobConf job, CellInfo[] cellsInfo) throws IOException { Path tempFile;/*from w w w.j a v a 2 s .c o m*/ FileSystem fs = FileSystem.get(job); do { tempFile = new Path(job.getJobName() + "_" + (int) (Math.random() * 1000000) + ".cells"); } while (fs.exists(tempFile)); FSDataOutputStream out = fs.create(tempFile); out.writeInt(cellsInfo.length); for (CellInfo cell : cellsInfo) { cell.write(out); } out.close(); fs.deleteOnExit(tempFile); DistributedCache.addCacheFile(tempFile.toUri(), job); job.set(OUTPUT_CELLS, tempFile.getName()); LOG.info("Partitioning file into " + cellsInfo.length + " cells"); }
From source file:com.scaleoutsoftware.soss.hserver.HServerJobClient.java
License:Apache License
public HServerJobClient(JobConf conf) throws IOException { //super(conf); -- do not attempt to initialize cluster jobID = JobID.forName("job_" + conf.getJobName() + "_0"); jobConf = conf;/* w ww .ja v a2 s. c om*/ }
From source file:com.scaleoutsoftware.soss.hserver.HServerJobClient.java
License:Apache License
@Override public RunningJob submitJob(final JobConf job) throws IOException { ensureInvocationGridPresent();//from w ww. ja v a 2 s . c o m ExecutorService async = Executors.newSingleThreadExecutor(); final JobID jobID = JobID.forName("job_" + job.getJobName() + "_0"); Future jobSubmitted = async.submit(new Callable<Object>() { @Override public Object call() throws Exception { try { JobScheduler.getInstance().runOldApiJob(job, jobID, sortEnabled, null, grid); } finally { if (unloadGrid) { grid.unload(); } } return null; } }); async.shutdown(); //Will shut down after task is done return new HServerRunningJob(jobID, jobSubmitted); }
From source file:edu.brown.cs.mapreduce.BenchmarkBase.java
License:Open Source License
public void runJob(JobConf _conf) throws Exception { String ret = "BenchmarkBase(" + _conf.getJobName() + ")\n" + "\tInput Path: {"; Path inputs[] = FileInputFormat.getInputPaths(_conf); for (int ctr = 0; ctr < inputs.length; ctr++) { if (ctr > 0) ret += ", "; ret += inputs[ctr].toString();// w w w.j a v a2s . c o m } ret += "}\n"; ret += "\tOutput Path: " + FileOutputFormat.getOutputPath(_conf) + "\n" + "\tMap Jobs: " + _conf.getNumMapTasks() + "\n" + "\tReduce Jobs: " + _conf.getNumReduceTasks() + "\n" + "\tProperties: " + this.options; System.out.println(ret); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(_conf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println( "The job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds."); this.last_job = _conf; return; }
From source file:edu.brown.cs.mapreduce.benchmarks.Benchmark3.java
License:Open Source License
public int run(String[] args) throws Exception { BenchmarkBase base = new BenchmarkBase(this.getConf(), this.getClass(), args); Date startTime = new Date(); System.out.println("Job started: " + startTime); // ------------------------------------------- // Phase #1//from ww w .jav a2 s . c om // ------------------------------------------- JobConf p1_job = base.getJobConf(); p1_job.setJobName(p1_job.getJobName() + ".Phase1"); Path p1_output = new Path(base.getOutputPath().toString() + "/phase1"); FileOutputFormat.setOutputPath(p1_job, p1_output); // // Make sure we have our properties // String required[] = { BenchmarkBase.PROPERTY_START_DATE, BenchmarkBase.PROPERTY_STOP_DATE }; for (String req : required) { if (!base.getOptions().containsKey(req)) { System.err.println("ERROR: The property '" + req + "' is not set"); System.exit(1); } } // FOR p1_job.setInputFormat( base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class); if (base.getSequenceFile()) p1_job.setOutputFormat(SequenceFileOutputFormat.class); p1_job.setOutputKeyClass(Text.class); p1_job.setOutputValueClass(Text.class); p1_job.setMapperClass( base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TupleWritableMap.class : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TextMap.class); p1_job.setReducerClass( base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TupleWritableReduce.class : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TextReduce.class); p1_job.setCompressMapOutput(base.getCompress()); // ------------------------------------------- // Phase #2 // ------------------------------------------- JobConf p2_job = base.getJobConf(); p2_job.setJobName(p2_job.getJobName() + ".Phase2"); p2_job.setInputFormat( base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class); if (base.getSequenceFile()) p2_job.setOutputFormat(SequenceFileOutputFormat.class); p2_job.setOutputKeyClass(Text.class); p2_job.setOutputValueClass(Text.class); p2_job.setMapperClass(IdentityMapper.class); p2_job.setReducerClass( base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase2.TupleWritableReduce.class : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase2.TextReduce.class); p2_job.setCompressMapOutput(base.getCompress()); p2_job.setNumMapTasks(60); // ------------------------------------------- // Phase #3 // ------------------------------------------- JobConf p3_job = base.getJobConf(); p3_job.setJobName(p3_job.getJobName() + ".Phase3"); p3_job.setNumReduceTasks(1); p3_job.setInputFormat( base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class); p3_job.setOutputKeyClass(Text.class); p3_job.setOutputValueClass(Text.class); //p3_job.setMapperClass(Phase3Map.class); p3_job.setMapperClass(IdentityMapper.class); p3_job.setReducerClass( base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase3.TupleWritableReduce.class : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase3.TextReduce.class); // // Execute #1 // base.runJob(p1_job); // // Execute #2 // Path p2_output = new Path(base.getOutputPath().toString() + "/phase2"); FileOutputFormat.setOutputPath(p2_job, p2_output); FileInputFormat.setInputPaths(p2_job, p1_output); base.runJob(p2_job); // // Execute #3 // Path p3_output = new Path(base.getOutputPath().toString() + "/phase3"); FileOutputFormat.setOutputPath(p3_job, p3_output); FileInputFormat.setInputPaths(p3_job, p2_output); base.runJob(p3_job); // There does need to be a combine if (base.getCombine()) base.runCombine(); return 0; }
From source file:edu.uci.ics.fuzzyjoin.hadoop.FuzzyJoinDriver.java
License:Apache License
public static void run(JobConf job) throws IOException { job.setJarByClass(FuzzyJoinDriver.class); ///* w ww . j a va2 s . co m*/ // print info // String ret = "FuzzyJoinDriver(" + job.getJobName() + ")\n" + " Input Path: {"; Path inputs[] = FileInputFormat.getInputPaths(job); for (int ctr = 0; ctr < inputs.length; ctr++) { if (ctr > 0) { ret += "\n "; } ret += inputs[ctr].toString(); } ret += "}\n"; ret += " Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + " Map Jobs: " + job.getNumMapTasks() + "\n" + " Reduce Jobs: " + job.getNumReduceTasks() + "\n" + " Properties: {"; String[][] properties = new String[][] { new String[] { FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY, FuzzyJoinConfig.SIMILARITY_NAME_VALUE }, new String[] { FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY, "" + FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE }, new String[] { FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE }, new String[] { TOKENS_PACKAGE_PROPERTY, TOKENS_PACKAGE_VALUE }, new String[] { TOKENS_LENGTHSTATS_PROPERTY, "" + TOKENS_LENGTHSTATS_VALUE }, new String[] { RIDPAIRS_GROUP_CLASS_PROPERTY, RIDPAIRS_GROUP_CLASS_VALUE }, new String[] { RIDPAIRS_GROUP_FACTOR_PROPERTY, "" + RIDPAIRS_GROUP_FACTOR_VALUE }, new String[] { FuzzyJoinConfig.DATA_TOKENS_PROPERTY, "" }, new String[] { DATA_JOININDEX_PROPERTY, "" }, }; for (int crt = 0; crt < properties.length; crt++) { if (crt > 0) { ret += "\n "; } ret += properties[crt][0] + "=" + job.get(properties[crt][0], properties[crt][1]); } ret += "}"; System.out.println(ret); // // run job // Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println( "The job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds."); }
From source file:edu.uci.ics.hyracks.hadoop.compat.driver.CompatibilityLayer.java
License:Apache License
public HyracksRunningJob submitJob(JobConf conf, Set<String> userLibs) throws Exception { List<JobConf> jobConfs = new ArrayList<JobConf>(); jobConfs.add(conf);/*w w w. j a v a2 s . co m*/ String applicationName = conf.getJobName() + System.currentTimeMillis(); JobSpecification spec = hadoopAdapter.getJobSpecification(jobConfs); HyracksRunningJob hyracksRunningJob = hyracksClient.submitJob(applicationName, spec, userLibs); return hyracksRunningJob; }
From source file:edu.ucsb.cs.hybrid.HybridDriver.java
License:Apache License
public static void run(JobConf job) throws IOException { String ret = stars() + "\n Running job: " + job.getJobName() + "\n Input Path: {"; Path inputs[] = FileInputFormat.getInputPaths(job); for (int ctr = 0; ctr < inputs.length; ctr++) { if (ctr > 0) { ret += "\n "; }/*from w w w . ja v a2 s . com*/ ret += inputs[ctr].toString(); } ret += "}\n"; ret += " Output Path: " + FileOutputFormat.getOutputPath(job) + "\n"; ret += " Threshold: " + job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE) + "\n"; System.err.println(ret); Date startTime = new Date(); JobClient.runJob(job); Date end_time = new Date(); System.err.println( "Similarity job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds."); }
From source file:edu.ucsb.cs.knn.KnnDriver.java
License:Apache License
/** * Submit the configured job to Hadoop JobTracker to start the process. *//*from w w w . j a v a2s . c o m*/ public static void run(JobConf job) throws IOException { job.setJarByClass(KnnDriver.class); // This method sets the jar String ret = stars() + "\nKnnDriver(" + job.getJobName() + ")\n" + " Input Path: {"; Path inputs[] = FileInputFormat.getInputPaths(job); for (int ctr = 0; ctr < inputs.length; ctr++) { if (ctr > 0) { ret += "\n "; } ret += inputs[ctr].toString(); } ret += "}\n"; ret += " Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + " Map Tasks: " + job.getNumMapTasks() + "\n" + " Reduce Tasks: " + job.getNumReduceTasks() + "\n"; ret += " Threshold: " + job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE) + "\n"; System.out.println(ret); // // run job // JobClient.runJob(job); }