List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks
public void setNumReduceTasks(int n)
From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java
License:LGPL
private static JobConf createJobConf(final Configuration conf) { JobConf jobconf = new JobConf(conf, DistCp.class); jobconf.setJobName(NAME);//w ww . java2 s.c om // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobconf.setMapSpeculativeExecution(false); jobconf.setInputFormat(CopyInputFormat.class); jobconf.setOutputKeyClass(Text.class); jobconf.setOutputValueClass(Text.class); jobconf.setMapperClass(CopyFilesMapper.class); jobconf.setNumReduceTasks(0); return jobconf; }
From source file:fr.mines_nantes.atlanmod.monitoring.frameworks.TeraGen.java
License:Apache License
/** * @param args the cli arguments//w w w. jav a2 s.co m */ public int run(String[] args) throws IOException { JobConf job = (JobConf) getConf(); setNumberOfRows(job, Long.parseLong(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraGen"); job.setJarByClass(TeraGen.class); job.setMapperClass(SortGenMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(RangeInputFormat.class); //job.setOutputFormat(TeraOutputFormat.class); JobClient.runJob(job); return 0; }
From source file:gaffer.accumulostore.operation.hdfs.handler.job.factory.SampleDataForSplitPointsJobFactory.java
License:Apache License
protected void setupJobConf(final JobConf jobConf, final SampleDataForSplitPoints operation, final Store store) throws IOException { jobConf.set(SCHEMA, new String(store.getSchema().toJson(false), CommonConstants.UTF_8)); jobConf.set(MAPPER_GENERATOR, operation.getMapperGeneratorClassName()); jobConf.set(VALIDATE, String.valueOf(operation.isValidate())); jobConf.set(PROPORTION_TO_SAMPLE, String.valueOf(operation.getProportionToSample())); jobConf.set(AccumuloStoreConstants.ACCUMULO_ELEMENT_CONVERTER_CLASS, ((AccumuloStore) store).getKeyPackage().getKeyConverter().getClass().getName()); Integer numTasks = operation.getNumMapTasks(); if (null != numTasks) { jobConf.setNumMapTasks(numTasks); }//from w w w. j a v a2 s . co m jobConf.setNumReduceTasks(1); }
From source file:gaffer.accumulostore.operation.hdfs.handler.job.SampleDataForSplitPointsJobFactory.java
License:Apache License
protected void setupJobConf(final JobConf jobConf, final SampleDataForSplitPoints operation, final Store store) throws IOException { jobConf.set(SCHEMA, new String(store.getSchema().toJson(false), CommonConstants.UTF_8)); jobConf.set(MAPPER_GENERATOR, operation.getMapperGeneratorClassName()); jobConf.set(VALIDATE, String.valueOf(operation.isValidate())); jobConf.set(PROPORTION_TO_SAMPLE, String.valueOf(operation.getProportionToSample())); Integer numTasks = operation.getNumMapTasks(); if (null != numTasks) { jobConf.setNumMapTasks(numTasks); }//ww w. ja v a 2 s . co m numTasks = operation.getNumReduceTasks(); if (null != numTasks) { jobConf.setNumReduceTasks(numTasks); } }
From source file:gaffer.operation.simple.hdfs.handler.AbstractAddElementsFromHdfsJobFactory.java
License:Apache License
protected void setupJobConf(final JobConf jobConf, final AddElementsFromHdfs operation, final Store store) throws IOException { jobConf.set(DATA_SCHEMA, new String(store.getDataSchema().toJson(false), UTF_8_CHARSET)); jobConf.set(STORE_SCHEMA, new String(store.getStoreSchema().toJson(false), UTF_8_CHARSET)); jobConf.set(MAPPER_GENERATOR, operation.getMapperGeneratorClassName()); jobConf.set(VALIDATE, String.valueOf(operation.isValidate())); Integer numTasks = operation.getNumMapTasks(); if (null != numTasks) { jobConf.setNumMapTasks(numTasks); }/*from w w w . j a v a2 s. c om*/ numTasks = operation.getNumReduceTasks(); if (null != numTasks) { jobConf.setNumReduceTasks(numTasks); } }
From source file:gaffer.operation.simple.hdfs.handler.job.factory.AbstractAddElementsFromHdfsJobFactory.java
License:Apache License
protected void setupJobConf(final JobConf jobConf, final AddElementsFromHdfs operation, final Store store) throws IOException { LOGGER.info("Setting up job conf"); jobConf.set(SCHEMA, new String(store.getSchema().toJson(false), CommonConstants.UTF_8)); LOGGER.info("Added {} {} to job conf", SCHEMA, new String(store.getSchema().toJson(false), CommonConstants.UTF_8)); jobConf.set(MAPPER_GENERATOR, operation.getMapperGeneratorClassName()); LOGGER.info("Added {} of {} to job conf", MAPPER_GENERATOR, operation.getMapperGeneratorClassName()); jobConf.set(VALIDATE, String.valueOf(operation.isValidate())); LOGGER.info("Added {} option of {} to job conf", VALIDATE, operation.isValidate()); Integer numTasks = operation.getNumMapTasks(); if (null != numTasks) { jobConf.setNumMapTasks(numTasks); LOGGER.info("Set number of map tasks to {} on job conf", numTasks); }/* www. j av a 2 s . c o m*/ numTasks = operation.getNumReduceTasks(); if (null != numTasks) { jobConf.setNumReduceTasks(numTasks); LOGGER.info("Set number of reduce tasks to {} on job conf", numTasks); } }
From source file:gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.java
public static void main(String[] args) { //paths and input handling Path inputRatingsPath = new Path(args[0]); //movieid, userid, rating (text files) Path job1OutputPath = new Path("/user/hduser/partialResults"); Path partialDistancesPath = new Path("/user/hduser/partialResults/part-*"); //member_nonMember \t partialDistance (sequence files) Path candidateMoviesPath = new Path("/user/hduser/partialResults/candidateMovies-*"); //candidateMovieId, nonMemberUserId_rating (text files) Path userSimilaritiesPath = new Path("/user/hduser/userSimilarities"); //similarity of each group member to his friends (text files) Path finalScoresPath = new Path(args[1]); //movieId \t outputScore int numReduceTasks = 56; //defaultValue if (args.length == 3) { numReduceTasks = Integer.parseInt(args[2]); }/*w w w . jav a 2s . com*/ final float friendsSimThresh = 0.8f; String groupFilePath = "/user/hduser/group.txt"; //one-line csv file with user ids (text file) if (args.length < 2 || args.length > 3) { System.err.println( "Incorrect input. Example usage: hadoop jar ~/GroupRecs/MultiUser.jar inputPath outputPath [numReduceTasks]"); return; } //JOB 1// JobClient client = new JobClient(); JobConf conf = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class); try { FileSystem fs = FileSystem.get(conf); if (fs.exists(job1OutputPath)) { fs.delete(job1OutputPath, true); } if (fs.exists(userSimilaritiesPath)) { fs.delete(userSimilaritiesPath, true); } if (fs.exists(finalScoresPath)) { fs.delete(finalScoresPath, true); } } catch (IOException ex) { Logger.getLogger(MultiUserMain.class.getName()).log(Level.SEVERE, null, ex); } conf.setJobName("Multi-user approach - Job 1"); System.out.println("Starting Job 1 (Multi-user approach)..."); conf.setMapOutputKeyClass(VIntWritable.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(ByteWritable.class); conf.setInputFormat(TextInputFormat.class); //conf.setOutputFormat(TextOutputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK); FileInputFormat.setInputPaths(conf, inputRatingsPath); //user ratings FileOutputFormat.setOutputPath(conf, job1OutputPath); //partial distances MultipleOutputs.addNamedOutput(conf, "candidateMovies", SequenceFileOutputFormat.class, VIntWritable.class, Text.class); //movieId, userId_rating conf.setMapperClass(gr.forth.ics.isl.grouprecsmr.job1.Job1Mapper.class); conf.setReducerClass(gr.forth.ics.isl.grouprecsmr.job1.Job1Reducer.class); conf.setNumReduceTasks(numReduceTasks); try { DistributedCache.addCacheFile(new URI(groupFilePath), conf); // group } catch (URISyntaxException e1) { System.err.println(e1.toString()); } conf.setInt("mapred.task.timeout", 6000000); client.setConf(conf); RunningJob job; try { job = JobClient.runJob(conf); job.waitForCompletion(); } catch (Exception e) { System.err.println(e); } //JOB 2// System.out.println("Starting Job 2 (Multi-user approach)..."); JobClient client2 = new JobClient(); JobConf conf2 = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class); conf2.setJobName("Multi-user approach - Job 2"); conf2.setMapOutputKeyClass(Text.class); //user pair (member_nonMember), where nonMember is in friends conf2.setMapOutputValueClass(ByteWritable.class);//similarity part unsquared conf2.setOutputKeyClass(Text.class); //user pair (member_nonMember), where nonMember is in friends conf2.setOutputValueClass(DoubleWritable.class);//similarity conf2.setInputFormat(SequenceFileInputFormat.class); //conf2.setInputFormat(TextInputFormat.class); conf2.setOutputFormat(TextOutputFormat.class); //conf2.setOutputFormat(SequenceFileOutputFormat.class); //SequenceFileOutputFormat.setOutputCompressionType(conf2, SequenceFile.CompressionType.BLOCK); FileInputFormat.setInputPaths(conf2, partialDistancesPath); //Job 1 output FileOutputFormat.setOutputPath(conf2, userSimilaritiesPath); //Job 2 output (similarity of each group member to his friends) conf2.setMapperClass(IdentityMapper.class); conf2.setReducerClass(gr.forth.ics.isl.grouprecsmr.job2.Job2ReducerMulti.class); int numSimilaritiesPartitions = numReduceTasks; conf2.setNumReduceTasks(numSimilaritiesPartitions); conf2.setFloat("friendsSimThreshold", friendsSimThresh); conf2.setInt("mapred.task.timeout", 6000000); conf2.set("io.sort.mb", "500"); client2.setConf(conf2); RunningJob job2; try { job2 = JobClient.runJob(conf2); job2.waitForCompletion(); } catch (Exception e) { System.err.println(e); } //JOB 3// System.out.println("Starting Job 3 (Multi-user approach)..."); JobClient client3 = new JobClient(); JobConf conf3 = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class); conf3.setJobName("Multi-user approach - Job 3"); conf3.setMapOutputKeyClass(VIntWritable.class); conf3.setMapOutputValueClass(Text.class); conf3.setOutputKeyClass(VIntWritable.class); conf3.setOutputValueClass(DoubleWritable.class); conf3.setInputFormat(SequenceFileInputFormat.class); //conf3.setInputFormat(TextInputFormat.class); conf3.setOutputFormat(TextOutputFormat.class); //conf3.setOutputFormat(SequenceFileOutputFormat.class); //SequenceFileOutputFormat.setOutputCompressionType(conf3,SequenceFile.CompressionType.BLOCK); try { DistributedCache.addCacheFile(new URI(groupFilePath), conf3); } catch (URISyntaxException ex) { System.err.println("Could not add group file to distributed cache. " + ex); } for (int i = 0; i < numSimilaritiesPartitions; i++) { String reduceId = String.format("%05d", i); //5-digit int with leading try { DistributedCache.addCacheFile(new URI(userSimilaritiesPath.toString() + "/part-" + reduceId), conf3); } catch (URISyntaxException ex) { System.err.println("Could not add similarities files to distributed cache. " + ex); } } FileInputFormat.setInputPaths(conf3, candidateMoviesPath); //Job 1 output (candidate movies) FileOutputFormat.setOutputPath(conf3, finalScoresPath); //Job 3 output (movie \t outputScore) // conf3.setMapperClass(IdentityMapper.class); conf3.setMapperClass(gr.forth.ics.isl.grouprecsmr.job3.Job3MapperMulti.class); //filtering out ratings from non-Friends conf3.setReducerClass(gr.forth.ics.isl.grouprecsmr.job3.Job3ReducerMulti.class); conf3.setInt("mapred.task.timeout", 6000000); conf3.set("io.sort.mb", "500"); conf3.setNumReduceTasks(numReduceTasks); client3.setConf(conf3); RunningJob job3; try { job3 = JobClient.runJob(conf3); job3.waitForCompletion(); } catch (Exception e) { System.err.println(e); } }
From source file:graphbuilding.GenomixDriver.java
License:Apache License
public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, String defaultConfPath) throws IOException { JobConf conf = new JobConf(GenomixDriver.class); conf.setInt("sizeKmer", sizeKmer); if (defaultConfPath != null) { conf.addResource(new Path(defaultConfPath)); }//from w w w .ja v a 2 s . co m conf.setJobName("Genomix Graph Building"); conf.setMapperClass(GenomixMapper.class); conf.setReducerClass(GenomixReducer.class); conf.setCombinerClass(GenomixCombiner.class); conf.setMapOutputKeyClass(Kmer.class); conf.setMapOutputValueClass(KmerCountValue.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(Kmer.class); conf.setOutputValueClass(KmerCountValue.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setNumReduceTasks(numReducers); FileSystem dfs = FileSystem.get(conf); dfs.delete(new Path(outputPath), true); JobClient.runJob(conf); }
From source file:hadoop.UIUCWikifierAppHadoop.java
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); JobConf job = new JobConf(conf, UIUCWikifierAppHadoop.class); // System.out.println("Run.. Envinronment Variables"); // java.util.Map<String,String> env = System.getenv(); //// w w w. j a va 2s. c o m // System.out.println("Printing environment variables"); // for(String k : env.keySet()){ // System.out.println(k + "\t" + env.get(k)); // } // String jlpValue = System.getProperty("java.library.path"); // System.out.println("java.library.path=" + jlpValue); // System.setProperty("java.library.path", jlpValue + ":" + "/home/jgilme1/bin/gurobi550/linux64/lib"); //process command line options Path in = new Path(args[0]); Path out = new Path(args[1]); //change current working directory to hdfs path.. job.setJobName("entitylinker"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(DistributeInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setMapperClass(Map.class); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setNumReduceTasks(0); job.setNumMapTasks(Integer.parseInt(args[2])); job.set("mapreduce.input.fileinputformat.split.minsize", "0"); job.set("mapred.child.java.opts", "-Xmx16g"); job.setNumTasksToExecutePerJvm(-1); //job.setMemoryForMapTask(new Long(12288)); //job.set(JobConf.MAPRED_MAP_TASK_ULIMIT, "12582912"); String gurobiHomeVariable = "GUROBI_HOME"; String gurobiHomeValue = "/home/jgilme1/bin/gurobi560/linux64"; String pathVariable = "PATH"; String newPathValue = gurobiHomeValue + "/bin"; String ldLibraryPathVariable = "LD_LIBRARY_PATH"; String ldLibraryPathValue = gurobiHomeValue + "/lib"; String grbLicenseFileVariable = "GRB_LICENSE_FILE"; String grbLicenseFileValue = "/scratch6/usr/jgilme1/gurobiLicense/gurobi.lic"; StringBuilder newEnvironment = new StringBuilder(); newEnvironment.append(gurobiHomeVariable); newEnvironment.append("="); newEnvironment.append(gurobiHomeValue); newEnvironment.append(","); newEnvironment.append(pathVariable); newEnvironment.append("="); newEnvironment.append("$" + pathVariable + ":"); newEnvironment.append(newPathValue); newEnvironment.append(","); newEnvironment.append(ldLibraryPathVariable); newEnvironment.append("=$" + ldLibraryPathVariable + ":"); newEnvironment.append(ldLibraryPathValue); newEnvironment.append(","); newEnvironment.append(grbLicenseFileVariable); newEnvironment.append("="); newEnvironment.append(grbLicenseFileValue); //System.out.println(newEnvironment.toString()); job.set(JobConf.MAPRED_MAP_TASK_ENV, newEnvironment.toString()); DistributedCache.addCacheArchive(new URI("/user/jgilme1/entitylinking/Wikifier2013.tar.gz"), job); JobClient.runJob(job); return 0; }
From source file:hibench.DataGenerator.java
License:Apache License
public void sumUpZipf(Path fin, Path fout, ZipfRandom zipf) throws IOException { LOG.info("Summing up Zipfian Id Distirubtion..."); JobConf job = new JobConf(WebDataGen.class); String jobname = fin.getName() + " -> " + fout.getName(); job.setJobName(jobname);//from w w w.j a va 2 s . co m zipf.setJobConf(job); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(SumUpZipfMapper.class); job.setReducerClass(SumUpZipfReducer.class); job.setNumReduceTasks(1); // Important to sequentially accumulate the required space job.setInputFormat(NLineInputFormat.class); job.setOutputFormat(TextOutputFormat.class); DataPaths.checkHdfsFile(fout, false); FileInputFormat.setInputPaths(job, fin); FileOutputFormat.setOutputPath(job, fout); LOG.info("Running Job: " + jobname); LOG.info("Dummy file: " + fin); LOG.info("Zipfian sum up file as Ouput: " + fout); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); }