List of usage examples for org.apache.hadoop.mapred JobConf setOutputKeyClass
public void setOutputKeyClass(Class<?> theClass)
From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java
License:LGPL
private static JobConf createJobConf(final Configuration conf) { JobConf jobconf = new JobConf(conf, DistCp.class); jobconf.setJobName(NAME);/* w w w.j av a 2s.com*/ // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobconf.setMapSpeculativeExecution(false); jobconf.setInputFormat(CopyInputFormat.class); jobconf.setOutputKeyClass(Text.class); jobconf.setOutputValueClass(Text.class); jobconf.setMapperClass(CopyFilesMapper.class); jobconf.setNumReduceTasks(0); return jobconf; }
From source file:fr.mines_nantes.atlanmod.monitoring.frameworks.TeraGen.java
License:Apache License
/** * @param args the cli arguments//from w w w . ja v a2 s. c om */ public int run(String[] args) throws IOException { JobConf job = (JobConf) getConf(); setNumberOfRows(job, Long.parseLong(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraGen"); job.setJarByClass(TeraGen.class); job.setMapperClass(SortGenMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(RangeInputFormat.class); //job.setOutputFormat(TeraOutputFormat.class); JobClient.runJob(job); return 0; }
From source file:fr.worf.hadoop.scratchpad.Scratch2MapReduce.java
License:Apache License
/** * @param args the command line arguments * @throws java.io.IOException//from ww w. j a va2 s .c o m */ public static void main(String[] args) throws IOException { JobConf job = new JobConf(Scratch2MapReduce.class); job.setJobName("wordcount"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path("/home/slash/test/testfile1.txt")); FileOutputFormat.setOutputPath(job, new Path("/home/slash/test/testfile2.txt")); RunningJob runJob = JobClient.runJob(job); }
From source file:gov.nasa.jpl.memex.pooledtimeseries.GradientTimeSeries.java
License:Apache License
public static void main(String[] args) throws Exception { System.loadLibrary(Core.NATIVE_LIBRARY_NAME); Configuration baseConf = new Configuration(); baseConf.set("mapred.reduce.tasks", "0"); JobConf conf = new JobConf(baseConf, GradientTimeSeries.class); conf.setJobName("gradient_time_series"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(MultiFileOutput.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);//from w ww. ja va 2 s . c o m }
From source file:gov.nasa.jpl.memex.pooledtimeseries.OpticalTimeSeries.java
License:Apache License
public static void main(String[] args) throws Exception { System.loadLibrary(Core.NATIVE_LIBRARY_NAME); LOG.info("Loaded- " + Core.NATIVE_LIBRARY_NAME); Configuration baseConf = new Configuration(); baseConf.set("mapred.reduce.tasks", "0"); JobConf conf = new JobConf(baseConf, OpticalTimeSeries.class); conf.setJobName("optical_time_series"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(MultiFileOutput.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);//from w w w . j a va 2s . c om }
From source file:gov.nih.ncgc.hadoop.BioIsostere.java
public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf(), BioIsostere.class); jobConf.setJobName(BioIsostere.class.getSimpleName()); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(IntWritable.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(MoleculePairWritable.class); jobConf.setMapperClass(BioisostereMapper.class); jobConf.setReducerClass(MoleculePairReducer.class); // jobConf.setInputFormat(TextInputFormat.class); jobConf.setInt("mapred.line.input.format.linespermap", 10); jobConf.setInputFormat(NLineInputFormat.class); jobConf.setOutputFormat(TextOutputFormat.class); if (args.length != 3) { System.err.println("Usage: bisos <datafile> <out> <license file>"); System.exit(2);/*from w w w . ja v a 2 s. c om*/ } FileInputFormat.setInputPaths(jobConf, new Path(args[0])); FileOutputFormat.setOutputPath(jobConf, new Path(args[1])); // make the license file available vis dist cache DistributedCache.addCacheFile(new Path(args[2]).toUri(), jobConf); long start = System.currentTimeMillis(); JobClient.runJob(jobConf); double duration = (System.currentTimeMillis() - start) / 1000.0; System.out.println("Total runtime was " + duration + "s"); return 0; }
From source file:gov.nih.ncgc.hadoop.SmartsSearch.java
public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf(), HeavyAtomCount.class); jobConf.setJobName("smartsSearch"); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(IntWritable.class); jobConf.setMapperClass(MoleculeMapper.class); jobConf.setCombinerClass(SmartsMatchReducer.class); jobConf.setReducerClass(SmartsMatchReducer.class); jobConf.setInputFormat(TextInputFormat.class); jobConf.setOutputFormat(TextOutputFormat.class); jobConf.setNumMapTasks(5);// w ww . j a v a 2s.c om if (args.length != 4) { System.err.println("Usage: ss <in> <out> <pattern> <license file>"); System.exit(2); } FileInputFormat.setInputPaths(jobConf, new Path(args[0])); FileOutputFormat.setOutputPath(jobConf, new Path(args[1])); jobConf.setStrings("pattern", args[2]); // make the license file available vis dist cache DistributedCache.addCacheFile(new Path(args[3]).toUri(), jobConf); JobClient.runJob(jobConf); return 0; }
From source file:gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.java
public static void main(String[] args) { //paths and input handling Path inputRatingsPath = new Path(args[0]); //movieid, userid, rating (text files) Path job1OutputPath = new Path("/user/hduser/partialResults"); Path partialDistancesPath = new Path("/user/hduser/partialResults/part-*"); //member_nonMember \t partialDistance (sequence files) Path candidateMoviesPath = new Path("/user/hduser/partialResults/candidateMovies-*"); //candidateMovieId, nonMemberUserId_rating (text files) Path userSimilaritiesPath = new Path("/user/hduser/userSimilarities"); //similarity of each group member to his friends (text files) Path finalScoresPath = new Path(args[1]); //movieId \t outputScore int numReduceTasks = 56; //defaultValue if (args.length == 3) { numReduceTasks = Integer.parseInt(args[2]); }//from w w w .j ava 2 s .c om final float friendsSimThresh = 0.8f; String groupFilePath = "/user/hduser/group.txt"; //one-line csv file with user ids (text file) if (args.length < 2 || args.length > 3) { System.err.println( "Incorrect input. Example usage: hadoop jar ~/GroupRecs/MultiUser.jar inputPath outputPath [numReduceTasks]"); return; } //JOB 1// JobClient client = new JobClient(); JobConf conf = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class); try { FileSystem fs = FileSystem.get(conf); if (fs.exists(job1OutputPath)) { fs.delete(job1OutputPath, true); } if (fs.exists(userSimilaritiesPath)) { fs.delete(userSimilaritiesPath, true); } if (fs.exists(finalScoresPath)) { fs.delete(finalScoresPath, true); } } catch (IOException ex) { Logger.getLogger(MultiUserMain.class.getName()).log(Level.SEVERE, null, ex); } conf.setJobName("Multi-user approach - Job 1"); System.out.println("Starting Job 1 (Multi-user approach)..."); conf.setMapOutputKeyClass(VIntWritable.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(ByteWritable.class); conf.setInputFormat(TextInputFormat.class); //conf.setOutputFormat(TextOutputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK); FileInputFormat.setInputPaths(conf, inputRatingsPath); //user ratings FileOutputFormat.setOutputPath(conf, job1OutputPath); //partial distances MultipleOutputs.addNamedOutput(conf, "candidateMovies", SequenceFileOutputFormat.class, VIntWritable.class, Text.class); //movieId, userId_rating conf.setMapperClass(gr.forth.ics.isl.grouprecsmr.job1.Job1Mapper.class); conf.setReducerClass(gr.forth.ics.isl.grouprecsmr.job1.Job1Reducer.class); conf.setNumReduceTasks(numReduceTasks); try { DistributedCache.addCacheFile(new URI(groupFilePath), conf); // group } catch (URISyntaxException e1) { System.err.println(e1.toString()); } conf.setInt("mapred.task.timeout", 6000000); client.setConf(conf); RunningJob job; try { job = JobClient.runJob(conf); job.waitForCompletion(); } catch (Exception e) { System.err.println(e); } //JOB 2// System.out.println("Starting Job 2 (Multi-user approach)..."); JobClient client2 = new JobClient(); JobConf conf2 = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class); conf2.setJobName("Multi-user approach - Job 2"); conf2.setMapOutputKeyClass(Text.class); //user pair (member_nonMember), where nonMember is in friends conf2.setMapOutputValueClass(ByteWritable.class);//similarity part unsquared conf2.setOutputKeyClass(Text.class); //user pair (member_nonMember), where nonMember is in friends conf2.setOutputValueClass(DoubleWritable.class);//similarity conf2.setInputFormat(SequenceFileInputFormat.class); //conf2.setInputFormat(TextInputFormat.class); conf2.setOutputFormat(TextOutputFormat.class); //conf2.setOutputFormat(SequenceFileOutputFormat.class); //SequenceFileOutputFormat.setOutputCompressionType(conf2, SequenceFile.CompressionType.BLOCK); FileInputFormat.setInputPaths(conf2, partialDistancesPath); //Job 1 output FileOutputFormat.setOutputPath(conf2, userSimilaritiesPath); //Job 2 output (similarity of each group member to his friends) conf2.setMapperClass(IdentityMapper.class); conf2.setReducerClass(gr.forth.ics.isl.grouprecsmr.job2.Job2ReducerMulti.class); int numSimilaritiesPartitions = numReduceTasks; conf2.setNumReduceTasks(numSimilaritiesPartitions); conf2.setFloat("friendsSimThreshold", friendsSimThresh); conf2.setInt("mapred.task.timeout", 6000000); conf2.set("io.sort.mb", "500"); client2.setConf(conf2); RunningJob job2; try { job2 = JobClient.runJob(conf2); job2.waitForCompletion(); } catch (Exception e) { System.err.println(e); } //JOB 3// System.out.println("Starting Job 3 (Multi-user approach)..."); JobClient client3 = new JobClient(); JobConf conf3 = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class); conf3.setJobName("Multi-user approach - Job 3"); conf3.setMapOutputKeyClass(VIntWritable.class); conf3.setMapOutputValueClass(Text.class); conf3.setOutputKeyClass(VIntWritable.class); conf3.setOutputValueClass(DoubleWritable.class); conf3.setInputFormat(SequenceFileInputFormat.class); //conf3.setInputFormat(TextInputFormat.class); conf3.setOutputFormat(TextOutputFormat.class); //conf3.setOutputFormat(SequenceFileOutputFormat.class); //SequenceFileOutputFormat.setOutputCompressionType(conf3,SequenceFile.CompressionType.BLOCK); try { DistributedCache.addCacheFile(new URI(groupFilePath), conf3); } catch (URISyntaxException ex) { System.err.println("Could not add group file to distributed cache. " + ex); } for (int i = 0; i < numSimilaritiesPartitions; i++) { String reduceId = String.format("%05d", i); //5-digit int with leading try { DistributedCache.addCacheFile(new URI(userSimilaritiesPath.toString() + "/part-" + reduceId), conf3); } catch (URISyntaxException ex) { System.err.println("Could not add similarities files to distributed cache. " + ex); } } FileInputFormat.setInputPaths(conf3, candidateMoviesPath); //Job 1 output (candidate movies) FileOutputFormat.setOutputPath(conf3, finalScoresPath); //Job 3 output (movie \t outputScore) // conf3.setMapperClass(IdentityMapper.class); conf3.setMapperClass(gr.forth.ics.isl.grouprecsmr.job3.Job3MapperMulti.class); //filtering out ratings from non-Friends conf3.setReducerClass(gr.forth.ics.isl.grouprecsmr.job3.Job3ReducerMulti.class); conf3.setInt("mapred.task.timeout", 6000000); conf3.set("io.sort.mb", "500"); conf3.setNumReduceTasks(numReduceTasks); client3.setConf(conf3); RunningJob job3; try { job3 = JobClient.runJob(conf3); job3.waitForCompletion(); } catch (Exception e) { System.err.println(e); } }
From source file:graphbuilding.GenomixDriver.java
License:Apache License
public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, String defaultConfPath) throws IOException { JobConf conf = new JobConf(GenomixDriver.class); conf.setInt("sizeKmer", sizeKmer); if (defaultConfPath != null) { conf.addResource(new Path(defaultConfPath)); }//from w ww.j av a 2 s. c o m conf.setJobName("Genomix Graph Building"); conf.setMapperClass(GenomixMapper.class); conf.setReducerClass(GenomixReducer.class); conf.setCombinerClass(GenomixCombiner.class); conf.setMapOutputKeyClass(Kmer.class); conf.setMapOutputValueClass(KmerCountValue.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(Kmer.class); conf.setOutputValueClass(KmerCountValue.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setNumReduceTasks(numReducers); FileSystem dfs = FileSystem.get(conf); dfs.delete(new Path(outputPath), true); JobClient.runJob(conf); }
From source file:hadoop.UIUCWikifierAppHadoop.java
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); JobConf job = new JobConf(conf, UIUCWikifierAppHadoop.class); // System.out.println("Run.. Envinronment Variables"); // java.util.Map<String,String> env = System.getenv(); ////ww w . j ava2s . c om // System.out.println("Printing environment variables"); // for(String k : env.keySet()){ // System.out.println(k + "\t" + env.get(k)); // } // String jlpValue = System.getProperty("java.library.path"); // System.out.println("java.library.path=" + jlpValue); // System.setProperty("java.library.path", jlpValue + ":" + "/home/jgilme1/bin/gurobi550/linux64/lib"); //process command line options Path in = new Path(args[0]); Path out = new Path(args[1]); //change current working directory to hdfs path.. job.setJobName("entitylinker"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(DistributeInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setMapperClass(Map.class); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setNumReduceTasks(0); job.setNumMapTasks(Integer.parseInt(args[2])); job.set("mapreduce.input.fileinputformat.split.minsize", "0"); job.set("mapred.child.java.opts", "-Xmx16g"); job.setNumTasksToExecutePerJvm(-1); //job.setMemoryForMapTask(new Long(12288)); //job.set(JobConf.MAPRED_MAP_TASK_ULIMIT, "12582912"); String gurobiHomeVariable = "GUROBI_HOME"; String gurobiHomeValue = "/home/jgilme1/bin/gurobi560/linux64"; String pathVariable = "PATH"; String newPathValue = gurobiHomeValue + "/bin"; String ldLibraryPathVariable = "LD_LIBRARY_PATH"; String ldLibraryPathValue = gurobiHomeValue + "/lib"; String grbLicenseFileVariable = "GRB_LICENSE_FILE"; String grbLicenseFileValue = "/scratch6/usr/jgilme1/gurobiLicense/gurobi.lic"; StringBuilder newEnvironment = new StringBuilder(); newEnvironment.append(gurobiHomeVariable); newEnvironment.append("="); newEnvironment.append(gurobiHomeValue); newEnvironment.append(","); newEnvironment.append(pathVariable); newEnvironment.append("="); newEnvironment.append("$" + pathVariable + ":"); newEnvironment.append(newPathValue); newEnvironment.append(","); newEnvironment.append(ldLibraryPathVariable); newEnvironment.append("=$" + ldLibraryPathVariable + ":"); newEnvironment.append(ldLibraryPathValue); newEnvironment.append(","); newEnvironment.append(grbLicenseFileVariable); newEnvironment.append("="); newEnvironment.append(grbLicenseFileValue); //System.out.println(newEnvironment.toString()); job.set(JobConf.MAPRED_MAP_TASK_ENV, newEnvironment.toString()); DistributedCache.addCacheArchive(new URI("/user/jgilme1/entitylinking/Wikifier2013.tar.gz"), job); JobClient.runJob(job); return 0; }