List of usage examples for org.apache.hadoop.mapred JobConf setOutputValueClass
public void setOutputValueClass(Class<?> theClass)
From source file:graphbuilding.GenomixDriver.java
License:Apache License
public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, String defaultConfPath) throws IOException { JobConf conf = new JobConf(GenomixDriver.class); conf.setInt("sizeKmer", sizeKmer); if (defaultConfPath != null) { conf.addResource(new Path(defaultConfPath)); }/* ww w. ja v a 2 s .c om*/ conf.setJobName("Genomix Graph Building"); conf.setMapperClass(GenomixMapper.class); conf.setReducerClass(GenomixReducer.class); conf.setCombinerClass(GenomixCombiner.class); conf.setMapOutputKeyClass(Kmer.class); conf.setMapOutputValueClass(KmerCountValue.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(Kmer.class); conf.setOutputValueClass(KmerCountValue.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setNumReduceTasks(numReducers); FileSystem dfs = FileSystem.get(conf); dfs.delete(new Path(outputPath), true); JobClient.runJob(conf); }
From source file:hadoop.UIUCWikifierAppHadoop.java
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); JobConf job = new JobConf(conf, UIUCWikifierAppHadoop.class); // System.out.println("Run.. Envinronment Variables"); // java.util.Map<String,String> env = System.getenv(); ////ww w . j a v a2s . c o m // System.out.println("Printing environment variables"); // for(String k : env.keySet()){ // System.out.println(k + "\t" + env.get(k)); // } // String jlpValue = System.getProperty("java.library.path"); // System.out.println("java.library.path=" + jlpValue); // System.setProperty("java.library.path", jlpValue + ":" + "/home/jgilme1/bin/gurobi550/linux64/lib"); //process command line options Path in = new Path(args[0]); Path out = new Path(args[1]); //change current working directory to hdfs path.. job.setJobName("entitylinker"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(DistributeInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setMapperClass(Map.class); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setNumReduceTasks(0); job.setNumMapTasks(Integer.parseInt(args[2])); job.set("mapreduce.input.fileinputformat.split.minsize", "0"); job.set("mapred.child.java.opts", "-Xmx16g"); job.setNumTasksToExecutePerJvm(-1); //job.setMemoryForMapTask(new Long(12288)); //job.set(JobConf.MAPRED_MAP_TASK_ULIMIT, "12582912"); String gurobiHomeVariable = "GUROBI_HOME"; String gurobiHomeValue = "/home/jgilme1/bin/gurobi560/linux64"; String pathVariable = "PATH"; String newPathValue = gurobiHomeValue + "/bin"; String ldLibraryPathVariable = "LD_LIBRARY_PATH"; String ldLibraryPathValue = gurobiHomeValue + "/lib"; String grbLicenseFileVariable = "GRB_LICENSE_FILE"; String grbLicenseFileValue = "/scratch6/usr/jgilme1/gurobiLicense/gurobi.lic"; StringBuilder newEnvironment = new StringBuilder(); newEnvironment.append(gurobiHomeVariable); newEnvironment.append("="); newEnvironment.append(gurobiHomeValue); newEnvironment.append(","); newEnvironment.append(pathVariable); newEnvironment.append("="); newEnvironment.append("$" + pathVariable + ":"); newEnvironment.append(newPathValue); newEnvironment.append(","); newEnvironment.append(ldLibraryPathVariable); newEnvironment.append("=$" + ldLibraryPathVariable + ":"); newEnvironment.append(ldLibraryPathValue); newEnvironment.append(","); newEnvironment.append(grbLicenseFileVariable); newEnvironment.append("="); newEnvironment.append(grbLicenseFileValue); //System.out.println(newEnvironment.toString()); job.set(JobConf.MAPRED_MAP_TASK_ENV, newEnvironment.toString()); DistributedCache.addCacheArchive(new URI("/user/jgilme1/entitylinking/Wikifier2013.tar.gz"), job); JobClient.runJob(job); return 0; }
From source file:hadoopProcesses.testJob.java
public static void start(String[] args) { try {// w w w . ja va 2s . co m JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); map Map = new map(); conf.setMapperClass(Map.getClass()); reducer Reduce = new reducer(); conf.setCombinerClass(Reduce.getClass()); conf.setReducerClass(Reduce.getClass()); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[1])); Path outputDir = new Path(args[2]); outputDir.getFileSystem(conf).delete(outputDir, true); FileSystem fs = FileSystem.get(conf); fs.delete(outputDir, true); FileOutputFormat.setOutputPath(conf, outputDir); JobClient.runJob(conf); FileSystem FS = FileSystem.get(conf); Path src = new Path(FS.getWorkingDirectory() + "/output/part-00000"); if (FS.exists(src)) { System.out.println("\t\t------ Results ------ "); /* BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(src))); String line; line = br.readLine(); while (line != null) { System.out.println("\t" + line); line = br.readLine(); } */ List<String> FileList = (new fileInteractions()).readLines(src, conf); for (String LocString : FileList) { System.out.println(LocString); } } } catch (Exception Exp) { Exp.printStackTrace(); } }
From source file:hibench.DataGenerator.java
License:Apache License
public void sumUpZipf(Path fin, Path fout, ZipfRandom zipf) throws IOException { LOG.info("Summing up Zipfian Id Distirubtion..."); JobConf job = new JobConf(WebDataGen.class); String jobname = fin.getName() + " -> " + fout.getName(); job.setJobName(jobname);//from w ww .j a va 2s. co m zipf.setJobConf(job); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(SumUpZipfMapper.class); job.setReducerClass(SumUpZipfReducer.class); job.setNumReduceTasks(1); // Important to sequentially accumulate the required space job.setInputFormat(NLineInputFormat.class); job.setOutputFormat(TextOutputFormat.class); DataPaths.checkHdfsFile(fout, false); FileInputFormat.setInputPaths(job, fin); FileOutputFormat.setOutputPath(job, fout); LOG.info("Running Job: " + jobname); LOG.info("Dummy file: " + fin); LOG.info("Zipfian sum up file as Ouput: " + fout); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); }
From source file:hibench.DataGenerator.java
License:Apache License
public void createZipf(Path fin, Path fout, ZipfRandom zipf) throws IOException { LOG.info("Creating Zipfian Id Distirubtion..."); JobConf job = new JobConf(WebDataGen.class); String jobname = fin.getName() + " -> " + fout.getName(); job.setJobName(jobname);/* ww w. j a va 2s .c o m*/ zipf.setJobConf(job); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(CreateZipfDistrMapper.class); job.setNumReduceTasks(0); job.setInputFormat(NLineInputFormat.class); job.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(job, fin); FileOutputFormat.setOutputPath(job, fout); DataPaths.checkHdfsFile(fout, false); LOG.info("Running Job: " + jobname); LOG.info("Zipfian Sum File: " + fin); LOG.info("Zipfian Id distribution as Ouput: " + fout); RunningJob jobCreateZipf = JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); long vElems = jobCreateZipf.getCounters().getCounter(MAP_OUTPUT_RECORDS); LOG.info("Created " + vElems + " virtual zipfian elements"); zipf.setVirtElems(vElems); }
From source file:hibench.DataGenerator.java
License:Apache License
public void replaceIds(Path fcontent, Path fids, Path fjoin, ZipfRandom zipf) throws IOException { LOG.info("Replace Virtual Zipfian Ids with real Ids..."); JobConf job = new JobConf(WebDataGen.class); String jobname = fcontent.getName() + " JOIN " + fids.getName() + " -> " + fjoin.getName(); job.setJobName(jobname);/* w ww .j a v a 2s . c o m*/ job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); MultipleInputs.addInputPath(job, fids, TextInputFormat.class, TagRecordsMapper.class); MultipleInputs.addInputPath(job, fcontent, TextInputFormat.class, ReverseContentMapper.class); job.setOutputFormat(TextOutputFormat.class); // use combiner to avoid too many inputs for reducer job.setCombinerClass(ConcatTextCombiner.class); job.setReducerClass(JoinContentWithZipfReducer.class); if (zipf.reds > 0) { job.setNumReduceTasks(zipf.reds); } else { job.setNumReduceTasks(DataOptions.getMaxNumReduce()); } FileOutputFormat.setOutputPath(job, fjoin); LOG.info("Running Job: " + jobname); LOG.info("Zipfian Id distribution: " + fids); LOG.info("Content file with virtual Ids: " + fcontent); LOG.info("Joint result file: " + fjoin); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); }
From source file:hibench.DataGenerator.java
License:Apache License
public void createHtmlPages(Path dummy, HtmlConf html) throws IOException { LOG.info("Creating Html Pages..."); Path fout = new Path(dummy.getParent(), "tmp"); JobConf job = new JobConf(WebDataGen.class); String jobname = "Create html pages to " + fout.getName(); job.setJobName(jobname);//from w w w .j a va2s.c o m html.setJobConf(job); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(CreateHtmlPagesMapper.class); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, dummy); // first create result files under tmp folder FileOutputFormat.setOutputPath(job, fout); // begin from dummy file job.setInputFormat(NLineInputFormat.class); // use MultipleTextOutputFormat to produce three out files defined // in PathConf, i.e., LINK, PAGE_ZLINK_TABLE, PAGE_ZWORD_TABLE job.setOutputFormat(HtmlMultipleTextOutputFormat.class); LOG.info("Running Job: " + jobname); LOG.info("Dummy file: " + dummy); LOG.info("Multiple result Html files as <links, words, urls>"); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); // Move result files under tmp into parent path // and remove the empty tmp path finally DataPaths.moveFilesToParent(fout); }
From source file:hibench.HiveDataGenerator.java
License:Apache License
private void createRankingsTable() throws IOException { LOG.info("Creating table rankings..."); JobConf job = new JobConf(WebDataGen.class); String jobname = "Create " + paths.dname + " rankings"; job.setJobName(jobname);/*from w w w . jav a 2 s . c o m*/ job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapOutputKeyClass(Text.class); job.setCombinerClass(ConcatTextCombiner.class); job.setReducerClass(CountRankingAndReplaceIdReducer.class); if (options.reds > 0) { job.setNumReduceTasks(options.reds); } else { job.setNumReduceTasks(DataOptions.getMaxNumReduce()); } // job.setNumReduceTasks(options.agents/2); /*** * need to join result with LINK table so that to replace * url ids with real contents */ MultipleInputs.addInputPath(job, paths.getPath(DataPaths.T_LINK_PAGE), TextInputFormat.class, MyIdentityMapper.class); MultipleInputs.addInputPath(job, paths.getPath(DataPaths.LINKS), TextInputFormat.class, TagRecordsMapper.class); if (options.SEQUENCE_OUT) { job.setOutputFormat(SequenceFileOutputFormat.class); } else { job.setOutputFormat(TextOutputFormat.class); } if (null != options.codecClass) { job.set("mapred.output.compression.type", "BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.codecClass); } FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.RANKINGS)); LOG.info("Running Job: " + jobname); LOG.info("Table link-page file " + paths.getPath(DataPaths.T_LINK_PAGE) + " as input"); LOG.info("Links file " + paths.getResult(DataPaths.LINKS) + " as output"); LOG.info("Ouput file " + paths.getResult(DataPaths.RANKINGS)); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); LOG.info("Cleaning temp files..."); paths.cleanTempFiles(paths.getResult(DataPaths.RANKINGS)); }
From source file:hibench.HiveDataGenerator.java
License:Apache License
private void createUserVisitsTable() throws IOException, URISyntaxException { LOG.info("Creating user visits..."); JobConf job = new JobConf(WebDataGen.class); String jobname = "Create " + paths.dname + " uservisits"; job.setJobName(jobname);/*from w w w . j ava 2 s.co m*/ /*** * Set distributed cache file for table generation, * cache files include: * 1. user agents * 2. country code and language code * 3. search keys */ DistributedCache.addCacheFile(paths.getPath(DataPaths.uagentf).toUri(), job); DistributedCache.addCacheFile(paths.getPath(DataPaths.countryf).toUri(), job); DistributedCache.addCacheFile(paths.getPath(DataPaths.searchkeyf).toUri(), job); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapOutputKeyClass(Text.class); visit.setJobConf(job); job.setInputFormat(TextInputFormat.class); MultipleInputs.addInputPath(job, paths.getPath(DataPaths.DUMMY), NLineInputFormat.class, CreateRandomAccessMapper.class); MultipleInputs.addInputPath(job, paths.getPath(DataPaths.LINKS), TextInputFormat.class, TagRecordsMapper.class); job.setCombinerClass(CreateUserVisitsCombiner.class); job.setReducerClass(CreateUserVisitsReducer.class); if (options.reds > 0) { job.setNumReduceTasks(options.reds); } else { job.setNumReduceTasks(DataOptions.getMaxNumReduce()); } // job.setNumReduceTasks(options.agents/2); if (options.SEQUENCE_OUT) { job.setOutputFormat(SequenceFileOutputFormat.class); } else { job.setOutputFormat(TextOutputFormat.class); } if (null != options.codecClass) { job.set("mapred.output.compression.type", "BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.codecClass); } FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.USERVISITS)); LOG.info("Running Job: " + jobname); LOG.info("Dummy file " + paths.getPath(DataPaths.DUMMY) + " as input"); LOG.info("Links file " + paths.getResult(DataPaths.LINKS) + " as output"); LOG.info("Ouput file " + paths.getResult(DataPaths.USERVISITS)); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); LOG.info("Cleaning temp files..."); paths.cleanTempFiles(paths.getResult(DataPaths.USERVISITS)); }
From source file:hibench.PageRankDataGenerator.java
License:Apache License
private void createPageRankNodes() throws IOException { LOG.info("Creating PageRank nodes...", null); JobConf job = new JobConf(WebDataGen.class); String jobname = "Create " + paths.dname + " pagerank nodes"; job.setJobName(jobname);//from ww w . jav a 2 s . co m job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, paths.getPath(DataPaths.LINKS)); job.setInputFormat(TextInputFormat.class); if (options.PAGERANK_NODE_BALANCE) { /*** * Balance the output order of nodes, to prevent the running * of pagerank bench from potential data skew */ job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setMapperClass(BalancedLinkNodesMapper.class); job.setReducerClass(BalancedLinkNodesReducer.class); // job.setPartitionerClass(ModulusPartitioner.class); if (options.reds > 0) { job.setNumReduceTasks(options.reds); } else { job.setNumReduceTasks(DataOptions.getMaxNumReduce()); } } else { job.setMapOutputKeyClass(Text.class); job.setMapperClass(OutputLinkNodesMapper.class); job.setNumReduceTasks(0); } if (options.SEQUENCE_OUT) { job.setOutputFormat(SequenceFileOutputFormat.class); } else { job.setOutputFormat(TextOutputFormat.class); } if (null != options.codecClass) { job.set("mapred.output.compression.type", "BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.codecClass); } FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.VERTICALS)); LOG.info("Running Job: " + jobname); LOG.info("Links file " + paths.getPath(DataPaths.LINKS) + " as input"); LOG.info("Vertices file " + paths.getResult(DataPaths.VERTICALS) + " as output"); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); LOG.info("Cleaning temp files..."); paths.cleanTempFiles(paths.getResult(DataPaths.VERTICALS)); }