List of usage examples for org.apache.hadoop.mapred JobConf setOutputFormat
public void setOutputFormat(Class<? extends OutputFormat> theClass)
From source file:com.dynamicalsoftware.hadoop.mapreduce.SanFranciscoCrime.java
License:Apache License
private static void generate(String name, Class mapper, String input, String output) throws IOException { JobConf conf = new JobConf(SanFranciscoCrime.class); conf.setJobName(name);/*w ww . j a va 2s .c o m*/ conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(mapper); conf.setReducerClass(ReduceByWeek.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf); }
From source file:com.dynamicalsoftware.hadoop.mapreduce.SanFranciscoCrimePrepOlap.java
License:Apache License
/** * sets up and runs the hadoop map/reduce job itself * @param name contains the name of the job itself * @param mapper identified which mapper class to use * @param input is the fully qualified path to the raw crime data * @param output is the fully qualified path to where the generated data should reside * @throws IOException/*from w ww .j av a 2s . c o m*/ */ private static void generate(String name, Class mapper, String input, String output) throws IOException { JobConf conf = new JobConf(SanFranciscoCrimePrepOlap.class); conf.setJobName(name); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(mapper); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf); }
From source file:com.ebay.nest.FormattedSequenceFile.java
License:Apache License
@Override public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setOutputFormat(SequenceFileOutputFormat.class); }
From source file:com.example.hadoop.mapreduce.test.MapReduceTest.java
License:Open Source License
public static void main(String[] args) throws IOException { String input = HDFS_PATH + "/input/README.txt"; String input2 = HDFS_PATH + "/input/README2.txt"; String output = HDFS_PATH + "/test/output"; // ?mapreduce??? if (HdfsClient.exists(output)) { HdfsClient.rm(output);// w w w.java 2s . c om } JobConf conf = new JobConf(MapReduceTest.class); conf.setJobName("MapReduceTest"); conf.addResource("classpath:/hadoop/core-site.xml"); conf.addResource("classpath:/hadoop/hdfs-site.xml"); conf.addResource("classpath:/hadoop/mapred-site.xml"); // mapper conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); // reducer conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); // mapper conf.setMapperClass(MapperTest.class); // combiner?????mapper??reducer? conf.setCombinerClass(ReducerTest.class); // reducer conf.setReducerClass(ReducerTest.class); // MapReduce? conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // MapReduce? FileInputFormat.setInputPaths(conf, new Path[] { new Path(input), new Path(input2) }); // MapReduce? FileOutputFormat.setOutputPath(conf, new Path(output)); try { JobClient.runJob(conf); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.facebook.LinkBench.LinkBenchDriverMR.java
License:Apache License
/** * create JobConf for map reduce job/*from w w w. j a va 2 s. co m*/ * @param currentphase LOAD or REQUEST * @param nmappers number of mappers (loader or requester) */ private JobConf createJobConf(int currentphase, int nmappers) { final JobConf jobconf = new JobConf(getConf(), getClass()); jobconf.setJobName("LinkBench MapReduce Driver"); if (USE_INPUT_FILES) { jobconf.setInputFormat(SequenceFileInputFormat.class); } else { jobconf.setInputFormat(LinkBenchInputFormat.class); } jobconf.setOutputKeyClass(IntWritable.class); jobconf.setOutputValueClass(LongWritable.class); jobconf.setOutputFormat(SequenceFileOutputFormat.class); if (currentphase == LOAD) { jobconf.setMapperClass(LoadMapper.class); } else { //REQUEST jobconf.setMapperClass(RequestMapper.class); } jobconf.setNumMapTasks(nmappers); jobconf.setReducerClass(LoadRequestReducer.class); jobconf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobconf.setSpeculativeExecution(false); return jobconf; }
From source file:com.firewallid.io.HBaseWrite.java
public void save(String tableName, JavaPairRDD<String, String> savePairRDD, String destColumn) throws IOException { /* Check hbase table */ if (!HBaseTableUtils.istableExists(tableName)) { throw new TableNotFoundException(); }/*www . ja v a2 s .co m*/ /* Check column family */ if (!HBaseTableUtils.isFamilyExists(tableName, destColumn.split(":")[0])) { throw new NoSuchColumnFamilyException(); } /* Save to HBase */ JobConf jobConf = new JobConf(); jobConf.setOutputFormat(TableOutputFormat.class); jobConf.set(TableOutputFormat.OUTPUT_TABLE, tableName); savePairRDD.mapToPair((Tuple2<String, String> t) -> convertRowToPut(t._1, destColumn, t._2)) .filter((Tuple2<ImmutableBytesWritable, Put> t1) -> t1 != null).saveAsHadoopDataset(jobConf); }
From source file:com.firewallid.io.HBaseWrite.java
public void save(String tableName, JavaPairRDD<String, Map<String, String>> savePairRDD, List<String> destFamilys) throws IOException { /* Check hbase table */ if (!HBaseTableUtils.istableExists(tableName)) { throw new TableNotFoundException(); }//from w ww .ja va2s . c o m /* Check column family */ if (!HBaseTableUtils.isFamilyExists(tableName, destFamilys)) { throw new NoSuchColumnFamilyException(); } /* Save to HBase */ JobConf jobConf = new JobConf(); jobConf.setOutputFormat(TableOutputFormat.class); jobConf.set(TableOutputFormat.OUTPUT_TABLE, tableName); savePairRDD.mapToPair((Tuple2<String, Map<String, String>> t) -> convertRowToPut(t)) .filter((Tuple2<ImmutableBytesWritable, Put> t1) -> t1 != null).saveAsHadoopDataset(jobConf); }
From source file:com.github.gaoyangthu.demo.mapred.Grep.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 3) { System.out.println("Grep <inDir> <outDir> <regex> [<group>]"); ToolRunner.printGenericCommandUsage(System.out); return -1; }// www . j a v a2 s . c om Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); JobConf grepJob = new JobConf(getConf(), Grep.class); try { grepJob.setJobName("grep-search"); FileInputFormat.setInputPaths(grepJob, args[0]); grepJob.setMapperClass(RegexMapper.class); grepJob.set("mapred.mapper.regex", args[2]); if (args.length == 4) grepJob.set("mapred.mapper.regex.group", args[3]); grepJob.setCombinerClass(LongSumReducer.class); grepJob.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(grepJob, tempDir); grepJob.setOutputFormat(SequenceFileOutputFormat.class); grepJob.setOutputKeyClass(Text.class); grepJob.setOutputValueClass(LongWritable.class); JobClient.runJob(grepJob); JobConf sortJob = new JobConf(Grep.class); sortJob.setJobName("grep-sort"); FileInputFormat.setInputPaths(sortJob, tempDir); sortJob.setInputFormat(SequenceFileInputFormat.class); sortJob.setMapperClass(InverseMapper.class); sortJob.setNumReduceTasks(1); // write a single file FileOutputFormat.setOutputPath(sortJob, new Path(args[1])); sortJob.setOutputKeyComparatorClass // sort by decreasing freq (LongWritable.DecreasingComparator.class); JobClient.runJob(sortJob); } finally { FileSystem.get(grepJob).delete(tempDir, true); } return 0; }
From source file:com.github.gaoyangthu.demo.mapred.PiEstimator.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi//from ww w . ja v a2 s . co m */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { //setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(TMP_DIR, "in"); final Path outDir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }
From source file:com.github.gaoyangthu.demo.mapred.terasort.TeraGen.java
License:Apache License
/** * @param args the cli arguments/*w ww . j a v a 2s. com*/ */ public int run(String[] args) throws IOException { JobConf job = (JobConf) getConf(); setNumberOfRows(job, Long.parseLong(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraGen"); job.setJarByClass(TeraGen.class); job.setMapperClass(SortGenMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(RangeInputFormat.class); job.setOutputFormat(TeraOutputFormat.class); JobClient.runJob(job); return 0; }