List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputKeyClass
public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException
From source file:com.phantom.hadoop.examples.DBCountPageView.java
License:Apache License
@Override // Usage DBCountPageView [driverClass dburl] public int run(String[] args) throws Exception { String driverClassName = DRIVER_CLASS; String url = DB_URL;//from w w w .jav a 2 s . c o m if (args.length > 1) { driverClassName = args[0]; url = args[1]; } initialize(driverClassName, url); Configuration conf = getConf(); DBConfiguration.configureDB(conf, driverClassName, url); Job job = new Job(conf); job.setJobName("Count Pageviews of URLs"); job.setJarByClass(DBCountPageView.class); job.setMapperClass(PageviewMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(PageviewReducer.class); DBInputFormat.setInput(job, AccessRecord.class, "Access", null, "url", AccessFieldNames); DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(PageviewRecord.class); job.setOutputValueClass(NullWritable.class); int ret; try { ret = job.waitForCompletion(true) ? 0 : 1; boolean correct = verify(); if (!correct) { throw new RuntimeException("Evaluation was not correct!"); } } finally { shutdown(); } return ret; }
From source file:com.phantom.hadoop.examples.SecondarySort.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysort <in> <out>"); System.exit(2);/* w ww . j ava 2 s . c o m*/ } Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); job.setGroupingComparatorClass(FirstGroupingComparator.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.philiphubbard.digraph.MRBuildVertices.java
License:Open Source License
public static void setupJob(Job job, Path inputPath, Path outputPath) throws IOException { job.setJarByClass(MRBuildVertices.class); job.setMapperClass(MRBuildVertices.Mapper.class); job.setCombinerClass(MRBuildVertices.Reducer.class); job.setReducerClass(MRBuildVertices.Reducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(BytesWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); }
From source file:com.philiphubbard.digraph.MRCompressChains.java
License:Open Source License
public static void setupIterationJob(Job job, Path inputPathOrig, Path outputPathOrig) throws IOException { job.setJarByClass(MRCompressChains.class); job.setMapperClass(MRCompressChains.Mapper.class); job.setCombinerClass(MRCompressChains.Reducer.class); job.setReducerClass(MRCompressChains.Reducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); Path inputPath;//from w w w . ja v a2 s . c o m if (iter == 0) inputPath = inputPathOrig; else inputPath = new Path(outputPathOrig.toString() + (iter - 1)); Path outputPath = new Path(outputPathOrig.toString() + iter); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); }
From source file:com.pinterest.terrapin.hadoop.examples.WordCount.java
License:Apache License
public int run(String[] args) throws Exception { TerrapinUploaderOptions options = TerrapinUploaderOptions.initFromSystemProperties(); // Create the job, setting the inputs and map output key and map output value classes. // Also, set reducer and mapper. Job job = Job.getInstance(super.getConf(), "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); // Wrap around Hadoop Loader job to write the data to a terrapin fileset. return new HadoopJobLoader(options, job).waitForCompletion() ? 0 : 1; }
From source file:com.pivotal.gfxd.demo.mapreduce.LoadAverage.java
License:Open Source License
/** * This method is assuming fs.default.name as args[0] * * @param args//from ww w . j a va 2 s . c o m * @return * @throws Exception */ @Override public int run(String[] args) throws Exception { System.out.println("Starting MapReduce Job"); GfxdDataSerializable.initTypes(); Configuration conf = new Configuration(); //Configuration conf = getConf(); Path outputPath = new Path("/output"); String hdfsHomeDir = "/sensorStore"; //args[1]; String tableName = "RAW_SENSOR"; String outTableName = "LOAD_AVERAGES_SHADOW"; String gfxdURL = conf.get("gemfirexd.url", "jdbc:gemfirexd://localhost:1527"); // conf.set("fs.default.name", args[0]); String hdfsUrl = conf.get("fs.defaultFS"); FileSystem hdfs = FileSystem.get(new URI(hdfsUrl), conf); // Retrieve last run timestamp long now = System.currentTimeMillis(); long lastStart = getLastStart(hdfs); outputPath.getFileSystem(conf).delete(outputPath, true); conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir); conf.set(RowInputFormat.INPUT_TABLE, tableName); conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false); conf.setLong(RowInputFormat.START_TIME_MILLIS, lastStart); conf.setLong(RowInputFormat.END_TIME_MILLIS, now); conf.set(RowOutputFormat.OUTPUT_URL, gfxdURL); conf.set(RowOutputFormat.OUTPUT_TABLE, outTableName); // print config to troubleshoot possible issues // Configuration.dumpConfiguration(conf, new PrintWriter(System.out)); Job job = Job.getInstance(conf, "LoadAverage"); job.setNumReduceTasks(1); job.setInputFormatClass(RowInputFormat.class); // configure mapper and reducer job.setJarByClass(LoadAverage.class); job.setMapperClass(LoadAverageMapper.class); job.setReducerClass(LoadAverageReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LoadKey.class); TextOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(RowOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(LoadAverageModel.class); boolean jobSuccess = job.waitForCompletion(true); if (jobSuccess) { writeLastStart(hdfs, now); } return jobSuccess ? 0 : 1; }
From source file:com.pivotal.hawq.mapreduce.parquet.HAWQParquetOutputDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf(), "HAWQParquetOutputFormat"); job.setJarByClass(HAWQParquetOutputDriver.class); job.setOutputFormatClass(HAWQParquetOutputFormat.class); /*//w ww.j av a2 s.c o m // int2 int4 int8 HAWQSchema schema = new HAWQSchema("t_int", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT2, "col_short"), HAWQSchema.optional_field(HAWQPrimitiveField.PrimitiveType.INT4, "col_int"), HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT8, "col_long") ); job.setMapperClass(WriteIntMapper.class); */ /* // varchar HAWQSchema schema = new HAWQSchema("t_varchar", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.VARCHAR, "col_varchar") ); job.setMapperClass(WriteVarcharMapper.class); */ /* // float4 float8 HAWQSchema schema = new HAWQSchema("t_floating", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT4, "col_float"), HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT8, "col_long") ); job.setMapperClass(WriteFloatingNumberMapper.class); */ // boolean // HAWQSchema schema = new HAWQSchema("t_boolean", // HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BOOL, "col_bool")); // job.setMapperClass(WriteBooleanMapper.class); // byte array HAWQSchema schema = new HAWQSchema("t_bytea", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BYTEA, "col_bytea")); job.setMapperClass(WriteByteArrayMapper.class); HAWQParquetOutputFormat.setSchema(job, schema); FileInputFormat.addInputPath(job, new Path(args[0])); HAWQParquetOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Void.class); job.setMapOutputValueClass(HAWQRecord.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.placeiq.piqconnect.BlocksBuilder.java
License:Apache License
protected Job configStage1() throws Exception { FileSystem fs = FileSystem.get(getConf()); fs.delete(pathOutput, true); // useful ? Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); conf.setBoolean(Constants.PROP_IS_VECTOR, isVector); conf.set("mapred.output.compression.type", "BLOCK"); // useful ? Job job = new Job(conf, "data-piqid.piqconnect.BlocksBuilder"); job.setJarByClass(BlocksBuilder.class); job.setMapperClass(MapStage1.class); job.setReducerClass(RedStage1.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numberOfReducers); job.setMapOutputKeyClass(BlockIndexWritable.class); job.setMapOutputValueClass(LightBlockWritable.class); job.setOutputKeyClass(BlockIndexWritable.class); job.setOutputValueClass(BlockWritable.class); FileInputFormat.setInputPaths(job, pathEdges); SequenceFileOutputFormat.setOutputPath(job, pathOutput); SequenceFileOutputFormat.setCompressOutput(job, true); Runner.setCompression(job);//from www. jav a2 s . co m return job; }
From source file:com.placeiq.piqconnect.Runner.java
License:Apache License
private Job buildJob1(Path input1, Path input2, Path output) throws Exception { Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); conf.set("mapred.output.compression.type", "BLOCK"); Job job = new Job(conf, "data-piqid.piqconnect.IterationStage1"); job.setJarByClass(Runner.class); job.setMapperClass(IterationStage1._Mapper.class); job.setReducerClass(IterationStage1._Reducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numberOfReducers); job.setMapOutputKeyClass(IterationStage1.JoinKey.class); job.setMapOutputValueClass(BlockWritable.class); job.setOutputKeyClass(VLongWritable.class); job.setOutputValueClass(BlockWritable.class); job.setGroupingComparatorClass(IterationStage1.IndexComparator.class); job.setPartitionerClass(IterationStage1.IndexPartitioner.class); job.setSortComparatorClass(IterationStage1.SortComparator.class); FileInputFormat.setInputPaths(job, input1, input2); SequenceFileOutputFormat.setOutputPath(job, output); SequenceFileOutputFormat.setCompressOutput(job, true); setCompression(job);//from ww w .j av a 2s . c o m return job; }
From source file:com.placeiq.piqconnect.Runner.java
License:Apache License
private Job buildJob2(Path input, Path output) throws Exception { Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); Job job = new Job(conf, "data-piqid.piqconnect.IterationStage2"); job.setJarByClass(Runner.class); job.setMapperClass(Mapper.class); job.setReducerClass(IterationStage2._Reducer.class); job.setNumReduceTasks(numberOfReducers); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(VLongWritable.class); job.setMapOutputValueClass(BlockWritable.class); job.setOutputKeyClass(BlockIndexWritable.class); job.setOutputValueClass(BlockWritable.class); job.setSortComparatorClass(VLongWritableComparator.class); SequenceFileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setCompressOutput(job, true); setCompression(job);//from w w w . j ava 2 s. co m return job; }