List of usage examples for org.apache.hadoop.mapreduce Job setInputFormatClass
public void setInputFormatClass(Class<? extends InputFormat> cls) throws IllegalStateException
From source file:com.pivotal.gfxd.demo.mapreduce.LoadAverage.java
License:Open Source License
/** * This method is assuming fs.default.name as args[0] * * @param args/*from w w w.ja v a 2s. co m*/ * @return * @throws Exception */ @Override public int run(String[] args) throws Exception { System.out.println("Starting MapReduce Job"); GfxdDataSerializable.initTypes(); Configuration conf = new Configuration(); //Configuration conf = getConf(); Path outputPath = new Path("/output"); String hdfsHomeDir = "/sensorStore"; //args[1]; String tableName = "RAW_SENSOR"; String outTableName = "LOAD_AVERAGES_SHADOW"; String gfxdURL = conf.get("gemfirexd.url", "jdbc:gemfirexd://localhost:1527"); // conf.set("fs.default.name", args[0]); String hdfsUrl = conf.get("fs.defaultFS"); FileSystem hdfs = FileSystem.get(new URI(hdfsUrl), conf); // Retrieve last run timestamp long now = System.currentTimeMillis(); long lastStart = getLastStart(hdfs); outputPath.getFileSystem(conf).delete(outputPath, true); conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir); conf.set(RowInputFormat.INPUT_TABLE, tableName); conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false); conf.setLong(RowInputFormat.START_TIME_MILLIS, lastStart); conf.setLong(RowInputFormat.END_TIME_MILLIS, now); conf.set(RowOutputFormat.OUTPUT_URL, gfxdURL); conf.set(RowOutputFormat.OUTPUT_TABLE, outTableName); // print config to troubleshoot possible issues // Configuration.dumpConfiguration(conf, new PrintWriter(System.out)); Job job = Job.getInstance(conf, "LoadAverage"); job.setNumReduceTasks(1); job.setInputFormatClass(RowInputFormat.class); // configure mapper and reducer job.setJarByClass(LoadAverage.class); job.setMapperClass(LoadAverageMapper.class); job.setReducerClass(LoadAverageReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LoadKey.class); TextOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(RowOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(LoadAverageModel.class); boolean jobSuccess = job.waitForCompletion(true); if (jobSuccess) { writeLastStart(hdfs, now); } return jobSuccess ? 0 : 1; }
From source file:com.pivotal.hawq.mapreduce.demo.HAWQInputFormatDemo.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 3) { printUsage();//from w w w.j av a 2 s. co m } String dbURL = args[0]; String tableName = args[1]; String outputPath = args[2]; String username = (args.length >= 4) ? args[3] : null; String password = (args.length >= 5) ? args[4] : null; Job job = new Job(getConf(), "HAWQInputFormatDemo"); job.setJarByClass(HAWQInputFormatDemo.class); job.setInputFormatClass(HAWQInputFormat.class); HAWQInputFormat.setInput(job.getConfiguration(), dbURL, username, password, tableName); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapperClass(HAWQEchoMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); long startTime = System.currentTimeMillis(); int returnCode = job.waitForCompletion(true) ? 0 : 1; long endTime = System.currentTimeMillis(); System.out.println("Time elapsed: " + (endTime - startTime) + " milliseconds"); return returnCode; }
From source file:com.pivotal.hawq.mapreduce.demo.HAWQInputFormatDemo2.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { printUsage();// ww w. j a v a 2 s .c o m } String metadataFile = args[0]; String outputPath = args[1]; Job job = new Job(getConf(), "HAWQInputFormatDemo2"); job.setJarByClass(HAWQInputFormatDemo2.class); job.setInputFormatClass(HAWQInputFormat.class); HAWQInputFormat.setInput(job.getConfiguration(), metadataFile); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapperClass(HAWQEchoMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); long startTime = System.currentTimeMillis(); int returnCode = job.waitForCompletion(true) ? 0 : 1; long endTime = System.currentTimeMillis(); System.out.println("Time elapsed: " + (endTime - startTime) + " milliseconds"); return returnCode; }
From source file:com.pivotal.hawq.mapreduce.MapReduceClusterDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 3 && args.length != 4) { System.err.printf("Usage: %s [generic options] <tableName> <dburl> <output> [<mapper_classname>]\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }/*from w w w . j a va2 s. c om*/ String tableName = args[0]; String dbUrl = args[1]; Path outputPath = new Path(args[2]); Class<? extends Mapper> mapperClass = (args.length == 3) ? HAWQTableMapper.class : (Class<? extends Mapper>) Class.forName(args[3]); // delete previous output FileSystem fs = FileSystem.get(getConf()); if (fs.exists(outputPath)) fs.delete(outputPath, true); fs.close(); Job job = new Job(getConf(), "job_read_" + tableName); job.setJarByClass(MapReduceClusterDriver.class); job.setInputFormatClass(HAWQInputFormat.class); HAWQInputFormat.setInput(job.getConfiguration(), dbUrl, null, null, tableName); FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(mapperClass); job.setReducerClass(HAWQTableReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.pivotal.hawq.mapreduce.MapReduceLocalDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2 && args.length != 3) { System.err.printf("Usage: %s [generic options] <metadata_file> <output> [<mapper_classname>]\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }//from w w w. j a va2 s .co m String metadataFile = args[0]; Path outputPath = new Path(args[1]); Class<? extends Mapper> mapperClass = (args.length == 2) ? HAWQTableMapper.class : (Class<? extends Mapper>) Class.forName(args[2]); // delete previous output FileSystem fs = FileSystem.getLocal(getConf()); if (fs.exists(outputPath)) fs.delete(outputPath, true); fs.close(); Job job = new Job(getConf()); job.setJarByClass(MapReduceLocalDriver.class); job.setInputFormatClass(HAWQInputFormat.class); HAWQInputFormat.setInput(job.getConfiguration(), metadataFile); FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(mapperClass); job.setReducerClass(HAWQTableReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.pivotal.hawq.mapreduce.pt.HAWQInputFormatPerformanceTest_TPCH.java
License:Apache License
private int runMapReduceJob() throws Exception { Path outputPath = new Path("/output"); // delete previous output FileSystem fs = FileSystem.get(getConf()); if (fs.exists(outputPath)) fs.delete(outputPath, true);// www. ja v a 2 s . c o m fs.close(); Job job = new Job(getConf()); job.setJarByClass(HAWQInputFormatPerformanceTest_TPCH.class); job.setInputFormatClass(HAWQInputFormat.class); long startTime = System.currentTimeMillis(); HAWQInputFormat.setInput(job.getConfiguration(), MRFormatConfiguration.TEST_DB_URL, null, null, tableName); metadataExtractTime = System.currentTimeMillis() - startTime; FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(TPCHTableMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Void.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.placeiq.piqconnect.Runner.java
License:Apache License
private Job buildJob1(Path input1, Path input2, Path output) throws Exception { Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); conf.set("mapred.output.compression.type", "BLOCK"); Job job = new Job(conf, "data-piqid.piqconnect.IterationStage1"); job.setJarByClass(Runner.class); job.setMapperClass(IterationStage1._Mapper.class); job.setReducerClass(IterationStage1._Reducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numberOfReducers); job.setMapOutputKeyClass(IterationStage1.JoinKey.class); job.setMapOutputValueClass(BlockWritable.class); job.setOutputKeyClass(VLongWritable.class); job.setOutputValueClass(BlockWritable.class); job.setGroupingComparatorClass(IterationStage1.IndexComparator.class); job.setPartitionerClass(IterationStage1.IndexPartitioner.class); job.setSortComparatorClass(IterationStage1.SortComparator.class); FileInputFormat.setInputPaths(job, input1, input2); SequenceFileOutputFormat.setOutputPath(job, output); SequenceFileOutputFormat.setCompressOutput(job, true); setCompression(job);// www.ja v a 2 s . c om return job; }
From source file:com.placeiq.piqconnect.Runner.java
License:Apache License
private Job buildJob2(Path input, Path output) throws Exception { Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); Job job = new Job(conf, "data-piqid.piqconnect.IterationStage2"); job.setJarByClass(Runner.class); job.setMapperClass(Mapper.class); job.setReducerClass(IterationStage2._Reducer.class); job.setNumReduceTasks(numberOfReducers); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(VLongWritable.class); job.setMapOutputValueClass(BlockWritable.class); job.setOutputKeyClass(BlockIndexWritable.class); job.setOutputValueClass(BlockWritable.class); job.setSortComparatorClass(VLongWritableComparator.class); SequenceFileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setCompressOutput(job, true); setCompression(job);//from ww w . ja v a2 s .c o m return job; }
From source file:com.placeiq.piqconnect.Runner.java
License:Apache License
private Job buildJob3(Path input, Path output) throws Exception { Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); Job job = new Job(conf, "data-piqid.piqconnect.FinalResultBuilder"); job.setJarByClass(Runner.class); job.setMapperClass(FinalResultBuilder._Mapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setNumReduceTasks(0);/*from w w w . j a v a 2 s . c o m*/ job.setOutputKeyClass(VLongWritable.class); job.setOutputValueClass(VLongWritable.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setCompressOutput(job, true); setCompression(job); return job; }
From source file:com.rim.logdriver.util.Cat.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }//ww w . j a v a2 s . c o m FileSystem fs = FileSystem.get(conf); // The command line options List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 2) { System.out.println("usage: [genericOptions] input [input ...] output"); System.exit(1); } // Get the files we need from the command line. for (int i = 0; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(Cat.class); jobConf.setIfUnset("mapred.job.name", "Cat Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } job.setInputFormatClass(BoomInputFormat.class); job.setMapperClass(CatMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { BoomInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }