List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath
public static void setOutputPath(Job job, Path outputDir)
From source file:com.igalia.metamail.jobs.MessagesByTimePeriod.java
License:Open Source License
private static Job setupJob() throws IOException, InterruptedException, ClassNotFoundException { Configuration config = HBaseConfiguration.create(); Job job = new Job(config, "MessagesByTimePeriod"); job.setJarByClass(MessagesByTimePeriod.class); Scan scan = new Scan(); scan.setCaching(500);//from w w w.j av a2s .co m scan.setCacheBlocks(false); // don't set to true for MR jobs // Mapper TableMapReduceUtil.initTableMapperJob(mailsTable, // input HBase table name scan, // Scan instance to control CF and attribute selection MessagesByTimePeriod.MessagesByTimePeriodMapper.class, Text.class, IntWritable.class, job); // Reducer job.setReducerClass(MessagesByTimePeriod.MessagesByTimePeriodReducer.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath(job, new Path(MessagesByTimePeriod.MAIL_OUT)); return job; }
From source file:com.igalia.wordcount.WordCount.java
License:Open Source License
public int run(String[] arg0) throws Exception { Job job = new Job(getConf()); job.setJarByClass(WordCount.class); job.setJobName("wordcount"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); FileInputFormat.setInputPaths(job, new Path("/tmp/wordcount/in")); FileOutputFormat.setOutputPath(job, new Path("/tmp/wordcount/out")); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.impetus.code.examples.hadoop.cassandra.wordcount.WordCount.java
License:Apache License
public int run(String[] args) throws Exception { String outputReducerType = "cassandra"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1];/*from ww w.j a v a 2 s . com*/ } logger.info("output reducer type: " + outputReducerType); for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) { String columnName = "text" + i; getConf().set(CONF_COLUMN_NAME, columnName); Job job = new Job(getConf(), "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setCombinerClass(ReducerToFilesystem.class); job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, INPUT_COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); job.waitForCompletion(true); } return 0; }
From source file:com.impetus.code.examples.hadoop.cassandra.wordcount.WordCountCounters.java
License:Apache License
public int run(String[] args) throws Exception { Job job = new Job(getConf(), "wordcountcounters"); job.setJarByClass(WordCountCounters.class); job.setMapperClass(SumMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX)); job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), WordCount.KEYSPACE, WordCountCounters.COUNTER_COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setSlice_range(new SliceRange().setStart(ByteBufferUtil.EMPTY_BYTE_BUFFER) .setFinish(ByteBufferUtil.EMPTY_BYTE_BUFFER).setCount(100)); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); job.waitForCompletion(true);// ww w .j a va 2s . c o m return 0; }
From source file:com.impetus.code.examples.hadoop.mapred.earthquake.EarthQuakeAnalyzer.java
License:Apache License
public static void main(String[] args) throws Throwable { Job job = new Job(); job.setJarByClass(EarthQuakeAnalyzer.class); FileInputFormat.addInputPath(job, new Path("src/main/resources/eq/input")); FileOutputFormat.setOutputPath(job, new Path("src/main/resources/eq/output")); job.setMapperClass(EarthQuakeMapper.class); job.setReducerClass(EarthQuakeReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.impetus.code.examples.hadoop.mapred.weather.MaxTemp.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: MaxTemperature <input path> <output path>"); System.exit(-1);//from w w w .jav a 2s .c om } Job job = new Job(); job.setJarByClass(MaxTemp.class); job.setJobName("Max temperature"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(MaxTempMapper.class); job.setReducerClass(MaxTempReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); }
From source file:com.intel.hadoop.hbase.dot.KEY.java
License:Apache License
private void doMapReduce(Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass, String mrTableName) throws IOException, ClassNotFoundException, InterruptedException { this.conf.set(KEY.INPUT_TABLE, mrTableName); Job job = new Job(this.conf); job.setJobName("Generate Data for [" + mrTableName + "]"); job.setJarByClass(GenerateTestTable.class); job.setInputFormatClass(inputFormatClass); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); FileSystem fs = FileSystem.get(conf); Path path = new Path("/tmp", "tempout"); fs.delete(path, true);//from w w w.j av a 2 s. c o m FileOutputFormat.setOutputPath(job, path); job.setMapperClass(mapperClass); job.setNumReduceTasks(0); TableMapReduceUtil.addDependencyJars(job); // Add a Class from the hbase.jar so it gets registered too. TableMapReduceUtil.addDependencyJars(job.getConfiguration(), org.apache.hadoop.hbase.util.Bytes.class); TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
From source file:com.intel.hadoop.hbase.dot.mapreduce.DotImportTsv.java
License:Apache License
/** * Sets up the actual job./* w ww . j av a2 s. c om*/ * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException, ClassNotFoundException { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(SEPARATOR_CONF_KEY); if (actualSeparator != null) { conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes())); } // See if a non-default Mapper was set String mapperClassName = conf.get(MAPPER_CONF_KEY); Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER; String tableName = args[0]; Path inputDir = new Path(args[1]); Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(mapperClass); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(mapperClass); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); if (hfileOutPath != null) { if (!doesTableExist(tableName)) { createTable(conf, tableName); } HTable table = new HTable(conf, tableName); job.setReducerClass(PutSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); HFileOutputFormat.configureIncrementalLoad(job, table); } else { // No reducers. Just write straight to table. Call initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /* Guava used by TsvParser */); return job; }
From source file:com.j.distributed.counter.CounterJob.java
@Override public int run(String... options) throws Exception { Job job = Job.getInstance(getConf(), getClass().toString()); job.setJarByClass(getClass());/*w w w. j a v a 2s.c o m*/ job.setMapperClass(CounterMapper.class); job.setCombinerClass(CounterReducer.class); job.setReducerClass(CounterReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(options[0])); FileOutputFormat.setOutputPath(job, new Path(options[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.j.distributed.sorter.SorterJob.java
@Override public int run(String... options) throws Exception { Job job = Job.getInstance(getConf(), getClass().toString()); job.setJarByClass(getClass());/*w w w .j av a2 s . co m*/ job.setMapperClass(SorterMapper.class); job.setCombinerClass(SorterReducer.class); job.setReducerClass(SorterReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setSortComparatorClass(LongWritable.DecreasingComparator.class); FileInputFormat.addInputPath(job, new Path(options[1])); FileOutputFormat.setOutputPath(job, new Path(options[2])); return job.waitForCompletion(true) ? 0 : 1; }