List of usage examples for org.apache.hadoop.mapreduce Job Job
Job(JobStatus status, JobConf conf) throws IOException
From source file:com.igalia.metamail.jobs.MessagesByTimePeriod.java
License:Open Source License
private static Job setupJob() throws IOException, InterruptedException, ClassNotFoundException { Configuration config = HBaseConfiguration.create(); Job job = new Job(config, "MessagesByTimePeriod"); job.setJarByClass(MessagesByTimePeriod.class); Scan scan = new Scan(); scan.setCaching(500);//from w w w. j ava 2 s. c om scan.setCacheBlocks(false); // don't set to true for MR jobs // Mapper TableMapReduceUtil.initTableMapperJob(mailsTable, // input HBase table name scan, // Scan instance to control CF and attribute selection MessagesByTimePeriod.MessagesByTimePeriodMapper.class, Text.class, IntWritable.class, job); // Reducer job.setReducerClass(MessagesByTimePeriod.MessagesByTimePeriodReducer.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath(job, new Path(MessagesByTimePeriod.MAIL_OUT)); return job; }
From source file:com.impetus.code.examples.hadoop.cassandra.wordcount.WordCount.java
License:Apache License
public int run(String[] args) throws Exception { String outputReducerType = "cassandra"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1];/*from w w w . j av a 2 s. com*/ } logger.info("output reducer type: " + outputReducerType); for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) { String columnName = "text" + i; getConf().set(CONF_COLUMN_NAME, columnName); Job job = new Job(getConf(), "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setCombinerClass(ReducerToFilesystem.class); job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, INPUT_COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); job.waitForCompletion(true); } return 0; }
From source file:com.impetus.code.examples.hadoop.cassandra.wordcount.WordCountCounters.java
License:Apache License
public int run(String[] args) throws Exception { Job job = new Job(getConf(), "wordcountcounters"); job.setJarByClass(WordCountCounters.class); job.setMapperClass(SumMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX)); job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), WordCount.KEYSPACE, WordCountCounters.COUNTER_COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setSlice_range(new SliceRange().setStart(ByteBufferUtil.EMPTY_BYTE_BUFFER) .setFinish(ByteBufferUtil.EMPTY_BYTE_BUFFER).setCount(100)); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); job.waitForCompletion(true);/* w ww .ja va 2 s. c o m*/ return 0; }
From source file:com.inmobi.conduit.distcp.tools.DistCp.java
License:Apache License
/** * Create Job object for submitting it, with all the configuration * * @return Reference to job object.//from w w w. j a va 2s . c o m * @throws IOException - Exception if any */ protected Job createJob() throws IOException { String jobName = "distcp"; String userChosenName = getConf().get("mapred.job.name"); if (userChosenName != null) jobName += ": " + userChosenName; Job job = new Job(getConf(), jobName); job.setInputFormatClass(DistCpUtils.getStrategy(getConf(), inputOptions)); job.setJarByClass(CopyMapper.class); configureOutputFormat(job); job.setMapperClass(CopyMapper.class); job.setReducerClass(Reducer.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(CopyOutputFormat.class); job.getConfiguration().set("mapred.map.tasks.speculative.execution", "false"); job.getConfiguration().set(DistCpConstants.CONF_LABEL_NUM_MAPS, String.valueOf(inputOptions.getMaxMaps())); if (inputOptions.getSslConfigurationFile() != null) { setupSSLConfig(job.getConfiguration()); } inputOptions.appendToConf(job.getConfiguration()); return job; }
From source file:com.intel.hadoop.hbase.dot.mapreduce.DotImportTsv.java
License:Apache License
/** * Sets up the actual job./*from w w w . j a v a2 s . c o m*/ * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException, ClassNotFoundException { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(SEPARATOR_CONF_KEY); if (actualSeparator != null) { conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes())); } // See if a non-default Mapper was set String mapperClassName = conf.get(MAPPER_CONF_KEY); Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER; String tableName = args[0]; Path inputDir = new Path(args[1]); Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(mapperClass); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(mapperClass); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); if (hfileOutPath != null) { if (!doesTableExist(tableName)) { createTable(conf, tableName); } HTable table = new HTable(conf, tableName); job.setReducerClass(PutSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); HFileOutputFormat.configureIncrementalLoad(job, table); } else { // No reducers. Just write straight to table. Call initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /* Guava used by TsvParser */); return job; }
From source file:com.javiertordable.mrif.MapReduceQuadraticSieve.java
License:Apache License
/** * Setup the MapReduce parameters and run it. * * Tool parses the command line arguments for us. *//* www. ja va 2s. co m*/ public int run(String[] args) throws Exception { Configuration conf = getConf(); // Check the arguments. we need the integer to attempt to factor. if (args.length < 1) { System.out.println("Please indicate the integer to factor"); LOGGER.severe("No integer to factor. Exit."); System.exit(1); } // Parse N and add it to the job configuration, so that the workers can // access it as well. BigInteger N = new BigInteger(args[0]); LOGGER.info("Attempting factorization of: " + N.toString()); conf.set(INTEGER_TO_FACTOR_NAME, N.toString()); // Obtain the factor base for the integer N. FactorBaseArray factorBase = SieveInput.factorBase(N); LOGGER.info("Factor base of size: " + factorBase.size()); conf.set(FACTOR_BASE_NAME, factorBase.toString()); // Prepare the input of the mapreduce. LOGGER.info("Sieve of size: " + SieveInput.fullSieveIntervalSize(N)); try { // Write the full sieve interval to disk. SieveInput.writeFullSieveInterval(N, "input/" + INPUT_FILE_NAME); } catch (FileNotFoundException e) { System.out.println("Unable to open the file for writing."); } catch (IOException e) { System.out.println("Unable to write to the output file."); } // Configure the classes of the mapreducer Job job = new Job(conf, "QuadraticSieve"); job.setJarByClass(MapReduceQuadraticSieve.class); job.setMapperClass(SieveMapper.class); job.setReducerClass(FindSquaresReducer.class); // Output will be two pairs of strings: // <"Factor1", "59"> // <"Factor2", "101"> job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path("input/")); FileOutputFormat.setOutputPath(job, new Path("output/")); // Submit the job. job.waitForCompletion(true); return 0; }
From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.AverageJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, AverageJob.class.getSimpleName()); job.setJarByClass(AverageJob.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("Sample Average Job"); job.setMapperClass(AverageMapper.class); job.setCombinerClass(AverageCombiner.class); job.setReducerClass(AverageReducer.class); job.setInputFormatClass(TextInputFormat.class); //job.setOutputFormatClass(TextOutputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0;//from w w w .j av a 2s .co m }
From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.AverageMultipleOutputJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, AverageMultipleOutputJob.class.getSimpleName()); job.setJarByClass(AverageMultipleOutputJob.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("Sample Multiple Output Job"); job.setMapperClass(AverageMapper.class); job.setReducerClass(AverageMultipleOutputReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); MultipleOutputs.addNamedOutput(job, "greaterThan1000", TextOutputFormat.class, Text.class, DoubleWritable.class); MultipleOutputs.addNamedOutput(job, "lessThan1000", TextOutputFormat.class, Text.class, DoubleWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0;/*from w ww. j a v a2 s. c o m*/ }
From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.BloomFilterJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, BloomFilterJob.class.getSimpleName()); job.setJarByClass(BloomFilterJob.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("Sample BloomFilter Job"); job.setMapperClass(BloomFilterMapper.class); job.setReducerClass(BloomFilterReducer.class); job.setNumReduceTasks(1);// ww w . j a v a2 s . c o m job.setInputFormatClass(TextInputFormat.class); /* * We want our reducer to output the final BloomFilter as a binary file. I think * Hadoop doesn't have this format [check later], so using NullOutpuFormat.class. * * In general life gets a little more dangerous when you deviate from MapReduce's input/output * framework and start working with your own files. Your tasks are no longer guaranteed to be idempotent * and you'll need to understand how various failure scenarios can affect your tasks. For example, your files * may only be partially written when some tasks are restarted. Our example here is safe(r) because all the file * operations take place together only once in the close() method and in only one reducer. A more * careful/paranoid implementation would check each individual file operation more closely. */ job.setOutputFormatClass(NullOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BloomFilter.class); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }
From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.ChainJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, ChainJob.class.getSimpleName()); job.setJobName("Sample Chain Job"); job.setJarByClass(ChainJob.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); ChainMapper.addMapper(job, ReverseMapper.class, Text.class, Text.class, Text.class, Text.class, new Configuration(false)); ChainMapper.addMapper(job, AverageMapper.class, Text.class, Text.class, Text.class, AverageWritable.class, new Configuration(false)); ChainReducer.setReducer(job, AverageReducer.class, Text.class, AverageWritable.class, Text.class, DoubleWritable.class, new Configuration(false)); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0;/*www .j av a 2s . c o m*/ }