List of usage examples for org.apache.hadoop.mapreduce Job setJarByClass
public void setJarByClass(Class<?> cls)
From source file:com.impetus.code.examples.hadoop.mapred.weather.MaxTemp.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: MaxTemperature <input path> <output path>"); System.exit(-1);// www. ja v a 2 s. c o m } Job job = new Job(); job.setJarByClass(MaxTemp.class); job.setJobName("Max temperature"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(MaxTempMapper.class); job.setReducerClass(MaxTempReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); }
From source file:com.inmobi.conduit.distcp.tools.DistCp.java
License:Apache License
/** * Create Job object for submitting it, with all the configuration * * @return Reference to job object.//w w w . j a v a 2 s. c o m * @throws IOException - Exception if any */ protected Job createJob() throws IOException { String jobName = "distcp"; String userChosenName = getConf().get("mapred.job.name"); if (userChosenName != null) jobName += ": " + userChosenName; Job job = new Job(getConf(), jobName); job.setInputFormatClass(DistCpUtils.getStrategy(getConf(), inputOptions)); job.setJarByClass(CopyMapper.class); configureOutputFormat(job); job.setMapperClass(CopyMapper.class); job.setReducerClass(Reducer.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(CopyOutputFormat.class); job.getConfiguration().set("mapred.map.tasks.speculative.execution", "false"); job.getConfiguration().set(DistCpConstants.CONF_LABEL_NUM_MAPS, String.valueOf(inputOptions.getMaxMaps())); if (inputOptions.getSslConfigurationFile() != null) { setupSSLConfig(job.getConfiguration()); } inputOptions.appendToConf(job.getConfiguration()); return job; }
From source file:com.inmobi.conduit.local.LocalStreamService.java
License:Apache License
protected Job createJob(Path inputPath, long totalSize) throws IOException { String jobName = getName();//from w w w . j ava2 s . com Configuration conf = currentCluster.getHadoopConf(); conf.set(ConduitConstants.AUDIT_ENABLED_KEY, System.getProperty(ConduitConstants.AUDIT_ENABLED_KEY)); Job job = new Job(conf); job.setJobName(jobName); // DistributedCache.addFileToClassPath(inputFormatJarDestPath, // job.getConfiguration()); job.getConfiguration().set("tmpjars", inputFormatJarDestPath.toString() + "," + auditUtilJarDestPath.toString()); LOG.debug("Adding file [" + inputFormatJarDestPath + "] to distributed cache"); job.setInputFormatClass(UniformSizeInputFormat.class); Class<? extends Mapper<Text, FileStatus, NullWritable, Text>> mapperClass = getMapperClass(); job.setJarByClass(mapperClass); job.setMapperClass(mapperClass); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); // setting identity reducer job.setReducerClass(Reducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, tmpCounterOutputPath); job.getConfiguration().set("mapred.map.tasks.speculative.execution", "false"); job.getConfiguration().set(LOCALSTREAM_TMP_PATH, tmpPath.toString()); job.getConfiguration().set(SRC_FS_DEFAULT_NAME_KEY, srcCluster.getHadoopConf().get(FS_DEFAULT_NAME_KEY)); // set configurations needed for UniformSizeInputFormat int numMaps = getNumMapsForJob(totalSize); job.getConfiguration().setInt(DistCpConstants.CONF_LABEL_NUM_MAPS, numMaps); job.getConfiguration().setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, totalSize); job.getConfiguration().set(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, inputPath.toString()); LOG.info("Expected number of maps [" + numMaps + "] Total data size [" + totalSize + "]"); return job; }
From source file:com.inmobi.databus.local.LocalStreamService.java
License:Apache License
private Job createJob(Path inputPath) throws IOException { String jobName = "localstream"; Configuration conf = cluster.getHadoopConf(); Job job = new Job(conf); job.setJobName(jobName);//w w w . j a va 2 s . c om KeyValueTextInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setJarByClass(CopyMapper.class); job.setMapperClass(CopyMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(NullOutputFormat.class); job.getConfiguration().set("mapred.map.tasks.speculative.execution", "false"); job.getConfiguration().set("localstream.tmp.path", tmpPath.toString()); return job; }
From source file:com.intel.hadoop.hbase.dot.KEY.java
License:Apache License
private void doMapReduce(Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass, String mrTableName) throws IOException, ClassNotFoundException, InterruptedException { this.conf.set(KEY.INPUT_TABLE, mrTableName); Job job = new Job(this.conf); job.setJobName("Generate Data for [" + mrTableName + "]"); job.setJarByClass(GenerateTestTable.class); job.setInputFormatClass(inputFormatClass); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); FileSystem fs = FileSystem.get(conf); Path path = new Path("/tmp", "tempout"); fs.delete(path, true);// w w w . j a v a2 s . c o m FileOutputFormat.setOutputPath(job, path); job.setMapperClass(mapperClass); job.setNumReduceTasks(0); TableMapReduceUtil.addDependencyJars(job); // Add a Class from the hbase.jar so it gets registered too. TableMapReduceUtil.addDependencyJars(job.getConfiguration(), org.apache.hadoop.hbase.util.Bytes.class); TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
From source file:com.intel.hadoop.hbase.dot.mapreduce.DotImportTsv.java
License:Apache License
/** * Sets up the actual job.//ww w .jav a 2 s. com * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException, ClassNotFoundException { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(SEPARATOR_CONF_KEY); if (actualSeparator != null) { conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes())); } // See if a non-default Mapper was set String mapperClassName = conf.get(MAPPER_CONF_KEY); Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER; String tableName = args[0]; Path inputDir = new Path(args[1]); Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(mapperClass); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(mapperClass); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); if (hfileOutPath != null) { if (!doesTableExist(tableName)) { createTable(conf, tableName); } HTable table = new HTable(conf, tableName); job.setReducerClass(PutSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); HFileOutputFormat.configureIncrementalLoad(job, table); } else { // No reducers. Just write straight to table. Call initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /* Guava used by TsvParser */); return job; }
From source file:com.j.distributed.counter.CounterJob.java
@Override public int run(String... options) throws Exception { Job job = Job.getInstance(getConf(), getClass().toString()); job.setJarByClass(getClass()); job.setMapperClass(CounterMapper.class); job.setCombinerClass(CounterReducer.class); job.setReducerClass(CounterReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(options[0])); FileOutputFormat.setOutputPath(job, new Path(options[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.j.distributed.sorter.SorterJob.java
@Override public int run(String... options) throws Exception { Job job = Job.getInstance(getConf(), getClass().toString()); job.setJarByClass(getClass()); job.setMapperClass(SorterMapper.class); job.setCombinerClass(SorterReducer.class); job.setReducerClass(SorterReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setSortComparatorClass(LongWritable.DecreasingComparator.class); FileInputFormat.addInputPath(job, new Path(options[1])); FileOutputFormat.setOutputPath(job, new Path(options[2])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.javiertordable.mrif.MapReduceQuadraticSieve.java
License:Apache License
/** * Setup the MapReduce parameters and run it. * * Tool parses the command line arguments for us. *//*from www . ja v a2s . c o m*/ public int run(String[] args) throws Exception { Configuration conf = getConf(); // Check the arguments. we need the integer to attempt to factor. if (args.length < 1) { System.out.println("Please indicate the integer to factor"); LOGGER.severe("No integer to factor. Exit."); System.exit(1); } // Parse N and add it to the job configuration, so that the workers can // access it as well. BigInteger N = new BigInteger(args[0]); LOGGER.info("Attempting factorization of: " + N.toString()); conf.set(INTEGER_TO_FACTOR_NAME, N.toString()); // Obtain the factor base for the integer N. FactorBaseArray factorBase = SieveInput.factorBase(N); LOGGER.info("Factor base of size: " + factorBase.size()); conf.set(FACTOR_BASE_NAME, factorBase.toString()); // Prepare the input of the mapreduce. LOGGER.info("Sieve of size: " + SieveInput.fullSieveIntervalSize(N)); try { // Write the full sieve interval to disk. SieveInput.writeFullSieveInterval(N, "input/" + INPUT_FILE_NAME); } catch (FileNotFoundException e) { System.out.println("Unable to open the file for writing."); } catch (IOException e) { System.out.println("Unable to write to the output file."); } // Configure the classes of the mapreducer Job job = new Job(conf, "QuadraticSieve"); job.setJarByClass(MapReduceQuadraticSieve.class); job.setMapperClass(SieveMapper.class); job.setReducerClass(FindSquaresReducer.class); // Output will be two pairs of strings: // <"Factor1", "59"> // <"Factor2", "101"> job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path("input/")); FileOutputFormat.setOutputPath(job, new Path("output/")); // Submit the job. job.waitForCompletion(true); return 0; }
From source file:com.jbw.jobcontrol.Patent.java
@Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Job job1 = Job.getInstance(conf); job1.setJobName("test"); job1.setJarByClass(Patent.class); ChainMapper.addMapper(job1, InverseMapper.class, LongWritable.class, Text.class, Text.class, Text.class, conf);//from w w w .ja v a 2 s. com ChainMapper.addMapper(job1, CountMapper.class, Text.class, Text.class, Text.class, IntWritable.class, conf); job1.setReducerClass(IntSumReducer.class); Job job2 = Job.getInstance(); ControlledJob cjob1 = new ControlledJob(job1.getConfiguration()); ControlledJob cjob2 = new ControlledJob(job2.getConfiguration()); cjob2.addDependingJob(cjob1); JobControl jc = new JobControl("process job"); jc.addJob(cjob1); jc.addJob(cjob2); Thread t = new Thread(jc); t.start(); while (true) { for (ControlledJob j : jc.getRunningJobList()) { break; } break; } return 0; }