List of usage examples for org.apache.hadoop.mapreduce Job getInstance
@Deprecated public static Job getInstance(Cluster ignored, Configuration conf) throws IOException
From source file:com.kylinolap.job.hadoop.invertedindex.IIDistinctColumnsJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options options = new Options(); try {/*from ww w. ja v a 2 s. c om*/ options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_TABLE_NAME); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_INPUT_FORMAT); options.addOption(OPTION_INPUT_DELIM); options.addOption(OPTION_OUTPUT_PATH); parseOptions(options, args); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); String tableName = getOptionValue(OPTION_TABLE_NAME).toUpperCase(); Path input = new Path(getOptionValue(OPTION_INPUT_PATH)); String inputFormat = getOptionValue(OPTION_INPUT_FORMAT); String inputDelim = getOptionValue(OPTION_INPUT_DELIM); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); // ---------------------------------------------------------------------------- System.out.println("Starting: " + job.getJobName()); setupMapInput(input, inputFormat, inputDelim); setupReduceOutput(output); // pass table and columns MetadataManager metaMgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv()); TableDesc table = metaMgr.getTableDesc(tableName); job.getConfiguration().set(BatchConstants.TABLE_NAME, tableName); job.getConfiguration().set(BatchConstants.TABLE_COLUMNS, getColumns(table)); return waitForCompletion(job); } catch (Exception e) { printUsage(options); log.error(e.getLocalizedMessage(), e); return 2; } }
From source file:com.kylinolap.job.hadoop.invertedindex.InvertedIndexJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options options = new Options(); try {//from ww w. jav a 2 s .c o m options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_INPUT_FORMAT); options.addOption(OPTION_INPUT_DELIM); options.addOption(OPTION_OUTPUT_PATH); parseOptions(options, args); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); String cubeName = getOptionValue(OPTION_CUBE_NAME); Path input = new Path(getOptionValue(OPTION_INPUT_PATH)); String inputFormat = getOptionValue(OPTION_INPUT_FORMAT); String inputDelim = getOptionValue(OPTION_INPUT_DELIM); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); // ---------------------------------------------------------------------------- System.out.println("Starting: " + job.getJobName()); CubeInstance cube = getCube(cubeName); setupMapInput(input, inputFormat, inputDelim); setupReduceOutput(output, cube.getInvertedIndexDesc().getSharding()); attachMetadata(cube); return waitForCompletion(job); } catch (Exception e) { printUsage(options); log.error(e.getLocalizedMessage(), e); return 2; } }
From source file:com.kylinolap.job.hadoop.invertedindex.RandomKeyDistributionJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options options = new Options(); try {/*from w w w . j av a 2 s.c o m*/ options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_KEY_CLASS); options.addOption(OPTION_REGION_MB); parseOptions(options, args); // start job String jobName = getOptionValue(OPTION_JOB_NAME); job = Job.getInstance(getConf(), jobName); job.setJarByClass(this.getClass()); addInputDirs(getOptionValue(OPTION_INPUT_PATH), job); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); FileOutputFormat.setOutputPath(job, output); String keyClass = getOptionValue(OPTION_KEY_CLASS); Class<?> keyClz = Class.forName(keyClass); int regionMB = Integer.parseInt(getOptionValue(OPTION_REGION_MB)); // Mapper job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(RandomKeyDistributionMapper.class); job.setMapOutputKeyClass(keyClz); job.setMapOutputValueClass(NullWritable.class); // Reducer - only one job.setReducerClass(RandomKeyDistributionReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(keyClz); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(1); this.deletePath(job.getConfiguration(), output); // total map input MB double totalMapInputMB = this.getTotalMapInputMB(); int regionCount = Math.max(1, (int) (totalMapInputMB / regionMB)); int mapSampleNumber = 1000; System.out.println("Total Map Input MB: " + totalMapInputMB); System.out.println("Region Count: " + regionCount); // set job configuration job.getConfiguration().set(BatchConstants.MAPPER_SAMPLE_NUMBER, String.valueOf(mapSampleNumber)); job.getConfiguration().set(BatchConstants.REGION_NUMBER, String.valueOf(regionCount)); return waitForCompletion(job); } catch (Exception e) { printUsage(options); log.error(e.getLocalizedMessage(), e); return 2; } }
From source file:com.leon.hadoop.loganalyse.WordCount.java
License:Open Source License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.littlehotspot.hadoop.mr.box.BoxLog.java
License:Open Source License
@Override public int run(String[] arg) throws Exception { try {//from www . j av a 2 s. c o m // ??? if (arg.length > 2) { BOX_LOG_FORMAT_REGEX = Pattern.compile(arg[2]); } Job job = Job.getInstance(this.getConf(), BoxLog.class.getSimpleName()); job.setJarByClass(BoxLog.class); /***/ Path inputPath = new Path(arg[0]); FileInputFormat.addInputPath(job, inputPath); job.setMapperClass(BoxMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); /***/ Path outputPath = new Path(arg[1]); FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), new Configuration()); if (fileSystem.exists(outputPath)) { fileSystem.delete(outputPath, true); } FileOutputFormat.setOutputPath(job, outputPath); job.setReducerClass(BoxReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); boolean status = job.waitForCompletion(true); if (!status) { throw new Exception("MapReduce task execute failed........."); } return 0; } catch (Exception e) { e.printStackTrace(); return 1; } }
From source file:com.littlehotspot.hadoop.mr.mobile.MobileLog.java
License:Open Source License
@Override public int run(String[] arg) throws Exception { try {// www. j av a2s . c om Job job = Job.getInstance(this.getConf(), MobileLog.class.getSimpleName()); job.setJarByClass(MobileLog.class); /***/ Path inputPath = new Path(arg[0]); FileInputFormat.addInputPath(job, inputPath); job.setMapperClass(MobileMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); /***/ Path outputPath = new Path(arg[1]); FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), new Configuration()); if (fileSystem.exists(outputPath)) { fileSystem.delete(outputPath, true); } FileOutputFormat.setOutputPath(job, outputPath); job.setReducerClass(MobileReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); boolean status = job.waitForCompletion(true); if (!status) { throw new Exception("MapReduce task execute failed........."); } return 0; } catch (Exception e) { e.printStackTrace(); return 1; } }
From source file:com.littlehotspot.hadoop.mr.nginx.module.cdf.CDFScheduler.java
License:Open Source License
@Override public int run(String[] args) throws Exception { try {// www . j a v a2 s . c om CommonVariables.initMapReduce(this.getConf(), args);// ? MAP REDUCE String matcherRegex = CommonVariables.getParameterValue(Argument.MapperInputFormatRegex); String hdfsInputPath = CommonVariables.getParameterValue(Argument.InputPath); String hdfsOutputPath = CommonVariables.getParameterValue(Argument.OutputPath); // ??? if (StringUtils.isNotBlank(matcherRegex)) { CommonVariables.MAPPER_INPUT_FORMAT_REGEX = Pattern.compile(matcherRegex); } Path inputPath = new Path(hdfsInputPath); Path outputPath = new Path(hdfsOutputPath); Job job = Job.getInstance(this.getConf(), this.getClass().getName()); job.setJarByClass(this.getClass()); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(CDFMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(GeneralReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), this.getConf()); if (fileSystem.exists(outputPath)) { fileSystem.delete(outputPath, true); } boolean status = job.waitForCompletion(true); if (!status) { throw new Exception("MapReduce task execute failed........."); } return 0; } catch (Exception e) { e.printStackTrace(); return 1; } }
From source file:com.littlehotspot.hadoop.mr.nginx.module.hdfs2hbase.api.user.UserScheduler.java
License:Open Source License
@Override public int run(String[] args) throws Exception { try {// w w w. jav a 2s .co m CommonVariables.initMapReduce(this.getConf(), args);// ? MAP REDUCE CommonVariables.hBaseHelper = new HBaseHelper(this.getConf()); // ?? String matcherRegex = CommonVariables.getParameterValue(Argument.MapperInputFormatRegex); String hdfsInputPath = CommonVariables.getParameterValue(Argument.InputPath); String hdfsOutputPath = CommonVariables.getParameterValue(Argument.OutputPath); // ??? if (StringUtils.isNotBlank(matcherRegex)) { CommonVariables.MAPPER_INPUT_FORMAT_REGEX = Pattern.compile(matcherRegex); } Path inputPath = new Path(hdfsInputPath); Path outputPath = new Path(hdfsOutputPath); Job job = Job.getInstance(this.getConf(), this.getClass().getName()); job.setJarByClass(this.getClass()); job.setMapperClass(UserMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(UserReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), this.getConf()); if (fileSystem.exists(outputPath)) { fileSystem.delete(outputPath, true); } // boolean state = job.waitForCompletion(true); if (!state) { throw new Exception("MapReduce task execute failed........."); } return 0; } catch (Exception e) { e.printStackTrace(); return 1; } }
From source file:com.mapr.db.utils.ImportCSV_MR.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 4) { System.out.println("MapR-DB JSON Tables - Import CSV" + "\nUsage:\n" + "\tParam 1: JSON Table Path (MapR-FS)\n" + "\tParam 2: Text File Path (Local-FS)\n" + "\tParam 3: Text File Delimiter (Local-FS)\n" + "\tParam 4: Schema File Path (Local-FS)\n"); System.exit(-1);/* w w w. ja v a 2 s .c om*/ } outputTable = args[0].toString().trim(); inputDir = args[1].toString().trim(); delimiter = args[2].toString().trim(); schemaFile = args[3].toString().trim(); BasicConfigurator.configure(); Logger.getRootLogger().setLevel(Level.ERROR); ImportCSV_MR imp = new ImportCSV_MR(); imp.readSchema(schemaFile); imp.printSchema(); Job job = Job.getInstance(conf, "ImportCSV_MR"); job.setJarByClass(ImportCSV_MR.class); job.setMapperClass(MyMapper.class); conf = job.getConfiguration(); conf.setStrings("io.serializations", new String[] { conf.get("io.serializations"), JSONDocumentSerialization.class.getName() }); conf.set("countColumnsInSchema", String.valueOf(countColumnsInSchema)); conf.set("delimiter", delimiter); conf.set("tablePath", outputTable); String valueTypes[] = valueTypesInSchema.toArray(new String[valueTypesInSchema.size()]); conf.setStrings("valueTypesInSchema", valueTypes); String columnNames[] = columnNamesInSchema.toArray(new String[columnNamesInSchema.size()]); conf.setStrings("columnNamesInSchema", columnNames); //Deciding the appropriate Input format class along with their input path FileInputFormat.addInputPath(job, new Path(inputDir)); job.setInputFormatClass(TextInputFormat.class); //Mapper output record key and value class job.setMapOutputKeyClass(ByteBufWritableComparable.class); job.setMapOutputValueClass(DBDocumentImpl.class); //Deciding the appropriate Output format class along with their input path conf.set("maprdb.mapred.outputtable", outputTable); job.setOutputFormatClass(TableOutputFormat.class); //Reducer output record key and value class job.setNumReduceTasks(0); boolean isJobSuccessful = job.waitForCompletion(true); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }
From source file:com.marklogic.mapreduce.examples.BinaryReader.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length < 2) { System.err.println("Usage: BinaryReader configFile outputDir"); System.exit(2);// ww w . ja v a2s.com } String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = Job.getInstance(conf, "binary reader"); job.setJarByClass(BinaryReader.class); job.setInputFormatClass(DocumentInputFormat.class); job.setMapperClass(DocMapper.class); job.setMapOutputKeyClass(DocumentURI.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputFormatClass(BinaryOutputFormat.class); job.setOutputKeyClass(DocumentURI.class); job.setOutputValueClass(BytesWritable.class); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); conf = job.getConfiguration(); conf.addResource(otherArgs[0]); System.exit(job.waitForCompletion(true) ? 0 : 1); }