List of usage examples for org.apache.hadoop.mapreduce.lib.jobcontrol ControlledJob setJob
public synchronized void setJob(Job job)
From source file:clustering.inverted_index.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s tf_idf_result_dir output_dir" + "[decimal_number] [pruning_threshold]\n", getClass().getSimpleName()); System.exit(1);/* www .j av a 2s . c o m*/ } Path normDir = new Path(args[1] + "/normed"); Path resultDir = new Path(args[1] + "/result"); Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 2) { conf.setInt("deci.number", Integer.valueOf(args[2])); } else { conf.setInt("deci.number", 4); } if (args.length > 3) { conf.setBoolean("pruning", true); conf.setDouble("pruning.threshold", Double.valueOf(args[3])); } else { conf.setBoolean("pruning", false); } JobControl jobControl = new JobControl("inverted-index jobs"); /* step 1, normalize the vector lenth of each document */ Job job1 = Job.getInstance(conf, "tf idf normalizer job"); job1.setJarByClass(Driver.class); FileInputFormat.addInputPath(job1, new Path(args[0])); job1.setInputFormatClass(KeyValueTextInputFormat.class); job1.setMapperClass(Mapper.class); job1.setReducerClass(NormalizerReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job1, normDir); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(job1); jobControl.addJob(controlledJob1); /* step 2, calculate inverted index */ Job job2 = Job.getInstance(conf, "inverted index job"); job2.setJarByClass(Driver.class); FileInputFormat.addInputPath(job2, normDir); job2.setInputFormatClass(KeyValueTextInputFormat.class); job2.setMapperClass(Mapper.class); job2.setReducerClass(InvertedIndexReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job2, resultDir); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); MapReduceUtils.runJobs(jobControl); return job2.waitForCompletion(true) ? 0 : 1; }
From source file:clustering.link_back.WorkflowDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 4) { System.err.printf("usage: %s init_input_dir simhash_intermediate_dir mst_dir output_dir\n", getClass().getSimpleName()); System.exit(1);//from ww w . j a v a 2 s. c o m } String pre_output = args[3] + "/pre"; String step1_output = args[3] + "/step1"; String step2_output = args[3] + "/final"; Configuration conf = getConf(); conf = initConf(conf); JobControl jobControl = new JobControl("link back jobs"); Driver preDriver = new Driver(); String[] preArgs = new String[2]; preArgs[0] = args[0]; preArgs[1] = pre_output; Job preJob = preDriver.configJob(preArgs); ControlledJob controlledPreJob = new ControlledJob(conf); controlledPreJob.setJob(preJob); jobControl.addJob(controlledPreJob); clustering.link_back.step1.Driver step1Driver = new clustering.link_back.step1.Driver(); String[] step1Args = new String[3]; step1Args[0] = args[2]; step1Args[1] = args[1]; step1Args[2] = step1_output; Job step1Job = step1Driver.configJob(step1Args); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(step1Job); jobControl.addJob(controlledJob1); clustering.link_back.step2.Driver driver2 = new clustering.link_back.step2.Driver(); String[] args2 = new String[3]; args2[0] = pre_output; args2[1] = step1_output; args2[2] = step2_output; Job job2 = driver2.configJob(args2); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledPreJob); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); MapReduceUtils.runJobs(jobControl); return job2.waitForCompletion(true) ? 0 : 1; }
From source file:clustering.mst.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 3) { System.err.printf("usage: %s similarity_result_dir document_count_file output_dir " + "[cluster_threshold] [reduce_number] [compression]\n", getClass().getSimpleName()); System.exit(1);//from w w w.ja v a2 s . c o m } Path step1_OutputDir = new Path(args[2] + "/step1"); Path resultDir = new Path(args[2] + "/result"); URI docCntFile = new URI(args[1] + "/part-r-00000#docCnt"); Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 3) { conf.setDouble("final.threshold", Double.valueOf(args[3])); } else { conf.setDouble("final.threshold", 0.2d); } if (args.length > 4) { conf.setInt("reduce.task.num", Integer.valueOf(args[4])); } else { conf.setInt("reduce.task.num", 5); } JobControl jobControl = new JobControl("mst jobs"); /* step 1, split and calculate the child msts */ Job childJob = Job.getInstance(conf, "mst child job"); childJob.setJarByClass(Driver.class); childJob.addCacheFile(docCntFile); if (args.length > 5 && args[5].equals("0")) { FileInputFormat.addInputPath(childJob, new Path(args[0])); childJob.setInputFormatClass(KeyValueTextInputFormat.class); } else { SequenceFileInputFormat.addInputPath(childJob, new Path(args[0])); childJob.setInputFormatClass(SequenceFileAsTextInputFormat.class); } FileOutputFormat.setOutputPath(childJob, step1_OutputDir); childJob.setMapperClass(ChildMapper.class); childJob.setMapOutputKeyClass(DoubleWritable.class); childJob.setMapOutputValueClass(Text.class); childJob.setPartitionerClass(ChildPartitioner.class); childJob.setReducerClass(ChildReducer.class); childJob.setNumReduceTasks(conf.getInt("reduce.task.num", 1)); childJob.setOutputKeyClass(DoubleWritable.class); childJob.setOutputValueClass(Text.class); ControlledJob controlledChildJob = new ControlledJob(conf); controlledChildJob.setJob(childJob); jobControl.addJob(controlledChildJob); /* step 2, merge step 1's output and calculate final mst */ Job finalJob = Job.getInstance(conf, "mst final job"); finalJob.setJarByClass(FinalReducer.class); finalJob.addCacheFile(docCntFile); FileInputFormat.addInputPath(finalJob, step1_OutputDir); finalJob.setInputFormatClass(KeyValueTextInputFormat.class); finalJob.setMapperClass(FinalMapper.class); finalJob.setMapOutputKeyClass(DoubleWritable.class); finalJob.setMapOutputValueClass(Text.class); finalJob.setReducerClass(FinalReducer.class); finalJob.setOutputKeyClass(IntWritable.class); finalJob.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(finalJob, resultDir); ControlledJob finalControlledJob = new ControlledJob(conf); finalControlledJob.setJob(finalJob); finalControlledJob.addDependingJob(controlledChildJob); jobControl.addJob(finalControlledJob); // run jobs MapReduceUtils.runJobs(jobControl); return finalJob.waitForCompletion(true) ? 0 : 1; }
From source file:clustering.simhash.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s init_result_dir output_dir [simhash_threshold]\n", getClass().getSimpleName()); System.exit(1);//from w ww . j a v a2s . c o m } Path step1_outputDir = new Path(args[1] + "/step1"); Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 2) { conf.setInt("simhash.threshold", Integer.valueOf(args[2])); } else { conf.setInt("simhash.threshold", 3); } JobControl jobControl = new JobControl("simhash jobs"); Job job1 = Job.getInstance(conf, "simhash step1 job"); job1.setJarByClass(Driver.class); FileInputFormat.addInputPath(job1, new Path(args[0])); job1.setInputFormatClass(KeyValueTextInputFormat.class); job1.setMapperClass(Step1Mapper.class); job1.setMapOutputKeyClass(LongWritable.class); job1.setMapOutputValueClass(Text.class); job1.setReducerClass(Step1Reducer.class); job1.setOutputKeyClass(IntWritable.class); job1.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job1, step1_outputDir); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(job1); jobControl.addJob(controlledJob1); Job job2 = Job.getInstance(conf, "simhash step2 job"); job2.setJarByClass(Driver.class); FileInputFormat.addInputPath(job2, step1_outputDir); job2.setInputFormatClass(KeyValueTextInputFormat.class); job2.setMapperClass(Step2Mapper.class); job2.setMapOutputKeyClass(IntWritable.class); job2.setMapOutputValueClass(Text.class); job2.setReducerClass(Step2Reducer.class); job2.setOutputKeyClass(IntWritable.class); job2.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job2, new Path(args[1] + "/result")); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); long starttime = System.currentTimeMillis(); clustering.Utils.MapReduceUtils.runJobs(jobControl); boolean complete = job2.waitForCompletion(true); long endtime = System.currentTimeMillis(); System.out.println("simhash job finished in: " + (endtime - starttime) / 1000 + " seconds"); return complete ? 0 : 1; }
From source file:clustering.tf_idf.WorkflowDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s simhash_result_dir output_dir " + "[gname_weight]\n", getClass().getSimpleName()); System.exit(1);//w w w .jav a 2 s . co m } String docCntDir = args[1] + "/docCnt"; String step1_outputDir = args[1] + "/step1"; String step2_outputDir = args[1] + "/step2"; String step3_outputDir = args[1] + "/result"; Configuration conf = getConf(); conf = initConf(conf); JobControl jobControl = new JobControl("tf-idf jobs"); /* pre step, count documents number in the corpus */ DocCntDriver docCntDriver = new DocCntDriver(); String[] preJobArgs = new String[2]; preJobArgs[0] = args[0]; preJobArgs[1] = docCntDir; Job preJob = docCntDriver.configJob(preJobArgs); ControlledJob controlledPreJob = new ControlledJob(conf); controlledPreJob.setJob(preJob); jobControl.addJob(controlledPreJob); /* step 1, calculate term count of each document */ TermCntDriver termCntDriver = new TermCntDriver(); String[] job1Args = new String[2]; job1Args[0] = args[0]; job1Args[1] = step1_outputDir; Job job1 = termCntDriver.configJob(job1Args); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(job1); jobControl.addJob(controlledJob1); /* step 2, calculate the term frequency of each document */ TermFreqDriver termFreqDriver = new TermFreqDriver(); String gnameWeight = args.length > 2 ? args[2] : "1.0"; conf.setDouble("gname.weight", Double.valueOf(gnameWeight)); String[] job2Args = args.length > 2 ? new String[3] : new String[2]; job2Args[0] = step1_outputDir; job2Args[1] = step2_outputDir; if (args.length > 2) { job2Args[2] = args[2]; } Job job2 = termFreqDriver.configJob(job2Args); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); /* step 3, calculate tf_idf */ TF_IDF_Driver tf_idf_driver = new TF_IDF_Driver(); String[] job3Args = new String[3]; job3Args[0] = docCntDir; job3Args[1] = step2_outputDir; job3Args[2] = step3_outputDir; Job job3 = tf_idf_driver.configJob(job3Args); ControlledJob controlledJob3 = new ControlledJob(conf); controlledJob3.setJob(job3); controlledJob3.addDependingJob(controlledJob2); controlledJob3.addDependingJob(controlledPreJob); jobControl.addJob(controlledJob3); // run jobs runJobs(jobControl); return job3.waitForCompletion(true) ? 0 : 1; }
From source file:org.hf.mls.mahout.math.hadoop.similarity.cooccurrence.RowSimilarityJob.java
License:Apache License
public Map<String, ControlledJob> getJobs(String[] args) throws Exception { Map<String, ControlledJob> cJobs = new HashMap<String, ControlledJob>(); ControlledJob cNormsAndTranspose = null; ControlledJob cPairwiseSimilarity = null; ControlledJob cAsMatrix = null;// w w w . ja va2 s .c o m addInputOption(); addOutputOption(); addOption("numberOfColumns", "r", "Number of columns in the input matrix", false); addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate, alternatively use " + "one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')'); addOption("maxSimilaritiesPerRow", "m", "Number of maximum similarities per row (default: " + DEFAULT_MAX_SIMILARITIES_PER_ROW + ')', String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW)); addOption("excludeSelfSimilarity", "ess", "compute similarity of rows to themselves?", String.valueOf(false)); addOption("threshold", "tr", "discard row pairs with a similarity value below this", false); addOption(DefaultOptionCreator.overwriteOption().create()); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return null; } String similarityClassnameArg = getOption("similarityClassname"); String similarityClassname; try { similarityClassname = VectorSimilarityMeasures.valueOf(similarityClassnameArg).getClassname(); } catch (IllegalArgumentException iae) { similarityClassname = similarityClassnameArg; } // Clear the output and temp paths if the overwrite option has been set if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { // Clear the temp path HadoopUtil.delete(getConf(), getTempPath()); // Clear the output path HadoopUtil.delete(getConf(), getOutputPath()); } int maxSimilaritiesPerRow = Integer.parseInt(getOption("maxSimilaritiesPerRow")); boolean excludeSelfSimilarity = Boolean.parseBoolean(getOption("excludeSelfSimilarity")); double threshold = hasOption("threshold") ? Double.parseDouble(getOption("threshold")) : NO_THRESHOLD; Path weightsPath = getTempPath("weights"); Path normsPath = getTempPath("norms.bin"); Path numNonZeroEntriesPath = getTempPath("numNonZeroEntries.bin"); Path maxValuesPath = getTempPath("maxValues.bin"); Path pairwiseSimilarityPath = getTempPath("pairwiseSimilarity"); AtomicInteger currentPhase = new AtomicInteger(); if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job normsAndTranspose = prepareJob(new Path(getInputPath(), "ratingMatrix"), weightsPath, VectorNormMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class); normsAndTranspose.setCombinerClass(MergeVectorsCombiner.class); Configuration normsAndTransposeConf = normsAndTranspose.getConfiguration(); normsAndTransposeConf.set(THRESHOLD, String.valueOf(threshold)); normsAndTransposeConf.set(NORMS_PATH, normsPath.toString()); normsAndTransposeConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString()); normsAndTransposeConf.set(MAXVALUES_PATH, maxValuesPath.toString()); normsAndTransposeConf.set(SIMILARITY_CLASSNAME, similarityClassname); /** * this job is depending the last job --- countObservations */ cNormsAndTranspose = new ControlledJob(new Configuration()); cNormsAndTranspose.setJob(normsAndTranspose); cJobs.put("normsAndTranspose", cNormsAndTranspose); } if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job pairwiseSimilarity = prepareJob(weightsPath, pairwiseSimilarityPath, CooccurrencesMapper.class, IntWritable.class, VectorWritable.class, SimilarityReducer.class, IntWritable.class, VectorWritable.class); pairwiseSimilarity.setCombinerClass(VectorSumReducer.class); Configuration pairwiseConf = pairwiseSimilarity.getConfiguration(); pairwiseConf.set(THRESHOLD, String.valueOf(threshold)); pairwiseConf.set(NORMS_PATH, normsPath.toString()); pairwiseConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString()); pairwiseConf.set(MAXVALUES_PATH, maxValuesPath.toString()); pairwiseConf.set(SIMILARITY_CLASSNAME, similarityClassname); //add prePath pairwiseConf.set("prepPath", getInputPath().toString()); pairwiseConf.setBoolean(EXCLUDE_SELF_SIMILARITY, excludeSelfSimilarity); /** * depending on normsAndTranspose job */ cPairwiseSimilarity = new ControlledJob(new Configuration()); cPairwiseSimilarity.setJob(pairwiseSimilarity); if (null != cNormsAndTranspose) { cPairwiseSimilarity.addDependingJob(cNormsAndTranspose); } cJobs.put("pairwiseSimilarity", cPairwiseSimilarity); } if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job asMatrix = prepareJob(pairwiseSimilarityPath, getOutputPath(), UnsymmetrifyMapper.class, IntWritable.class, VectorWritable.class, MergeToTopKSimilaritiesReducer.class, IntWritable.class, VectorWritable.class); asMatrix.setCombinerClass(MergeToTopKSimilaritiesReducer.class); asMatrix.getConfiguration().setInt(MAX_SIMILARITIES_PER_ROW, maxSimilaritiesPerRow); /** * depending on pairwiseSimilarity job */ cAsMatrix = new ControlledJob(new Configuration()); cAsMatrix.setJob(asMatrix); if (null != cPairwiseSimilarity) { cAsMatrix.addDependingJob(cPairwiseSimilarity); } cJobs.put("asMatrix", cAsMatrix); } return cJobs; }
From source file:tv.icntv.grade.film.dbcollect.TableConcurrencyJob.java
License:Apache License
@Override public int run(String[] strings) throws Exception { Configuration configuration = getConf(); String[] tables = configuration.get("hbase.cdn.tables").split(","); JobControl jobControl = new JobControl("data init"); for (String table : tables) { //// w w w. java 2s . c o m String hbaseDbDirectory = String.format(configuration.get("hdfs.directory.from.hbase"), new Date(), table); HadoopUtils.deleteIfExist(hbaseDbDirectory); Job tableJob = new Job(configuration, "icntv grade init " + table); TableMapReduceUtil.initTableMapperJob(table, new Scan(), TableInitMapper.class, Text.class, Text.class, tableJob); MapReduceUtils.initReducerJob(new Path(hbaseDbDirectory), TableInitReducer.class, tableJob); // controlled job ControlledJob tableControlledJob = new ControlledJob(configuration); tableControlledJob.setJob(tableJob); String dbDirectory = String.format(configuration.get("hdfs.directory.base.db"), new Date(), table); HadoopUtils.deleteIfExist(dbDirectory); Configuration conf = getConf(); Job db = new Job(conf, "icntv db collect " + table); conf.setLong("mapred.min.split.size", 512 * 2014 * 1024L); MapReduceUtils.initMapperJob(DefaultHbaseMapper.class, Text.class, Text.class, this.getClass(), db, new Path(strings[1])); FileOutputFormat.setOutputPath(db, new Path(dbDirectory)); db.setNumReduceTasks(0); ControlledJob dbControlledJob = new ControlledJob(conf); dbControlledJob.setJob(db); dbControlledJob.addDependingJob(tableControlledJob); //controlledJob. jobControl.addJob(tableControlledJob); jobControl.addJob(dbControlledJob); } new Thread(jobControl).start(); while (!jobControl.allFinished()) { Thread.sleep(5000); } return 0; }
From source file:tv.icntv.grade.film.dbcollect.TableInitJob.java
License:Apache License
@Override public int run(String[] strings) throws Exception { Configuration configuration = getConf(); JobControl jobControl = new JobControl("init data"); for (String table : strings) { String dbPath = String.format(configuration.get("hdfs.directory.base.db"), new Date(), table); // String[] arrays = new String[]{table,//input table // String.format(configuration.get("hdfs.directory.from.hbase"), new Date(), table), // db // }; String hbasePath = String.format(configuration.get("hdfs.directory.from.hbase"), new Date(), table); //table job Job tableJob = new Job(configuration, "icntv grade init"); Scan scan = new Scan(); HadoopUtils.deleteIfExist(hbasePath); HadoopUtils.deleteIfExist(dbPath); TableMapReduceUtil.initTableMapperJob(table, scan, TableInitMapper.class, Text.class, Text.class, tableJob);// w ww . jav a2 s . c om MapReduceUtils.initReducerJob(new Path(hbasePath), TableInitReducer.class, tableJob); ControlledJob firstControll = new ControlledJob(configuration); firstControll.setJob(tableJob); // tableJob.waitForCompletion(true); Job db = new Job(configuration, "icntv db collect"); configuration.setLong("mapred.min.split.size", 512 * 2014 * 1024L); MapReduceUtils.initMapperJob(DefaultHbaseMapper.class, Text.class, Text.class, this.getClass(), db, new Path(hbasePath)); FileOutputFormat.setOutputPath(db, new Path(dbPath)); db.setNumReduceTasks(0); ControlledJob secondaryController = new ControlledJob(configuration); secondaryController.setJob(db); secondaryController.addDependingJob(firstControll); jobControl.addJob(firstControll); jobControl.addJob(secondaryController); } new Thread(jobControl).start(); while (!jobControl.allFinished()) { Thread.sleep(5000); } logger.info("job controller successed job size=" + jobControl.getSuccessfulJobList().size()); // db.waitForCompletion(true); return 0; }
From source file:tv.icntv.grade.film.grade.GradeJob.java
License:Apache License
@Override public int run(String[] strings) throws Exception { Configuration configuration = this.getConf(); //num job/*from w ww . j a va2 s . c o m*/ Job numJob = new Job(configuration, "calculate film seed num job "); Path[] paths = getPaths(strings[0].split(",")); MapReduceUtils.initMapperJob(NumCountMapper.class, Text.class, LongWritable.class, this.getClass(), numJob, paths); // TableMapReduceUtil.initTableReducerJob(strings[1], NumCountReducer.class, numJob); ControlledJob controlledJob3 = new ControlledJob(configuration); controlledJob3.setJob(numJob); //time job Job timeJob = new Job(configuration, "calculate film time middle job"); MapReduceUtils.initMapperJob(TimeMaper.class, Text.class, Text.class, this.getClass(), timeJob, paths); timeJob.setCombinerClass(TimeCombiner.class); MapReduceUtils.initReducerJob(new Path(strings[2]), TimeReducer.class, timeJob); ControlledJob controlledJob4 = new ControlledJob(configuration); controlledJob4.setJob(timeJob); // //? Job correlate = new Job(configuration, "icntv correlate job"); MapReduceUtils.initMapperJob(UserHistoryMapper.class, Text.class, Text.class, this.getClass(), correlate, paths); MapReduceUtils.initReducerJob(new Path(strings[3]), UserHistoryReducer.class, correlate); ControlledJob correlateController = new ControlledJob(configuration); correlateController.setJob(correlate); // controlledJob3.getDependentJobs().add() JobControl jobControl = new JobControl("unit grade"); jobControl.addJob(controlledJob3); jobControl.addJob(controlledJob4); // jobControl.addJob(controlledJob5); jobControl.addJob(correlateController); new Thread(jobControl).start(); while (!jobControl.allFinished()) { Thread.sleep(5000); } // Counter counter =correlate.getCounters().findCounter(groupName,countName); // HadoopUtils.save(new Path(configuration.get("icntv.temp.file")),counter.getValue()); return 0; }
From source file:tv.icntv.log.stb.GenerateStbLogJob.java
License:Apache License
@Override public boolean run(Map<String, String> maps) throws Exception { //To change body of implemented methods use File | Settings | File Templates. Configuration configuration = getConf(); //user login/*from w ww . j av a 2 s. c om*/ Job userLogin = Job.getInstance(configuration, "user login job"); userLogin.setMapperClass(ParserLoginMapper.class); userLogin.setJarByClass(this.getClass()); userLogin.setOutputKeyClass(NullWritable.class); userLogin.setOutputValueClass(Text.class); Path userLoginOutput = new Path(maps.get(USER_LOGIN_JOB_OUTPUT)); FileInputFormat.addInputPath(userLogin, new Path(maps.get(USER_LOGIN_JOB_INPUT))); FileOutputFormat.setOutputPath(userLogin, userLoginOutput); userLogin.setNumReduceTasks(0); ControlledJob userControlledJob = new ControlledJob(configuration); userControlledJob.setJob(userLogin); // //player Job player = Job.getInstance(configuration, "player job"); player.setJarByClass(getClass()); player.setMapperClass(PlayerMapper.class); player.setOutputValueClass(Text.class); player.setOutputKeyClass(NullWritable.class); FileInputFormat.addInputPath(player, new Path(maps.get(PLAYER_JOB_INPUT))); FileOutputFormat.setOutputPath(player, new Path(maps.get(PLAYER_JOB_OUTPUT))); player.setNumReduceTasks(0); ControlledJob playControlledJob = new ControlledJob(configuration); playControlledJob.setJob(player); //contentView Job contentView = Job.getInstance(configuration, "content view job"); contentView.setJarByClass(getClass()); contentView.setMapperClass(ContentViewMapperBack.class); contentView.setOutputKeyClass(NullWritable.class); contentView.setOutputValueClass(Text.class); //System.out.println(MessageFormat.format(maps.get(OUTPUT_PREFIX),day)+MessageFormat.format(maps.get(CONTENT_VIEW_JOB_INPUT),configuration.getLong(FILTER_TIME,0L))); FileInputFormat.addInputPath(contentView, new Path(maps.get(CONTENT_VIEW_JOB_INPUT))); FileOutputFormat.setOutputPath(contentView, new Path(maps.get(CONTENT_VIEW_JOB_OUTPUT))); contentView.setNumReduceTasks(0); ControlledJob contentViewControlledJob = new ControlledJob(configuration); contentViewControlledJob.setJob(contentView); //reply Job replay = Job.getInstance(configuration, "reply job"); replay.setJarByClass(getClass()); replay.setMapperClass(ReplayMapper.class); replay.setOutputKeyClass(NullWritable.class); replay.setOutputValueClass(Text.class); FileInputFormat.addInputPath(replay, new Path(maps.get(LOOK_BACK_JOB_INPUT))); FileOutputFormat.setOutputPath(replay, new Path(maps.get(LOOK_BACK_JOB_OUTPUT))); replay.setNumReduceTasks(0); ControlledJob replayControlledJob = new ControlledJob(configuration); replayControlledJob.setJob(replay); //logEpg Job logEpg = Job.getInstance(configuration, "log epg job"); logEpg.setJarByClass(getClass()); logEpg.setMapperClass(EPGOperateMapper.class); logEpg.setOutputKeyClass(NullWritable.class); logEpg.setOutputValueClass(Text.class); FileInputFormat.addInputPath(logEpg, new Path(maps.get(LOG_EPG_JOB_INPUT))); FileOutputFormat.setOutputPath(logEpg, new Path(maps.get(LOG_EPG_JOB_OUTPUT))); logEpg.setNumReduceTasks(0); ControlledJob logEpgControlledJob = new ControlledJob(configuration); logEpgControlledJob.setJob(logEpg); // //cdn // Job cdn=Job.getInstance(configuration,"cdn job"); // cdn.setJarByClass(getClass()); // cdn.setMapperClass(CdnModuleMapper.class); // cdn.setOutputKeyClass(NullWritable.class); // cdn.setOutputValueClass(Text.class); // FileInputFormat.addInputPath(cdn, new Path(maps.get(CDN_JOB_INPUT))); // FileOutputFormat.setOutputPath(cdn, new Path(maps.get(CDN_JOB_OUTPUT))); // cdn.setNumReduceTasks(0); // ControlledJob cdnControlledJob=new ControlledJob(configuration); // cdnControlledJob.setJob(cdn); //cdn stb Job cdn = Job.getInstance(configuration, "cdn stb job"); cdn.setJarByClass(this.getClass()); cdn.setMapperClass(CdnStbMapper.class); cdn.setOutputValueClass(Text.class); cdn.setOutputKeyClass(NullWritable.class); FileInputFormat.addInputPath(cdn, new Path(maps.get(CDN_JOB_INPUT))); FileOutputFormat.setOutputPath(cdn, new Path(maps.get(CDN_JOB_OUTPUT))); cdn.setNumReduceTasks(0); ControlledJob cdnControlledJob = new ControlledJob(configuration); cdnControlledJob.setJob(cdn); //cdn adapter Job cdnAdapterJob = Job.getInstance(configuration, "cdn adapter job "); cdnAdapterJob.setJarByClass(getClass()); cdnAdapterJob.setMapperClass(CdnAdapterMapper.class); cdnAdapterJob.setOutputKeyClass(NullWritable.class); cdnAdapterJob.setOutputValueClass(Text.class); FileInputFormat.addInputPath(cdnAdapterJob, new Path(maps.get(CDN_ADAPTER_JOB_INPUT))); FileOutputFormat.setOutputPath(cdnAdapterJob, new Path(maps.get(CDN_ADAPTER_JOB_OUTPUT))); cdnAdapterJob.setNumReduceTasks(0); ControlledJob cdnAdapterControlleredJob = new ControlledJob(configuration); cdnAdapterControlleredJob.setJob(cdnAdapterJob); JobControl jobControl = new JobControl("stb log parser .eg: userLogin,devicePlayer,contentView,logEpg,cdn"); jobControl.addJob(userControlledJob); jobControl.addJob(playControlledJob); jobControl.addJob(contentViewControlledJob); jobControl.addJob(replayControlledJob); jobControl.addJob(logEpgControlledJob); jobControl.addJob(cdnControlledJob); jobControl.addJob(cdnAdapterControlleredJob); new Thread(jobControl).start(); while (!jobControl.allFinished()) { Thread.sleep(5000); } if (jobControl.getFailedJobList().size() > 0) { return false; } return true; }