List of usage examples for org.apache.hadoop.mapreduce.lib.jobcontrol ControlledJob ControlledJob
public ControlledJob(Configuration conf) throws IOException
From source file:com.zinnia.nectar.regression.hadoop.primitive.jobs.SigmaSqJob.java
License:Apache License
public Double call() throws NectarException { // TODO Auto-generated method stub double value = 0; JobControl jobControl = new JobControl("sigmajob"); try {/*from ww w .j a v a 2 s . c o m*/ job = new Job(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } job.setJarByClass(SigmaSqJob.class); log.info("Sigma square Job initialized"); log.warn("Sigma square job: Processing...Do not terminate/close"); log.debug("Sigma square job: Mapping process started"); try { ChainMapper.addMapper(job, FieldSeperator.FieldSeperationMapper.class, DoubleWritable.class, Text.class, NullWritable.class, Text.class, job.getConfiguration()); ChainMapper.addMapper(job, SigmaSqMapper.class, NullWritable.class, Text.class, Text.class, DoubleWritable.class, job.getConfiguration()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } job.getConfiguration().set("fields.spec", "" + column); job.setReducerClass(DoubleSumReducer.class); try { FileInputFormat.addInputPath(job, new Path(inputFilePath)); fs = FileSystem.get(job.getConfiguration()); if (!fs.exists(new Path(inputFilePath))) { throw new NectarException("Exception occured:File " + inputFilePath + " not found "); } } catch (Exception e) { // TODO Auto-generated catch block String trace = new String(); log.error(e.toString()); for (StackTraceElement s : e.getStackTrace()) { trace += "\n\t at " + s.toString(); } log.debug(trace); log.debug("Sigma square Job terminated abruptly\n"); throw new NectarException(); } FileOutputFormat.setOutputPath(job, new Path(outputFilePath)); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); job.setInputFormatClass(TextInputFormat.class); log.debug("Sigma square job: Mapping process completed"); log.debug("Sigma square job: Reducing process started"); try { controlledJob = new ControlledJob(job.getConfiguration()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } jobControl.addJob(controlledJob); Thread thread = new Thread(jobControl); thread.start(); while (!jobControl.allFinished()) { try { Thread.sleep(10000); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } jobControl.stop(); try { fs = FileSystem.get(job.getConfiguration()); FSDataInputStream in = fs.open(new Path(outputFilePath + "/part-r-00000")); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(in)); String valueLine = bufferedReader.readLine(); String[] fields = valueLine.split("\t"); value = Double.parseDouble(fields[1]); bufferedReader.close(); in.close(); } catch (IOException e) { log.error("Exception occured: Output file cannot be read."); log.debug(e.getMessage()); log.debug("Sigma square Job terminated abruptly\n"); throw new NectarException(); } log.debug("Sigma square job: Reducing process completed"); log.info("Sigma square Job completed\n"); return value; }
From source file:com.zinnia.nectar.regression.hadoop.primitive.jobs.SigmaXYJob.java
License:Apache License
public Double call() throws NectarException { double value = 0; JobControl jobControl = new JobControl("sigmajob"); try {/* w ww . j av a 2s . c o m*/ job = new Job(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } job.setJarByClass(SigmaXYJob.class); log.info("SigmaXY Job initialized"); log.warn("SigmaXY job: Processing...Do not terminate/close"); log.debug("SigmaXY job: Mapping process started"); try { ChainMapper.addMapper(job, FieldSeperator.FieldSeperationMapper.class, LongWritable.class, Text.class, NullWritable.class, Text.class, job.getConfiguration()); ChainMapper.addMapper(job, SigmaXYMapper.class, NullWritable.class, Text.class, Text.class, DoubleWritable.class, job.getConfiguration()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } job.getConfiguration().set("fields.spec", x + "," + y); job.setReducerClass(DoubleSumReducer.class); try { FileInputFormat.addInputPath(job, new Path(inputFilePath)); fs = FileSystem.get(job.getConfiguration()); if (!fs.exists(new Path(inputFilePath))) { throw new NectarException("Exception occured:File " + inputFilePath + " not found "); } } catch (Exception e) { // TODO Auto-generated catch block String trace = new String(); log.error(e.toString()); for (StackTraceElement s : e.getStackTrace()) { trace += "\n\t at " + s.toString(); } log.debug(trace); log.debug("SigmaXY Job terminated abruptly\n"); throw new NectarException(); } FileOutputFormat.setOutputPath(job, new Path(outputFilePath)); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); job.setInputFormatClass(TextInputFormat.class); log.debug("SigmaXY job: Mapping process completed"); log.debug("SigmaXY job: Reducing process started"); try { controlledJob = new ControlledJob(job.getConfiguration()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } jobControl.addJob(controlledJob); Thread thread = new Thread(jobControl); thread.start(); while (!jobControl.allFinished()) { try { Thread.sleep(10000); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } jobControl.stop(); FileSystem fs; try { fs = FileSystem.get(job.getConfiguration()); FSDataInputStream in = fs.open(new Path(outputFilePath + "/part-r-00000")); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(in)); String valueLine = bufferedReader.readLine(); String[] fields = valueLine.split("\t"); value = Double.parseDouble(fields[1]); bufferedReader.close(); in.close(); } catch (IOException e) { log.error("Exception occured: Output file cannot be read."); log.debug(e.getMessage()); log.debug("SigmaXY Job terminated abruptly\n"); throw new NectarException(); } log.debug("SigmaXY job: Reducing process completed"); log.info("SigmaXY Job completed\n"); return value; }
From source file:com.zinnia.nectar.regression.hadoop.primitive.jobs.SortJob.java
License:Apache License
public Double[] call() throws NectarException { // TODO Auto-generated method stub JobControl jobControl = new JobControl("Sortjob"); try {/*from w w w . j av a 2 s .co m*/ job = new Job(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } job.setJarByClass(SortJob.class); log.info("Sorting Job initialized"); log.warn("Sorting job: Processing...Do not terminate/close"); log.debug("Sorting job: Mapping process started"); try { ChainMapper.addMapper(job, FieldSeperator.FieldSeperationMapper.class, LongWritable.class, Text.class, NullWritable.class, Text.class, job.getConfiguration()); ChainMapper.addMapper(job, SortMapper.class, NullWritable.class, Text.class, DoubleWritable.class, DoubleWritable.class, job.getConfiguration()); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } job.getConfiguration().set("fields.spec", "" + column); job.setReducerClass(Reducer.class); try { FileInputFormat.addInputPath(job, new Path(inputFilePath)); fs = FileSystem.get(job.getConfiguration()); if (!fs.exists(new Path(inputFilePath))) { throw new NectarException("Exception occured:File " + inputFilePath + " not found "); } } catch (Exception e2) { // TODO Auto-generated catch block String trace = new String(); log.error(e2.toString()); for (StackTraceElement s : e2.getStackTrace()) { trace += "\n\t at " + s.toString(); } log.debug(trace); log.debug("Sorting Job terminated abruptly\n"); throw new NectarException(); } FileOutputFormat.setOutputPath(job, new Path(outputFilePath)); job.setMapOutputValueClass(DoubleWritable.class); job.setMapOutputKeyClass(DoubleWritable.class); job.setInputFormatClass(TextInputFormat.class); log.debug("Sorting job: Mapping process completed"); log.debug("Sorting job: Reducing process started"); try { controlledJob = new ControlledJob(job.getConfiguration()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } jobControl.addJob(controlledJob); Thread thread = new Thread(jobControl); thread.start(); while (!jobControl.allFinished()) { try { Thread.sleep(10000); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } try { FSDataInputStream in = fs.open(new Path(outputFilePath + "/part-r-00000")); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(in)); String valueLine; while ((valueLine = bufferedReader.readLine()) != null) { String[] fields = valueLine.split("\t"); value.add(Double.parseDouble(fields[1])); } bufferedReader.close(); in.close(); } catch (IOException e) { // TODO Auto-generated catch block log.error("Exception occured: Output file cannot be read."); log.debug(e.getMessage()); log.debug("Sorting Job terminated abruptly\n"); throw new NectarException(); } log.debug("Sorting job: Reducing process completed"); log.info("Sorting Job completed\n"); return value.toArray(new Double[value.size()]); }
From source file:com.zinnia.nectar.regression.hadoop.primitive.jobs.YDiffJob.java
License:Apache License
public Double call() throws Exception { JobControl jobControl = new JobControl("YDiff job"); Job job = new Job(); job.setJarByClass(YDiffJob.class); ChainMapper.addMapper(job, FieldSeperator.FieldSeperationMapper.class, DoubleWritable.class, Text.class, NullWritable.class, Text.class, job.getConfiguration()); ChainMapper.addMapper(job, YDiffMapper.class, NullWritable.class, Text.class, Text.class, DoubleWritable.class, job.getConfiguration()); String fieldSpec = getFieldSpecForColumns(); job.getConfiguration().set("fields.spec", fieldSpec); job.getConfiguration().setStrings("paramValues", paramValues); job.setReducerClass(DoubleSumReducer.class); FileInputFormat.addInputPath(job, new Path(inputFilePath)); FileOutputFormat.setOutputPath(job, new Path(outputFilePath)); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); job.setInputFormatClass(TextInputFormat.class); ControlledJob controlledJob = new ControlledJob(job.getConfiguration()); jobControl.addJob(controlledJob);/*www . j a v a 2 s.co m*/ Thread thread = new Thread(jobControl); thread.start(); while (!jobControl.allFinished()) { Thread.sleep(10000); } jobControl.stop(); FileSystem fs = FileSystem.get(job.getConfiguration()); FSDataInputStream in = fs.open(new Path(outputFilePath + "/part-r-00000")); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(in)); String valueLine = bufferedReader.readLine(); String[] fields = valueLine.split("\t"); double value = Double.parseDouble(fields[1]); bufferedReader.close(); in.close(); return value; }
From source file:main.Driver.java
private ControlledJob setSortingJob(String input, String output, String outputFileName) throws Exception { _configuration.set(LicenseOutputFormat.OUTPUT_FILE_NAME, outputFileName); ControlledJob jc = new ControlledJob(_configuration); Job job = jc.getJob();// w ww . java 2 s .c o m job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(KeyValueTextInputFormat.class); FileInputFormat.addInputPath(job, new Path(input)); job.setOutputFormatClass(LicenseOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(output)); job.setJarByClass(Driver.class); return jc; }
From source file:main.Driver.java
private ControlledJob setMRJob(String input1, String input2, String output) throws IOException { _configuration.set(LicenseOutputFormat.NAMES, LicenseNameWritable.class.getName()); _configuration.set(LicenseOutputFormat.LICENSE, LicenseTypeWritable.class.getName()); String joinExpression = CompositeInputFormat.compose("inner", LicenseInputFormat.class, new Path(input1), new Path(input2)); System.out.println(joinExpression); _configuration.set("mapreduce.join.expr", joinExpression); ControlledJob controlledJob = new ControlledJob(_configuration); Job job = controlledJob.getJob();/* w w w .j a v a 2 s .co m*/ job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MapSideMapper.class); job.setReducerClass(MapSideReducer.class); job.setInputFormatClass(CompositeInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(output)); job.setJarByClass(Driver.class); return controlledJob; }
From source file:org.hf.mls.mahout.math.hadoop.similarity.cooccurrence.RowSimilarityJob.java
License:Apache License
public Map<String, ControlledJob> getJobs(String[] args) throws Exception { Map<String, ControlledJob> cJobs = new HashMap<String, ControlledJob>(); ControlledJob cNormsAndTranspose = null; ControlledJob cPairwiseSimilarity = null; ControlledJob cAsMatrix = null;/*from w w w . j a v a 2 s.c o m*/ addInputOption(); addOutputOption(); addOption("numberOfColumns", "r", "Number of columns in the input matrix", false); addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate, alternatively use " + "one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')'); addOption("maxSimilaritiesPerRow", "m", "Number of maximum similarities per row (default: " + DEFAULT_MAX_SIMILARITIES_PER_ROW + ')', String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW)); addOption("excludeSelfSimilarity", "ess", "compute similarity of rows to themselves?", String.valueOf(false)); addOption("threshold", "tr", "discard row pairs with a similarity value below this", false); addOption(DefaultOptionCreator.overwriteOption().create()); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return null; } String similarityClassnameArg = getOption("similarityClassname"); String similarityClassname; try { similarityClassname = VectorSimilarityMeasures.valueOf(similarityClassnameArg).getClassname(); } catch (IllegalArgumentException iae) { similarityClassname = similarityClassnameArg; } // Clear the output and temp paths if the overwrite option has been set if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { // Clear the temp path HadoopUtil.delete(getConf(), getTempPath()); // Clear the output path HadoopUtil.delete(getConf(), getOutputPath()); } int maxSimilaritiesPerRow = Integer.parseInt(getOption("maxSimilaritiesPerRow")); boolean excludeSelfSimilarity = Boolean.parseBoolean(getOption("excludeSelfSimilarity")); double threshold = hasOption("threshold") ? Double.parseDouble(getOption("threshold")) : NO_THRESHOLD; Path weightsPath = getTempPath("weights"); Path normsPath = getTempPath("norms.bin"); Path numNonZeroEntriesPath = getTempPath("numNonZeroEntries.bin"); Path maxValuesPath = getTempPath("maxValues.bin"); Path pairwiseSimilarityPath = getTempPath("pairwiseSimilarity"); AtomicInteger currentPhase = new AtomicInteger(); if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job normsAndTranspose = prepareJob(new Path(getInputPath(), "ratingMatrix"), weightsPath, VectorNormMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class); normsAndTranspose.setCombinerClass(MergeVectorsCombiner.class); Configuration normsAndTransposeConf = normsAndTranspose.getConfiguration(); normsAndTransposeConf.set(THRESHOLD, String.valueOf(threshold)); normsAndTransposeConf.set(NORMS_PATH, normsPath.toString()); normsAndTransposeConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString()); normsAndTransposeConf.set(MAXVALUES_PATH, maxValuesPath.toString()); normsAndTransposeConf.set(SIMILARITY_CLASSNAME, similarityClassname); /** * this job is depending the last job --- countObservations */ cNormsAndTranspose = new ControlledJob(new Configuration()); cNormsAndTranspose.setJob(normsAndTranspose); cJobs.put("normsAndTranspose", cNormsAndTranspose); } if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job pairwiseSimilarity = prepareJob(weightsPath, pairwiseSimilarityPath, CooccurrencesMapper.class, IntWritable.class, VectorWritable.class, SimilarityReducer.class, IntWritable.class, VectorWritable.class); pairwiseSimilarity.setCombinerClass(VectorSumReducer.class); Configuration pairwiseConf = pairwiseSimilarity.getConfiguration(); pairwiseConf.set(THRESHOLD, String.valueOf(threshold)); pairwiseConf.set(NORMS_PATH, normsPath.toString()); pairwiseConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString()); pairwiseConf.set(MAXVALUES_PATH, maxValuesPath.toString()); pairwiseConf.set(SIMILARITY_CLASSNAME, similarityClassname); //add prePath pairwiseConf.set("prepPath", getInputPath().toString()); pairwiseConf.setBoolean(EXCLUDE_SELF_SIMILARITY, excludeSelfSimilarity); /** * depending on normsAndTranspose job */ cPairwiseSimilarity = new ControlledJob(new Configuration()); cPairwiseSimilarity.setJob(pairwiseSimilarity); if (null != cNormsAndTranspose) { cPairwiseSimilarity.addDependingJob(cNormsAndTranspose); } cJobs.put("pairwiseSimilarity", cPairwiseSimilarity); } if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job asMatrix = prepareJob(pairwiseSimilarityPath, getOutputPath(), UnsymmetrifyMapper.class, IntWritable.class, VectorWritable.class, MergeToTopKSimilaritiesReducer.class, IntWritable.class, VectorWritable.class); asMatrix.setCombinerClass(MergeToTopKSimilaritiesReducer.class); asMatrix.getConfiguration().setInt(MAX_SIMILARITIES_PER_ROW, maxSimilaritiesPerRow); /** * depending on pairwiseSimilarity job */ cAsMatrix = new ControlledJob(new Configuration()); cAsMatrix.setJob(asMatrix); if (null != cPairwiseSimilarity) { cAsMatrix.addDependingJob(cPairwiseSimilarity); } cJobs.put("asMatrix", cAsMatrix); } return cJobs; }
From source file:tv.icntv.grade.film.dbcollect.TableConcurrencyJob.java
License:Apache License
@Override public int run(String[] strings) throws Exception { Configuration configuration = getConf(); String[] tables = configuration.get("hbase.cdn.tables").split(","); JobControl jobControl = new JobControl("data init"); for (String table : tables) { ////from w w w . ja va 2s .co m String hbaseDbDirectory = String.format(configuration.get("hdfs.directory.from.hbase"), new Date(), table); HadoopUtils.deleteIfExist(hbaseDbDirectory); Job tableJob = new Job(configuration, "icntv grade init " + table); TableMapReduceUtil.initTableMapperJob(table, new Scan(), TableInitMapper.class, Text.class, Text.class, tableJob); MapReduceUtils.initReducerJob(new Path(hbaseDbDirectory), TableInitReducer.class, tableJob); // controlled job ControlledJob tableControlledJob = new ControlledJob(configuration); tableControlledJob.setJob(tableJob); String dbDirectory = String.format(configuration.get("hdfs.directory.base.db"), new Date(), table); HadoopUtils.deleteIfExist(dbDirectory); Configuration conf = getConf(); Job db = new Job(conf, "icntv db collect " + table); conf.setLong("mapred.min.split.size", 512 * 2014 * 1024L); MapReduceUtils.initMapperJob(DefaultHbaseMapper.class, Text.class, Text.class, this.getClass(), db, new Path(strings[1])); FileOutputFormat.setOutputPath(db, new Path(dbDirectory)); db.setNumReduceTasks(0); ControlledJob dbControlledJob = new ControlledJob(conf); dbControlledJob.setJob(db); dbControlledJob.addDependingJob(tableControlledJob); //controlledJob. jobControl.addJob(tableControlledJob); jobControl.addJob(dbControlledJob); } new Thread(jobControl).start(); while (!jobControl.allFinished()) { Thread.sleep(5000); } return 0; }
From source file:tv.icntv.grade.film.dbcollect.TableInitJob.java
License:Apache License
@Override public int run(String[] strings) throws Exception { Configuration configuration = getConf(); JobControl jobControl = new JobControl("init data"); for (String table : strings) { String dbPath = String.format(configuration.get("hdfs.directory.base.db"), new Date(), table); // String[] arrays = new String[]{table,//input table // String.format(configuration.get("hdfs.directory.from.hbase"), new Date(), table), // db // }; String hbasePath = String.format(configuration.get("hdfs.directory.from.hbase"), new Date(), table); //table job Job tableJob = new Job(configuration, "icntv grade init"); Scan scan = new Scan(); HadoopUtils.deleteIfExist(hbasePath); HadoopUtils.deleteIfExist(dbPath); TableMapReduceUtil.initTableMapperJob(table, scan, TableInitMapper.class, Text.class, Text.class, tableJob);// ww w .j av a 2 s .c om MapReduceUtils.initReducerJob(new Path(hbasePath), TableInitReducer.class, tableJob); ControlledJob firstControll = new ControlledJob(configuration); firstControll.setJob(tableJob); // tableJob.waitForCompletion(true); Job db = new Job(configuration, "icntv db collect"); configuration.setLong("mapred.min.split.size", 512 * 2014 * 1024L); MapReduceUtils.initMapperJob(DefaultHbaseMapper.class, Text.class, Text.class, this.getClass(), db, new Path(hbasePath)); FileOutputFormat.setOutputPath(db, new Path(dbPath)); db.setNumReduceTasks(0); ControlledJob secondaryController = new ControlledJob(configuration); secondaryController.setJob(db); secondaryController.addDependingJob(firstControll); jobControl.addJob(firstControll); jobControl.addJob(secondaryController); } new Thread(jobControl).start(); while (!jobControl.allFinished()) { Thread.sleep(5000); } logger.info("job controller successed job size=" + jobControl.getSuccessfulJobList().size()); // db.waitForCompletion(true); return 0; }
From source file:tv.icntv.grade.film.grade.GradeJob.java
License:Apache License
@Override public int run(String[] strings) throws Exception { Configuration configuration = this.getConf(); //num job/*ww w .jav a 2 s .c o m*/ Job numJob = new Job(configuration, "calculate film seed num job "); Path[] paths = getPaths(strings[0].split(",")); MapReduceUtils.initMapperJob(NumCountMapper.class, Text.class, LongWritable.class, this.getClass(), numJob, paths); // TableMapReduceUtil.initTableReducerJob(strings[1], NumCountReducer.class, numJob); ControlledJob controlledJob3 = new ControlledJob(configuration); controlledJob3.setJob(numJob); //time job Job timeJob = new Job(configuration, "calculate film time middle job"); MapReduceUtils.initMapperJob(TimeMaper.class, Text.class, Text.class, this.getClass(), timeJob, paths); timeJob.setCombinerClass(TimeCombiner.class); MapReduceUtils.initReducerJob(new Path(strings[2]), TimeReducer.class, timeJob); ControlledJob controlledJob4 = new ControlledJob(configuration); controlledJob4.setJob(timeJob); // //? Job correlate = new Job(configuration, "icntv correlate job"); MapReduceUtils.initMapperJob(UserHistoryMapper.class, Text.class, Text.class, this.getClass(), correlate, paths); MapReduceUtils.initReducerJob(new Path(strings[3]), UserHistoryReducer.class, correlate); ControlledJob correlateController = new ControlledJob(configuration); correlateController.setJob(correlate); // controlledJob3.getDependentJobs().add() JobControl jobControl = new JobControl("unit grade"); jobControl.addJob(controlledJob3); jobControl.addJob(controlledJob4); // jobControl.addJob(controlledJob5); jobControl.addJob(correlateController); new Thread(jobControl).start(); while (!jobControl.allFinished()) { Thread.sleep(5000); } // Counter counter =correlate.getCounters().findCounter(groupName,countName); // HadoopUtils.save(new Path(configuration.get("icntv.temp.file")),counter.getValue()); return 0; }