List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputValueClass
public void setMapOutputValueClass(Class<?> theClass)
From source file:name.abhijitsarkar.hadoop.citation.CitationCombiner.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("citation-combiner"); /* This is to set the separator byte for KeyValueTextInputFormat */ conf.set("key.value.separator.in.input.line", ","); conf.setMapperClass(CitationMapper.class); conf.setReducerClass(CitationReducer.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);// w ww . j a va 2 s . c o m return 0; }
From source file:net.peacesoft.nutch.crawl.RaovatPostDeleteDuplicates.java
License:Apache License
public void dedup(String solrUrl, boolean noCommit) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); LOG.info("RaovatPostDeleteDuplicates: starting at " + sdf.format(start)); LOG.info("RaovatPostDeleteDuplicates: Solr url: " + solrUrl); JobConf job = new NutchJob(getConf()); job.set(ReSolrConstants.SERVER_URL, solrUrl); job.setBoolean("noCommit", noCommit); job.setInputFormat(RaovatPostDeleteDuplicates.SolrInputFormat.class); job.setOutputFormat(NullOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(RaovatPostDeleteDuplicates.SolrRecord.class); job.setMapperClass(IdentityMapper.class); job.setReducerClass(RaovatPostDeleteDuplicates.class); JobClient.runJob(job);// ww w .j a v a 2 s. com long end = System.currentTimeMillis(); LOG.info("RaovatPostDeleteDuplicates: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); }
From source file:net.peacesoft.nutch.crawl.ReIndexerMapReduce.java
License:Apache License
public static void initMRJob(Path inputDb, Path crawlDb, Path linkDb, Collection<Path> segments, JobConf job) { LOG.info("IndexerMapReduce: crawldb: " + crawlDb); if (linkDb != null) { LOG.info("IndexerMapReduce: linkdb: " + linkDb); }// w w w . j a v a2 s.co m for (final Path segment : segments) { LOG.info("IndexerMapReduces: adding segment: " + segment); FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.FETCH_DIR_NAME)); FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.PARSE_DIR_NAME)); FileInputFormat.addInputPath(job, new Path(segment, ParseData.DIR_NAME)); FileInputFormat.addInputPath(job, new Path(segment, ParseText.DIR_NAME)); } FileInputFormat.addInputPath(job, new Path(inputDb, CrawlDb.CURRENT_NAME)); FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME)); if (linkDb != null) { FileInputFormat.addInputPath(job, new Path(linkDb, LinkDb.CURRENT_NAME)); } job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(IndexerMapReduce.class); job.setReducerClass(IndexerMapReduce.class); job.setOutputFormat(IndexerOutputFormat.class); job.setOutputKeyClass(Text.class); job.setMapOutputValueClass(NutchWritable.class); job.setOutputValueClass(NutchWritable.class); }
From source file:nl.tudelft.graphalytics.mapreducev2.MapReduceJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { FileSystem dfs = FileSystem.get(getConf()); String inPath = inputPath;//from w ww . j a v a 2 s . c om while (!isFinished()) { iteration++; // Prepare job configuration JobConf jobConfiguration = new JobConf(this.getConf()); jobConfiguration.setJarByClass(this.getClass()); jobConfiguration.setMapOutputKeyClass(getMapOutputKeyClass()); jobConfiguration.setMapOutputValueClass(getMapOutputValueClass()); jobConfiguration.setMapperClass(getMapperClass()); if (getCombinerClass() != null) jobConfiguration.setCombinerClass(getCombinerClass()); jobConfiguration.setReducerClass(getReducerClass()); jobConfiguration.setOutputKeyClass(getOutputKeyClass()); jobConfiguration.setOutputValueClass(getOutputValueClass()); jobConfiguration.setInputFormat(getInputFormatClass()); jobConfiguration.setOutputFormat(getOutputFormatClass()); if (getNumMappers() != -1) jobConfiguration.setNumMapTasks(getNumMappers()); if (getNumReducers() != -1) jobConfiguration.setNumReduceTasks(getNumReducers()); setConfigurationParameters(jobConfiguration); // Set the input and output paths String outPath = intermediatePath + "/iteration-" + iteration; FileInputFormat.addInputPath(jobConfiguration, new Path(inPath)); FileOutputFormat.setOutputPath(jobConfiguration, new Path(outPath)); // Execute the current iteration RunningJob jobExecution = JobClient.runJob(jobConfiguration); jobExecution.waitForCompletion(); // Remove the output of the previous job (unless it is the input graph) if (iteration != 1) { dfs.delete(new Path(inPath), true); } inPath = outPath; processJobOutput(jobExecution); } // Rename the last job output to the specified output path try { dfs.mkdirs(new Path(outputPath).getParent()); dfs.rename(new Path(inPath), new Path(outputPath)); } catch (Exception e) { LOG.warn("Failed to rename MapReduce job output.", e); } return 0; }
From source file:nthu.scopelab.tsqr.ssvd.ABtDenseOutJob.java
License:Apache License
public static void run(Configuration conf, Path[] inputPath, Path inputBt, Path outputPath, int k, int p, int reduceTasks, int mis) throws Exception { JobConf job = new JobConf(conf, ABtDenseOutJob.class); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setInt(QJob.PROP_K, k);// www.ja va2 s .c o m job.setInt(QJob.PROP_P, p); job.set(PROP_BT_PATH, inputBt.toString()); FileOutputFormat.setOutputPath(job, outputPath); job.setJobName("ABtDenseOutJob"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(LMatrixWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(LMatrixWritable.class); job.setMapperClass(ABtMapper.class); fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job)); mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job)); job.setNumMapTasks(fgather.recNumMapTasks(mis)); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, inputPath); RunningJob rj = JobClient.runJob(job); }
From source file:nthu.scopelab.tsqr.ssvd.BtJob.java
License:Apache License
public static void run(Configuration conf, Path[] inputPath, Path btPath, String qrfPath, int k, int p, int outerBlockHeight, int reduceTasks, boolean outputBBtProducts, String reduceSchedule, int mis) throws Exception { boolean outputQ = true; String stages[] = reduceSchedule.split(","); JobConf job = new JobConf(conf, BtJob.class); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setInt(SCHEDULE_NUM, stages.length); job.setInt(PROP_OUTER_PROD_BLOCK_HEIGHT, outerBlockHeight); job.setInt(QJob.PROP_K, k);/*from www . j a v a 2 s . c o m*/ job.setInt(QJob.PROP_P, p); job.setBoolean(QmultiplyJob.OUTPUT_Q, outputQ); job.setBoolean(PROP_OUPTUT_BBT_PRODUCTS, outputBBtProducts); job.set(QmultiplyJob.QRF_DIR, qrfPath); FileSystem.get(job).delete(btPath, true); FileOutputFormat.setOutputPath(job, btPath); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setJobName("BtJob"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(SparseRowBlockWritable.class); job.setOutputKeyClass(IntWritable.class); //job.setOutputValueClass(SparseRowBlockWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(BtMapper.class); job.setCombinerClass(OuterProductCombiner.class); job.setReducerClass(OuterProductReducer.class); fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job)); mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job)); job.setNumMapTasks(fgather.recNumMapTasks(mis)); //job.setNumReduceTasks(0); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, inputPath); if (outputQ) { MultipleOutputs.addNamedOutput(job, QmultiplyJob.Q_MAT, SequenceFileOutputFormat.class, IntWritable.class, LMatrixWritable.class); } if (outputBBtProducts) { MultipleOutputs.addNamedOutput(job, OUTPUT_BBT, SequenceFileOutputFormat.class, IntWritable.class, VectorWritable.class); } RunningJob rj = JobClient.runJob(job); System.out.println("Btjob Job ID: " + rj.getJobID().toString()); }
From source file:nthu.scopelab.tsqr.ssvd.itBtJob.java
License:Apache License
public static void run(Configuration conf, Path[] inputPath, Path btPath, String qrfPath, int k, int p, int outerBlockHeight, int reduceTasks, boolean outputBBtProducts, String reduceSchedule, int mis) throws Exception { boolean outputQ = true; String stages[] = reduceSchedule.split(","); JobConf job = new JobConf(conf, itBtJob.class); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setInt(SCHEDULE_NUM, stages.length); job.setInt(PROP_OUTER_PROD_BLOCK_HEIGHT, outerBlockHeight); job.setInt(QJob.PROP_K, k);//from w w w .ja va 2s .com job.setInt(QJob.PROP_P, p); job.setBoolean(QmultiplyJob.OUTPUT_Q, outputQ); job.setBoolean(PROP_OUPTUT_BBT_PRODUCTS, outputBBtProducts); job.set(QmultiplyJob.QRF_DIR, qrfPath); FileSystem.get(job).delete(btPath, true); FileOutputFormat.setOutputPath(job, btPath); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setJobName("itBtJob"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(SparseRowBlockWritable.class); job.setOutputKeyClass(IntWritable.class); //job.setOutputValueClass(SparseRowBlockWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(BtMapper.class); job.setCombinerClass(OuterProductCombiner.class); job.setReducerClass(OuterProductReducer.class); fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job)); mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job)); job.setNumMapTasks(fgather.recNumMapTasks(mis)); //job.setNumReduceTasks(0); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, inputPath); if (outputQ) { MultipleOutputs.addNamedOutput(job, QmultiplyJob.Q_MAT, SequenceFileOutputFormat.class, IntWritable.class, LMatrixWritable.class); } if (outputBBtProducts) { MultipleOutputs.addNamedOutput(job, OUTPUT_BBT, SequenceFileOutputFormat.class, IntWritable.class, VectorWritable.class); } RunningJob rj = JobClient.runJob(job); System.out.println("itBtJob Job ID: " + rj.getJobID().toString()); }
From source file:nthu.scopelab.tsqr.ssvd.UJob.java
License:Apache License
public void start(Configuration conf, Path inputPathQ, Path inputUHatPath, Path sigmaPath, Path outputPath, int k, boolean uHalfSigma, int mis) throws ClassNotFoundException, InterruptedException, IOException { String input = ""; JobConf job = new JobConf(conf, UJob.class); jobclient = new JobClient(job); job.setJobName("UJob"); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setMapperClass(MultiplyMapper.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LMatrixWritable.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(LMatrixWritable.class); FileSystem fs = FileSystem.get(job); fileGather fgather = new fileGather( new Path(inputPathQ.toString().substring(0, inputPathQ.toString().lastIndexOf("/") - 1)), "Q-", fs); mis = Checker.checkMis(mis, fgather.getInputSize(), fs); job.setNumMapTasks(fgather.recNumMapTasks(mis)); job.setNumReduceTasks(0);//from w ww. j a v a 2 s. c o m job.set("mapreduce.output.basename", OUTPUT_U); job.set(PROP_UHAT_PATH, inputUHatPath.toString()); job.set(PROP_SIGMA_PATH, sigmaPath.toString()); if (uHalfSigma) { job.set(PROP_U_HALFSIGMA, "y"); } job.setInt(QJob.PROP_K, k); FileSystem.get(job).delete(outputPath, true); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); FileInputFormat.setInputPaths(job, inputPathQ); //JobClient.runJob(job); jobid = jobclient.submitJob(job).getID(); }
From source file:org.ahanna.DoubleConversionMapper.java
License:Apache License
public static void main(String[] args) { JobConf conf = new JobConf(DoubleConversion.class); conf.setJobName("DoubleConversation"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(DoubleConversionMapper.class); conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class); // KeyValueTextInputFormat treats each line as an input record, // and splits the line by the tab character to separate it into key and value conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); try {/*from ww w . j a v a2 s . com*/ JobClient.runJob(conf); } catch (IOException e) { // do nothing } }
From source file:org.apache.ambari.servicemonitor.jobs.FileUsingJobRunner.java
License:Apache License
public int run(String[] args) throws Exception { // Configuration processed by ToolRunner Configuration conf = getConf(); CommandLine commandLine = getCommandLine(); // Create a JobConf using the processed conf JobConf jobConf = new JobConf(conf, FileUsingJobRunner.class); //tune the config if (jobConf.get(JobKeys.RANGEINPUTFORMAT_ROWS) == null) { jobConf.setInt(JobKeys.RANGEINPUTFORMAT_ROWS, 1); }/*from w ww .j a v a 2 s .c o m*/ // Process custom command-line options String name = OptionHelper.getStringOption(commandLine, "n", "File Using Job"); if (commandLine.hasOption('x')) { //delete the output directory String destDir = jobConf.get(JobKeys.MAPRED_OUTPUT_DIR); FileSystem fs = FileSystem.get(jobConf); fs.delete(new Path(destDir), true); } // Specify various job-specific parameters jobConf.setMapperClass(FileUsingMapper.class); jobConf.setReducerClass(FileUsingReducer.class); jobConf.setMapOutputKeyClass(IntWritable.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputFormat(TextOutputFormat.class); jobConf.setInputFormat(RangeInputFormat.class); //jobConf.setPartitionerClass(SleepJob.class); jobConf.setSpeculativeExecution(false); jobConf.setJobName(name); jobConf.setJarByClass(this.getClass()); FileInputFormat.addInputPath(jobConf, new Path("ignored")); // Submit the job, then poll for progress until the job is complete RunningJob runningJob = JobClient.runJob(jobConf); runningJob.waitForCompletion(); return runningJob.isSuccessful() ? 0 : 1; }