List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputKeyClass
public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException
From source file:com.test.hadoop.unoExample.CardDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { String input, output;/*www . j a v a2 s . co m*/ if (args.length == 2) { input = args[0]; output = args[1]; } else { System.err.println("Incorrect number of arguments. Expected: input output"); return -1; } Job job = new Job(getConf()); job.setJarByClass(CardDriver.class); job.setJobName(this.getClass().getName()); FileInputFormat.setInputPaths(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); job.setMapperClass(CardMapper.class); job.setReducerClass(CardTotalReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.tetsuyaodaka.hadoop.math.matrix.MatrixMult.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Date startProc = new Date(System.currentTimeMillis()); System.out.println("process started at " + startProc); Configuration conf = new Configuration(); int I = Integer.parseInt(args[3]); // Num of Row of MatrixA int K = Integer.parseInt(args[4]); // Num of Row of MatrixB' int IB = Integer.parseInt(args[5]); // RowBlock Size of MatrixA int KB = Integer.parseInt(args[6]); // RowBlock Size of MatrixB' int M = 0;// w w w.ja v a 2 s.c o m if (I % IB == 0) { M = I / IB; } else { M = I / IB + 1; } int N = 0; if (K % KB == 0) { N = K / KB; } else { N = K / KB + 1; } conf.set("I", args[3]); // Num of Row of MatrixA conf.set("K", args[4]); // Num of Row of MatrixB' conf.set("IB", args[5]); // RowBlock Size of MatrixA conf.set("KB", args[6]); // RowBlock Size of MatrixB' conf.set("M", new Integer(M).toString()); conf.set("N", new Integer(N).toString()); Job job = new Job(conf, "MatrixMultiplication"); job.setJarByClass(MatrixMult.class); job.setReducerClass(Reduce.class); job.setMapOutputKeyClass(MatrixMult.IndexPair.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Mapper????? MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapA.class); // matrixA MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapB.class); // matrixB FileOutputFormat.setOutputPath(job, new Path(args[2])); // output path System.out.println("num of MatrixA RowBlock(M) is " + M); System.out.println("num of MatrixB RowBlock(N) is " + N); boolean success = job.waitForCompletion(true); Date endProc = new Date(System.currentTimeMillis()); System.out.println("process ended at " + endProc); System.out.println(success); }
From source file:com.tetsuyaodaka.hadoop.math.matrix.MatrixMultiplication.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Date startProc = new Date(System.currentTimeMillis()); System.out.println("process started at " + startProc); Configuration conf = new Configuration(); int I = Integer.parseInt(args[3]); // Num of Row of MatrixA int K = Integer.parseInt(args[4]); // Num of Row of MatrixB' int IB = Integer.parseInt(args[5]); // RowBlock Size of MatrixA int KB = Integer.parseInt(args[6]); // RowBlock Size of MatrixB' int M = 0;//from w w w . j a va 2 s .c o m if (I % IB == 0) { M = I / IB; } else { M = I / IB + 1; } int N = 0; if (K % KB == 0) { N = K / KB; } else { N = K / KB + 1; } conf.set("I", args[3]); // Num of Row of MatrixA conf.set("K", args[4]); // Num of Row of MatrixB' conf.set("IB", args[5]); // RowBlock Size of MatrixA conf.set("KB", args[6]); // RowBlock Size of MatrixB' conf.set("M", new Integer(M).toString()); conf.set("N", new Integer(N).toString()); Job job = new Job(conf, "MatrixMultiplication"); job.setJarByClass(MatrixMultiplication.class); job.setReducerClass(Reduce.class); job.setMapOutputKeyClass(MatrixMultiplication.IndexPair.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Mapper????? MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapA.class); // matrixA MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapB.class); // matrixB FileOutputFormat.setOutputPath(job, new Path(args[2])); // output path System.out.println("num of MatrixA RowBlock(M) is " + M); System.out.println("num of MatrixB ColBlock(N) is " + N); boolean success = job.waitForCompletion(true); Date endProc = new Date(System.currentTimeMillis()); System.out.println("process ended at " + endProc); System.out.println(success); }
From source file:com.tetsuyaodaka.hadoop.math.matrix.TransformMatrix.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Date startProc = new Date(System.currentTimeMillis()); System.out.println("process started at " + startProc); Configuration conf = new Configuration(); if (args[2].equals("yes")) { conf.set("transpose", "true"); // transpose } else {// w w w. j av a 2 s.com conf.set("transpose", "false"); // } Job job = new Job(conf, "MatrixMultiplication"); job.setJarByClass(TransformMatrix.class); job.setReducerClass(Reduce.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); // Mapper????? MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, Map.class); // matrixA FileOutputFormat.setOutputPath(job, new Path(args[1])); // output path boolean success = job.waitForCompletion(true); Date endProc = new Date(System.currentTimeMillis()); System.out.println("process ended at " + endProc); System.out.println(success); }
From source file:com.toddbodnar.simpleHadoop.distributedHadoopDriver.java
/** * Runs a job/*from w w w.j a v a 2 s. co m*/ * * @param theJob the MapReduceJob to be run * @param verbose if true, output progress information */ public static void run(MapReduceJob theJob, boolean verbose) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = GetConfiguration.get(); Job job = Job.getInstance(conf, theJob.toString()); job.setJarByClass(distributedHadoopDriver.class); job.setMapperClass(theJob.getMapper().getClass()); job.setReducerClass(theJob.getReducer().getClass()); job.setMapOutputKeyClass(theJob.getKeyType()); job.setMapOutputValueClass(theJob.getValueType()); theJob.writeConfig(job.getConfiguration()); hdfsFile input = hdfsFile.transferToHDFS(theJob.getInput().getFile()); if (!input.equals(theJob.getInput().getFile())) { garbage_collector.noteCreated(input); } if (theJob.getClass().equals(join.class)) { join jobLeftJoin = (join) theJob; hdfsFile input2 = hdfsFile.transferToHDFS(jobLeftJoin.getOtherInput().getFile()); if (!input2.equals(jobLeftJoin.getOtherInput().getFile())) { garbage_collector.noteCreated(input2); } Mapper maps[] = jobLeftJoin.getMapperPairs(); MultipleInputs.addInputPath(job, input.getPath(), TextInputFormat.class, maps[0].getClass()); MultipleInputs.addInputPath(job, input2.getPath(), TextInputFormat.class, maps[1].getClass()); } else { MultipleInputs.addInputPath(job, input.getPath(), TextInputFormat.class); } job.getConfiguration().set(TextOutputFormat.SEPERATOR, ""); job.setOutputFormatClass(TextOutputFormat.class); //FileInputFormat.setInputPaths(job, new Path(theJob.getInput().getFile().getLocation())); Path out = new Path(settings.hdfs_prefix + "/TMP_TABLE_" + theJob.hashCode()); FileOutputFormat.setOutputPath(job, out); boolean success = job.waitForCompletion(true); if (!success) { System.err.println("Error processing " + theJob); return; } FileSystem fs = FileSystem.get(GetConfiguration.get()); fs.delete(new Path(out, "_SUCCESS"), false); table output = new table(new hdfsFile(out), theJob.getOutput().getColNames()); output.setSeperator(theJob.getOutput().getSeperator()); theJob.setOutput(output); garbage_collector.noteCreated(output.getFile()); }
From source file:com.tomslabs.grid.avro.AvroWordCount.java
License:Apache License
public static Job createSubmitableJob(final Configuration conf, final Path inputPath, final Path outputPath) throws IOException { conf.set(AvroFileOutputFormat.OUTPUT_SCHEMA, WordCountSchema.getSchema().toString()); conf.setInt("mapred.max.split.size", 1024000); conf.setInt("mapred.reduce.tasks", 10); conf.setBoolean("mapred.reduce.tasks.speculative.execution", true); final Job job = new Job(conf, "Word Count"); job.setJarByClass(AvroWordCount.class); job.setInputFormatClass(AvroFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setOutputKeyClass(GenericRecord.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormatClass(AvroFileOutputFormat.class); AvroFileOutputFormat.setDeflateLevel(job, 3); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); return job;// w w w. j a va 2 s.com }
From source file:com.toshiba.mwcloud.gs.hadoop.mapreduce.examples.GSWordCount.java
License:Apache License
/** * <div lang="ja">/*from w w w .java 2s .com*/ * WordCount?MapReduce??? * @param args * @return ???0????????1 * @throws Exception ?????? * </div><div lang="en"> * Run a MapReduce job of WordCount. * @param args command argument * @return 0 for normal termination of the job and 1 otherwise * @throws Exception processing failed. * </div> */ public int run(String[] args) throws Exception { GSConf gsConf = new GSConf(); gsConf.parseArg(args); Configuration conf = getConf(); gsConf.setup(conf); Job job = Job.getInstance(conf, APP_NAME); job.setJarByClass(GSWordCount.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(GSRowWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(GSRowInputFormat.class); job.setOutputFormatClass(GSRowOutputFormat.class); int res = job.waitForCompletion(true) ? 0 : 1; if (res == 0) { printResult(gsConf); } return res; }
From source file:com.trexinhca.TrexinHCATest.java
License:Apache License
public static void main(String[] args) throws Exception { ks = KieServices.Factory.get();// w w w. ja va2s.c o m kContainer = ks.getKieClasspathContainer(); ksession = TrexinHCATest.kContainer.newKieSession("MapReduceKS"); Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: TrexinHCATest <in> [<in>...] <out>"); System.exit(2); } Job job = Job.getInstance(conf); job.setJobName("HCATest"); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(TrexinHCAReducer.class); job.setReducerClass(TrexinHCAReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(TextOutputFormat.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.setJarByClass(TrexinHCATest.class); job.waitForCompletion(true); }
From source file:com.twitter.algebra.matrix.multiply.ABOuterHDFSBroadcastOfA.java
License:Apache License
/** * Perform A x B, where A and B refer to the paths that contain matrices in * {@link SequenceFileInputFormat} Refer to {@link ABOuterHDFSBroadcastOfA} * for further details.//from w w w . j a va 2 s. c om * * @param conf * the initial configuration * @param matrixInputPath * path to matrix A * @param inMemMatrixDir * path to matrix B (must be small enough to fit into memory) * @param matrixOutputPath * path to which AxB will be written * @param inMemMatrixNumRows * B rows * @param inMemMatrixNumCols * B cols * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void run(Configuration conf, String inMemMatrixDir, Path matrixInputPath, Path matrixOutputPath, int inMemMatrixNumRows, int inMemMatrixNumCols) throws IOException, InterruptedException, ClassNotFoundException { conf.set(MATRIXINMEMORY, inMemMatrixDir); conf.setInt(MATRIXINMEMORYROWS, inMemMatrixNumRows); conf.setInt(MATRIXINMEMORYCOLS, inMemMatrixNumCols); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(ABOuterHDFSBroadcastOfA.class); job.setJobName(ABOuterHDFSBroadcastOfA.class.getSimpleName()); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); // ensures total order (when used with {@link MatrixOutputFormat}), RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, inMemMatrixNumRows); job.setCombinerClass(AtBOuterStaticMapsideJoinJob.MyReducer.class); job.setReducerClass(AtBOuterStaticMapsideJoinJob.MyReducer.class); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.matrix.multiply.AtBOuterStaticMapsideJoinJob.java
License:Apache License
public void run(Configuration conf, Path atPath, Path bPath, Path outPath, int outCardinality) throws IOException, InterruptedException, ClassNotFoundException { conf.setInt(OUT_CARD, outCardinality); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJobName(AtBOuterStaticMapsideJoinJob.class.getSimpleName()); job.setJarByClass(AtBOuterStaticMapsideJoinJob.class); FileSystem fs = FileSystem.get(atPath.toUri(), conf); atPath = fs.makeQualified(atPath);//from w w w .ja v a 2 s .c o m bPath = fs.makeQualified(bPath); job.setInputFormatClass(CompositeInputFormat.class); //mapside join expression job.getConfiguration().set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose("inner", SequenceFileInputFormat.class, atPath, bPath)); job.setOutputFormatClass(MatrixOutputFormat.class); outPath = fs.makeQualified(outPath); FileOutputFormat.setOutputPath(job, outPath); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); job.setCombinerClass(MyReducer.class); int numReducers = conf.getInt("algebra.reduceslots.multiply", 10); job.setNumReduceTasks(numReducers); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed"); }