List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf(Configuration conf, Class exampleClass)
From source file:averageprocessingtimesbytype.AverageProcessingTimesByType.java
public int run(String[] args) throws Exception { Configuration conf = getConf(); JobConf job = new JobConf(conf, AverageProcessingTimesByType.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("AverageProcessingTimesByType"); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); job.setInputFormat(KeyValueTextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // job.set("key.value.separator.in.input.line", ""); JobClient.runJob(job);//from ww w . jav a 2 s .co m return 0; }
From source file:babel.prep.PrepStep.java
License:Apache License
public PrepStep(Configuration conf) throws IOException { super(new JobConf(conf, PrepStep.class)); m_fs = FileSystem.get(getConf()); }
From source file:boa.datagen.SeqSort.java
License:Apache License
/** * The main driver for sort program./*from w ww .j ava 2s .c om*/ * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. */ @Override public int run(String[] args) throws Exception { System.out.println(inPath); JobConf jobConf = new JobConf(getConf(), SeqSort.class); jobConf.setJobName("sorter"); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = jobConf.get("test.sort.reduces_per_host"); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } // Set user-supplied (possibly default) job configs jobConf.setNumReduceTasks(num_reduces); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(BytesWritable.class); SequenceFileOutputFormat.setCompressOutput(jobConf, true); SequenceFileOutputFormat.setOutputCompressorClass(jobConf, SnappyCodec.class); SequenceFileOutputFormat.setOutputCompressionType(jobConf, CompressionType.BLOCK); // Make sure there are exactly 2 parameters left. FileInputFormat.setInputPaths(jobConf, inPath); FileOutputFormat.setOutputPath(jobConf, new Path(outPath)); System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf) + " with " + num_reduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); jobResult = JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
From source file:Business.DataJoin.java
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); JobConf job = new JobConf(conf, DataJoin.class); final File f = new File(MapReduceOne.class.getProtectionDomain().getCodeSource().getLocation().getPath()); String inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/inFiles/"; String outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outFiles/OutputOne"; //use the arguments instead if provided. if (args.length > 1) { inFiles = args[1];//ww w . ja va 2 s .com outFiles = args[2]; } Path in = new Path(inFiles); Path out = new Path(outFiles); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("Data Join"); job.setMapperClass(MapClass.class); job.setReducerClass(ReduceClass.class); job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(TaggedWritable.class); job.set("mapred.textoutputformat.separator", ","); JobClient.runJob(job); return 0; }
From source file:ca.etsmtl.lasi.hbasewikipedialoader.HBaseWikipediaLoader.java
License:Apache License
/** * Sets up the actual job.//from w w w. ja v a2 s.c om * * @param conf * The current configuration. * @param args * The command line parameters. * @return The newly created job. * @throws IOException * When setting up the job fails. */ public static JobConf createSubmittableJob(HBaseConfiguration conf, String[] args) throws IOException { JobConf jobConf = new JobConf(conf, HBaseWikipediaLoader.class); jobConf.setJobName(NAME); // Stream stuff jobConf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader"); jobConf.set("stream.recordreader.begin", "<page>"); jobConf.set("stream.recordreader.end", "</page>"); jobConf.setSpeculativeExecution(false); jobConf.setMapOutputKeyClass(ImmutableBytesWritable.class); jobConf.setMapOutputValueClass(BatchUpdate.class); jobConf.setMapperClass(Map.class); jobConf.setNumReduceTasks(0); jobConf.setInputFormat(StreamInputFormat.class); jobConf.setOutputFormat(TableOutputFormat.class); jobConf.set(TableOutputFormat.OUTPUT_TABLE, TABLE); jobConf.setOutputKeyClass(ImmutableBytesWritable.class); jobConf.setOutputValueClass(BatchUpdate.class); StreamInputFormat.setInputPaths(jobConf, new Path(args[0])); FileOutputFormat.setOutputPath(jobConf, new Path("/tmp/" + NAME + "-" + System.currentTimeMillis())); return jobConf; }
From source file:ca.etsmtl.logti.log792.mti830.RowCounter.java
License:Apache License
/** * @param args/*from ww w . j a va 2 s . c o m*/ * @return the JobConf * @throws IOException */ @SuppressWarnings({ "unused", "deprecation" }) public JobConf createSubmittableJob(String[] args) throws IOException { JobConf c = new JobConf(getConf(), RowCounter.class); c.setJobName(NAME); // Columns are space delimited StringBuilder sb = new StringBuilder(); final int columnoffset = 2; for (int i = columnoffset; i < args.length; i++) { if (i > columnoffset) { sb.append(" "); } sb.append(args[i]); } // Second argument is the table name. TableMapReduceUtil.initTableMapJob(args[1], sb.toString(), this.getClass(), ImmutableBytesWritable.class, RowResult.class, c); c.setReducerClass(IdentityReducer.class); // First arg is the output directory. FileOutputFormat.setOutputPath(c, new Path(args[0])); return c; }
From source file:cl.borrego.store.hadoop.SimpleClient.java
License:Open Source License
public void main() { // Create a new JobConf JobConf job = new JobConf(new Configuration(), SimpleClient.class); // Specify various job-specific parameters job.setJobName("myjob"); // Submit the job, then poll for progress until the job is complete try {// ww w.j a va2 s. c om JobClient.runJob(job); } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } }
From source file:clusteringblocks.ClusteringBlocks.java
public int run(String[] args) throws Exception { Configuration conf = getConf(); JobConf job = new JobConf(conf, ClusteringBlocks.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("ClusteringBlocks"); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); job.setInputFormat(KeyValueTextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // job.set("key.value.separator.in.input.line", ""); JobClient.runJob(job);//w ww .j a v a 2 s . c om return 0; }
From source file:cn.edu.xmu.dm.mapreduce.MultiFileWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { printUsage();//from w w w.ja v a2 s. c om return 1; } JobConf job = new JobConf(getConf(), MultiFileWordCount.class); job.setJobName("MultiFileWordCount"); // set the InputFormat of the job to our InputFormat job.setInputFormat(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(LongWritable.class); // use the defined mapper job.setMapperClass(MapClass.class); // use the WordCount Reducer job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); JobClient.runJob(job); return 0; }
From source file:cn.edu.xmu.dm.mapreduce.Sort.java
License:Apache License
/** * The main driver for sort program. Invoke this method to submit the * map/reduce job.// www. ja v a 2 s . c om * * @throws IOException * When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "Sorter"); job.setJarByClass(Sort.class); JobConf jobConf = new JobConf(getConf(), Sort.class); jobConf.setJobName("sorter"); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = jobConf.get("test.sort.reduces_per_host"); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = BytesWritable.class; List<String> otherArgs = new ArrayList<String>(); InputSampler.Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { jobConf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-totalOrder".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs jobConf.setNumReduceTasks(num_reduces); jobConf.setInputFormat(inputFormatClass); jobConf.setOutputFormat(outputFormatClass); jobConf.setOutputKeyClass(outputKeyClass); jobConf.setOutputValueClass(outputValueClass); // Make sure there are exactly 2 parameters left. if (otherArgs.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(jobConf, otherArgs.get(0)); FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.get(1))); if (sampler != null) { System.out.println("Sampling input to effect total-order sort..."); jobConf.setPartitionerClass(TotalOrderPartitioner.class); Path inputDir = FileInputFormat.getInputPaths(jobConf)[0]; inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf)); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile); InputSampler.<K, V>writePartitionFile(jobConf, sampler); URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); DistributedCache.addCacheFile(partitionUri, jobConf); DistributedCache.createSymlink(jobConf); } System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf) + " with " + num_reduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); jobResult = JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }