List of usage examples for org.apache.hadoop.mapreduce Job setInputFormatClass
public void setInputFormatClass(Class<? extends InputFormat> cls) throws IllegalStateException
From source file:com.gsinnovations.howdah.Driver.java
License:Apache License
public static void job(Path input, Path output, int numReduceTasks) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); Job job = new Job(conf); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(TikaMapper.class); //job.setCombinerClass(KMeansCombiner.class); //job.setReducerClass(KMeansReducer.class); job.setNumReduceTasks(numReduceTasks); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); job.setJarByClass(Driver.class); HadoopUtil.overwriteOutput(output);//from ww w. ja v a2s . c o m job.waitForCompletion(true); }
From source file:com.gsvic.csmr.CSMRBase.java
License:Apache License
public static void generatePairs(String in, String out) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); path = out;/*ww w . j a va 2 s.c o m*/ Job job; Path input, output; input = new Path(in); output = new Path(path + "/CSMRPairs"); job = new Job(conf); job.setJobName("CSMR Pairs Job"); job.setJarByClass(CSMRBase.class); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); job.setMapperClass(CSMRMapper.class); job.setReducerClass(CSMRReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DocumentWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(VectorArrayWritable.class); job.waitForCompletion(true); }
From source file:com.gsvic.csmr.CSMRBase.java
License:Apache License
public static void StartCSMR() throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job; job = new Job(conf); job.setJobName("CSMR Cosine Similarity Job"); job.setJarByClass(CSMRBase.class); FileInputFormat.addInputPath(job, new Path(path + "/CSMRPairs/part-r-00000")); FileOutputFormat.setOutputPath(job, new Path(path + "/Results")); job.setMapperClass(Mapper.class); job.setReducerClass(CosineSimilarityReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(VectorArrayWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); System.exit(job.waitForCompletion(true) ? 1 : 0); }
From source file:com.hhscyber.nl.tweets.hbase2.Hbase2.java
@Override public int run(String[] args) throws Exception { Job client = new Job(getConf(), "hbasetest"); client.setSpeculativeExecution(false); client.setMaxMapAttempts(2);//from ww w .j a v a2 s . c o m client.setJarByClass(Hbase2.class); client.setOutputKeyClass(Text.class); client.setOutputValueClass(Text.class); client.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(client, new Path("input/1441737001"));//test one folder TextOutputFormat.setOutputPath(client, new Path("output4")); client.setMapperClass(Hbase2Mapper.class); client.setReducerClass(Hbase2Reducer.class); try { client.waitForCompletion(true); } catch (IOException | InterruptedException | ClassNotFoundException e) { System.out.println(e); } return 0; }
From source file:com.hhscyber.nl.tweets.hbasefill.HbaseFill.java
/** * @param args the command line arguments * @throws java.io.IOException//w w w. j a va2 s . c om */ public static void main(String[] args) throws IOException { conHbase = HBaseConfiguration.create(); Job client = new Job(conHbase); // new configuration client.setJarByClass(HbaseFill.class); client.setOutputKeyClass(Text.class); client.setOutputValueClass(IntWritable.class); client.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(client, new Path("input/" + test));//test one folder TextOutputFormat.setOutputPath(client, new Path("output3")); client.setMapperClass(HbaseFillMapper.class); try { client.submit(); } catch (IOException | InterruptedException | ClassNotFoundException e) { e.printStackTrace(); } }
From source file:com.hhscyber.nl.tweets.processtweets.ProcessTweets.java
/** * @param args the command line arguments *///from w w w.ja v a 2 s. c om public static void main(String[] args) throws IOException { Job client = new Job(new Configuration()); client.setJarByClass(ProcessTweets.class); client.setOutputKeyClass(Text.class); client.setOutputValueClass(IntWritable.class); client.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(client, new Path("input_concat"));// TextOutputFormat.setOutputPath(client, new Path("output2")); client.setMapperClass(ProcessTweetsMapper.class); client.setReducerClass(ProcessTweetsReducer.class); client.setCombinerClass(ProcessTweetsReducer.class); try { client.submit(); } catch (Exception e) { e.printStackTrace(); } }
From source file:com.hhscyber.nl.tweets.svm.train.Train.java
/** * @param args the command line arguments * @throws java.io.IOException//w ww . ja v a 2s . c om */ public static void main(String[] args) throws IOException { Conf conf = new Conf(args, ""); FileSystem hdfs = FileSystem.get(conf); hdfs.delete(new Path("trainer"), true); Job client = new HBJob(conf, "SVMTrainer"); client.setJarByClass(Train.class); client.setMapOutputKeyClass(Text.class); client.setMapOutputValueClass(Text.class); client.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(client, new Path("svmclass")); client.setNumReduceTasks(1); client.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(client, new Path("trainer")); client.setMapperClass(TrainMapper.class); client.setReducerClass(TrainReducer.class); try { client.waitForCompletion(true); } catch (IOException | InterruptedException | ClassNotFoundException e) { } }
From source file:com.hn.cluster.hadoop.mrs.SecondarySort.java
License:Apache License
public static void main(String[] args) throws Exception { // ?hadoop?/* www. ja v a 2s . c om*/ Configuration conf = new Configuration(); // ? Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); // Mapper job.setMapperClass(MapClass.class); // Reducer job.setReducerClass(Reduce.class); // job.setPartitionerClass(FirstPartitioner.class); // job.setGroupingComparatorClass(FirstGroupingComparator.class); // map Key job.setMapOutputKeyClass(IntPair.class); // mapValue job.setMapOutputValueClass(IntWritable.class); // rduceKeyTextOutputFormatClassTextOutputFormat job.setOutputKeyClass(Text.class); // rduceValue job.setOutputValueClass(IntWritable.class); /** * ?????splites???RecordReder * ??RecordReder?keyvalue * Map<LongWritable, Text> * Mapmap<LongWritable, Text>Mapmap * ?List<IntPair, IntWritable> * map?job.setPartitionerClassList?reducer */ job.setInputFormatClass(TextInputFormat.class); // ??RecordWriter? job.setOutputFormatClass(TextOutputFormat.class); // hdfs FileInputFormat.addInputPath(job, new Path("hdfs://192.1168.1.12:9000/input/input/soso.txt")); // hdfs FileOutputFormat.setOutputPath(job, new Path("hdfs://192.1168.1.12:9000/output/sort/")); // ??job System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.hortonworks.mapreduce.URLCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", " "); Job job = Job.getInstance(conf, "URLCount"); job.setJarByClass(getClass());/* ww w . ja va2s.c om*/ job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(URLCountM.class); job.setReducerClass(URLCountR.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return (job.waitForCompletion(true) == true ? 0 : -1); }
From source file:com.hortonworks.pso.data.generator.mapreduce.DataGenTool.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); // new Job(conf, this.getClass().getCanonicalName()); // Configuration conf = getConf(); int mappers = 2; String output = null;// w w w. j a va2 s .c om String config = null; long count = 100; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-mappers".equals(args[i])) { mappers = Integer.parseInt(args[++i]); otherArgs.add("-Dmapreduce.job.maps=" + Integer.toString(mappers)); } else if ("-output".equals(args[i])) { output = args[++i]; } else if ("-json.cfg".equals(args[i])) { config = args[++i]; } else if ("-count".equals(args[i])) { count = Long.parseLong(args[++i]); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } job.getConfiguration().set("json.cfg", config); String[] altArgs = new String[otherArgs.size()]; otherArgs.toArray(altArgs); GenericOptionsParser gop = new GenericOptionsParser(job.getConfiguration(), altArgs); DataGenInputFormat.setNumberOfRows(job, count); job.setJarByClass(DataGenTool.class); Path output_path = new Path(output); if (output_path.getFileSystem(getConf()).exists(output_path)) { throw new IOException("Output directory " + output_path + " already exists."); } FileOutputFormat.setOutputPath(job, output_path); job.setMapperClass(DataGenMapper.class); // Map Only Job job.setNumReduceTasks(0); // job.setReducerClass(RerateReducer.class); job.setInputFormatClass(DataGenInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }