List of usage examples for org.apache.hadoop.mapreduce Job setMapperClass
public void setMapperClass(Class<? extends Mapper> cls) throws IllegalStateException
From source file:com.alectenharmsel.research.MoabLogSearch.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: MoabLogSearch <input> <output>"); System.exit(-1);/*from w w w . j a v a2 s . co m*/ } Job job = new Job(getConf(), "MoabLogSearch"); job.setJarByClass(MoabLogSearch.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(MoabLogSearchMapper.class); job.setReducerClass(MoabLogSearchReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); Configuration check = job.getConfiguration(); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.alectenharmsel.research.SrcTok.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: MoabLicenses <input> <output>"); System.exit(-1);/* w w w. j av a 2 s . c o m*/ } Configuration conf = getConf(); Job job = new Job(conf, "SrcTok"); job.setJarByClass(SrcTok.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(SrcTokMapper.class); job.setReducerClass(SrcTokReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.alexholmes.hadooputils.combine.avro.mapreduce.CombineAvroKeyValueInputFormatTest.java
License:Apache License
@Test public void testKeyValueInput() throws ClassNotFoundException, IOException, InterruptedException { // Create a test input file. File inputFile = createInputFile(); // Configure the job input. Job job = new Job(); FileInputFormat.setInputPaths(job, new Path(inputFile.getAbsolutePath())); job.setInputFormatClass(CombineAvroKeyValueInputFormat.class); AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.INT)); AvroJob.setInputValueSchema(job, Schema.create(Schema.Type.STRING)); // Configure a mapper. job.setMapperClass(IndexMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // Configure a reducer. job.setReducerClass(IndexReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(AvroValue.class); AvroJob.setOutputValueSchema(job, Schema.createArray(Schema.create(Schema.Type.INT))); // Configure the output format. job.setOutputFormatClass(AvroKeyValueOutputFormat.class); Path outputPath = new Path(mTempDir.getRoot().getPath(), "out-index"); FileOutputFormat.setOutputPath(job, outputPath); // Run the job. assertTrue(job.waitForCompletion(true)); // Verify that the output Avro container file as the expected data. File avroFile = new File(outputPath.toString(), "part-r-00000.avro"); DatumReader<GenericRecord> datumReader = new SpecificDatumReader<GenericRecord>(AvroKeyValue .getSchema(Schema.create(Schema.Type.STRING), Schema.createArray(Schema.create(Schema.Type.INT)))); DataFileReader<GenericRecord> avroFileReader = new DataFileReader<GenericRecord>(avroFile, datumReader); assertTrue(avroFileReader.hasNext()); AvroKeyValue<CharSequence, List<Integer>> appleRecord = new AvroKeyValue<CharSequence, List<Integer>>( avroFileReader.next());//w w w . j av a 2 s. co m assertNotNull(appleRecord.get()); assertEquals("apple", appleRecord.getKey().toString()); List<Integer> appleDocs = appleRecord.getValue(); assertEquals(3, appleDocs.size()); assertTrue(appleDocs.contains(1)); assertTrue(appleDocs.contains(2)); assertTrue(appleDocs.contains(3)); assertTrue(avroFileReader.hasNext()); AvroKeyValue<CharSequence, List<Integer>> bananaRecord = new AvroKeyValue<CharSequence, List<Integer>>( avroFileReader.next()); assertNotNull(bananaRecord.get()); assertEquals("banana", bananaRecord.getKey().toString()); List<Integer> bananaDocs = bananaRecord.getValue(); assertEquals(2, bananaDocs.size()); assertTrue(bananaDocs.contains(1)); assertTrue(bananaDocs.contains(2)); assertTrue(avroFileReader.hasNext()); AvroKeyValue<CharSequence, List<Integer>> carrotRecord = new AvroKeyValue<CharSequence, List<Integer>>( avroFileReader.next()); assertEquals("carrot", carrotRecord.getKey().toString()); List<Integer> carrotDocs = carrotRecord.getValue(); assertEquals(1, carrotDocs.size()); assertTrue(carrotDocs.contains(1)); assertFalse(avroFileReader.hasNext()); avroFileReader.close(); }
From source file:com.alexholmes.json.mapreduce.ExampleJob.java
License:Apache License
/** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code//from w ww . j av a 2 s . co m * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { String input = args[0]; String output = args[1]; Configuration conf = super.getConf(); writeInput(conf, new Path(input)); Job job = new Job(conf); job.setJarByClass(ExampleJob.class); job.setMapperClass(Map.class); job.setNumReduceTasks(0); Path outputPath = new Path(output); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, outputPath); // use the JSON input format job.setInputFormatClass(MultiLineJsonInputFormat.class); // specify the JSON attribute name which is used to determine which // JSON elements are supplied to the mapper MultiLineJsonInputFormat.setInputJsonMember(job, "colorName"); if (job.waitForCompletion(true)) { return 0; } return 1; }
From source file:com.aliyun.emr.example.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser optionParser = new GenericOptionsParser(conf, args); String[] remainingArgs = optionParser.getRemainingArgs(); if (!(remainingArgs.length == 2 || remainingArgs.length == 4)) { System.err.println("Usage: wordcount <in> <out> [-skip skipPatternFile]"); System.exit(2);/*from w w w . ja va 2 s .c o m*/ } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < remainingArgs.length; ++i) { if ("-skip".equals(remainingArgs[i])) { job.addCacheFile(new Path(EMapReduceOSSUtil.buildOSSCompleteUri(remainingArgs[++i], conf)).toUri()); job.getConfiguration().setBoolean("wordcount.skip.patterns", true); } else { otherArgs.add(remainingArgs[i]); } } FileInputFormat.addInputPath(job, new Path(EMapReduceOSSUtil.buildOSSCompleteUri(otherArgs.get(0), conf))); FileOutputFormat.setOutputPath(job, new Path(EMapReduceOSSUtil.buildOSSCompleteUri(otherArgs.get(1), conf))); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.aliyun.openservices.tablestore.hadoop.RowCounter.java
License:Apache License
public static void main(String[] args) throws Exception { if (!parseArgs(args)) { printUsage();//from w w w. j av a2 s .c om System.exit(1); } if (endpoint == null || accessKeyId == null || accessKeySecret == null || table == null || outputPath == null) { printUsage(); System.exit(1); } Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "row count"); job.setJarByClass(RowCounter.class); job.setMapperClass(RowCounterMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setInputFormatClass(TableStoreInputFormat.class); TableStore.setCredential(job, accessKeyId, accessKeySecret, securityToken); TableStore.setEndpoint(job, endpoint, instance); TableStoreInputFormat.addCriteria(job, fetchCriteria()); FileOutputFormat.setOutputPath(job, new Path(outputPath)); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.aliyun.openservices.tablestore.hadoop.TableStoreOutputFormatExample.java
License:Apache License
public static void main(String[] args) throws Exception { if (!parseArgs(args)) { printUsage();//from www.j a v a 2s . c o m System.exit(1); } if (endpoint == null || accessKeyId == null || accessKeySecret == null || inputTable == null || outputTable == null) { printUsage(); System.exit(1); } Configuration conf = new Configuration(); Job job = Job.getInstance(conf, TableStoreOutputFormatExample.class.getName()); job.setMapperClass(OwnerMapper.class); job.setReducerClass(IntoTableReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(MapWritable.class); job.setInputFormatClass(TableStoreInputFormat.class); job.setOutputFormatClass(TableStoreOutputFormat.class); TableStore.setCredential(job, accessKeyId, accessKeySecret, securityToken); TableStore.setEndpoint(job, endpoint, instance); TableStoreInputFormat.addCriteria(job, fetchCriteria()); TableStoreOutputFormat.setOutputTable(job, outputTable); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.ambiata.ivory.operation.hadoop.MultipleInputs.java
License:Apache License
/** * Add a {@link Path} with a custom {@link InputFormat} and * {@link Mapper} to the list of inputs for the map-reduce job. * * @param job The {@link Job}/* w w w. j a v a2s .c o m*/ * @param path {@link Path} to be added to the list of inputs for the job * @param inputFormatClass {@link InputFormat} class to use for this path * @param mapperClass {@link Mapper} class to use for this path */ @SuppressWarnings("unchecked") public static void addInputPath(Job job, Path path, Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass) { addInputPath(job, path, inputFormatClass); Configuration conf = job.getConfiguration(); /* WAS not encoded */ String mapperMapping = encode(path.toString() + ";" + mapperClass.getName()); String mappers = conf.get(DIR_MAPPERS); conf.set(DIR_MAPPERS, mappers == null ? mapperMapping : mappers + "," + mapperMapping); job.setMapperClass(DelegatingMapper.class); }
From source file:com.antbrains.crf.hadoop.CalcFeatureWeights.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3 && otherArgs.length != 4) { System.err.println("CalcFeatureWeights <inDir> <tmpDir> <outDir> [startStep]"); System.exit(-1);/* w w w . j av a 2 s .co m*/ } int startStep = 1; if (otherArgs.length == 4) { startStep = Integer.valueOf(otherArgs[otherArgs.length - 1]); } FileSystem fs = FileSystem.get(conf); if (startStep <= 1) { System.out.println("calc"); fs.delete(new Path(otherArgs[1]), true); Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName()); job.setNumReduceTasks(1); job.setJarByClass(CalcFeatureWeights.class); job.setMapperClass(CalcFeatureMapper.class); job.setReducerClass(CalcFeatureReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(MyKey.class); job.setOutputKeyClass(MyKey.class); job.setOutputValueClass(MyValue.class); FileInputFormat.setInputPaths(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); boolean res = job.waitForCompletion(true); if (!res) { System.err.println("step1 failed"); return; } } if (startStep <= 2) // sort { fs.delete(new Path(otherArgs[2]), true); System.out.println("sort"); Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName()); job.setNumReduceTasks(1); job.setJarByClass(CalcFeatureWeights.class); job.setMapperClass(IdentityMapper.class); job.setReducerClass(IdentityReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(MyKey.class); job.setMapOutputValueClass(MyValue.class); job.setOutputKeyClass(MyKey.class); job.setOutputValueClass(MyValue.class); FileInputFormat.setInputPaths(job, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); boolean res = job.waitForCompletion(true); if (!res) { System.err.println("step2 failed"); return; } } }
From source file:com.antbrains.crf.hadoop.FeatureCounter.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: wordcount <in> <out> <templatefile>"); System.exit(2);// w ww . j av a 2 s .c o m } String[] templates = SgdCrf.readTemplates(otherArgs[2]).toArray(new String[0]); conf.set("templates", strArr2Str(templates)); Job job = new Job(conf, FeatureCounter.class.getSimpleName()); job.setJarByClass(FeatureCounter.class); job.setMapperClass(CounterMapper.class); job.setCombinerClass(SumReducer.class); job.setReducerClass(SumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }