List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputKeyClass
public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException
From source file:com.talis.labs.pagerank.mapreduce.CheckingData.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: CheckingData <input path> <output path>"); return -1; }//from w ww . j ava 2 s . c om FileSystem.get(getConf()).delete(new Path(args[1]), true); Job job = new Job(getConf(), "CheckingData"); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(CheckingDataMapper.class); job.setReducerClass(CheckingDataReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.talis.labs.pagerank.mapreduce.CountPages.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: CountPages <input path> <output path>"); return -1; }/*from ww w . j a v a 2 s . c om*/ FileSystem.get(getConf()).delete(new Path(args[1]), true); Job job = new Job(getConf(), "CountPages"); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(CountPagesMapper.class); job.setCombinerClass(CountPagesReducer.class); job.setReducerClass(CountPagesReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.talis.labs.pagerank.mapreduce.DanglingPages.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: DanglingPages <input path> <output path>"); return -1; }//w w w . j a v a2 s.c om FileSystem.get(getConf()).delete(new Path(args[1]), true); Job job = new Job(getConf(), "DanglingPages"); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(DanglingPagesMapper.class); job.setCombinerClass(DanglingPagesReducer.class); job.setReducerClass(DanglingPagesReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(DoubleWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); job.setNumReduceTasks(1); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.talis.labs.pagerank.mapreduce.InitializePageRanks.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 3) { System.err.println("Usage: InitializePageRanks <input path> <output path> <number of pages>"); return -1; }/*from www . ja v a 2 s .co m*/ Configuration conf = getConf(); conf.set("pagerank.count", args[2]); FileSystem.get(conf).delete(new Path(args[1]), true); Job job = new Job(conf, "InitializePageRanks"); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(InitializePageRanksMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.talis.labs.pagerank.mapreduce.SortPageRanks.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: SortPageRanks <input path> <output path>"); return -1; }/* ww w .j av a2 s. co m*/ FileSystem.get(getConf()).delete(new Path(args[1]), true); Job job = new Job(getConf(), "SortPageRanks"); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(SortPageRanksMapper.class); job.setReducerClass(Reducer.class); // i.e. identity reducer job.setSortComparatorClass(DoubleWritableDecreasingComparator.class); job.setMapOutputKeyClass(DoubleWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); // TODO: inefficient, use InputSampler with v0.20.x return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.talis.labs.pagerank.mapreduce.UpdatePageRanks.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 4) { System.err.println(/*from ww w .jav a 2s .com*/ "Usage: UpdatePageRanks <input path> <output path> <number of pages> <dangling pages contribution>"); return -1; } Configuration conf = getConf(); conf.set("pagerank.count", args[2]); conf.set("pagerank.dangling", args[3]); FileSystem.get(conf).delete(new Path(args[1]), true); Job job = new Job(conf, "UpdatePageRanks"); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(UpdatePageRanksMapper.class); job.setReducerClass(UpdatePageRanksReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.tdunning.plume.local.lazy.MapRedExecutor.java
License:Apache License
/** * This method returns a Job instance out of a {@link MSCR} entity. It puts the Class of * the {@link PlumeWorkflow} argument and the MSCR id in the hadoop configuration. * /*from www .ja v a 2 s.co m*/ * @param mscr The MSCR to convert * @param workflow The workflow whose class will be instantiated by hadoop mappers/reducers * @param outputPath The output path of the MapRed job * @return A hadoop-executable MapRed Job * * @throws IOException */ static Job getMapRed(final MSCR mscr, PlumeWorkflow workFlow, String workFlowOutputPath, String outputPath) throws IOException { Configuration conf = new Configuration(); conf.set(WORKFLOW_NAME, workFlow.getClass().getName()); conf.setInt(MSCR_ID, mscr.getId()); conf.set(TEMP_OUTPUT_PATH, workFlowOutputPath); Job job = new Job(conf, "MSCR"); // TODO deprecation job.setMapOutputKeyClass(PlumeObject.class); job.setMapOutputValueClass(PlumeObject.class); job.setJarByClass(MapRedExecutor.class); /** * Define multiple inputs */ for (PCollection<?> input : mscr.getInputs()) { if (!(input instanceof LazyCollection)) { throw new IllegalArgumentException("Can't create MapRed from MSCR whose inputs are not LazyTable"); } LazyCollection<Text> l = (LazyCollection<Text>) input; if (!(l.isMaterialized() && l.getFile() != null)) { // Collections have plume ID only if they are intermediate results - TODO better naming for this if (l.getPlumeId().length() < 1) { throw new IllegalArgumentException( "Can't create MapRed from MSCR inputs that are not materialized to a file"); } } PCollectionType<?> rType = l.getType(); Class<? extends InputFormat> format = SequenceFileInputFormat.class; if (rType instanceof PTableType) { PTableType<?, ?> tType = (PTableType<?, ?>) rType; if (tType.valueType() instanceof StringType && tType.keyType() instanceof StringType) { format = KeyValueTextInputFormat.class; } MultipleInputs.addInputPath(job, new Path(l.getFile()), format, MSCRMapper.class); } else { if (rType.elementType() instanceof StringType) { format = TextInputFormat.class; } MultipleInputs.addInputPath(job, new Path(l.getFile()), format, MSCRMapper.class); } } /** * Define multiple outputs */ FileOutputFormat.setOutputPath(job, new Path(outputPath)); for (Map.Entry<PCollection<?>, Integer> entry : mscr.getNumberedChannels().entrySet()) { PCollectionType<?> rType = ((LazyCollection<?>) mscr.getOutputChannels().get(entry.getKey()).output) .getType(); if (rType instanceof PTableType) { PTableType<?, ?> tType = (PTableType<?, ?>) rType; Class<? extends OutputFormat> outputFormat = SequenceFileOutputFormat.class; if (tType.keyType() instanceof StringType && tType.valueType() instanceof StringType) { outputFormat = TextOutputFormat.class; } MultipleOutputs.addNamedOutput(job, entry.getValue() + "", outputFormat, getHadoopType(tType.keyType()), getHadoopType(tType.valueType())); } else { Class<? extends OutputFormat> outputFormat = SequenceFileOutputFormat.class; if (rType.elementType() instanceof StringType) { outputFormat = TextOutputFormat.class; } MultipleOutputs.addNamedOutput(job, entry.getValue() + "", outputFormat, NullWritable.class, getHadoopType(rType.elementType())); } } /** * Define Reducer & Combiner */ job.setCombinerClass(MSCRCombiner.class); job.setReducerClass(MSCRReducer.class); job.setNumReduceTasks(1); return job; }
From source file:com.telefonica.iot.tidoop.mrlib.jobs.Filter.java
License:Open Source License
@Override public int run(String[] args) throws Exception { // check the number of arguments, show the usage if it is wrong if (args.length != 3) { showUsage();//www . j a va2s . co m return -1; } // if // get the arguments String input = args[0]; String output = args[1]; String regex = args[2]; // create and configure a MapReduce job Configuration conf = this.getConf(); conf.set(Constants.PARAM_REGEX, regex); Job job = Job.getInstance(conf, "tidoop-mr-lib-filter"); job.setNumReduceTasks(1); job.setJarByClass(Filter.class); job.setMapperClass(LineFilter.class); job.setCombinerClass(LinesCombiner.class); job.setReducerClass(LinesJoiner.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); // run the MapReduce job return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.telefonica.iot.tidoop.mrlib.jobs.MapOnly.java
License:Open Source License
@Override public int run(String[] args) throws Exception { // check the number of arguments, show the usage if it is wrong if (args.length != 3) { showUsage();//from w ww . j ava 2 s .c om return -1; } // if // get the arguments String input = args[0]; String output = args[1]; String mapFunction = args[2]; // create and configure a MapReduce job Configuration conf = this.getConf(); conf.set(Constants.PARAM_FUNCTION, mapFunction); Job job = Job.getInstance(conf, "tidoop-mr-lib-maponly"); job.setNumReduceTasks(0); job.setJarByClass(MapOnly.class); job.setMapperClass(CustomMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); // run the MapReduce job return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.teradata.compaction.mapreduce.MergeParquetFilesMR.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "MergeParquet"); if (args.length != 2) { System.err.println("Usage: java -jar MergeParquetFilesMR path_to_input_folder path_to_output_folder "); System.exit(0);//from w w w . j a v a 2 s . co m } final Path inputPath = new Path(args[0]); final Path out = new Path(args[1]); Schema schemaParquetFile = getBaseSchema(inputPath, conf); job.setJarByClass(MergeParquetFilesMR.class); job.setMapperClass(SampleParquetMapper.class); job.setReducerClass(SampleParquetReducer.class); job.setInputFormatClass(AvroParquetInputFormat.class); job.setOutputFormatClass(AvroParquetOutputFormat.class); job.setMapOutputKeyClass(NullWritable.class); AvroJob.setMapOutputValueSchema(job, schemaParquetFile); AvroParquetOutputFormat.setSchema(job, schemaParquetFile); FileInputFormat.addInputPath(job, inputPath); AvroParquetOutputFormat.setOutputPath(job, out); job.setNumReduceTasks(1); System.exit(job.waitForCompletion(true) ? 0 : 1); }