List of usage examples for org.apache.hadoop.mapreduce Job setMapperClass
public void setMapperClass(Class<? extends Mapper> cls) throws IllegalStateException
From source file:com.asakusafw.runtime.stage.AbstractStageClient.java
License:Apache License
private void configureStageInput(Job job, VariableTable variables) { List<StageInput> inputList = new ArrayList<>(); for (StageInput input : getStageInputs()) { Class<? extends Mapper<?, ?, ?, ?>> mapperClass = input.getMapperClass(); String pathString = input.getPathString(); Class<? extends InputFormat<?, ?>> formatClass = input.getFormatClass(); String expanded = variables.parse(pathString); Map<String, String> attributes = input.getAttributes(); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Input: path={0}, format={1}, mapper={2}, attributes={3}", //$NON-NLS-1$ expanded, formatClass.getName(), mapperClass.getName(), attributes)); }/*from w w w . j av a 2s . c om*/ inputList.add(new StageInput(expanded, formatClass, mapperClass, attributes)); } StageInputDriver.set(job, inputList); job.setInputFormatClass(StageInputFormat.class); job.setMapperClass(StageInputMapper.class); }
From source file:com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient.java
License:Apache License
private void updateMerge() throws IOException, InterruptedException { Job job = newJob(); List<StageInput> inputList = new ArrayList<>(); inputList.add(new StageInput(storage.getHeadContents("*").toString(), TemporaryInputFormat.class, MergeJoinBaseMapper.class)); inputList.add(new StageInput(storage.getPatchContents("*").toString(), TemporaryInputFormat.class, MergeJoinPatchMapper.class)); StageInputDriver.set(job, inputList); job.setInputFormatClass(StageInputFormat.class); job.setMapperClass(StageInputMapper.class); job.setMapOutputKeyClass(PatchApplyKey.class); job.setMapOutputValueClass(modelClass); // combiner may have no effect in normal cases job.setReducerClass(MergeJoinReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(modelClass); job.setPartitionerClass(PatchApplyKey.Partitioner.class); job.setSortComparatorClass(PatchApplyKey.SortComparator.class); job.setGroupingComparatorClass(PatchApplyKey.GroupComparator.class); TemporaryOutputFormat.setOutputPath(job, getNextDirectory()); job.setOutputFormatClass(TemporaryOutputFormat.class); job.getConfiguration().setClass("mapred.output.committer.class", LegacyBridgeOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class); LOG.info(MessageFormat.format("applying patch (merge join): {0} / {1} -> {2}", storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); try {//from w ww . j ava2 s . c o m boolean succeed = job.waitForCompletion(true); LOG.info(MessageFormat.format("applied patch (merge join): succeed={0}, {1} / {2} -> {3}", succeed, storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); if (succeed == false) { throw new IOException(MessageFormat.format("failed to apply patch (merge join): {0} / {1} -> {2}", storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); } } catch (ClassNotFoundException e) { throw new IOException(e); } putMeta(); }
From source file:com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient.java
License:Apache License
private void updateTable() throws IOException, InterruptedException { Job job = newJob(); List<StageInput> inputList = new ArrayList<>(); inputList.add(new StageInput(storage.getHeadContents("*").toString(), TemporaryInputFormat.class, TableJoinBaseMapper.class)); inputList.add(new StageInput(storage.getPatchContents("*").toString(), TemporaryInputFormat.class, TableJoinPatchMapper.class)); StageInputDriver.set(job, inputList); StageResourceDriver.add(job, storage.getPatchContents("*").toString(), TableJoinBaseMapper.RESOURCE_KEY); job.setInputFormatClass(StageInputFormat.class); job.setMapperClass(StageInputMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(modelClass); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(modelClass); TemporaryOutputFormat.setOutputPath(job, getNextDirectory()); job.setOutputFormatClass(TemporaryOutputFormat.class); job.getConfiguration().setClass("mapred.output.committer.class", LegacyBridgeOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class); job.setNumReduceTasks(0);/* ww w .ja v a 2 s .co m*/ LOG.info(MessageFormat.format("applying patch (table join): {0} / {1} -> {2}", storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); try { boolean succeed = job.waitForCompletion(true); LOG.info(MessageFormat.format("applied patch (table join): succeed={0}, {1} / {2} -> {3}", succeed, storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); if (succeed == false) { throw new IOException(MessageFormat.format("failed to apply patch (table join): {0} / {1} -> {2}", storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); } } catch (ClassNotFoundException e) { throw new IOException(e); } putMeta(); }
From source file:com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient.java
License:Apache License
private void create() throws InterruptedException, IOException { Job job = newJob(); List<StageInput> inputList = new ArrayList<>(); inputList.add(new StageInput(storage.getPatchContents("*").toString(), TemporaryInputFormat.class, CreateCacheMapper.class)); StageInputDriver.set(job, inputList); job.setInputFormatClass(StageInputFormat.class); job.setMapperClass(StageInputMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(modelClass); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(modelClass); TemporaryOutputFormat.setOutputPath(job, getNextDirectory()); job.setOutputFormatClass(TemporaryOutputFormat.class); job.getConfiguration().setClass("mapred.output.committer.class", LegacyBridgeOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class); job.setNumReduceTasks(0);/*from w ww .jav a2s . c o m*/ LOG.info(MessageFormat.format("applying patch (no join): {0} / (empty) -> {2}", storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); try { boolean succeed = job.waitForCompletion(true); LOG.info(MessageFormat.format("applied patch (no join): succeed={0}, {1} / (empty) -> {3}", succeed, storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); if (succeed == false) { throw new IOException(MessageFormat.format("failed to apply patch (no join): {0} / (empty) -> {2}", storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); } } catch (ClassNotFoundException e) { throw new IOException(e); } putMeta(); }
From source file:com.asp.tranlog.ImportTsv.java
License:Apache License
/** * Sets up the actual job.//from w w w . j a v a 2s.co m * * @param conf * The current configuration. * @param args * The command line parameters. * @return The newly created job. * @throws IOException * When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException, ClassNotFoundException { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(SEPARATOR_CONF_KEY); if (actualSeparator != null) { conf.set(SEPARATOR_CONF_KEY, new String(Base64.encodeBytes(actualSeparator.getBytes()))); } // See if a non-default Mapper was set String mapperClassName = conf.get(MAPPER_CONF_KEY); Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER; String tableName = args[0]; Path inputDir = new Path(args[1]); Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(mapperClass); FileInputFormat.setInputPaths(job, inputDir); String inputCodec = conf.get(INPUT_LZO_KEY); if (inputCodec == null) { FileInputFormat.setMaxInputSplitSize(job, 67108864l); // max split // size = // 64m job.setInputFormatClass(TextInputFormat.class); } else { if (inputCodec.equalsIgnoreCase("lzo")) job.setInputFormatClass(LzoTextInputFormat.class); else { usage("not supported compression codec!"); System.exit(-1); } } job.setMapperClass(mapperClass); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); if (hfileOutPath != null) { HTable table = new HTable(conf, tableName); job.setReducerClass(PutSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); HFileOutputFormat.configureIncrementalLoad(job, table); } else { // No reducers. Just write straight to table. Call // initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /* * Guava used by TsvParser */); return job; }
From source file:com.avira.couchdoop.demo.BenchmarkUpdater.java
License:Apache License
public Job configureJob(Configuration conf, String input) throws IOException { conf.setInt("mapreduce.map.failures.maxpercent", 5); conf.setInt("mapred.max.map.failures.percent", 5); conf.setInt("mapred.max.tracker.failures", 20); Job job = Job.getInstance(conf); job.setJarByClass(BenchmarkUpdater.class); // User classpath takes precedence in favor of Hadoop classpath. // This is because the Couchbase client requires a newer version of // org.apache.httpcomponents:httpcore. job.setUserClassesTakesPrecedence(true); // Input// ww w . j a v a2 s . co m FileInputFormat.setInputPaths(job, input); // Mapper job.setMapperClass(BenchmarkUpdateMapper.class); job.setMapOutputKeyClass(String.class); job.setMapOutputValueClass(CouchbaseAction.class); // Reducer job.setNumReduceTasks(0); // Output job.setOutputFormatClass(CouchbaseOutputFormat.class); job.setMapOutputKeyClass(String.class); job.setMapOutputValueClass(CouchbaseAction.class); return job; }
From source file:com.avira.couchdoop.demo.ExportDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: <input_path>"); return 1; }/*from www . ja v a 2 s . c o m*/ String input = args[0]; Job job = Job.getInstance(getConf()); job.setJarByClass(ExportDriver.class); // User classpath takes precedence in favor of Hadoop classpath. // This is because the Couchbase client requires a newer version of // org.apache.httpcomponents:httpcore. // job.setUserClassesTakesPrecedence(true); // Input FileInputFormat.setInputPaths(job, input); // Mapper job.setMapperClass(ExportMapper.class); // Reducer job.setNumReduceTasks(0); // Output job.setOutputKeyClass(String.class); job.setOutputValueClass(CouchbaseAction.class); job.setOutputFormatClass(CouchbaseOutputFormat.class); if (!job.waitForCompletion(true)) { return 2; } return 0; }
From source file:com.avira.couchdoop.demo.ImportDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: <output_path>"); return 1; }// w w w .j a va 2 s .c o m String output = args[0]; Job job = Job.getInstance(getConf()); job.setJarByClass(this.getClass()); // User classpath takes precedence in favor of Hadoop classpath. // This is because the Couchbase client requires a newer version of // org.apache.httpcomponents:httpcore. job.setUserClassesTakesPrecedence(true); // Input job.setInputFormatClass(CouchbaseViewInputFormat.class); // Mapper job.setMapperClass(ImportMapper.class); // Reducer job.setNumReduceTasks(0); // Output job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(output)); if (!job.waitForCompletion(true)) { return 2; } return 0; }
From source file:com.avira.couchdoop.jobs.CouchbaseExporter.java
License:Apache License
public Job configureJob(Configuration conf, String input) throws IOException { conf.setInt("mapreduce.map.failures.maxpercent", 5); conf.setInt("mapred.max.map.failures.percent", 5); conf.setInt("mapred.max.tracker.failures", 20); Job job = Job.getInstance(conf); job.setJarByClass(CouchbaseExporter.class); // Input//ww w .ja v a2 s . c om FileInputFormat.setInputPaths(job, input); // Mapper job.setMapperClass(CsvToCouchbaseMapper.class); job.setMapOutputKeyClass(String.class); job.setMapOutputValueClass(CouchbaseAction.class); // Reducer job.setNumReduceTasks(0); // Output job.setOutputFormatClass(CouchbaseOutputFormat.class); job.setOutputKeyClass(String.class); job.setOutputValueClass(CouchbaseAction.class); return job; }
From source file:com.avira.couchdoop.jobs.CouchbaseViewImporter.java
License:Apache License
public Job configureJob(Configuration conf, String output) throws IOException { conf.setInt("mapreduce.map.failures.maxpercent", 5); conf.setInt("mapred.max.map.failures.percent", 5); conf.setInt("mapred.max.tracker.failures", 20); Job job = Job.getInstance(conf); job.setJarByClass(CouchbaseViewImporter.class); // Input//from w ww . ja va2 s. c om job.setInputFormatClass(CouchbaseViewInputFormat.class); // Mapper job.setMapperClass(CouchbaseViewToFileMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // Reducer job.setNumReduceTasks(0); // Output job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(output)); return job; }