List of usage examples for org.apache.hadoop.mapreduce Job getOutputValueClass
public Class<?> getOutputValueClass()
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
/** * Sets the configurations used for outputs. */// w w w .j a v a 2 s. c o m private void setOutputsIfNeeded(Job job) { Map<String, OutputFormatProvider> outputFormatProviders = context.getOutputFormatProviders(); LOG.debug("Using as output for MapReduce Job: {}", outputFormatProviders.keySet()); if (outputFormatProviders.isEmpty()) { // user is not going through our APIs to add output; leave the job's output format to user return; } else if (outputFormatProviders.size() == 1) { // If only one output is configured through the context, then set it as the root OutputFormat Map.Entry<String, OutputFormatProvider> next = outputFormatProviders.entrySet().iterator().next(); OutputFormatProvider outputFormatProvider = next.getValue(); ConfigurationUtil.setAll(outputFormatProvider.getOutputFormatConfiguration(), job.getConfiguration()); job.getConfiguration().set(Job.OUTPUT_FORMAT_CLASS_ATTR, outputFormatProvider.getOutputFormatClassName()); return; } // multiple output formats configured via the context. We should use a RecordWriter that doesn't support writing // as the root output format in this case to disallow writing directly on the context MultipleOutputsMainOutputWrapper.setRootOutputFormat(job, UnsupportedOutputFormat.class.getName(), new HashMap<String, String>()); job.setOutputFormatClass(MultipleOutputsMainOutputWrapper.class); for (Map.Entry<String, OutputFormatProvider> entry : outputFormatProviders.entrySet()) { String outputName = entry.getKey(); OutputFormatProvider outputFormatProvider = entry.getValue(); String outputFormatClassName = outputFormatProvider.getOutputFormatClassName(); if (outputFormatClassName == null) { throw new IllegalArgumentException( "Output '" + outputName + "' provided null as the output format"); } Map<String, String> outputConfig = outputFormatProvider.getOutputFormatConfiguration(); MultipleOutputs.addNamedOutput(job, outputName, outputFormatClassName, job.getOutputKeyClass(), job.getOutputValueClass(), outputConfig); } }
From source file:com.baynote.kafka.hadoop.KafkaJobBuilderTest.java
License:Apache License
@Test public void testConfigureWholeJob() throws Exception { // base configuration builder.setZkConnect("localhost:2181"); builder.addQueueInput("queue_name", "group_name", MockMapper.class); builder.setTextFileOutputFormat("/a/hdfs/path"); // extended configuration builder.setJobName("job_name"); builder.setMapOutputKeyClass(Text.class); builder.setMapOutputValueClass(BytesWritable.class); builder.setReducerClass(MockReducer.class); builder.setTaskMemorySettings("-Xmx2048m"); builder.setNumReduceTasks(100);//from ww w .j av a2 s .com builder.setParitioner(MockPartitioner.class); builder.setKafkaFetchSizeBytes(1024); Job job = builder.configureJob(conf); assertEquals("job_name", job.getJobName()); assertEquals(Text.class, job.getMapOutputKeyClass()); assertEquals(BytesWritable.class, job.getMapOutputValueClass()); assertEquals(MockReducer.class, job.getReducerClass()); assertEquals(MockMapper.class, job.getMapperClass()); assertEquals("-Xmx2048m", job.getConfiguration().get("mapred.child.java.opts")); assertEquals(100, job.getNumReduceTasks()); assertEquals(MockPartitioner.class, job.getPartitionerClass()); assertEquals(1024, KafkaInputFormat.getKafkaFetchSizeBytes(job.getConfiguration())); assertEquals(TextOutputFormat.class, job.getOutputFormatClass()); assertEquals(KafkaInputFormat.class, job.getInputFormatClass()); assertEquals("file:/a/hdfs/path", TextOutputFormat.getOutputPath(job).toString()); builder.setJobName(null); builder.setSequenceFileOutputFormat(); builder.setUseLazyOutput(); builder.addQueueInput("queue_name_2", "group_name_2", MockMapper.class); job = builder.configureJob(conf); assertEquals(LazyOutputFormat.class, job.getOutputFormatClass()); assertEquals(MultipleKafkaInputFormat.class, job.getInputFormatClass()); assertEquals(DelegatingMapper.class, job.getMapperClass()); assertEquals(BytesWritable.class, job.getOutputKeyClass()); assertEquals(BytesWritable.class, job.getOutputValueClass()); assertNotNull(SequenceFileOutputFormat.getOutputPath(job)); assertNotNull(job.getJobName()); // use s3 builder.useS3("my_aws_key", "s3cr3t", "my-bucket"); builder.setTextFileOutputFormat("/a/hdfs/path"); job = builder.configureJob(conf); assertEquals("my_aws_key", job.getConfiguration().get("fs.s3n.awsAccessKeyId")); assertEquals("s3cr3t", job.getConfiguration().get("fs.s3n.awsSecretAccessKey")); assertEquals("my_aws_key", job.getConfiguration().get("fs.s3.awsAccessKeyId")); assertEquals("s3cr3t", job.getConfiguration().get("fs.s3.awsSecretAccessKey")); }
From source file:com.cloudera.castagna.logparser.Utils.java
License:Apache License
public static void log(Job job, Logger log) throws ClassNotFoundException { log.debug("{} -> {} ({}, {}) -> {}#{} ({}, {}) -> {}", new Object[] { job.getInputFormatClass().getSimpleName(), job.getMapperClass().getSimpleName(), job.getMapOutputKeyClass().getSimpleName(), job.getMapOutputValueClass().getSimpleName(), job.getReducerClass().getSimpleName(), job.getNumReduceTasks(), job.getOutputKeyClass().getSimpleName(), job.getOutputValueClass().getSimpleName(), job.getOutputFormatClass().getSimpleName() }); Path[] inputs = FileInputFormat.getInputPaths(job); Path output = FileOutputFormat.getOutputPath(job); log.debug("input: {}", inputs[0]); log.debug("output: {}", output); }
From source file:com.conversantmedia.mapreduce.tool.annotation.handler.NamedOutputAnnotationHandler.java
License:Apache License
@Override public void process(Annotation annotation, Job job, Object target) throws ToolException { NamedOutput namedOut = (NamedOutput) annotation; KeyValue kv = namedOut.type(); // If this is a MultipleOutputs member we're annotating, see if we can't // get the key/value from the parameters if there are any. Pair<Type, Type> kvTypePair = getGenericTypeParams(target); Class<?> keyClass = kv.key(); if (keyClass == void.class) { if (kvTypePair != null) { keyClass = (Class<?>) kvTypePair.getKey(); } else {/*from w ww .j a va 2 s . co m*/ // fall back on job output key class keyClass = job.getOutputKeyClass(); } } Class<?> valueClass = kv.value(); if (valueClass == void.class) { if (kvTypePair != null) { valueClass = (Class<?>) kvTypePair.getValue(); } else { valueClass = job.getOutputValueClass(); } } String[] names = getNames(namedOut); for (String name : names) { name = (String) evaluateExpression(name); if (!configured.contains(name)) { MultipleOutputs.addNamedOutput(job, name, namedOut.format(), keyClass, valueClass); MultipleOutputs.setCountersEnabled(job, namedOut.countersEnabled()); configured.add(name); } } }
From source file:com.example.Driver.java
License:Open Source License
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Your job name"); job.setJarByClass(Driver.class); logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: " + Arrays.toString(args)); if (args.length < 2) { logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar() + " input_files output_directory"); return 1; }/*from w w w .ja v a 2 s . c o m*/ job.setMapperClass(WordcountMapper.class); logger.info("mapper class is " + job.getMapperClass()); //job.setMapOutputKeyClass(Text.class); //job.setMapOutputValueClass(IntWritable.class); logger.info("mapper output key class is " + job.getMapOutputKeyClass()); logger.info("mapper output value class is " + job.getMapOutputValueClass()); job.setReducerClass(WordcountReducer.class); logger.info("reducer class is " + job.getReducerClass()); job.setCombinerClass(WordcountReducer.class); logger.info("combiner class is " + job.getCombinerClass()); //When you are not runnign any Reducer //OR job.setNumReduceTasks(0); // logger.info("number of reduce task is " + job.getNumReduceTasks()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); logger.info("output key class is " + job.getOutputKeyClass()); logger.info("output value class is " + job.getOutputValueClass()); job.setInputFormatClass(TextInputFormat.class); logger.info("input format class is " + job.getInputFormatClass()); job.setOutputFormatClass(TextOutputFormat.class); logger.info("output format class is " + job.getOutputFormatClass()); Path filePath = new Path(args[0]); logger.info("input path " + filePath); FileInputFormat.setInputPaths(job, filePath); Path outputPath = new Path(args[1]); logger.info("output path " + outputPath); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(true); return 0; }
From source file:com.moz.fiji.mapreduce.TestFijiGatherJobBuilder.java
License:Apache License
@Test public void testGatherToHFile() throws Exception { final FijiMapReduceJob gatherJob = FijiGatherJobBuilder.create().withConf(getConf()) .withInputTable(mTable.getURI()).withGatherer(GatherToHFile.class).withOutput(MapReduceJobOutputs .newHFileMapReduceJobOutput(mTable.getURI(), getLocalTestPath("hfile"), 10)) .build();// w ww .j av a 2 s . c o m final Job job = gatherJob.getHadoopJob(); final Configuration conf = job.getConfiguration(); assertEquals(GatherToHFile.class.getName(), conf.get(FijiConfKeys.FIJI_GATHERER_CLASS)); assertEquals(null, job.getCombinerClass()); assertEquals(IdentityReducer.class, job.getReducerClass()); assertEquals(10, job.getNumReduceTasks()); assertEquals(FijiHFileOutputFormat.class, job.getOutputFormatClass()); assertEquals(HFileKeyValue.class, job.getOutputKeyClass()); assertEquals(NullWritable.class, job.getOutputValueClass()); }
From source file:com.moz.fiji.mapreduce.TestFijiGatherJobBuilder.java
License:Apache License
@Test public void testGatherReducerToHFile() throws Exception { final FijiMapReduceJob gatherJob = FijiGatherJobBuilder.create().withConf(getConf()) .withInputTable(mTable.getURI()).withGatherer(SimpleGatherer.class) .withReducer(ReducerToHFile.class).withOutput(MapReduceJobOutputs .newHFileMapReduceJobOutput(mTable.getURI(), getLocalTestPath("hfile"), 10)) .build();/* ww w. j a v a2s . c o m*/ final Job job = gatherJob.getHadoopJob(); final Configuration conf = job.getConfiguration(); assertEquals(SimpleGatherer.class.getName(), conf.get(FijiConfKeys.FIJI_GATHERER_CLASS)); assertEquals(null, job.getCombinerClass()); assertEquals(ReducerToHFile.class, job.getReducerClass()); assertEquals(10, job.getNumReduceTasks()); assertEquals(SequenceFileOutputFormat.class, job.getOutputFormatClass()); assertEquals(HFileKeyValue.class, job.getOutputKeyClass()); assertEquals(NullWritable.class, job.getOutputValueClass()); }
From source file:com.splicemachine.mrio.api.SpliceTableMapReduceUtil.java
License:Apache License
/** * Add the dependency jars as well as jars for any of the configured * job classes to the job configuration, so that JobClient will ship them * to the cluster and add them to the DistributedCache. *//* w w w .ja v a 2 s. c o m*/ public static void addDependencyJars(Job job) throws IOException { try { addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class, com.google.protobuf.Message.class, com.google.common.collect.ImmutableSet.class, job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass()); } catch (ClassNotFoundException e) { throw new IOException(e); } }
From source file:com.yahoo.ycsb.bulk.hbase.BulkDataGeneratorJob.java
License:Apache License
/** * Parameters for bulk loader specified through the config file: * * - prefix for the row keys/* www .j a va 2s . c o m*/ * - range start * - range end (inclusive) * - num splits (or number of partitions). * - user * - password * - table * * For the accepted default options * @see org.apache.hadoop.util.Tool#run(java.lang.String[]) */ public int run(String[] args) throws Exception { Configuration conf = this.getConf(); Util.printArgs("run", args, System.err); printKeyValues(conf, ARG_KEYS, System.err); if (args.length > 1 || (args.length == 1 && "-help".compareToIgnoreCase(args[0]) == 0)) { System.err.println("Usage: " + this.getClass().getName() + "input_path [generic options] [input_paths...] ouptut_path"); GenericOptionsParser.printGenericCommandUsage(System.err); return 1; } // Time run long startTime = System.currentTimeMillis(); String workdir; if (args.length == 1) { /* override workdir in the config if it is specified in the * command line */ conf.set(ARG_KEY_OUTDIR, args[0]); workdir = args[0]; } workdir = conf.get(ARG_KEY_OUTDIR); if (workdir == null) { System.err.println("No output directory specified"); return 1; } /* Initialize job, check parameters and decide which mapper to use */ Job job = new Job(conf, conf.get(ARG_KEY_JOBNAME, "YCSB KV data generator")); /* these settings are the same (i.e., fixed) independent of the * parameters */ job.setJarByClass(this.getClass()); // job.setInputFormatClass(TextInputFormat.class); job.setInputFormatClass(NLineInputFormat.class); /* these settings should depend on the type of output file */ job.setOutputFormatClass(HFileOutputFormat.class); /* not sure the next two are needed */ job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); this.createInputFile(job, workdir); HFileOutputFormat.setOutputPath(job, new Path(workdir + "/files")); /* depending on whether the keys need to be sorted and hashed, then * decide which mapper and reducer to use */ boolean hashKeys = conf.getBoolean(ARG_KEY_HASH_KEYS, false); boolean sortKeys = conf.getBoolean(ARG_KEY_SORTKEYS, true); /* get splits file name: side-effect -> this may generate a splits file */ String splitsfile = this.getSplitsFile(job, workdir); if (sortKeys && hashKeys) { /* do a full map reduce job */ job.setMapperClass(RowGeneratorMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setPartitionerClass(RangePartitioner.class); if (splitsfile == null) { /* Auto generate the splits file either from: * - the input key ranges * - from the current table splits */ throw new InvalidInputException("No splits specified"); } /* Set splits file */ RangePartitioner.setSplitFile(job, splitsfile); /* Add reducer (based on mapper code) */ job.setReducerClass(RowGeneratorReduce.class); /* the number of reducers is dependent on the number of * partitions */ int numReduce = conf.getInt(ARG_KEY_NUMREDUCE, 1); job.setNumReduceTasks(numReduce); } else { /* perform a map only job */ job.setMapperClass(RowGeneratorMapOnly.class); /* map output key and value types are the same as * for the job */ job.setMapOutputKeyClass(job.getOutputKeyClass()); job.setMapOutputValueClass(job.getOutputValueClass()); job.setNumReduceTasks(0); } job.waitForCompletion(true); // JobClient.runJob(conf); SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss.SSS z"); SimpleDateFormat ddf = new SimpleDateFormat("HH:mm:ss.SSS"); ddf.setTimeZone(TimeZone.getTimeZone("UTC")); long endTime = System.currentTimeMillis(); System.out.println("Start time (ms): " + df.format(new Date(startTime)) + " -- " + startTime); System.out.println("End time (ms): " + df.format(new Date(endTime)) + " -- " + endTime); System.out .println("Elapsed time (ms): " + ddf.format(endTime - startTime) + " -- " + (endTime - startTime)); return 0; }
From source file:gr.ntua.h2rdf.inputFormat2.TableMapReduceUtil.java
License:Open Source License
/** * Add the HBase dependency jars as well as jars for any of the configured * job classes to the job configuration, so that JobClient will ship them * to the cluster and add them to the DistributedCache. *//*w w w. ja v a 2 s . com*/ public static void addDependencyJars(Job job) throws IOException { try { addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class, com.google.protobuf.Message.class, job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass()); } catch (ClassNotFoundException e) { throw new IOException(e); } }