List of usage examples for org.apache.hadoop.mapreduce Job getMapOutputValueClass
public Class<?> getMapOutputValueClass()
From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormat2.java
License:Apache License
static void configureIncrementalLoad(Job job, HTable table, Class<? extends OutputFormat<?, ?>> cls) throws IOException { Configuration conf = job.getConfiguration(); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(HFileOutputFormat2.class); // Based on the configured map output class, set the correct reducer to properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(PutSortReducer.class); } else if (Text.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(TextSortReducer.class); } else {/*from ww w. j a v a 2 s.com*/ LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); // Use table's region boundaries for TOP split points. LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName())); List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table); LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); configurePartitioner(job, startKeys); // Set compression algorithms based on column families configureCompression(table, conf); configureBloomType(table, conf); configureBlockSize(table, conf); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + " output configured."); }
From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormatBase.java
License:Apache License
public static void configureIncrementalLoad(Job job, HTable table, Class<? extends OutputFormat<?, ?>> cls) throws IOException { Configuration conf = job.getConfiguration(); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(HFileOutputFormatBase.class); // Based on the configured map output class, set the correct reducer to // properly/* w w w .j ava2s . c o m*/ // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(PutSortReducer.class); } else if (Text.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(TextSortReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); // Use table's region boundaries for TOP split points. LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName())); List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table); LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); configurePartitioner(job, startKeys); // Set compression algorithms based on column families configureCompression(table, conf); configureBloomType(table, conf); configureBlockSize(table, conf); // TableMapReduceUtil.addDependencyJars(job);// TableMapReduceUtil.initCredentials(job); LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + " output configured."); }
From source file:com.savy3.nonequijoin.MapOutputSampler.java
License:Apache License
/** * Driver for InputSampler MapReduce Job *//* w w w . j av a 2 s. c o m*/ public static void runMap(Job job, Path sampleInputPath) throws IOException, IllegalStateException, ClassNotFoundException, InterruptedException { LOG.info("Running a MapReduce Job on Sample Input File" + sampleInputPath.toString()); Configuration conf = new Configuration(); conf.setBoolean("mapreduce.job.ubertask.enable", true); conf.set("numSamples", "" + (job.getNumReduceTasks() - 1)); Job sampleJob = new Job(conf); sampleJob.setMapperClass(job.getMapperClass()); sampleJob.setReducerClass(SampleKeyReducer.class); sampleJob.setJarByClass(job.getMapperClass()); sampleJob.setMapOutputKeyClass(job.getMapOutputKeyClass()); sampleJob.setMapOutputValueClass(job.getMapOutputValueClass()); sampleJob.setOutputKeyClass(job.getMapOutputKeyClass()); sampleJob.setOutputValueClass(NullWritable.class); sampleJob.setInputFormatClass(SequenceFileInputFormat.class); sampleJob.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileInputFormat.addInputPath(sampleJob, sampleInputPath); FileSystem fs = FileSystem.get(conf); Path out = new Path(sampleInputPath.getParent(), "mapOut"); fs.delete(out, true); SequenceFileOutputFormat.setOutputPath(sampleJob, out); sampleJob.waitForCompletion(true); LOG.info("Sample MapReduce Job Output File" + out.toString()); Path partFile = new Path(out, "part-r-00000"); Path tmpFile = new Path("/_tmp"); fs.delete(tmpFile, true); fs.rename(partFile, tmpFile); fs.delete(sampleInputPath.getParent(), true); fs.rename(new Path("/_tmp"), sampleInputPath.getParent()); LOG.info("Sample partitioning file cpied to location " + sampleInputPath.getParent().toString()); }
From source file:com.splicemachine.mrio.api.SpliceTableMapReduceUtil.java
License:Apache License
/** * Add the dependency jars as well as jars for any of the configured * job classes to the job configuration, so that JobClient will ship them * to the cluster and add them to the DistributedCache. *//* w ww . j av a 2 s .c om*/ public static void addDependencyJars(Job job) throws IOException { try { addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class, com.google.protobuf.Message.class, com.google.common.collect.ImmutableSet.class, job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass()); } catch (ClassNotFoundException e) { throw new IOException(e); } }
From source file:gr.ntua.h2rdf.inputFormat2.TableMapReduceUtil.java
License:Open Source License
/** * Add the HBase dependency jars as well as jars for any of the configured * job classes to the job configuration, so that JobClient will ship them * to the cluster and add them to the DistributedCache. *//*from w w w . j a v a 2 s . com*/ public static void addDependencyJars(Job job) throws IOException { try { addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class, com.google.protobuf.Message.class, job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass()); } catch (ClassNotFoundException e) { throw new IOException(e); } }
From source file:org.apache.blur.mapreduce.lib.BlurMapReduceUtil.java
License:Apache License
/** * Add the Blur dependency jars as well as jars for any of the configured job * classes to the job configuration, so that JobClient will ship them to the * cluster and add them to the DistributedCache. *///from www . ja v a2 s .com public static void addDependencyJars(Job job) throws IOException { try { addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class, job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass(), DocumentVisibility.class); addAllJarsInBlurLib(job.getConfiguration()); } catch (ClassNotFoundException e) { throw new IOException(e); } }
From source file:org.apache.kudu.mapreduce.KuduTableMapReduceUtil.java
License:Apache License
/** * Add the Kudu dependency jars as well as jars for any of the configured * job classes to the job configuration, so that JobClient will ship them * to the cluster and add them to the DistributedCache. *///from ww w. j a v a2 s . co m public static void addDependencyJars(Job job) throws IOException { addKuduDependencyJars(job.getConfiguration()); try { addDependencyJars(job.getConfiguration(), // when making changes here, consider also mapred.TableMapReduceUtil // pull job classes job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass()); } catch (ClassNotFoundException e) { throw new IOException(e); } }
From source file:org.apache.kylin.storage.hbase.steps.HFileOutputFormat3.java
License:Apache License
static void configureIncrementalLoad(Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator, Class<? extends OutputFormat<?, ?>> cls) throws IOException, UnsupportedEncodingException { Configuration conf = job.getConfiguration(); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(cls);/* w w w . j a v a 2 s.co m*/ // Based on the configured map output class, set the correct reducer to properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(PutSortReducer.class); } else if (Text.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(TextSortReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); // Use table's region boundaries for TOP split points. LOG.info("Looking up current regions for table " + tableDescriptor.getTableName()); List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator); LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); configurePartitioner(job, startKeys); // Set compression algorithms based on column families configureCompression(conf, tableDescriptor); configureBloomType(tableDescriptor, conf); configureBlockSize(tableDescriptor, conf); configureDataBlockEncoding(tableDescriptor, conf); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); LOG.info("Incremental table " + regionLocator.getName() + " output configured."); }
From source file:org.cloudgraph.hbase.mapreduce.GraphMapReduceSetup.java
License:Apache License
/** * Add the HBase dependency jars as well as jars for any of the configured job * classes to the job configuration, so that JobClient will ship them to the * cluster and add them to the DistributedCache. *///from www . jav a 2s . c o m public static void addDependencyJars(Job job) throws IOException { try { addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class, com.google.protobuf.Message.class, com.google.common.collect.ImmutableSet.class, org.apache.hadoop.hbase.util.Bytes.class, // one class from // hbase.jar job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass()); } catch (ClassNotFoundException e) { throw new IOException(e); } }
From source file:org.kiji.mapreduce.framework.MapReduceJobBuilder.java
License:Apache License
/** * Configures the job with any Avro reader or writer schemas specified by the mapper class. * * <p>If the job's mapper class uses AvroKey as the job's input key class, it should * have implemented the AvroKeyReader interface to specify the reader schema for the * input key. Likewise, if it uses AvroValue as the job's input value class, it should * have implemented the AvroValueReader interface.</p> * * <p>If the job's mapper class uses AvroKey as the output key class, it should * have implemented the AvroKeyWriter interface to specify the writer schema for the * output key. Likewise, if it uses AvroValue as the output value class, it should have * implemented the AvroValueWriter interface.</p> * * <p>This method makes sure those interfaces were implemented correctly, uses them to * fetch the reader/writer schemas as necessary, and sets them in the Job configuration * so the Avro input format and serialization framework can access them.</p> * * @param job The job to configure./* w ww. j a va 2 s. c o m*/ * @param mapper The Kiji mapper the job is configured to run. * @throws IOException If the Avro schemas cannot be configured. */ protected void configureAvro(Job job, KijiMapper<?, ?, ?, ?> mapper) throws IOException { // If the user has specified particular reader schemas for the records of the input, // put it in the job configuration. Schema inputKeyReaderSchema = AvroMapReduce.getAvroKeyReaderSchema(mapper); if (null != inputKeyReaderSchema) { LOG.info("Setting reader schema for the map input key to: " + inputKeyReaderSchema); AvroJob.setInputKeySchema(job, inputKeyReaderSchema); } Schema inputValueReaderSchema = AvroMapReduce.getAvroValueReaderSchema(mapper); if (null != inputValueReaderSchema) { LOG.info("Setting reader schema for the map input value to: " + inputValueReaderSchema); AvroJob.setInputValueSchema(job, inputValueReaderSchema); } // Set the output writer schemas in the job configuration (if specified). Schema outputKeyWriterSchema = AvroMapReduce.getAvroKeyWriterSchema(mapper); if (null != outputKeyWriterSchema) { if (!AvroKey.class.isAssignableFrom(job.getMapOutputKeyClass())) { throw new JobConfigurationException( mapper.getClass().getName() + ".getAvroKeyWriterSchema() returned a non-null Schema" + " but the output key class was not AvroKey."); } LOG.info("Setting avro serialization for map output key schema: " + outputKeyWriterSchema); AvroJob.setMapOutputKeySchema(job, outputKeyWriterSchema); } Schema outputValueWriterSchema = AvroMapReduce.getAvroValueWriterSchema(mapper); if (null != outputValueWriterSchema) { if (!AvroValue.class.isAssignableFrom(job.getMapOutputValueClass())) { throw new JobConfigurationException( mapper.getClass().getName() + ".getAvroValueWriterSchema() returned a non-null Schema" + " but the output value class was not AvroValue."); } LOG.info("Setting avro serialization for map output value schema: " + outputValueWriterSchema); AvroJob.setMapOutputValueSchema(job, outputValueWriterSchema); } }