List of usage examples for org.apache.hadoop.mapred JobConf getOutputKeyClass
public Class<?> getOutputKeyClass()
From source file:com.mellanox.hadoop.mapred.UdaPlugin.java
License:Apache License
public UdaPluginRT(UdaShuffleConsumerPluginShared udaShuffleConsumer, ReduceTask reduceTask, JobConf jobConf, Reporter reporter, int numMaps) throws IOException { super(jobConf); this.udaShuffleConsumer = udaShuffleConsumer; this.reduceTask = reduceTask; String totalRdmaSizeStr = jobConf.get("mapred.rdma.shuffle.total.size", "0"); // default 0 means ignoring this parameter and use instead -Xmx and mapred.job.shuffle.input.buffer.percent long totalRdmaSize = StringUtils.TraditionalBinaryPrefix.string2long(totalRdmaSizeStr); long maxRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size", 1024); long minRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size.min", 16); long shuffleMemorySize = totalRdmaSize; StringBuilder meminfoSb = new StringBuilder(); meminfoSb.append("UDA: numMaps=").append(numMaps); meminfoSb.append(", maxRdmaBufferSize=").append(maxRdmaBufferSize); meminfoSb.append("KB, minRdmaBufferSize=").append(minRdmaBufferSize).append("KB"); meminfoSb.append("KB, rdmaShuffleTotalSize=").append(totalRdmaSize); if (totalRdmaSize < 0) { LOG.warn("Illegal paramter value: mapred.rdma.shuffle.total.size=" + totalRdmaSize); }// www . j a va 2 s. c om if (totalRdmaSize <= 0) { long maxHeapSize = Runtime.getRuntime().maxMemory(); double shuffleInputBufferPercent = jobConf.getFloat("mapred.job.shuffle.input.buffer.percent", DEFAULT_SHUFFLE_INPUT_PERCENT); if ((shuffleInputBufferPercent < 0) || (shuffleInputBufferPercent > 1)) { LOG.warn("UDA: mapred.job.shuffle.input.buffer.percent is out of range - set to default: " + DEFAULT_SHUFFLE_INPUT_PERCENT); shuffleInputBufferPercent = DEFAULT_SHUFFLE_INPUT_PERCENT; } shuffleMemorySize = (long) (maxHeapSize * shuffleInputBufferPercent); LOG.info("Using JAVA Xmx with mapred.job.shuffle.input.buffer.percent to limit UDA shuffle memory"); meminfoSb.append(", maxHeapSize=").append(maxHeapSize).append("B"); meminfoSb.append(", shuffleInputBufferPercent=").append(shuffleInputBufferPercent); meminfoSb.append("==> shuffleMemorySize=").append(shuffleMemorySize).append("B"); LOG.info("RDMA shuffle memory is limited to " + shuffleMemorySize / 1024 / 1024 + "MB"); } else { LOG.info("Using mapred.rdma.shuffle.total.size to limit UDA shuffle memory"); LOG.info("RDMA shuffle memory is limited to " + totalRdmaSize / 1024 / 1024 + "MB"); } LOG.debug(meminfoSb.toString()); LOG.info("UDA: user prefer rdma.buf.size=" + maxRdmaBufferSize + "KB"); LOG.info("UDA: minimum rdma.buf.size=" + minRdmaBufferSize + "KB"); if (jobConf.getSpeculativeExecution()) { // (getMapSpeculativeExecution() || getReduceSpeculativeExecution()) LOG.info("UDA has limited support for map task speculative execution"); } LOG.info("UDA: number of segments to fetch: " + numMaps); /* init variables */ init_kv_bufs(); launchCppSide(true, this); // true: this is RT => we should execute NetMerger this.j2c_queue = new J2CQueue<K, V>(); this.mTaskReporter = reporter; this.mMapsNeed = numMaps; /* send init message */ TaskAttemptID reduceId = reduceTask.getTaskID(); mParams.clear(); mParams.add(Integer.toString(numMaps)); mParams.add(reduceId.getJobID().toString()); mParams.add(reduceId.toString()); mParams.add(jobConf.get("mapred.netmerger.hybrid.lpq.size", "0")); mParams.add(Long.toString(maxRdmaBufferSize * 1024)); // in Bytes - pass the raw value we got from xml file (with only conversion to bytes) mParams.add(Long.toString(minRdmaBufferSize * 1024)); // in Bytes . passed for checking if rdmaBuffer is still larger than minRdmaBuffer after alignment mParams.add(jobConf.getOutputKeyClass().getName()); boolean compression = jobConf.getCompressMapOutput(); //"true" or "false" String alg = null; if (compression) { alg = jobConf.get("mapred.map.output.compression.codec", null); } mParams.add(alg); String bufferSize = Integer.toString(256 * 1024); if (alg != null) { if (alg.contains("lzo.LzoCodec")) { bufferSize = jobConf.get("io.compression.codec.lzo.buffersize", bufferSize); } else if (alg.contains("SnappyCodec")) { bufferSize = jobConf.get("io.compression.codec.snappy.buffersize", bufferSize); } } mParams.add(bufferSize); mParams.add(Long.toString(shuffleMemorySize)); String[] dirs = jobConf.getLocalDirs(); ArrayList<String> dirsCanBeCreated = new ArrayList<String>(); //checking if the directories can be created for (int i = 0; i < dirs.length; i++) { try { DiskChecker.checkDir(new File(dirs[i].trim())); //saving only the directories that can be created dirsCanBeCreated.add(dirs[i].trim()); } catch (DiskErrorException e) { } } //sending the directories int numDirs = dirsCanBeCreated.size(); mParams.add(Integer.toString(numDirs)); for (int i = 0; i < numDirs; i++) { mParams.add(dirsCanBeCreated.get(i)); } LOG.info("mParams array is " + mParams); LOG.info("UDA: sending INIT_COMMAND"); String msg = UdaCmd.formCmd(UdaCmd.INIT_COMMAND, mParams); UdaBridge.doCommand(msg); this.mProgress = new Progress(); this.mProgress.set(0.5f); }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopReducerOperatorDescriptor.java
License:Apache License
public static RecordDescriptor getRecordDescriptor(JobConf conf, IHadoopClassFactory classFactory) { String outputKeyClassName = null; String outputValueClassName = null; if (conf.getUseNewMapper()) { JobContext context = new ContextFactory().createJobContext(conf); outputKeyClassName = context.getOutputKeyClass().getName(); outputValueClassName = context.getOutputValueClass().getName(); } else {//from w ww .j a va2s . c o m outputKeyClassName = conf.getOutputKeyClass().getName(); outputValueClassName = conf.getOutputValueClass().getName(); } RecordDescriptor recordDescriptor = null; try { if (classFactory == null) { recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor( (Class<? extends Writable>) Class.forName(outputKeyClassName), (Class<? extends Writable>) Class.forName(outputValueClassName)); } else { recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor( (Class<? extends Writable>) classFactory.loadClass(outputKeyClassName), (Class<? extends Writable>) classFactory.loadClass(outputValueClassName)); } } catch (Exception e) { e.printStackTrace(); return null; } return recordDescriptor; }
From source file:org.apache.blur.spark.Consumer.java
License:Apache License
private void run() { String checkpointDirectory = "hdfs://10.252.5.113:9000/user/hadoop/spark"; // number of partition for Kafka Topic int _partitionCount = 5; List<JavaDStream<MessageAndMetadata>> streamsList = new ArrayList<JavaDStream<MessageAndMetadata>>( _partitionCount);//from w w w.j a v a 2 s . c o m JavaDStream<MessageAndMetadata> unionStreams; SparkConf conf = new SparkConf().setAppName("KafkaReceiver").set("spark.streaming.blockInterval", "200"); // Path to Blur Libraries . Can be copied to each Node of Spark Cluster. conf.set("spark.executor.extraClassPath", "/home/apache-blur-0.2.4/lib/*"); // Used KryoSerializer for BlurMutate and Text. conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); JavaStreamingContext ssc = new JavaStreamingContext(conf, new Duration(3000)); /* * Receive Kafka Stream. Create individual Receivers for each Topic * Partition */ for (int i = 0; i < _partitionCount; i++) { streamsList.add(ssc.receiverStream(new KafkaReceiver(_props, i))); } /* * Union all the streams if there is more than 1 stream */ if (streamsList.size() > 1) { unionStreams = ssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size())); } else { // Otherwise, just use the 1 stream unionStreams = streamsList.get(0); } /* * Generate JavaPairDStream */ JavaPairDStream<Text, BlurMutate> pairDStream = unionStreams .mapToPair(new PairFunction<MessageAndMetadata, Text, BlurMutate>() { private static final long serialVersionUID = 443235214978L; public Tuple2<Text, BlurMutate> call(MessageAndMetadata mmeta) { /* * create the BlurMutate from MessageAndMetadata */ String message = new String(mmeta.getPayload()); String keyStr = DigestUtils.shaHex(message); Text key = new Text((keyStr).getBytes()); BlurMutate mutate = new BlurMutate(BlurMutate.MUTATE_TYPE.REPLACE, keyStr, keyStr, "family"); mutate.addColumn("message", message); return new Tuple2<Text, BlurMutate>(key, mutate); } }); pairDStream.foreachRDD(new Function2<JavaPairRDD<Text, BlurMutate>, Time, Void>() { private static final long serialVersionUID = 88875777435L; @Override public Void call(JavaPairRDD<Text, BlurMutate> rdd, Time time) throws Exception { /* * Blur Table Details */ TableDescriptor tableDescriptor = new TableDescriptor(); String tableUri = new Path("hdfs://10.252.5.113:9000/blur/tables/nrt").toString(); tableDescriptor.tableUri = tableUri; tableDescriptor.cluster = "pearson"; tableDescriptor.name = "nrt"; tableDescriptor.shardCount = 9; Configuration conf = new Configuration(); /* * Partition RDD to match Blur Table Shard Count. Used * Custom Partitioner to channel correct BlurMutate to * correct Shard. */ final JavaPairRDD<Text, BlurMutate> pRdd = rdd .partitionBy(new BlurSparkPartitioner(tableDescriptor.shardCount)) .persist(StorageLevel.MEMORY_ONLY_2()); /* * Blur specific Configuration */ BlurOutputFormat.setIndexLocally(conf, false); BlurOutputFormat.setOptimizeInFlight(conf, false); conf.setClass("mapreduce.reduce.class", DefaultBlurReducer.class, Reducer.class); conf.setClass("mapreduce.outputformat.class", BlurOutputFormat.class, OutputFormat.class); conf.setClass("mapreduce.partitioner.class", BlurPartitioner.class, Partitioner.class); conf.set("mapred.output.committer.class", BlurOutputCommitter.class.getName()); conf.setInt("blur.output.max.document.buffer.size", 10000); BlurOutputFormat.setTableDescriptor(conf, tableDescriptor); JobConf jobConf = new JobConf(conf); jobConf.setNumReduceTasks(tableDescriptor.shardCount); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(BlurMutate.class); BlurMapReduceUtil.addAllJarsInBlurLib(conf); BlurMapReduceUtil.addDependencyJars(conf, org.apache.zookeeper.ZooKeeper.class, org.apache.lucene.codecs.lucene42.Lucene42Codec.class, jobConf.getOutputKeyClass(), jobConf.getOutputValueClass()); /* * Write the RDD to Blur Table */ if (pRdd.count() > 0) pRdd.saveAsNewAPIHadoopFile(tableUri, Text.class, BlurMutate.class, BlurOutputFormat.class, jobConf); return null; } }); // ssc.checkpoint(checkpointDirectory); ssc.start(); ssc.awaitTermination(); }
From source file:org.pentaho.hadoop.mapreduce.PentahoMapReduceBase.java
License:Apache License
@SuppressWarnings("unchecked") @Override//from www . j a v a 2 s. c o m public void configure(JobConf job) { super.configure(job); debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$ transMapXml = job.get("transformation-map-xml"); transCombinerXml = job.get("transformation-combiner-xml"); transReduceXml = job.get("transformation-reduce-xml"); mapInputStepName = job.get("transformation-map-input-stepname"); mapOutputStepName = job.get("transformation-map-output-stepname"); combinerInputStepName = job.get("transformation-combiner-input-stepname"); combinerOutputStepName = job.get("transformation-combiner-output-stepname"); combineSingleThreaded = isCombinerSingleThreaded(job); reduceInputStepName = job.get("transformation-reduce-input-stepname"); reduceOutputStepName = job.get("transformation-reduce-output-stepname"); reduceSingleThreaded = isReducerSingleThreaded(job); String xmlVariableSpace = job.get("variableSpace"); if (!Const.isEmpty(xmlVariableSpace)) { setDebugStatus("PentahoMapReduceBase. variableSpace was retrieved from the job. The contents: "); // deserialize from xml to variable space XStream xStream = new XStream(); if (xStream != null) { setDebugStatus("PentahoMapReduceBase: Setting classes variableSpace property.: "); variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace); for (String variableName : variableSpace.listVariables()) { if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) { System.setProperty(variableName, variableSpace.getVariable(variableName)); } } } } else { setDebugStatus( "PentahoMapReduceBase: The PDI Job's variable space was not found in the job configuration."); variableSpace = new Variables(); } // Check for environment variables in the userDefined variables Iterator<Entry<String, String>> iter = job.iterator(); while (iter.hasNext()) { Entry<String, String> entry = iter.next(); if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) { System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()), entry.getValue()); } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) { System.setProperty(entry.getKey(), entry.getValue()); } } MRUtil.passInformationToTransformation(variableSpace, job); switch (mrOperation) { case Combine: outClassK = (Class<K>) job.getMapOutputKeyClass(); outClassV = (Class<V>) job.getMapOutputValueClass(); break; case Reduce: outClassK = (Class<K>) job.getOutputKeyClass(); outClassV = (Class<V>) job.getOutputValueClass(); break; default: throw new IllegalArgumentException("Unsupported MapReduce operation: " + mrOperation); } if (debug) { System.out.println("Job configuration>"); System.out.println("Output key class: " + outClassK.getName()); System.out.println("Output value class: " + outClassV.getName()); } // set the log level to what the level of the job is String stringLogLevel = job.get("logLevel"); if (!Const.isEmpty(stringLogLevel)) { logLevel = LogLevel.valueOf(stringLogLevel); setDebugStatus("Log level set to " + stringLogLevel); } else { System.out.println( "Could not retrieve the log level from the job configuration. logLevel will not be set."); } createTrans(job); }
From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java
License:Open Source License
@Test public void testReducerOutputClasses() throws IOException, KettleException { JobConf jobConf = createJobConf("./test-res/wordcount-mapper.ktr", "./test-res/wordcount-reducer.ktr", "./test-res/wordcount-reducer.ktr"); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputValueClass(NullWritable.class); jobConf.setOutputValueClass(LongWritable.class); GenericTransReduce reducer = new GenericTransReduce(); reducer.configure(jobConf);//from w ww .j a va 2 s . c o m assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK()); assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV()); }
From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java
License:Apache License
@Test public void testReducerOutputClasses() throws IOException, KettleException { JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr", "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr"); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputValueClass(NullWritable.class); jobConf.setOutputValueClass(LongWritable.class); GenericTransReduce reducer = new GenericTransReduce(); reducer.configure(jobConf);//from w ww. ja va2 s . com assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK()); assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV()); }
From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIT.java
License:Apache License
@Test public void testReducerOutputClasses() throws IOException, KettleException { JobConf jobConf = createJobConf("./src/it/resources/wordcount-mapper.ktr", "./src/it/resources/wordcount-reducer.ktr", "./src/it/resources/wordcount-reducer.ktr"); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputValueClass(NullWritable.class); jobConf.setOutputValueClass(LongWritable.class); GenericTransReduce reducer = new GenericTransReduce(); reducer.configure(jobConf);// w w w . j a v a2 s. c om assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK()); assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV()); }
From source file:tap.core.ReducerBridge.java
License:Apache License
@Override public void configure(JobConf conf) { super.configure(conf); isTextOutput = conf.getOutputFormat() instanceof TextOutputFormat; isProtoOutput = conf.getOutputFormat() instanceof TapfileOutputFormat; if (isProtoOutput) { try {//w w w. j a v a2 s . co m mapOutClass = Class.forName(conf.get(Phase.MAP_OUT_CLASS)); reduceOutClass = Class.forName(conf.get(Phase.REDUCE_OUT_CLASS)); if (mapOutClass != reduceOutClass) { reduceOutKeyChanges = true; String groupBy = conf.get(Phase.GROUP_BY); String sortBy = conf.get(Phase.SORT_BY); reduceOutSchema = ReflectUtils.getSchema(ObjectFactory.newInstance(reduceOutClass)); extractor = ReflectionKeyExtractor.getReflectionKeyExtractorForReduceOutKey(reduceOutSchema, groupBy, sortBy); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } multiOutputPrefix = conf.get(Phase.MULTIPLE_OUTPUT_PREFIX); if (multiOutputPrefix == null) multiOutputPrefix = "out"; MultipleOutputs.addMultiNamedOutput(conf, multiOutputPrefix, conf.getOutputFormat().getClass(), conf.getOutputKeyClass(), conf.getOutputValueClass()); this.multiOutput = new MultipleOutputs(conf); }
From source file:voldemort.store.readonly.mr.serialization.JsonSequenceFileOutputFormat.java
License:Apache License
public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { // Shamelessly copy in hadoop code to allow us to set the metadata with // our schema // get the path of the temporary output file Path file = FileOutputFormat.getTaskOutputPath(job, name); FileSystem fs = file.getFileSystem(job); CompressionType compressionType = CompressionType.BLOCK; // find the right codec Class<?> codecClass = getOutputCompressorClass(job, DefaultCodec.class); CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, job); // set the schema metadata /* begin jays code */ SequenceFile.Metadata meta = new SequenceFile.Metadata(); meta.set(new Text("key.schema"), new Text(getSchema("reducer.output.key.schema", job))); meta.set(new Text("value.schema"), new Text(getSchema("reducer.output.value.schema", job))); final SequenceFile.Writer out = SequenceFile.createWriter(fs, job, file, job.getOutputKeyClass(), job.getOutputValueClass(), compressionType, codec, progress, meta); /* end jays code */ return new RecordWriter<BytesWritable, BytesWritable>() { public void write(BytesWritable key, BytesWritable value) throws IOException { out.append(key, value);//from w ww. jav a 2 s .c om } public void close(Reporter reporter) throws IOException { out.close(); } }; }