List of usage examples for org.apache.hadoop.mapred JobConf getMapOutputKeyClass
public Class<?> getMapOutputKeyClass()
From source file:org.apache.tez.mapreduce.hadoop.MRHelpers.java
License:Apache License
/** * Pulls in specific keys from the base configuration, if they are not set at * the stage level. An explicit list of keys is copied over (not all), which * require translation to tez keys./*from www. j av a2s . com*/ */ private static void setStageKeysFromBaseConf(Configuration conf, Configuration baseConf, String stage) { // Don't clobber explicit tez config. JobConf jobConf = null; if (conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS) == null) { // If this is set, but the comparator is not set, and their types differ - // the job will break. if (conf.get(MRJobConfig.MAP_OUTPUT_KEY_CLASS) == null) { // Pull this in from the baseConf // Create jobConf only if required. jobConf = new JobConf(baseConf); conf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, jobConf.getMapOutputKeyClass().getName()); if (LOG.isDebugEnabled()) { LOG.debug("Setting " + MRJobConfig.MAP_OUTPUT_KEY_CLASS + " for stage: " + stage + " based on job level configuration. Value: " + conf.get(MRJobConfig.MAP_OUTPUT_KEY_CLASS)); } } } if (conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS) == null) { if (conf.get(MRJobConfig.MAP_OUTPUT_VALUE_CLASS) == null) { if (jobConf == null) { // Create jobConf if not already created jobConf = new JobConf(baseConf); } conf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, jobConf.getMapOutputValueClass().getName()); if (LOG.isDebugEnabled()) { LOG.debug("Setting " + MRJobConfig.MAP_OUTPUT_VALUE_CLASS + " for stage: " + stage + " based on job level configuration. Value: " + conf.get(MRJobConfig.MAP_OUTPUT_VALUE_CLASS)); } } } }
From source file:org.pentaho.hadoop.mapreduce.PentahoMapReduceBase.java
License:Apache License
@SuppressWarnings("unchecked") @Override//ww w .java 2 s. co m public void configure(JobConf job) { super.configure(job); debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$ transMapXml = job.get("transformation-map-xml"); transCombinerXml = job.get("transformation-combiner-xml"); transReduceXml = job.get("transformation-reduce-xml"); mapInputStepName = job.get("transformation-map-input-stepname"); mapOutputStepName = job.get("transformation-map-output-stepname"); combinerInputStepName = job.get("transformation-combiner-input-stepname"); combinerOutputStepName = job.get("transformation-combiner-output-stepname"); combineSingleThreaded = isCombinerSingleThreaded(job); reduceInputStepName = job.get("transformation-reduce-input-stepname"); reduceOutputStepName = job.get("transformation-reduce-output-stepname"); reduceSingleThreaded = isReducerSingleThreaded(job); String xmlVariableSpace = job.get("variableSpace"); if (!Const.isEmpty(xmlVariableSpace)) { setDebugStatus("PentahoMapReduceBase. variableSpace was retrieved from the job. The contents: "); // deserialize from xml to variable space XStream xStream = new XStream(); if (xStream != null) { setDebugStatus("PentahoMapReduceBase: Setting classes variableSpace property.: "); variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace); for (String variableName : variableSpace.listVariables()) { if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) { System.setProperty(variableName, variableSpace.getVariable(variableName)); } } } } else { setDebugStatus( "PentahoMapReduceBase: The PDI Job's variable space was not found in the job configuration."); variableSpace = new Variables(); } // Check for environment variables in the userDefined variables Iterator<Entry<String, String>> iter = job.iterator(); while (iter.hasNext()) { Entry<String, String> entry = iter.next(); if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) { System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()), entry.getValue()); } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) { System.setProperty(entry.getKey(), entry.getValue()); } } MRUtil.passInformationToTransformation(variableSpace, job); switch (mrOperation) { case Combine: outClassK = (Class<K>) job.getMapOutputKeyClass(); outClassV = (Class<V>) job.getMapOutputValueClass(); break; case Reduce: outClassK = (Class<K>) job.getOutputKeyClass(); outClassV = (Class<V>) job.getOutputValueClass(); break; default: throw new IllegalArgumentException("Unsupported MapReduce operation: " + mrOperation); } if (debug) { System.out.println("Job configuration>"); System.out.println("Output key class: " + outClassK.getName()); System.out.println("Output value class: " + outClassV.getName()); } // set the log level to what the level of the job is String stringLogLevel = job.get("logLevel"); if (!Const.isEmpty(stringLogLevel)) { logLevel = LogLevel.valueOf(stringLogLevel); setDebugStatus("Log level set to " + stringLogLevel); } else { System.out.println( "Could not retrieve the log level from the job configuration. logLevel will not be set."); } createTrans(job); }
From source file:org.pentaho.hadoop.mapreduce.PentahoMapRunnable.java
License:Apache License
public void configure(JobConf job) { pluginWaitTimeout = TimeUnit.MINUTES.toMillis(5); debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$ transMapXml = job.get("transformation-map-xml"); transReduceXml = job.get("transformation-reduce-xml"); mapInputStepName = job.get("transformation-map-input-stepname"); mapOutputStepName = job.get("transformation-map-output-stepname"); reduceInputStepName = job.get("transformation-reduce-input-stepname"); reduceOutputStepName = job.get("transformation-reduce-output-stepname"); String xmlVariableSpace = job.get("variableSpace"); outClassK = (Class<K2>) job.getMapOutputKeyClass(); outClassV = (Class<V2>) job.getMapOutputValueClass(); if (!Const.isEmpty(xmlVariableSpace)) { setDebugStatus("PentahoMapRunnable(): variableSpace was retrieved from the job. The contents: "); setDebugStatus(xmlVariableSpace); // deserialize from xml to variable space XStream xStream = new XStream(); setDebugStatus("PentahoMapRunnable(): Setting classes variableSpace property.: "); variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace); for (String variableName : variableSpace.listVariables()) { if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) { System.setProperty(variableName, variableSpace.getVariable(variableName)); }/*from w w w .j a va 2 s . c o m*/ if (KETTLE_PMR_PLUGIN_TIMEOUT.equals(variableName)) { try { pluginWaitTimeout = Long.parseLong(variableSpace.getVariable(variableName)); } catch (Exception e) { System.out.println("Unable to parse plugin wait timeout, defaulting to 5 minutes"); } } } } else { setDebugStatus("PentahoMapRunnable(): The PDI Job's variable space was not sent."); variableSpace = new Variables(); } // Check for environment variables in the userDefined variables Iterator<Entry<String, String>> iter = job.iterator(); while (iter.hasNext()) { Entry<String, String> entry = iter.next(); if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) { System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()), entry.getValue()); } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) { System.setProperty(entry.getKey(), entry.getValue()); } } MRUtil.passInformationToTransformation(variableSpace, job); setDebugStatus("Job configuration"); setDebugStatus("Output key class: " + outClassK.getName()); setDebugStatus("Output value class: " + outClassV.getName()); // set the log level to what the level of the job is String stringLogLevel = job.get("logLevel"); if (!Const.isEmpty(stringLogLevel)) { logLevel = LogLevel.valueOf(stringLogLevel); setDebugStatus("Log level set to " + stringLogLevel); } else { System.out.println( "Could not retrieve the log level from the job configuration. logLevel will not be set."); } long deadline = 0; boolean first = true; while (true) { createTrans(job); if (first) { deadline = pluginWaitTimeout + System.currentTimeMillis(); System.out.println(PentahoMapRunnable.class + ": Trans creation checking starting now " + new Date().toString()); first = false; } List<MissingTrans> missingTranses = new ArrayList<MissingTrans>(); for (StepMeta stepMeta : trans.getTransMeta().getSteps()) { StepMetaInterface stepMetaInterface = stepMeta.getStepMetaInterface(); if (stepMetaInterface instanceof MissingTrans) { MissingTrans missingTrans = (MissingTrans) stepMetaInterface; System.out.println(MissingTrans.class + "{stepName: " + missingTrans.getStepName() + ", missingPluginId: " + missingTrans.getMissingPluginId() + "}"); missingTranses.add(missingTrans); } } if (missingTranses.size() == 0) { System.out.println( PentahoMapRunnable.class + ": Done waiting on plugins now " + new Date().toString()); break; } else { if (System.currentTimeMillis() > deadline) { StringBuilder stringBuilder = new StringBuilder("Failed to initialize plugins: "); for (MissingTrans missingTrans : missingTranses) { stringBuilder.append(missingTrans.getMissingPluginId()); stringBuilder.append(" on step ").append(missingTrans.getStepName()); stringBuilder.append(", "); } stringBuilder.setLength(stringBuilder.length() - 2); throw new RuntimeException(stringBuilder.toString()); } else { try { Thread.sleep(Math.min(100, deadline - System.currentTimeMillis())); } catch (InterruptedException e) { throw new RuntimeException(e); } } } } }
From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java
License:Open Source License
@Test public void testCombinerOutputClasses() throws IOException, KettleException { JobConf jobConf = createJobConf("./test-res/wordcount-mapper.ktr", "./test-res/wordcount-reducer.ktr", "./test-res/wordcount-reducer.ktr"); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputValueClass(NullWritable.class); jobConf.setOutputValueClass(LongWritable.class); GenericTransCombiner combiner = new GenericTransCombiner(); combiner.configure(jobConf);/* w w w. ja v a 2 s .c o m*/ assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK()); assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV()); }
From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java
License:Apache License
@Test public void testCombinerOutputClasses() throws IOException, KettleException { JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr", "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr"); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputValueClass(NullWritable.class); jobConf.setOutputValueClass(LongWritable.class); GenericTransCombiner combiner = new GenericTransCombiner(); combiner.configure(jobConf);// w w w.ja va2s. c o m assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK()); assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV()); }
From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIT.java
License:Apache License
@Test public void testCombinerOutputClasses() throws IOException, KettleException { JobConf jobConf = createJobConf("./src/it/resources/wordcount-mapper.ktr", "./src/it/resources/wordcount-reducer.ktr", "./src/it/resources/wordcount-reducer.ktr"); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputValueClass(NullWritable.class); jobConf.setOutputValueClass(LongWritable.class); GenericTransCombiner combiner = new GenericTransCombiner(); combiner.configure(jobConf);/*from ww w .j av a 2 s . com*/ assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK()); assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV()); }
From source file:sg.edu.astar.dsi.mergespill.App.java
public synchronized static void doProcess(String directory, int spillNumber) throws IOException, InterruptedException { // TODO code application logic here System.out.println("directory: " + directory); System.out.println("numberOfSpill: " + spillNumber); //SETUP// w ww .j a v a 2 s . c o m JobConf job = new JobConf(); //job.setMapOutputKeyClass(Text.class); job.setMapOutputKeyClass(TextDsi.class); job.setMapOutputValueClass(IntWritable.class); //Class<Text> keyClass = (Class<Text>)job.getMapOutputKeyClass(); Class<TextDsi> keyClass = (Class<TextDsi>) job.getMapOutputKeyClass(); Class<IntWritable> valClass = (Class<IntWritable>) job.getMapOutputValueClass(); FileSystem rfs; CompressionCodec codec = null; Counters.Counter spilledRecordsCounter = null; rfs = ((LocalFileSystem) FileSystem.getLocal(job)).getRaw(); while (!new File(directory).isDirectory()) { sleep(5000); } if (new File(directory).isDirectory()) { ArrayList<Path> spillFile = new ArrayList(); ArrayList<Path> spillFileIndex = new ArrayList(); App myApp; myApp = new App(); myApp.getSpillFilesAndIndices(new File(directory), spillFile, spillFileIndex, spillNumber); ArrayList<SpillRecord> indexCacheList = new ArrayList<>(); int numSpills = 0; Iterator itrSpillFileIndex = spillFileIndex.iterator(); while (itrSpillFileIndex.hasNext()) { numSpills++; Path temp = (Path) itrSpillFileIndex.next(); System.out.println(temp); SpillRecord sr = new SpillRecord(temp, job); indexCacheList.add(sr); System.out.println("indexFile partition size: " + sr.size()); long startOffset = 0; for (int i = 0; i < sr.size(); i++) { //sr.size is the number of partitions IndexRecord ir = sr.getIndex(i); System.out.println("index[" + i + "] rawLength = " + ir.rawLength); System.out.println("index[" + i + "] partLength = " + ir.partLength); System.out.println("index[" + i + "] startOffset= " + ir.startOffset); startOffset = ir.startOffset; } System.out.println("========================================"); } System.out.println("Number of spills: " + numSpills); //FinalOutputFile Path finalOutputFile = new Path(directory + File.separator + "FINALOUTPUTFILE"); FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096); System.out.println("GOT HERE 1"); Path finalIndexFile = new Path(directory + File.separator + "FINALOUTPUTFILE.index"); //ONE PARTITION ONLY List<Segment<TextDsi, IntWritable>> segmentList = new ArrayList<>(numSpills); for (int i = 0; i < numSpills; i++) { IndexRecord theIndexRecord = indexCacheList.get(i).getIndex(0); Path temp = spillFileIndex.get(i); String temp1 = temp.toString(); String temp2 = temp1.substring(0, temp1.length() - 6); //System.out.println(temp2); //System.out.println(new Path(temp2).getParent()); //File myFile = new File(temp2); //System.out.println(myFile.getPath()); Segment<TextDsi, IntWritable> s = new Segment<>(job, rfs, new Path(temp2), theIndexRecord.startOffset, theIndexRecord.partLength, codec, true); segmentList.add(i, s); } System.out.println("GOT HERE 2"); RawKeyValueIterator kvIter = Merger.merge(job, rfs, keyClass, valClass, null, segmentList, 4, new Path("/home/hduser/spillSample2/My"), job.getOutputKeyComparator(), null, false, null, spilledRecordsCounter, null, TaskType.MAP); System.out.println("GOT HERE 3"); //write merged output to disk long segmentStart = finalOut.getPos(); FSDataOutputStream finalPartitionOut = CryptoUtils.wrapIfNecessary(job, finalOut); Writer<TextDsi, IntWritable> writer = new Writer<TextDsi, IntWritable>(job, finalPartitionOut, TextDsi.class, IntWritable.class, codec, spilledRecordsCounter); System.out.println("GOT HERE 4"); Merger.writeFile(kvIter, writer, null, job); writer.close(); finalOut.close(); System.out.println("GOT HERE 5"); IndexRecord rec = new IndexRecord(); final SpillRecord spillRec = new SpillRecord(1); rec.startOffset = segmentStart; rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job); rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job); System.out.println("rec.startOffset: " + rec.startOffset); System.out.println("rec.rawLength : " + rec.rawLength); System.out.println("rec.partLength : " + rec.partLength); spillRec.putIndex(rec, 0); spillRec.writeToFile(finalIndexFile, job); System.out.println("GOT HERE 6"); } else { System.out.println("argument is not a directory! : " + directory); } }