List of usage examples for org.apache.hadoop.mapred JobConf getMapOutputValueClass
public Class<?> getMapOutputValueClass()
From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java
License:Apache License
public static InMemorySortOperatorDescriptor getInMemorySorter(JobConf conf, IOperatorDescriptorRegistry spec) { InMemorySortOperatorDescriptor inMemorySortOp = null; RecordDescriptor recordDescriptor = getHadoopRecordDescriptor(conf.getMapOutputKeyClass().getName(), conf.getMapOutputValueClass().getName()); Class<? extends RawComparator> rawComparatorClass = null; WritableComparator writableComparator = WritableComparator .get(conf.getMapOutputKeyClass().asSubclass(WritableComparable.class)); WritableComparingBinaryComparatorFactory comparatorFactory = new WritableComparingBinaryComparatorFactory( writableComparator.getClass()); inMemorySortOp = new InMemorySortOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { comparatorFactory }, recordDescriptor); return inMemorySortOp; }
From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java
License:Apache License
public static ExternalSortOperatorDescriptor getExternalSorter(JobConf conf, IOperatorDescriptorRegistry spec) { ExternalSortOperatorDescriptor externalSortOp = null; RecordDescriptor recordDescriptor = getHadoopRecordDescriptor(conf.getMapOutputKeyClass().getName(), conf.getMapOutputValueClass().getName()); Class<? extends RawComparator> rawComparatorClass = null; WritableComparator writableComparator = WritableComparator .get(conf.getMapOutputKeyClass().asSubclass(WritableComparable.class)); WritableComparingBinaryComparatorFactory comparatorFactory = new WritableComparingBinaryComparatorFactory( writableComparator.getClass()); externalSortOp = new ExternalSortOperatorDescriptor(spec, conf.getInt(HYRACKS_EX_SORT_FRAME_LIMIT, DEFAULT_EX_SORT_FRAME_LIMIT), new int[] { 0 }, new IBinaryComparatorFactory[] { comparatorFactory }, recordDescriptor); return externalSortOp; }
From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java
License:Apache License
public static MToNPartitioningConnectorDescriptor getMtoNHashPartitioningConnector(JobConf conf, IConnectorDescriptorRegistry spec) { Class mapOutputKeyClass = conf.getMapOutputKeyClass(); Class mapOutputValueClass = conf.getMapOutputValueClass(); MToNPartitioningConnectorDescriptor connectorDescriptor = null; ITuplePartitionComputerFactory factory = null; conf.getMapOutputKeyClass();//w w w .ja va 2 s . c o m if (conf.getPartitionerClass() != null && !conf.getPartitionerClass().getName().startsWith("org.apache.hadoop")) { Class<? extends Partitioner> partitioner = conf.getPartitionerClass(); factory = new HadoopPartitionerTuplePartitionComputerFactory(partitioner, DatatypeHelper.createSerializerDeserializer(mapOutputKeyClass), DatatypeHelper.createSerializerDeserializer(mapOutputValueClass)); } else { RecordDescriptor recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(mapOutputKeyClass, mapOutputValueClass); ISerializerDeserializer mapOutputKeySerializerDerserializer = DatatypeHelper .createSerializerDeserializer(mapOutputKeyClass); factory = new HadoopHashTuplePartitionComputerFactory(mapOutputKeySerializerDerserializer); } connectorDescriptor = new MToNPartitioningConnectorDescriptor(spec, factory); return connectorDescriptor; }
From source file:org.apache.sysml.runtime.matrix.mapred.GMRMapper.java
License:Apache License
@Override public void configure(JobConf job) { super.configure(job); mapperID = job.get(MRConfigurationNames.MR_TASK_ATTEMPT_ID); dimsUnknownFilePrefix = job.get("dims.unknown.file.prefix"); _filterEmptyInputBlocks = allowsFilterEmptyInputBlocks(); //assign the temporay vairables try {//from w ww .j a v a 2 s . c o m // System.out.println(valueClass.getName()); // System.out.println(MatrixCell.class.getName()); if (job.getMapOutputValueClass().equals(TaggedMatrixPackedCell.class)) taggedValueBuffer = TaggedMatrixValue.createObject(MatrixPackedCell.class); else taggedValueBuffer = TaggedMatrixValue.createObject(valueClass); } catch (Exception e) { throw new RuntimeException(e); } //decide whether it is a maponly job mapOnlyJob = (job.getNumReduceTasks() <= 0); if (!mapOnlyJob) return; //get the indexes of the final output matrices resultIndexes = MRJobConfiguration.getResultIndexes(job); resultDimsUnknown = MRJobConfiguration.getResultDimsUnknown(job); //initialize SystemML Counters (defined in MRJobConfiguration) resultsNonZeros = new long[resultIndexes.length]; resultsMaxRowDims = new long[resultIndexes.length]; resultsMaxColDims = new long[resultIndexes.length]; tagMapping = new HashMap<>(); for (int i = 0; i < resultIndexes.length; i++) { byte output = resultIndexes[i]; ArrayList<Integer> vec = tagMapping.get(output); if (vec == null) { vec = new ArrayList<>(); tagMapping.put(output, vec); } vec.add(i); } //for map only job, get the map output converters collectFinalMultipleOutputs = MRJobConfiguration.getMultipleConvertedOutputs(job); }
From source file:org.apache.tez.mapreduce.hadoop.MRHelpers.java
License:Apache License
/** * Pulls in specific keys from the base configuration, if they are not set at * the stage level. An explicit list of keys is copied over (not all), which * require translation to tez keys.//from w w w .j a v a 2 s . c o m */ private static void setStageKeysFromBaseConf(Configuration conf, Configuration baseConf, String stage) { // Don't clobber explicit tez config. JobConf jobConf = null; if (conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS) == null) { // If this is set, but the comparator is not set, and their types differ - // the job will break. if (conf.get(MRJobConfig.MAP_OUTPUT_KEY_CLASS) == null) { // Pull this in from the baseConf // Create jobConf only if required. jobConf = new JobConf(baseConf); conf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, jobConf.getMapOutputKeyClass().getName()); if (LOG.isDebugEnabled()) { LOG.debug("Setting " + MRJobConfig.MAP_OUTPUT_KEY_CLASS + " for stage: " + stage + " based on job level configuration. Value: " + conf.get(MRJobConfig.MAP_OUTPUT_KEY_CLASS)); } } } if (conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS) == null) { if (conf.get(MRJobConfig.MAP_OUTPUT_VALUE_CLASS) == null) { if (jobConf == null) { // Create jobConf if not already created jobConf = new JobConf(baseConf); } conf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, jobConf.getMapOutputValueClass().getName()); if (LOG.isDebugEnabled()) { LOG.debug("Setting " + MRJobConfig.MAP_OUTPUT_VALUE_CLASS + " for stage: " + stage + " based on job level configuration. Value: " + conf.get(MRJobConfig.MAP_OUTPUT_VALUE_CLASS)); } } } }
From source file:org.dkpro.bigdata.hadoop.DkproMapper.java
License:Apache License
@Override public void configure(JobConf job) { super.configure(job); try {//from w w w.j ava 2 s .c o m // create an output writable of the appropriate type outValue = (CASWritable) job.getMapOutputValueClass().newInstance(); docLanguage = job.get("dkpro.document.language"); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:org.dkpro.bigdata.hadoop.UIMAMapReduceBase.java
License:Open Source License
@Override public void configure(JobConf job) { try {// w w w. ja v a 2 s . c om this.job = job; this.inputName = job.get("mapred.input.dir"); this.taskId = job.get("mapred.task.id"); this.mapOutputValueClass = job.getMapOutputValueClass(); this.outputValueClass = job.getOutputValueClass(); this.samplingPropability = job.getInt("dkpro.map.samplingratio", 100); final EngineFactory engineFactory = (EngineFactory) Class .forName(job.get("dkpro.uima.factory", DkproHadoopDriver.class.getName())).newInstance(); engineFactory.configure(job); final AnalysisEngineDescription engineDescription = getEngineDescription(engineFactory, job); // replace the $dir variable within the configuration. this.fs = FileSystem.get(job); this.localFS = FileSystem.getLocal(job); if (job.getBoolean("dkpro.output.onedirpertask", true)) { this.working_dir = new Path("uima_output_" + job.get("mapred.task.id")); } else { this.working_dir = new Path("uima_output"); } final Path outputPath = FileOutputFormat.getOutputPath(job); this.results_dir = this.fs.startLocalOutput(outputPath, job.getLocalPath(this.working_dir.getName())); this.localFS.mkdirs(this.results_dir); final String[] resources = job.get("dkpro.resources", "").split(","); sLogger.info("Writing local data to: " + this.results_dir); this.resourceURIs = new TreeMap<String, URL>(); for (final String resource : resources) { final URL r = job.getResource(resource); if (r != null && !resource.isEmpty()) { this.resourceURIs.put(resource, r); } } Map<String, String> variableValues = new HashMap<String, String>(); variableValues.put("\\$dir", this.results_dir.toString()); variableValues.put("\\$input", this.inputName); variableValues.put("\\$taskid", this.taskId); Path[] cacheFiles = DistributedCache.getLocalCacheFiles(job); if (cacheFiles != null) { for (Path cacheFile : cacheFiles) { variableValues.put("^\\$cache/" + cacheFile.getName(), cacheFile.toUri().getPath()); } } for (final Entry<String, URL> resource : this.resourceURIs.entrySet()) { variableValues.put("\\$" + resource, resource.getValue().toString()); } AnalysisEngineUtil.replaceVariables(engineDescription, variableValues); this.engine = createEngine(engineDescription); } catch (final Exception e) { sLogger.fatal("Error while configuring pipeline", e); e.printStackTrace(); throw new RuntimeException(e); } }
From source file:org.pentaho.hadoop.mapreduce.PentahoMapReduceBase.java
License:Apache License
@SuppressWarnings("unchecked") @Override/*from w w w .ja v a 2 s . co m*/ public void configure(JobConf job) { super.configure(job); debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$ transMapXml = job.get("transformation-map-xml"); transCombinerXml = job.get("transformation-combiner-xml"); transReduceXml = job.get("transformation-reduce-xml"); mapInputStepName = job.get("transformation-map-input-stepname"); mapOutputStepName = job.get("transformation-map-output-stepname"); combinerInputStepName = job.get("transformation-combiner-input-stepname"); combinerOutputStepName = job.get("transformation-combiner-output-stepname"); combineSingleThreaded = isCombinerSingleThreaded(job); reduceInputStepName = job.get("transformation-reduce-input-stepname"); reduceOutputStepName = job.get("transformation-reduce-output-stepname"); reduceSingleThreaded = isReducerSingleThreaded(job); String xmlVariableSpace = job.get("variableSpace"); if (!Const.isEmpty(xmlVariableSpace)) { setDebugStatus("PentahoMapReduceBase. variableSpace was retrieved from the job. The contents: "); // deserialize from xml to variable space XStream xStream = new XStream(); if (xStream != null) { setDebugStatus("PentahoMapReduceBase: Setting classes variableSpace property.: "); variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace); for (String variableName : variableSpace.listVariables()) { if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) { System.setProperty(variableName, variableSpace.getVariable(variableName)); } } } } else { setDebugStatus( "PentahoMapReduceBase: The PDI Job's variable space was not found in the job configuration."); variableSpace = new Variables(); } // Check for environment variables in the userDefined variables Iterator<Entry<String, String>> iter = job.iterator(); while (iter.hasNext()) { Entry<String, String> entry = iter.next(); if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) { System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()), entry.getValue()); } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) { System.setProperty(entry.getKey(), entry.getValue()); } } MRUtil.passInformationToTransformation(variableSpace, job); switch (mrOperation) { case Combine: outClassK = (Class<K>) job.getMapOutputKeyClass(); outClassV = (Class<V>) job.getMapOutputValueClass(); break; case Reduce: outClassK = (Class<K>) job.getOutputKeyClass(); outClassV = (Class<V>) job.getOutputValueClass(); break; default: throw new IllegalArgumentException("Unsupported MapReduce operation: " + mrOperation); } if (debug) { System.out.println("Job configuration>"); System.out.println("Output key class: " + outClassK.getName()); System.out.println("Output value class: " + outClassV.getName()); } // set the log level to what the level of the job is String stringLogLevel = job.get("logLevel"); if (!Const.isEmpty(stringLogLevel)) { logLevel = LogLevel.valueOf(stringLogLevel); setDebugStatus("Log level set to " + stringLogLevel); } else { System.out.println( "Could not retrieve the log level from the job configuration. logLevel will not be set."); } createTrans(job); }
From source file:org.pentaho.hadoop.mapreduce.PentahoMapRunnable.java
License:Apache License
public void configure(JobConf job) { pluginWaitTimeout = TimeUnit.MINUTES.toMillis(5); debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$ transMapXml = job.get("transformation-map-xml"); transReduceXml = job.get("transformation-reduce-xml"); mapInputStepName = job.get("transformation-map-input-stepname"); mapOutputStepName = job.get("transformation-map-output-stepname"); reduceInputStepName = job.get("transformation-reduce-input-stepname"); reduceOutputStepName = job.get("transformation-reduce-output-stepname"); String xmlVariableSpace = job.get("variableSpace"); outClassK = (Class<K2>) job.getMapOutputKeyClass(); outClassV = (Class<V2>) job.getMapOutputValueClass(); if (!Const.isEmpty(xmlVariableSpace)) { setDebugStatus("PentahoMapRunnable(): variableSpace was retrieved from the job. The contents: "); setDebugStatus(xmlVariableSpace); // deserialize from xml to variable space XStream xStream = new XStream(); setDebugStatus("PentahoMapRunnable(): Setting classes variableSpace property.: "); variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace); for (String variableName : variableSpace.listVariables()) { if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) { System.setProperty(variableName, variableSpace.getVariable(variableName)); }/*from w w w . jav a 2s .c om*/ if (KETTLE_PMR_PLUGIN_TIMEOUT.equals(variableName)) { try { pluginWaitTimeout = Long.parseLong(variableSpace.getVariable(variableName)); } catch (Exception e) { System.out.println("Unable to parse plugin wait timeout, defaulting to 5 minutes"); } } } } else { setDebugStatus("PentahoMapRunnable(): The PDI Job's variable space was not sent."); variableSpace = new Variables(); } // Check for environment variables in the userDefined variables Iterator<Entry<String, String>> iter = job.iterator(); while (iter.hasNext()) { Entry<String, String> entry = iter.next(); if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) { System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()), entry.getValue()); } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) { System.setProperty(entry.getKey(), entry.getValue()); } } MRUtil.passInformationToTransformation(variableSpace, job); setDebugStatus("Job configuration"); setDebugStatus("Output key class: " + outClassK.getName()); setDebugStatus("Output value class: " + outClassV.getName()); // set the log level to what the level of the job is String stringLogLevel = job.get("logLevel"); if (!Const.isEmpty(stringLogLevel)) { logLevel = LogLevel.valueOf(stringLogLevel); setDebugStatus("Log level set to " + stringLogLevel); } else { System.out.println( "Could not retrieve the log level from the job configuration. logLevel will not be set."); } long deadline = 0; boolean first = true; while (true) { createTrans(job); if (first) { deadline = pluginWaitTimeout + System.currentTimeMillis(); System.out.println(PentahoMapRunnable.class + ": Trans creation checking starting now " + new Date().toString()); first = false; } List<MissingTrans> missingTranses = new ArrayList<MissingTrans>(); for (StepMeta stepMeta : trans.getTransMeta().getSteps()) { StepMetaInterface stepMetaInterface = stepMeta.getStepMetaInterface(); if (stepMetaInterface instanceof MissingTrans) { MissingTrans missingTrans = (MissingTrans) stepMetaInterface; System.out.println(MissingTrans.class + "{stepName: " + missingTrans.getStepName() + ", missingPluginId: " + missingTrans.getMissingPluginId() + "}"); missingTranses.add(missingTrans); } } if (missingTranses.size() == 0) { System.out.println( PentahoMapRunnable.class + ": Done waiting on plugins now " + new Date().toString()); break; } else { if (System.currentTimeMillis() > deadline) { StringBuilder stringBuilder = new StringBuilder("Failed to initialize plugins: "); for (MissingTrans missingTrans : missingTranses) { stringBuilder.append(missingTrans.getMissingPluginId()); stringBuilder.append(" on step ").append(missingTrans.getStepName()); stringBuilder.append(", "); } stringBuilder.setLength(stringBuilder.length() - 2); throw new RuntimeException(stringBuilder.toString()); } else { try { Thread.sleep(Math.min(100, deadline - System.currentTimeMillis())); } catch (InterruptedException e) { throw new RuntimeException(e); } } } } }
From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java
License:Open Source License
@Test public void testCombinerOutputClasses() throws IOException, KettleException { JobConf jobConf = createJobConf("./test-res/wordcount-mapper.ktr", "./test-res/wordcount-reducer.ktr", "./test-res/wordcount-reducer.ktr"); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputValueClass(NullWritable.class); jobConf.setOutputValueClass(LongWritable.class); GenericTransCombiner combiner = new GenericTransCombiner(); combiner.configure(jobConf);/*w w w.ja v a 2 s . c om*/ assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK()); assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV()); }