List of usage examples for org.apache.hadoop.mapred JobConf getMapOutputValueClass
public Class<?> getMapOutputValueClass()
From source file:com.ibm.bi.dml.runtime.matrix.mapred.GMRMapper.java
License:Open Source License
public void configure(JobConf job) { super.configure(job); mapperID = job.get("mapred.task.id"); dimsUnknownFilePrefix = job.get("dims.unknown.file.prefix"); _filterEmptyInputBlocks = allowsFilterEmptyInputBlocks(); //assign the temporay vairables try {//from w w w. j a va2s .co m // System.out.println(valueClass.getName()); // System.out.println(MatrixCell.class.getName()); if (job.getMapOutputValueClass().equals(TaggedMatrixPackedCell.class)) taggedValueBuffer = TaggedMatrixValue.createObject(MatrixPackedCell.class); else taggedValueBuffer = TaggedMatrixValue.createObject(valueClass); } catch (Exception e) { throw new RuntimeException(e); } //decide whether it is a maponly job mapOnlyJob = (job.getNumReduceTasks() <= 0); if (!mapOnlyJob) return; //get the indexes of the final output matrices resultIndexes = MRJobConfiguration.getResultIndexes(job); resultDimsUnknown = MRJobConfiguration.getResultDimsUnknown(job); //initialize SystemML Counters (defined in MRJobConfiguration) resultsNonZeros = new long[resultIndexes.length]; resultsMaxRowDims = new long[resultIndexes.length]; resultsMaxColDims = new long[resultIndexes.length]; tagMapping = new HashMap<Byte, ArrayList<Integer>>(); for (int i = 0; i < resultIndexes.length; i++) { byte output = resultIndexes[i]; ArrayList<Integer> vec = tagMapping.get(output); if (vec == null) { vec = new ArrayList<Integer>(); tagMapping.put(output, vec); } vec.add(i); } //for map only job, get the map output converters collectFinalMultipleOutputs = MRJobConfiguration.getMultipleConvertedOutputs(job); }
From source file:com.intel.hadoop.graphbuilder.partition.mapreduce.edge.EdgeIngressCombiner.java
License:Open Source License
@Override public void configure(JobConf job) { super.configure(job); this.valClass = job.getMapOutputValueClass(); }
From source file:com.intel.hadoop.graphbuilder.partition.mapreduce.edge.EdgeIngressMapper.java
License:Open Source License
@SuppressWarnings("unchecked") @Override/*from w w w . j av a2s .c om*/ public void configure(JobConf job) { super.configure(job); this.keyClass = job.getMapOutputKeyClass(); this.valClass = job.getMapOutputValueClass(); numprocs = job.getInt("numProcs", 1); overpartition = job.getInt("overpartition", 1); String ingressMethod = job.get("ingress"); if (ingressMethod.equals("greedy")) { this.ingress = new GreedyIngress<VidType>(numprocs); } else { this.ingress = new RandomIngress<VidType>(numprocs); } try { this.graphparser = (GraphParser) Class.forName(job.get("GraphParser")).newInstance(); this.vidparser = (FieldParser) Class.forName(job.get("VidParser")).newInstance(); this.vdataparser = (FieldParser) Class.forName(job.get("VdataParser")).newInstance(); this.edataparser = (FieldParser) Class.forName(job.get("EdataParser")).newInstance(); this.mapKey = (KeyType) keyClass.newInstance(); this.mapValue = (ValueType) valClass.newInstance(); } catch (InstantiationException e) { e.printStackTrace(); } catch (IllegalAccessException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } }
From source file:com.intel.hadoop.graphbuilder.preprocess.mapreduce.CreateGraphMapper.java
License:Open Source License
@Override public void configure(JobConf job) { super.configure(job); try {/*from w ww. ja v a2 s . co m*/ this.tokenizer = (GraphTokenizer) Class.forName(job.get("GraphTokenizer")).newInstance(); tokenizer.configure(job); this.valClass = job.getMapOutputValueClass(); mapVal = (VertexEdgeUnionType) valClass.newInstance(); } catch (InstantiationException e) { e.printStackTrace(); } catch (IllegalAccessException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } }
From source file:com.intel.hadoop.graphbuilder.preprocess.mapreduce.CreateGraphReducer.java
License:Open Source License
@Override public void configure(JobConf job) { super.configure(job); this.valClass = job.getMapOutputValueClass(); this.noBidir = job.getBoolean("noBidir", false); try {// w w w . j a v a 2 s . c o m if (job.get("EdgeFunc") != null) { this.EdgeFunc = (Functional) Class.forName(job.get("EdgeFunc")).newInstance(); this.EdgeFunc.configure(job); } if (job.get("VertexFunc") != null) { this.VertexFunc = (Functional) Class.forName(job.get("VertexFunc")).newInstance(); this.VertexFunc.configure(job); } } catch (InstantiationException e) { e.printStackTrace(); } catch (IllegalAccessException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } }
From source file:com.intel.hadoop.graphbuilder.preprocess.mapreduce.EdgeTransformMapper.java
License:Open Source License
@Override public void configure(JobConf job) { super.configure(job); this.reduceEndPoint = job.getBoolean("reduceEndPoint", EdgeTransformMR.SOURCE); try {//from w ww . j a v a2s . c o m this.graphparser = (GraphParser) Class.forName(job.get("GraphParser")).newInstance(); this.vidparser = (FieldParser) Class.forName(job.get("VidParser")).newInstance(); this.edataparser = (FieldParser) Class.forName(job.get("EdataParser")).newInstance(); this.valClass = job.getMapOutputValueClass(); val = (PairListType) valClass.newInstance(); } catch (InstantiationException e) { e.printStackTrace(); } catch (IllegalAccessException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } }
From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java
License:Apache License
public ReducerWrapperMapred(HServerInvocationParameters invocationParameters, int hadoopPartition, int appId, int region, boolean sort) throws IOException, ClassNotFoundException, InterruptedException { this.invocationParameters = invocationParameters; JobConf jobConf = new JobConf((Configuration) invocationParameters.getConfiguration()); //Clone JobConf, so the temporary settings do not pollute other tasks LOG.info("Starting reducer:" + HadoopInvocationParameters.dumpConfiguration(jobConf)); JobID jobID = (JobID) invocationParameters.getJobId(); this.hadoopPartition = hadoopPartition; hadoopVersionSpecificCode = HadoopVersionSpecificCode.getInstance(invocationParameters.getHadoopVersion(), jobConf);//from w w w. j ava 2 s . c o m TaskAttemptID taskAttemptID = TaskAttemptID .downgrade(hadoopVersionSpecificCode.createTaskAttemptId(jobID, false, hadoopPartition)); updateJobConf(jobConf, taskAttemptID, region); context = hadoopVersionSpecificCode.createTaskAttemptContextMapred(jobConf, taskAttemptID); reducer = (org.apache.hadoop.mapred.Reducer<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils .newInstance(jobConf.getReducerClass(), jobConf); reducer.configure(jobConf); OutputFormat outputFormat = jobConf.getOutputFormat(); FileSystem fs = FileSystem.get(jobConf); recordWriter = (org.apache.hadoop.mapred.RecordWriter<OUTKEY, OUTVALUE>) outputFormat.getRecordWriter(fs, jobConf, getOutputName(hadoopPartition), Reporter.NULL); committer = jobConf.getOutputCommitter(); //Create task object so it can handle file format initialization //The ReduceTask is private in the Hadoop 1.x so we have to go through reflection. try { Class reduceTask = Class.forName("org.apache.hadoop.mapred.ReduceTask"); Constructor reduceTaskConstructor = reduceTask.getDeclaredConstructor(String.class, TaskAttemptID.class, int.class, int.class, int.class); reduceTaskConstructor.setAccessible(true); Task task = (Task) reduceTaskConstructor.newInstance(null, taskAttemptID, hadoopPartition, 0, 0); task.setConf(jobConf); task.initialize(jobConf, jobID, Reporter.NULL, false); } catch (Exception e) { throw new IOException("Cannot initialize ReduceTask", e); } committer.setupTask(context); Class<INKEY> keyClass = (Class<INKEY>) jobConf.getMapOutputKeyClass(); WritableSerializerDeserializer<INKEY> firstKeySerializer = new WritableSerializerDeserializer<INKEY>( keyClass, null); WritableSerializerDeserializer<INKEY> secondKeySerializer = new WritableSerializerDeserializer<INKEY>( keyClass, null); Class<INVALUE> valueClass = (Class<INVALUE>) jobConf.getMapOutputValueClass(); WritableSerializerDeserializer<INVALUE> valueSerializer = new WritableSerializerDeserializer<INVALUE>( valueClass, null); DataGridReaderParameters<INKEY, INVALUE> params = new DataGridReaderParameters<INKEY, INVALUE>(region, appId, HServerParameters.getSetting(REDUCE_USEMEMORYMAPPEDFILES, jobConf) > 0, firstKeySerializer, valueSerializer, invocationParameters.getSerializationMode(), secondKeySerializer, keyClass, valueClass, sort, HServerParameters.getSetting(REDUCE_CHUNKSTOREADAHEAD, jobConf), 1024 * HServerParameters.getSetting(REDUCE_INPUTCHUNKSIZE_KB, jobConf), HServerParameters.getSetting(REDUCE_CHUNKREADTIMEOUT, jobConf)); transport = DataGridChunkedCollectionReader.getGridReader(params); outputCollector = new OutputCollector<OUTKEY, OUTVALUE>() { @Override public void collect(OUTKEY outkey, OUTVALUE outvalue) throws IOException { recordWriter.write(outkey, outvalue); } }; }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.hadoop.DkproMapper.java
License:Apache License
@Override public void configure(JobConf job) { super.configure(job); try {//w w w .j a va 2 s. c o m // create an output writable of the appropriate type outValue = (CASWritable) job.getMapOutputValueClass().newInstance(); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.hadoop.UIMAMapReduceBase.java
License:Open Source License
@Override public void configure(JobConf job) { try {//from w ww . j ava 2 s .co m this.job = job; this.mapOutputValueClass = job.getMapOutputValueClass(); this.outputValueClass = job.getOutputValueClass(); this.samplingPropability = job.getInt("dkpro.map.samplingratio", 100); final EngineFactory engineFactory = (EngineFactory) Class .forName(job.get("dkpro.uima.factory", DkproHadoopDriver.class.getName())).newInstance(); engineFactory.configure(job); final AnalysisEngineDescription engineDescription = getEngineDescription(engineFactory, job); // replace the $dir variable within the configuration. this.fs = FileSystem.get(job); this.localFS = FileSystem.getLocal(job); this.working_dir = new Path("uima_output_" + job.get("mapred.task.id")); final Path outputPath = FileOutputFormat.getOutputPath(job); this.results_dir = this.fs.startLocalOutput(outputPath, job.getLocalPath(this.working_dir.getName())); this.localFS.mkdirs(this.results_dir); final String[] resources = job.get("dkpro.resources", "").split(","); sLogger.info("Writing local data to: " + this.results_dir); this.resourceURIs = new TreeMap<String, URL>(); for (final String resource : resources) { final URL r = job.getResource(resource); if (r != null && !resource.isEmpty()) { this.resourceURIs.put(resource, r); } } replaceRecursively(engineDescription); this.engine = createEngine(engineDescription); } catch (final Exception e) { sLogger.fatal("Error while configuring pipeline", e); e.printStackTrace(); throw new RuntimeException(e); } }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopMapperOperatorDescriptor.java
License:Apache License
public static RecordDescriptor getRecordDescriptor(JobConf conf, IHadoopClassFactory hadoopClassFactory) { RecordDescriptor recordDescriptor = null; String mapOutputKeyClassName = conf.getMapOutputKeyClass().getName(); String mapOutputValueClassName = conf.getMapOutputValueClass().getName(); try {/*w ww . ja v a 2 s . co m*/ if (hadoopClassFactory == null) { recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor( (Class<? extends Writable>) Class.forName(mapOutputKeyClassName), (Class<? extends Writable>) Class.forName(mapOutputValueClassName)); } else { recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor( (Class<? extends Writable>) hadoopClassFactory.loadClass(mapOutputKeyClassName), (Class<? extends Writable>) hadoopClassFactory.loadClass(mapOutputValueClassName)); } } catch (Exception e) { e.printStackTrace(); } return recordDescriptor; }