List of usage examples for org.apache.hadoop.mapred JobConf getMapOutputKeyClass
public Class<?> getMapOutputKeyClass()
From source file:com.alexholmes.hadooputils.sort.SortInputSampler.java
License:Apache License
public static <K, V> void writePartitionFile(JobConf job, Sampler<K, V> sampler) throws IOException { Configuration conf = job;/*from w w w.j av a 2 s .c o m*/ // Use the input format defined in the job. NOT, the one provided by // the parent class's writePartitionFile() method, which will be a plain // TextInputFormat, by default final InputFormat inf = job.getInputFormat(); int numPartitions = job.getNumReduceTasks(); K[] samples = (K[]) sampler.getSample(inf, job); RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(job)); FileSystem fs = dst.getFileSystem(conf); if (fs.exists(dst)) { fs.delete(dst, false); } SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; int last = -1; for (int i = 1; i < numPartitions; ++i) { int k = Math.round(stepSize * i); while (last >= k && comparator.compare(samples[last], samples[k]) == 0) { ++k; } writer.append(samples[k], nullValue); last = k; } writer.close(); }
From source file:com.intel.hadoop.graphbuilder.partition.mapreduce.edge.EdgeIngressMapper.java
License:Open Source License
@SuppressWarnings("unchecked") @Override//from w w w . ja va 2 s. co m public void configure(JobConf job) { super.configure(job); this.keyClass = job.getMapOutputKeyClass(); this.valClass = job.getMapOutputValueClass(); numprocs = job.getInt("numProcs", 1); overpartition = job.getInt("overpartition", 1); String ingressMethod = job.get("ingress"); if (ingressMethod.equals("greedy")) { this.ingress = new GreedyIngress<VidType>(numprocs); } else { this.ingress = new RandomIngress<VidType>(numprocs); } try { this.graphparser = (GraphParser) Class.forName(job.get("GraphParser")).newInstance(); this.vidparser = (FieldParser) Class.forName(job.get("VidParser")).newInstance(); this.vdataparser = (FieldParser) Class.forName(job.get("VdataParser")).newInstance(); this.edataparser = (FieldParser) Class.forName(job.get("EdataParser")).newInstance(); this.mapKey = (KeyType) keyClass.newInstance(); this.mapValue = (ValueType) valClass.newInstance(); } catch (InstantiationException e) { e.printStackTrace(); } catch (IllegalAccessException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } }
From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java
License:Apache License
public ReducerWrapperMapred(HServerInvocationParameters invocationParameters, int hadoopPartition, int appId, int region, boolean sort) throws IOException, ClassNotFoundException, InterruptedException { this.invocationParameters = invocationParameters; JobConf jobConf = new JobConf((Configuration) invocationParameters.getConfiguration()); //Clone JobConf, so the temporary settings do not pollute other tasks LOG.info("Starting reducer:" + HadoopInvocationParameters.dumpConfiguration(jobConf)); JobID jobID = (JobID) invocationParameters.getJobId(); this.hadoopPartition = hadoopPartition; hadoopVersionSpecificCode = HadoopVersionSpecificCode.getInstance(invocationParameters.getHadoopVersion(), jobConf);/*from w w w .j ava 2 s .c o m*/ TaskAttemptID taskAttemptID = TaskAttemptID .downgrade(hadoopVersionSpecificCode.createTaskAttemptId(jobID, false, hadoopPartition)); updateJobConf(jobConf, taskAttemptID, region); context = hadoopVersionSpecificCode.createTaskAttemptContextMapred(jobConf, taskAttemptID); reducer = (org.apache.hadoop.mapred.Reducer<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils .newInstance(jobConf.getReducerClass(), jobConf); reducer.configure(jobConf); OutputFormat outputFormat = jobConf.getOutputFormat(); FileSystem fs = FileSystem.get(jobConf); recordWriter = (org.apache.hadoop.mapred.RecordWriter<OUTKEY, OUTVALUE>) outputFormat.getRecordWriter(fs, jobConf, getOutputName(hadoopPartition), Reporter.NULL); committer = jobConf.getOutputCommitter(); //Create task object so it can handle file format initialization //The ReduceTask is private in the Hadoop 1.x so we have to go through reflection. try { Class reduceTask = Class.forName("org.apache.hadoop.mapred.ReduceTask"); Constructor reduceTaskConstructor = reduceTask.getDeclaredConstructor(String.class, TaskAttemptID.class, int.class, int.class, int.class); reduceTaskConstructor.setAccessible(true); Task task = (Task) reduceTaskConstructor.newInstance(null, taskAttemptID, hadoopPartition, 0, 0); task.setConf(jobConf); task.initialize(jobConf, jobID, Reporter.NULL, false); } catch (Exception e) { throw new IOException("Cannot initialize ReduceTask", e); } committer.setupTask(context); Class<INKEY> keyClass = (Class<INKEY>) jobConf.getMapOutputKeyClass(); WritableSerializerDeserializer<INKEY> firstKeySerializer = new WritableSerializerDeserializer<INKEY>( keyClass, null); WritableSerializerDeserializer<INKEY> secondKeySerializer = new WritableSerializerDeserializer<INKEY>( keyClass, null); Class<INVALUE> valueClass = (Class<INVALUE>) jobConf.getMapOutputValueClass(); WritableSerializerDeserializer<INVALUE> valueSerializer = new WritableSerializerDeserializer<INVALUE>( valueClass, null); DataGridReaderParameters<INKEY, INVALUE> params = new DataGridReaderParameters<INKEY, INVALUE>(region, appId, HServerParameters.getSetting(REDUCE_USEMEMORYMAPPEDFILES, jobConf) > 0, firstKeySerializer, valueSerializer, invocationParameters.getSerializationMode(), secondKeySerializer, keyClass, valueClass, sort, HServerParameters.getSetting(REDUCE_CHUNKSTOREADAHEAD, jobConf), 1024 * HServerParameters.getSetting(REDUCE_INPUTCHUNKSIZE_KB, jobConf), HServerParameters.getSetting(REDUCE_CHUNKREADTIMEOUT, jobConf)); transport = DataGridChunkedCollectionReader.getGridReader(params); outputCollector = new OutputCollector<OUTKEY, OUTVALUE>() { @Override public void collect(OUTKEY outkey, OUTVALUE outvalue) throws IOException { recordWriter.write(outkey, outvalue); } }; }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopMapperOperatorDescriptor.java
License:Apache License
public static RecordDescriptor getRecordDescriptor(JobConf conf, IHadoopClassFactory hadoopClassFactory) { RecordDescriptor recordDescriptor = null; String mapOutputKeyClassName = conf.getMapOutputKeyClass().getName(); String mapOutputValueClassName = conf.getMapOutputValueClass().getName(); try {// www .j a va2 s . c o m if (hadoopClassFactory == null) { recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor( (Class<? extends Writable>) Class.forName(mapOutputKeyClassName), (Class<? extends Writable>) Class.forName(mapOutputValueClassName)); } else { recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor( (Class<? extends Writable>) hadoopClassFactory.loadClass(mapOutputKeyClassName), (Class<? extends Writable>) hadoopClassFactory.loadClass(mapOutputValueClassName)); } } catch (Exception e) { e.printStackTrace(); } return recordDescriptor; }
From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java
License:Apache License
public static InMemorySortOperatorDescriptor getInMemorySorter(JobConf conf, IOperatorDescriptorRegistry spec) { InMemorySortOperatorDescriptor inMemorySortOp = null; RecordDescriptor recordDescriptor = getHadoopRecordDescriptor(conf.getMapOutputKeyClass().getName(), conf.getMapOutputValueClass().getName()); Class<? extends RawComparator> rawComparatorClass = null; WritableComparator writableComparator = WritableComparator .get(conf.getMapOutputKeyClass().asSubclass(WritableComparable.class)); WritableComparingBinaryComparatorFactory comparatorFactory = new WritableComparingBinaryComparatorFactory( writableComparator.getClass()); inMemorySortOp = new InMemorySortOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { comparatorFactory }, recordDescriptor); return inMemorySortOp; }
From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java
License:Apache License
public static ExternalSortOperatorDescriptor getExternalSorter(JobConf conf, IOperatorDescriptorRegistry spec) { ExternalSortOperatorDescriptor externalSortOp = null; RecordDescriptor recordDescriptor = getHadoopRecordDescriptor(conf.getMapOutputKeyClass().getName(), conf.getMapOutputValueClass().getName()); Class<? extends RawComparator> rawComparatorClass = null; WritableComparator writableComparator = WritableComparator .get(conf.getMapOutputKeyClass().asSubclass(WritableComparable.class)); WritableComparingBinaryComparatorFactory comparatorFactory = new WritableComparingBinaryComparatorFactory( writableComparator.getClass()); externalSortOp = new ExternalSortOperatorDescriptor(spec, conf.getInt(HYRACKS_EX_SORT_FRAME_LIMIT, DEFAULT_EX_SORT_FRAME_LIMIT), new int[] { 0 }, new IBinaryComparatorFactory[] { comparatorFactory }, recordDescriptor); return externalSortOp; }
From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java
License:Apache License
public static MToNPartitioningConnectorDescriptor getMtoNHashPartitioningConnector(JobConf conf, IConnectorDescriptorRegistry spec) { Class mapOutputKeyClass = conf.getMapOutputKeyClass(); Class mapOutputValueClass = conf.getMapOutputValueClass(); MToNPartitioningConnectorDescriptor connectorDescriptor = null; ITuplePartitionComputerFactory factory = null; conf.getMapOutputKeyClass();//from www. ja v a2 s . c o m if (conf.getPartitionerClass() != null && !conf.getPartitionerClass().getName().startsWith("org.apache.hadoop")) { Class<? extends Partitioner> partitioner = conf.getPartitionerClass(); factory = new HadoopPartitionerTuplePartitionComputerFactory(partitioner, DatatypeHelper.createSerializerDeserializer(mapOutputKeyClass), DatatypeHelper.createSerializerDeserializer(mapOutputValueClass)); } else { RecordDescriptor recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(mapOutputKeyClass, mapOutputValueClass); ISerializerDeserializer mapOutputKeySerializerDerserializer = DatatypeHelper .createSerializerDeserializer(mapOutputKeyClass); factory = new HadoopHashTuplePartitionComputerFactory(mapOutputKeySerializerDerserializer); } connectorDescriptor = new MToNPartitioningConnectorDescriptor(spec, factory); return connectorDescriptor; }
From source file:hamr.core.general.group.GeneralGroupComparator.java
License:Open Source License
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { if (key1 == null) { Configuration conf = getConf(); JobConf jcon = new JobConf(conf); try {//from w w w . ja v a 2 s . co m key1 = jcon.getMapOutputKeyClass().asSubclass(WritableComparable.class).newInstance(); key2 = jcon.getMapOutputKeyClass().asSubclass(WritableComparable.class).newInstance(); } catch (InstantiationException | IllegalAccessException e) { e.printStackTrace(); } } try { buffer.reset(b1, s1, l1); // parse key1 key1.readFields(buffer); buffer.reset(b2, s2, l2); // parse key2 key2.readFields(buffer); buffer.reset(null, 0, 0); // clean up reference } catch (IOException e) { throw new RuntimeException(e); } return compare(key1, key2); // compare them }
From source file:org.apache.crunch.lib.sort.ReverseWritableComparator.java
License:Apache License
@SuppressWarnings("unchecked") @Override//from w ww . java2 s . com public void setConf(Configuration conf) { super.setConf(conf); if (conf != null) { JobConf jobConf = new JobConf(conf); comparator = WritableComparator .get(jobConf.getMapOutputKeyClass().asSubclass(WritableComparable.class)); } }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext.java
License:Apache License
/** * Try initializing partially raw comparator for job. * * @param conf Configuration.//from w w w. ja v a2s. c o m */ private void initializePartiallyRawComparator(JobConf conf) { String clsName = conf.get(HadoopJobProperty.JOB_PARTIALLY_RAW_COMPARATOR.propertyName(), null); if (clsName == null) { Class keyCls = conf.getMapOutputKeyClass(); while (keyCls != null) { clsName = PARTIAL_COMPARATORS.get(keyCls.getName()); if (clsName != null) { conf.set(HadoopJobProperty.JOB_PARTIALLY_RAW_COMPARATOR.propertyName(), clsName); break; } keyCls = keyCls.getSuperclass(); } } }