Example usage for org.apache.hadoop.mapred JobConf getMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getMapOutputKeyClass.

Prototype

public Class<?> getMapOutputKeyClass()

Source Link

Document

Get the key class for the map output data.

Usage

From source file:com.alexholmes.hadooputils.sort.SortInputSampler.java

License:Apache License

public static <K, V> void writePartitionFile(JobConf job, Sampler<K, V> sampler) throws IOException {
    Configuration conf = job;/*from  w w  w.j  av  a 2 s  .c  o  m*/
    // Use the input format defined in the job. NOT, the one provided by
    // the parent class's writePartitionFile() method, which will be a plain
    // TextInputFormat, by default
    final InputFormat inf = job.getInputFormat();
    int numPartitions = job.getNumReduceTasks();
    K[] samples = (K[]) sampler.getSample(inf, job);
    RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator();
    Arrays.sort(samples, comparator);
    Path dst = new Path(TotalOrderPartitioner.getPartitionFile(job));
    FileSystem fs = dst.getFileSystem(conf);
    if (fs.exists(dst)) {
        fs.delete(dst, false);
    }
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(),
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    float stepSize = samples.length / (float) numPartitions;
    int last = -1;
    for (int i = 1; i < numPartitions; ++i) {
        int k = Math.round(stepSize * i);
        while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
            ++k;
        }
        writer.append(samples[k], nullValue);
        last = k;
    }
    writer.close();
}

From source file:com.intel.hadoop.graphbuilder.partition.mapreduce.edge.EdgeIngressMapper.java

License:Open Source License

@SuppressWarnings("unchecked")
@Override//from  w w  w  .  ja va  2  s. co  m
public void configure(JobConf job) {
    super.configure(job);
    this.keyClass = job.getMapOutputKeyClass();
    this.valClass = job.getMapOutputValueClass();
    numprocs = job.getInt("numProcs", 1);
    overpartition = job.getInt("overpartition", 1);

    String ingressMethod = job.get("ingress");
    if (ingressMethod.equals("greedy")) {
        this.ingress = new GreedyIngress<VidType>(numprocs);
    } else {
        this.ingress = new RandomIngress<VidType>(numprocs);
    }

    try {
        this.graphparser = (GraphParser) Class.forName(job.get("GraphParser")).newInstance();
        this.vidparser = (FieldParser) Class.forName(job.get("VidParser")).newInstance();
        this.vdataparser = (FieldParser) Class.forName(job.get("VdataParser")).newInstance();
        this.edataparser = (FieldParser) Class.forName(job.get("EdataParser")).newInstance();
        this.mapKey = (KeyType) keyClass.newInstance();
        this.mapValue = (ValueType) valClass.newInstance();
    } catch (InstantiationException e) {
        e.printStackTrace();
    } catch (IllegalAccessException e) {
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java

License:Apache License

public ReducerWrapperMapred(HServerInvocationParameters invocationParameters, int hadoopPartition, int appId,
        int region, boolean sort) throws IOException, ClassNotFoundException, InterruptedException {
    this.invocationParameters = invocationParameters;
    JobConf jobConf = new JobConf((Configuration) invocationParameters.getConfiguration()); //Clone JobConf, so the temporary settings do not pollute other tasks

    LOG.info("Starting reducer:" + HadoopInvocationParameters.dumpConfiguration(jobConf));

    JobID jobID = (JobID) invocationParameters.getJobId();
    this.hadoopPartition = hadoopPartition;
    hadoopVersionSpecificCode = HadoopVersionSpecificCode.getInstance(invocationParameters.getHadoopVersion(),
            jobConf);/*from w w  w  .j  ava 2 s .c  o m*/

    TaskAttemptID taskAttemptID = TaskAttemptID
            .downgrade(hadoopVersionSpecificCode.createTaskAttemptId(jobID, false, hadoopPartition));

    updateJobConf(jobConf, taskAttemptID, region);

    context = hadoopVersionSpecificCode.createTaskAttemptContextMapred(jobConf, taskAttemptID);

    reducer = (org.apache.hadoop.mapred.Reducer<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(jobConf.getReducerClass(), jobConf);

    reducer.configure(jobConf);

    OutputFormat outputFormat = jobConf.getOutputFormat();

    FileSystem fs = FileSystem.get(jobConf);
    recordWriter = (org.apache.hadoop.mapred.RecordWriter<OUTKEY, OUTVALUE>) outputFormat.getRecordWriter(fs,
            jobConf, getOutputName(hadoopPartition), Reporter.NULL);

    committer = jobConf.getOutputCommitter();

    //Create task object so it can handle file format initialization
    //The ReduceTask is private in the Hadoop 1.x so we have to go through reflection.
    try {
        Class reduceTask = Class.forName("org.apache.hadoop.mapred.ReduceTask");
        Constructor reduceTaskConstructor = reduceTask.getDeclaredConstructor(String.class, TaskAttemptID.class,
                int.class, int.class, int.class);
        reduceTaskConstructor.setAccessible(true);
        Task task = (Task) reduceTaskConstructor.newInstance(null, taskAttemptID, hadoopPartition, 0, 0);
        task.setConf(jobConf);
        task.initialize(jobConf, jobID, Reporter.NULL, false);
    } catch (Exception e) {
        throw new IOException("Cannot initialize ReduceTask", e);
    }

    committer.setupTask(context);

    Class<INKEY> keyClass = (Class<INKEY>) jobConf.getMapOutputKeyClass();
    WritableSerializerDeserializer<INKEY> firstKeySerializer = new WritableSerializerDeserializer<INKEY>(
            keyClass, null);
    WritableSerializerDeserializer<INKEY> secondKeySerializer = new WritableSerializerDeserializer<INKEY>(
            keyClass, null);
    Class<INVALUE> valueClass = (Class<INVALUE>) jobConf.getMapOutputValueClass();
    WritableSerializerDeserializer<INVALUE> valueSerializer = new WritableSerializerDeserializer<INVALUE>(
            valueClass, null);

    DataGridReaderParameters<INKEY, INVALUE> params = new DataGridReaderParameters<INKEY, INVALUE>(region,
            appId, HServerParameters.getSetting(REDUCE_USEMEMORYMAPPEDFILES, jobConf) > 0, firstKeySerializer,
            valueSerializer, invocationParameters.getSerializationMode(), secondKeySerializer, keyClass,
            valueClass, sort, HServerParameters.getSetting(REDUCE_CHUNKSTOREADAHEAD, jobConf),
            1024 * HServerParameters.getSetting(REDUCE_INPUTCHUNKSIZE_KB, jobConf),
            HServerParameters.getSetting(REDUCE_CHUNKREADTIMEOUT, jobConf));
    transport = DataGridChunkedCollectionReader.getGridReader(params);
    outputCollector = new OutputCollector<OUTKEY, OUTVALUE>() {
        @Override
        public void collect(OUTKEY outkey, OUTVALUE outvalue) throws IOException {
            recordWriter.write(outkey, outvalue);
        }
    };
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopMapperOperatorDescriptor.java

License:Apache License

public static RecordDescriptor getRecordDescriptor(JobConf conf, IHadoopClassFactory hadoopClassFactory) {
    RecordDescriptor recordDescriptor = null;
    String mapOutputKeyClassName = conf.getMapOutputKeyClass().getName();
    String mapOutputValueClassName = conf.getMapOutputValueClass().getName();
    try {// www  .j  a va2  s . c o m
        if (hadoopClassFactory == null) {
            recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                    (Class<? extends Writable>) Class.forName(mapOutputKeyClassName),
                    (Class<? extends Writable>) Class.forName(mapOutputValueClassName));
        } else {
            recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                    (Class<? extends Writable>) hadoopClassFactory.loadClass(mapOutputKeyClassName),
                    (Class<? extends Writable>) hadoopClassFactory.loadClass(mapOutputValueClassName));
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return recordDescriptor;
}

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

public static InMemorySortOperatorDescriptor getInMemorySorter(JobConf conf, IOperatorDescriptorRegistry spec) {
    InMemorySortOperatorDescriptor inMemorySortOp = null;
    RecordDescriptor recordDescriptor = getHadoopRecordDescriptor(conf.getMapOutputKeyClass().getName(),
            conf.getMapOutputValueClass().getName());
    Class<? extends RawComparator> rawComparatorClass = null;
    WritableComparator writableComparator = WritableComparator
            .get(conf.getMapOutputKeyClass().asSubclass(WritableComparable.class));
    WritableComparingBinaryComparatorFactory comparatorFactory = new WritableComparingBinaryComparatorFactory(
            writableComparator.getClass());
    inMemorySortOp = new InMemorySortOperatorDescriptor(spec, new int[] { 0 },
            new IBinaryComparatorFactory[] { comparatorFactory }, recordDescriptor);
    return inMemorySortOp;
}

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

public static ExternalSortOperatorDescriptor getExternalSorter(JobConf conf, IOperatorDescriptorRegistry spec) {
    ExternalSortOperatorDescriptor externalSortOp = null;
    RecordDescriptor recordDescriptor = getHadoopRecordDescriptor(conf.getMapOutputKeyClass().getName(),
            conf.getMapOutputValueClass().getName());
    Class<? extends RawComparator> rawComparatorClass = null;
    WritableComparator writableComparator = WritableComparator
            .get(conf.getMapOutputKeyClass().asSubclass(WritableComparable.class));
    WritableComparingBinaryComparatorFactory comparatorFactory = new WritableComparingBinaryComparatorFactory(
            writableComparator.getClass());
    externalSortOp = new ExternalSortOperatorDescriptor(spec,
            conf.getInt(HYRACKS_EX_SORT_FRAME_LIMIT, DEFAULT_EX_SORT_FRAME_LIMIT), new int[] { 0 },
            new IBinaryComparatorFactory[] { comparatorFactory }, recordDescriptor);
    return externalSortOp;
}

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

public static MToNPartitioningConnectorDescriptor getMtoNHashPartitioningConnector(JobConf conf,
        IConnectorDescriptorRegistry spec) {

    Class mapOutputKeyClass = conf.getMapOutputKeyClass();
    Class mapOutputValueClass = conf.getMapOutputValueClass();

    MToNPartitioningConnectorDescriptor connectorDescriptor = null;
    ITuplePartitionComputerFactory factory = null;
    conf.getMapOutputKeyClass();//from www.  ja  v  a2 s  .  c o m
    if (conf.getPartitionerClass() != null
            && !conf.getPartitionerClass().getName().startsWith("org.apache.hadoop")) {
        Class<? extends Partitioner> partitioner = conf.getPartitionerClass();
        factory = new HadoopPartitionerTuplePartitionComputerFactory(partitioner,
                DatatypeHelper.createSerializerDeserializer(mapOutputKeyClass),
                DatatypeHelper.createSerializerDeserializer(mapOutputValueClass));
    } else {
        RecordDescriptor recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(mapOutputKeyClass,
                mapOutputValueClass);
        ISerializerDeserializer mapOutputKeySerializerDerserializer = DatatypeHelper
                .createSerializerDeserializer(mapOutputKeyClass);
        factory = new HadoopHashTuplePartitionComputerFactory(mapOutputKeySerializerDerserializer);
    }
    connectorDescriptor = new MToNPartitioningConnectorDescriptor(spec, factory);
    return connectorDescriptor;
}

From source file:hamr.core.general.group.GeneralGroupComparator.java

License:Open Source License

public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
    if (key1 == null) {
        Configuration conf = getConf();
        JobConf jcon = new JobConf(conf);
        try {//from w  w w  .  ja v  a  2  s  .  co  m
            key1 = jcon.getMapOutputKeyClass().asSubclass(WritableComparable.class).newInstance();
            key2 = jcon.getMapOutputKeyClass().asSubclass(WritableComparable.class).newInstance();
        } catch (InstantiationException | IllegalAccessException e) {
            e.printStackTrace();
        }
    }
    try {
        buffer.reset(b1, s1, l1); // parse key1
        key1.readFields(buffer);

        buffer.reset(b2, s2, l2); // parse key2
        key2.readFields(buffer);

        buffer.reset(null, 0, 0); // clean up reference
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    return compare(key1, key2); // compare them
}

From source file:org.apache.crunch.lib.sort.ReverseWritableComparator.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//from w ww  . java2  s .  com
public void setConf(Configuration conf) {
    super.setConf(conf);
    if (conf != null) {
        JobConf jobConf = new JobConf(conf);
        comparator = WritableComparator
                .get(jobConf.getMapOutputKeyClass().asSubclass(WritableComparable.class));
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext.java

License:Apache License

/**
 * Try initializing partially raw comparator for job.
 *
 * @param conf Configuration.//from w  w w. ja v a2s. c  o m
 */
private void initializePartiallyRawComparator(JobConf conf) {
    String clsName = conf.get(HadoopJobProperty.JOB_PARTIALLY_RAW_COMPARATOR.propertyName(), null);

    if (clsName == null) {
        Class keyCls = conf.getMapOutputKeyClass();

        while (keyCls != null) {
            clsName = PARTIAL_COMPARATORS.get(keyCls.getName());

            if (clsName != null) {
                conf.set(HadoopJobProperty.JOB_PARTIALLY_RAW_COMPARATOR.propertyName(), clsName);

                break;
            }

            keyCls = keyCls.getSuperclass();
        }
    }
}