Example usage for org.apache.hadoop.mapred JobConf getMapOutputKeyClass

List of usage examples for org.apache.hadoop.mapred JobConf getMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getMapOutputKeyClass.

Prototype

public Class<?> getMapOutputKeyClass() 

Source Link

Document

Get the key class for the map output data.

Usage

From source file:com.alexholmes.hadooputils.sort.SortInputSampler.java

License:Apache License

public static <K, V> void writePartitionFile(JobConf job, Sampler<K, V> sampler) throws IOException {
    Configuration conf = job;/*from  w w  w.j  av  a 2 s  .c  o  m*/
    // Use the input format defined in the job. NOT, the one provided by
    // the parent class's writePartitionFile() method, which will be a plain
    // TextInputFormat, by default
    final InputFormat inf = job.getInputFormat();
    int numPartitions = job.getNumReduceTasks();
    K[] samples = (K[]) sampler.getSample(inf, job);
    RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator();
    Arrays.sort(samples, comparator);
    Path dst = new Path(TotalOrderPartitioner.getPartitionFile(job));
    FileSystem fs = dst.getFileSystem(conf);
    if (fs.exists(dst)) {
        fs.delete(dst, false);
    }
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(),
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    float stepSize = samples.length / (float) numPartitions;
    int last = -1;
    for (int i = 1; i < numPartitions; ++i) {
        int k = Math.round(stepSize * i);
        while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
            ++k;
        }
        writer.append(samples[k], nullValue);
        last = k;
    }
    writer.close();
}

From source file:com.intel.hadoop.graphbuilder.partition.mapreduce.edge.EdgeIngressMapper.java

License:Open Source License

@SuppressWarnings("unchecked")
@Override//from  w w  w  .  ja va  2  s. co  m
public void configure(JobConf job) {
    super.configure(job);
    this.keyClass = job.getMapOutputKeyClass();
    this.valClass = job.getMapOutputValueClass();
    numprocs = job.getInt("numProcs", 1);
    overpartition = job.getInt("overpartition", 1);

    String ingressMethod = job.get("ingress");
    if (ingressMethod.equals("greedy")) {
        this.ingress = new GreedyIngress<VidType>(numprocs);
    } else {
        this.ingress = new RandomIngress<VidType>(numprocs);
    }

    try {
        this.graphparser = (GraphParser) Class.forName(job.get("GraphParser")).newInstance();
        this.vidparser = (FieldParser) Class.forName(job.get("VidParser")).newInstance();
        this.vdataparser = (FieldParser) Class.forName(job.get("VdataParser")).newInstance();
        this.edataparser = (FieldParser) Class.forName(job.get("EdataParser")).newInstance();
        this.mapKey = (KeyType) keyClass.newInstance();
        this.mapValue = (ValueType) valClass.newInstance();
    } catch (InstantiationException e) {
        e.printStackTrace();
    } catch (IllegalAccessException e) {
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java

License:Apache License

public ReducerWrapperMapred(HServerInvocationParameters invocationParameters, int hadoopPartition, int appId,
        int region, boolean sort) throws IOException, ClassNotFoundException, InterruptedException {
    this.invocationParameters = invocationParameters;
    JobConf jobConf = new JobConf((Configuration) invocationParameters.getConfiguration()); //Clone JobConf, so the temporary settings do not pollute other tasks

    LOG.info("Starting reducer:" + HadoopInvocationParameters.dumpConfiguration(jobConf));

    JobID jobID = (JobID) invocationParameters.getJobId();
    this.hadoopPartition = hadoopPartition;
    hadoopVersionSpecificCode = HadoopVersionSpecificCode.getInstance(invocationParameters.getHadoopVersion(),
            jobConf);/*from w w  w  .j  ava 2 s .c  o m*/

    TaskAttemptID taskAttemptID = TaskAttemptID
            .downgrade(hadoopVersionSpecificCode.createTaskAttemptId(jobID, false, hadoopPartition));

    updateJobConf(jobConf, taskAttemptID, region);

    context = hadoopVersionSpecificCode.createTaskAttemptContextMapred(jobConf, taskAttemptID);

    reducer = (org.apache.hadoop.mapred.Reducer<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(jobConf.getReducerClass(), jobConf);

    reducer.configure(jobConf);

    OutputFormat outputFormat = jobConf.getOutputFormat();

    FileSystem fs = FileSystem.get(jobConf);
    recordWriter = (org.apache.hadoop.mapred.RecordWriter<OUTKEY, OUTVALUE>) outputFormat.getRecordWriter(fs,
            jobConf, getOutputName(hadoopPartition), Reporter.NULL);

    committer = jobConf.getOutputCommitter();

    //Create task object so it can handle file format initialization
    //The ReduceTask is private in the Hadoop 1.x so we have to go through reflection.
    try {
        Class reduceTask = Class.forName("org.apache.hadoop.mapred.ReduceTask");
        Constructor reduceTaskConstructor = reduceTask.getDeclaredConstructor(String.class, TaskAttemptID.class,
                int.class, int.class, int.class);
        reduceTaskConstructor.setAccessible(true);
        Task task = (Task) reduceTaskConstructor.newInstance(null, taskAttemptID, hadoopPartition, 0, 0);
        task.setConf(jobConf);
        task.initialize(jobConf, jobID, Reporter.NULL, false);
    } catch (Exception e) {
        throw new IOException("Cannot initialize ReduceTask", e);
    }

    committer.setupTask(context);

    Class<INKEY> keyClass = (Class<INKEY>) jobConf.getMapOutputKeyClass();
    WritableSerializerDeserializer<INKEY> firstKeySerializer = new WritableSerializerDeserializer<INKEY>(
            keyClass, null);
    WritableSerializerDeserializer<INKEY> secondKeySerializer = new WritableSerializerDeserializer<INKEY>(
            keyClass, null);
    Class<INVALUE> valueClass = (Class<INVALUE>) jobConf.getMapOutputValueClass();
    WritableSerializerDeserializer<INVALUE> valueSerializer = new WritableSerializerDeserializer<INVALUE>(
            valueClass, null);

    DataGridReaderParameters<INKEY, INVALUE> params = new DataGridReaderParameters<INKEY, INVALUE>(region,
            appId, HServerParameters.getSetting(REDUCE_USEMEMORYMAPPEDFILES, jobConf) > 0, firstKeySerializer,
            valueSerializer, invocationParameters.getSerializationMode(), secondKeySerializer, keyClass,
            valueClass, sort, HServerParameters.getSetting(REDUCE_CHUNKSTOREADAHEAD, jobConf),
            1024 * HServerParameters.getSetting(REDUCE_INPUTCHUNKSIZE_KB, jobConf),
            HServerParameters.getSetting(REDUCE_CHUNKREADTIMEOUT, jobConf));
    transport = DataGridChunkedCollectionReader.getGridReader(params);
    outputCollector = new OutputCollector<OUTKEY, OUTVALUE>() {
        @Override
        public void collect(OUTKEY outkey, OUTVALUE outvalue) throws IOException {
            recordWriter.write(outkey, outvalue);
        }
    };
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopMapperOperatorDescriptor.java

License:Apache License

public static RecordDescriptor getRecordDescriptor(JobConf conf, IHadoopClassFactory hadoopClassFactory) {
    RecordDescriptor recordDescriptor = null;
    String mapOutputKeyClassName = conf.getMapOutputKeyClass().getName();
    String mapOutputValueClassName = conf.getMapOutputValueClass().getName();
    try {// www  .j  a va2  s . c o m
        if (hadoopClassFactory == null) {
            recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                    (Class<? extends Writable>) Class.forName(mapOutputKeyClassName),
                    (Class<? extends Writable>) Class.forName(mapOutputValueClassName));
        } else {
            recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                    (Class<? extends Writable>) hadoopClassFactory.loadClass(mapOutputKeyClassName),
                    (Class<? extends Writable>) hadoopClassFactory.loadClass(mapOutputValueClassName));
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return recordDescriptor;
}

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

public static InMemorySortOperatorDescriptor getInMemorySorter(JobConf conf, IOperatorDescriptorRegistry spec) {
    InMemorySortOperatorDescriptor inMemorySortOp = null;
    RecordDescriptor recordDescriptor = getHadoopRecordDescriptor(conf.getMapOutputKeyClass().getName(),
            conf.getMapOutputValueClass().getName());
    Class<? extends RawComparator> rawComparatorClass = null;
    WritableComparator writableComparator = WritableComparator
            .get(conf.getMapOutputKeyClass().asSubclass(WritableComparable.class));
    WritableComparingBinaryComparatorFactory comparatorFactory = new WritableComparingBinaryComparatorFactory(
            writableComparator.getClass());
    inMemorySortOp = new InMemorySortOperatorDescriptor(spec, new int[] { 0 },
            new IBinaryComparatorFactory[] { comparatorFactory }, recordDescriptor);
    return inMemorySortOp;
}

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

public static ExternalSortOperatorDescriptor getExternalSorter(JobConf conf, IOperatorDescriptorRegistry spec) {
    ExternalSortOperatorDescriptor externalSortOp = null;
    RecordDescriptor recordDescriptor = getHadoopRecordDescriptor(conf.getMapOutputKeyClass().getName(),
            conf.getMapOutputValueClass().getName());
    Class<? extends RawComparator> rawComparatorClass = null;
    WritableComparator writableComparator = WritableComparator
            .get(conf.getMapOutputKeyClass().asSubclass(WritableComparable.class));
    WritableComparingBinaryComparatorFactory comparatorFactory = new WritableComparingBinaryComparatorFactory(
            writableComparator.getClass());
    externalSortOp = new ExternalSortOperatorDescriptor(spec,
            conf.getInt(HYRACKS_EX_SORT_FRAME_LIMIT, DEFAULT_EX_SORT_FRAME_LIMIT), new int[] { 0 },
            new IBinaryComparatorFactory[] { comparatorFactory }, recordDescriptor);
    return externalSortOp;
}

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

public static MToNPartitioningConnectorDescriptor getMtoNHashPartitioningConnector(JobConf conf,
        IConnectorDescriptorRegistry spec) {

    Class mapOutputKeyClass = conf.getMapOutputKeyClass();
    Class mapOutputValueClass = conf.getMapOutputValueClass();

    MToNPartitioningConnectorDescriptor connectorDescriptor = null;
    ITuplePartitionComputerFactory factory = null;
    conf.getMapOutputKeyClass();//from www.  ja  v  a2 s  .  c o m
    if (conf.getPartitionerClass() != null
            && !conf.getPartitionerClass().getName().startsWith("org.apache.hadoop")) {
        Class<? extends Partitioner> partitioner = conf.getPartitionerClass();
        factory = new HadoopPartitionerTuplePartitionComputerFactory(partitioner,
                DatatypeHelper.createSerializerDeserializer(mapOutputKeyClass),
                DatatypeHelper.createSerializerDeserializer(mapOutputValueClass));
    } else {
        RecordDescriptor recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(mapOutputKeyClass,
                mapOutputValueClass);
        ISerializerDeserializer mapOutputKeySerializerDerserializer = DatatypeHelper
                .createSerializerDeserializer(mapOutputKeyClass);
        factory = new HadoopHashTuplePartitionComputerFactory(mapOutputKeySerializerDerserializer);
    }
    connectorDescriptor = new MToNPartitioningConnectorDescriptor(spec, factory);
    return connectorDescriptor;
}

From source file:hamr.core.general.group.GeneralGroupComparator.java

License:Open Source License

public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
    if (key1 == null) {
        Configuration conf = getConf();
        JobConf jcon = new JobConf(conf);
        try {//from w  w w  .  ja v  a  2  s  .  co  m
            key1 = jcon.getMapOutputKeyClass().asSubclass(WritableComparable.class).newInstance();
            key2 = jcon.getMapOutputKeyClass().asSubclass(WritableComparable.class).newInstance();
        } catch (InstantiationException | IllegalAccessException e) {
            e.printStackTrace();
        }
    }
    try {
        buffer.reset(b1, s1, l1); // parse key1
        key1.readFields(buffer);

        buffer.reset(b2, s2, l2); // parse key2
        key2.readFields(buffer);

        buffer.reset(null, 0, 0); // clean up reference
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    return compare(key1, key2); // compare them
}

From source file:org.apache.crunch.lib.sort.ReverseWritableComparator.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//from w ww  . java2  s .  com
public void setConf(Configuration conf) {
    super.setConf(conf);
    if (conf != null) {
        JobConf jobConf = new JobConf(conf);
        comparator = WritableComparator
                .get(jobConf.getMapOutputKeyClass().asSubclass(WritableComparable.class));
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext.java

License:Apache License

/**
 * Try initializing partially raw comparator for job.
 *
 * @param conf Configuration.//from w  w w. ja v a2s. c  o m
 */
private void initializePartiallyRawComparator(JobConf conf) {
    String clsName = conf.get(HadoopJobProperty.JOB_PARTIALLY_RAW_COMPARATOR.propertyName(), null);

    if (clsName == null) {
        Class keyCls = conf.getMapOutputKeyClass();

        while (keyCls != null) {
            clsName = PARTIAL_COMPARATORS.get(keyCls.getName());

            if (clsName != null) {
                conf.set(HadoopJobProperty.JOB_PARTIALLY_RAW_COMPARATOR.propertyName(), clsName);

                break;
            }

            keyCls = keyCls.getSuperclass();
        }
    }
}