Example usage for org.apache.hadoop.mapred JobConf setClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setClass.

Prototype

public void setClass(String name, Class<?> theClass, Class<?> xface)

Source Link

Document

Set the value of the name property to the name of a theClass implementing the given interface xface.

Usage

From source file:org.sf.xrime.algorithms.statistics.VertexEdgeDoubleCounter.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    try {//from  ww w. ja  v  a2s. c om
        JobConf conf = new JobConf(context, AverageVertexDegree.class);
        conf.setJobName("AverageDegree");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(DoubleWritable.class);
        conf.setMapperClass(MapClass.class);
        conf.setCombinerClass(ReduceClass.class);
        conf.setReducerClass(ReduceClass.class);
        // The format of input data is generated with WritableSerialization.
        conf.setInputFormat(SequenceFileInputFormat.class);
        conf.setOutputFormat(SequenceFileOutputFormat.class);
        FileInputFormat.setInputPaths(conf, getSource().getPath());
        FileOutputFormat.setOutputPath(conf, getDestination().getPath());
        conf.setNumMapTasks(getMapperNum());
        // Only one reducer is permitted, or the return value will be wrong.
        conf.setNumReduceTasks(1);

        // Set the possible CounterFilter class
        if (doubleCounterFilterClass != null) {
            conf.setClass(counterFilterKey, doubleCounterFilterClass, DoubleCounterFilter.class);
        }

        this.runningJob = JobClient.runJob(conf);

        FileSystem fs = getDestination().getPath().getFileSystem(conf);

        Path dataPath = new Path(getDestination().getPath().toString() + "/part-00000");
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, dataPath, conf);

        Writable key = ReflectionUtils.newInstance(reader.getKeyClass().asSubclass(WritableComparable.class),
                conf);
        Writable value = ReflectionUtils.newInstance(reader.getValueClass().asSubclass(Writable.class), conf);

        while (reader.next(key, value)) {
            addCounter(((Text) key).toString(), ((DoubleWritable) value).get());
        }

        reader.close();
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    } catch (IllegalAccessException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.transform.vertex.AdjVertex2AdjSetVertexTransformer.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    JobConf jobConf = new JobConf(conf, Vertex2LabeledTransformer.class);
    jobConf.setJobName("Vertex2Labelled");

    jobConf.setMapperClass(AdjVertex2AdjSetVertexMapper.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(ObjectWritable.class);
    jobConf.setNumReduceTasks(reducerNum);
    jobConf.setReducerClass(AdjVertex2AdjSetVertexReducer.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(AdjSetVertex.class);
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    jobConf.setClass(edgeFilterKey, edgeFilter, EdgeFilter.class);
    FileInputFormat.setInputPaths(jobConf, srcPath);
    FileOutputFormat.setOutputPath(jobConf, destPath);

    try {/*from w  ww  . j a  v a 2s.c  o  m*/
        this.runningJob = JobClient.runJob(jobConf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.transform.vertex.Vertex2LabeledTransformer.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    JobConf jobConf = new JobConf(conf, Vertex2LabeledTransformer.class);
    jobConf.setJobName("Vertex2Labelled");

    jobConf.setMapperClass(Vertex2LabeledMapper.class);
    jobConf.setNumReduceTasks(0);//  w w  w.  j  av a 2  s. c  o m
    jobConf.setOutputKeyClass(Text.class);
    if (this.theClass == null) {
        throw new ProcessorExecutionException("Need to specify the output value class.");
    }
    jobConf.setOutputValueClass(this.theClass);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Vertex.class);
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    if (theLabelAdderClass != null) {
        jobConf.setClass(labelFactoryKey, theLabelAdderClass, LabelAdder.class);
    }
    FileInputFormat.setInputPaths(jobConf, srcPath);
    FileOutputFormat.setOutputPath(jobConf, destPath);

    try {
        this.runningJob = JobClient.runJob(jobConf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.slc.sli.aggregation.mapreduce.map.ConfigurableMapReduceJob.java

License:Apache License

@SuppressWarnings("rawtypes")
protected static JobConf finalizeConfig(JobConf jobConf, ConfigSections s) throws IOException {

    Class<? extends Mapper> mapperClass = JobConfiguration.mapper.getMapClass(mapper);
    Class<? extends Reducer> reducerClass = JobConfiguration.function.getReduceClass(reduceFunction);
    Map<String, String> idFields = s.getMapper().getMapIdFields();

    // validate we have enough to continue
    boolean valid = true;
    if (mapperClass == null) {
        log.severe("Invalid map/reduce configuration detected : no mapper class specified.");
        valid = false;/*w  ww  . j a v a  2  s  .co  m*/
    }
    if (idFields == null) {
        idFields = new HashMap<String, String>();
        log.severe("Invalid map/reduce configuration detected : no map id fields specified.");
        valid = false;
    }
    if (mapCollection == null) {
        log.severe("Invalid map/reduce configuration detected : no map collection specified.");
        valid = false;
    }
    if (mapQuery == null) {
        log.severe("Invalid map/reduce configuration detected : no map query specified.");
        valid = false;
    }
    if (mapFields == null) {
        log.severe("Invalid map/reduce configuration detected : no map input fields specified.");
        valid = false;
    }
    if (reducerClass == null) {
        log.severe("Invalid map/reduce configuration detected : no reducer class specified.");
        valid = false;
    }
    if (reduceCollection == null) {
        log.severe("Invalid map/reduce configuration detected : no reduce collection specified.");
        valid = false;
    }
    if (reduceField == null) {
        log.severe("Invalid map/reduce configuration detected : no reduce field specified.");
        valid = false;
    }

    if (!valid) {
        throw new IllegalArgumentException("Invalid mapper specified. Check log for details.");
    }

    jobConf.set("mapred.output.dir", String.format("%s-%s-%d", s.getMapper().getMapper(),
            s.getMetadata().getFunction(), System.currentTimeMillis()));

    jobConf.setJobName(s.getMetadata().getDescription() == null ? "M/R Job" : s.getMetadata().getDescription());

    // enable speculative execution. Multiple mapper tasks are created for the same split.
    // First one to finish wins; the remaining tasks are terminated.
    jobConf.setSpeculativeExecution(true);
    jobConf.setUseNewMapper(true);
    jobConf.setUseNewReducer(true);

    /**
     * TODO -- decide if this is required.
    String id = conf.get("@ID@");
    String tenantId = conf.get("@TENANT_ID@");
    for (Map.Entry<String, Object> entry : query.entrySet()) {
    Object value = entry.getValue();
    if (value instanceof String) {
        String s = (String) value;
        if (s.indexOf("@ID@") >= 0 && id != null) {
            s = s.replace("@ID@", id);
            query.put(entry.getKey(), s);
        }
        if (s.indexOf("@TENANT_ID@") >= 0 && tenantId != null) {
            s = s.replace("@TENANT_ID@", tenantId);
            query.put(entry.getKey(), s);
        }
    }
    }
            
    if (updateField.indexOf("@ID@") >= 0 && id != null) {
    updateField = updateField.replace("@ID@", id);
    }
    if (updateField.indexOf("@TENANT_ID@") >= 0 && tenantId != null) {
    updateField = updateField.replace("@TENANT_ID@", tenantId);
    }
    */

    MongoConfigUtil.setQuery(jobConf, new BasicDBObject(mapQuery));

    Map<String, Object> fullFields = new HashMap<String, Object>();
    for (String f : idFields.values()) {
        fullFields.put(f, 1);
    }
    fullFields.putAll(mapFields);

    MongoConfigUtil.setFields(jobConf, new BasicDBObject(fullFields));
    MongoConfigUtil.setInputKey(jobConf, idFields.get("id"));
    MongoConfigUtil.setInputURI(jobConf, "mongodb://" + MONGO_HOST + "/" + mapCollection);
    MongoConfigUtil.setMapperOutputKey(jobConf, TenantAndIdEmittableKey.class);
    MongoConfigUtil.setMapperOutputValue(jobConf, BSONWritable.class);
    MongoConfigUtil.setOutputKey(jobConf, TenantAndIdEmittableKey.class);
    MongoConfigUtil.setOutputValue(jobConf, BSONWritable.class);

    // TODO - this probably should be configurable
    MongoConfigUtil.setReadSplitsFromSecondary(jobConf, true);

    MongoConfigUtil.setSplitSize(jobConf, 32);

    jobConf.setClass("mapred.input.key.class", TenantAndIdEmittableKey.class, EmittableKey.class);
    jobConf.setClass("mapred.input.value.class", BSONWritable.class, Object.class);

    jobConf.setClass("mapred.output.key.class", TenantAndIdEmittableKey.class, EmittableKey.class);
    jobConf.setClass("mapred.output.value.class", BSONWritable.class, Object.class);

    jobConf.setClass("mapreduce.inputformat.class", MongoTenantAndIdInputFormat.class, MongoInputFormat.class);
    jobConf.setClass("mapreduce.outputformat.class", MongoAggFormatter.class, MongoOutputFormat.class);
    MongoConfigUtil.setInputFormat(jobConf, MongoTenantAndIdInputFormat.class);
    MongoConfigUtil.setOutputFormat(jobConf, MongoAggFormatter.class);

    /**
     * Configure how hadoop calculates splits.
     *
     * We enable input splits to avoid having the entire job executed on a single hadoop node.
     *
     * We enable shard chunk splitting to allow mongo to specify how to split the input.
     *
     * We disable read splits from shards because we want hadoop connecting to mongos, not
     * mongod directly. This avoids incorrect results in situations where data is in the process
     * of migration at the same time hadoop is trying to read it.
     *
     * TODO - determine if we also need to set the input split key pattern. This depends
     * on how well data is distributed by _id. Setting the key pattern gives finer grained
     * control over how splits are calculated.
     */
    MongoConfigUtil.setCreateInputSplits(jobConf, true);
    MongoConfigUtil.setShardChunkSplittingEnabled(jobConf, true);
    MongoConfigUtil.setReadSplitsFromShards(jobConf, false);

    MongoConfigUtil.setOutputURI(jobConf, "mongodb://" + MONGO_HOST + "/" + reduceCollection);

    jobConf.setJarByClass(JobConfiguration.class);

    MongoConfigUtil.setMapper(jobConf, mapperClass);
    jobConf.setClass(JobContext.MAP_CLASS_ATTR, mapperClass, Mapper.class);

    MongoConfigUtil.setReducer(jobConf, reducerClass);
    jobConf.setClass(JobContext.REDUCE_CLASS_ATTR, reducerClass, Reducer.class);

    // Set this relatively high to keep the total map execution time low.
    // Formula:  1.75 * (# nodes * max tasks)
    // TODO : replace this hardcoded value with one calculated from configuration information.
    jobConf.setNumReduceTasks(52);

    // Add the configuration itself to the JobConf.
    JobConfiguration.toHadoopConfiguration(s, jobConf);

    return jobConf;
}