Example usage for org.apache.hadoop.mapred JobConf setClass

List of usage examples for org.apache.hadoop.mapred JobConf setClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setClass.

Prototype

public void setClass(String name, Class<?> theClass, Class<?> xface) 

Source Link

Document

Set the value of the name property to the name of a theClass implementing the given interface xface.

Usage

From source file:org.sf.xrime.algorithms.statistics.VertexEdgeDoubleCounter.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    try {//from  ww w. ja  v  a2s. c om
        JobConf conf = new JobConf(context, AverageVertexDegree.class);
        conf.setJobName("AverageDegree");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(DoubleWritable.class);
        conf.setMapperClass(MapClass.class);
        conf.setCombinerClass(ReduceClass.class);
        conf.setReducerClass(ReduceClass.class);
        // The format of input data is generated with WritableSerialization.
        conf.setInputFormat(SequenceFileInputFormat.class);
        conf.setOutputFormat(SequenceFileOutputFormat.class);
        FileInputFormat.setInputPaths(conf, getSource().getPath());
        FileOutputFormat.setOutputPath(conf, getDestination().getPath());
        conf.setNumMapTasks(getMapperNum());
        // Only one reducer is permitted, or the return value will be wrong.
        conf.setNumReduceTasks(1);

        // Set the possible CounterFilter class
        if (doubleCounterFilterClass != null) {
            conf.setClass(counterFilterKey, doubleCounterFilterClass, DoubleCounterFilter.class);
        }

        this.runningJob = JobClient.runJob(conf);

        FileSystem fs = getDestination().getPath().getFileSystem(conf);

        Path dataPath = new Path(getDestination().getPath().toString() + "/part-00000");
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, dataPath, conf);

        Writable key = ReflectionUtils.newInstance(reader.getKeyClass().asSubclass(WritableComparable.class),
                conf);
        Writable value = ReflectionUtils.newInstance(reader.getValueClass().asSubclass(Writable.class), conf);

        while (reader.next(key, value)) {
            addCounter(((Text) key).toString(), ((DoubleWritable) value).get());
        }

        reader.close();
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    } catch (IllegalAccessException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.transform.vertex.AdjVertex2AdjSetVertexTransformer.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    JobConf jobConf = new JobConf(conf, Vertex2LabeledTransformer.class);
    jobConf.setJobName("Vertex2Labelled");

    jobConf.setMapperClass(AdjVertex2AdjSetVertexMapper.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(ObjectWritable.class);
    jobConf.setNumReduceTasks(reducerNum);
    jobConf.setReducerClass(AdjVertex2AdjSetVertexReducer.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(AdjSetVertex.class);
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    jobConf.setClass(edgeFilterKey, edgeFilter, EdgeFilter.class);
    FileInputFormat.setInputPaths(jobConf, srcPath);
    FileOutputFormat.setOutputPath(jobConf, destPath);

    try {/*from w  ww  . j a  v a 2s.c  o  m*/
        this.runningJob = JobClient.runJob(jobConf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.transform.vertex.Vertex2LabeledTransformer.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    JobConf jobConf = new JobConf(conf, Vertex2LabeledTransformer.class);
    jobConf.setJobName("Vertex2Labelled");

    jobConf.setMapperClass(Vertex2LabeledMapper.class);
    jobConf.setNumReduceTasks(0);//  w w  w.  j  av a 2  s. c  o m
    jobConf.setOutputKeyClass(Text.class);
    if (this.theClass == null) {
        throw new ProcessorExecutionException("Need to specify the output value class.");
    }
    jobConf.setOutputValueClass(this.theClass);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Vertex.class);
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    if (theLabelAdderClass != null) {
        jobConf.setClass(labelFactoryKey, theLabelAdderClass, LabelAdder.class);
    }
    FileInputFormat.setInputPaths(jobConf, srcPath);
    FileOutputFormat.setOutputPath(jobConf, destPath);

    try {
        this.runningJob = JobClient.runJob(jobConf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.slc.sli.aggregation.mapreduce.map.ConfigurableMapReduceJob.java

License:Apache License

@SuppressWarnings("rawtypes")
protected static JobConf finalizeConfig(JobConf jobConf, ConfigSections s) throws IOException {

    Class<? extends Mapper> mapperClass = JobConfiguration.mapper.getMapClass(mapper);
    Class<? extends Reducer> reducerClass = JobConfiguration.function.getReduceClass(reduceFunction);
    Map<String, String> idFields = s.getMapper().getMapIdFields();

    // validate we have enough to continue
    boolean valid = true;
    if (mapperClass == null) {
        log.severe("Invalid map/reduce configuration detected : no mapper class specified.");
        valid = false;/*w  ww  . j a v a  2  s  .co  m*/
    }
    if (idFields == null) {
        idFields = new HashMap<String, String>();
        log.severe("Invalid map/reduce configuration detected : no map id fields specified.");
        valid = false;
    }
    if (mapCollection == null) {
        log.severe("Invalid map/reduce configuration detected : no map collection specified.");
        valid = false;
    }
    if (mapQuery == null) {
        log.severe("Invalid map/reduce configuration detected : no map query specified.");
        valid = false;
    }
    if (mapFields == null) {
        log.severe("Invalid map/reduce configuration detected : no map input fields specified.");
        valid = false;
    }
    if (reducerClass == null) {
        log.severe("Invalid map/reduce configuration detected : no reducer class specified.");
        valid = false;
    }
    if (reduceCollection == null) {
        log.severe("Invalid map/reduce configuration detected : no reduce collection specified.");
        valid = false;
    }
    if (reduceField == null) {
        log.severe("Invalid map/reduce configuration detected : no reduce field specified.");
        valid = false;
    }

    if (!valid) {
        throw new IllegalArgumentException("Invalid mapper specified. Check log for details.");
    }

    jobConf.set("mapred.output.dir", String.format("%s-%s-%d", s.getMapper().getMapper(),
            s.getMetadata().getFunction(), System.currentTimeMillis()));

    jobConf.setJobName(s.getMetadata().getDescription() == null ? "M/R Job" : s.getMetadata().getDescription());

    // enable speculative execution. Multiple mapper tasks are created for the same split.
    // First one to finish wins; the remaining tasks are terminated.
    jobConf.setSpeculativeExecution(true);
    jobConf.setUseNewMapper(true);
    jobConf.setUseNewReducer(true);

    /**
     * TODO -- decide if this is required.
    String id = conf.get("@ID@");
    String tenantId = conf.get("@TENANT_ID@");
    for (Map.Entry<String, Object> entry : query.entrySet()) {
    Object value = entry.getValue();
    if (value instanceof String) {
        String s = (String) value;
        if (s.indexOf("@ID@") >= 0 && id != null) {
            s = s.replace("@ID@", id);
            query.put(entry.getKey(), s);
        }
        if (s.indexOf("@TENANT_ID@") >= 0 && tenantId != null) {
            s = s.replace("@TENANT_ID@", tenantId);
            query.put(entry.getKey(), s);
        }
    }
    }
            
    if (updateField.indexOf("@ID@") >= 0 && id != null) {
    updateField = updateField.replace("@ID@", id);
    }
    if (updateField.indexOf("@TENANT_ID@") >= 0 && tenantId != null) {
    updateField = updateField.replace("@TENANT_ID@", tenantId);
    }
    */

    MongoConfigUtil.setQuery(jobConf, new BasicDBObject(mapQuery));

    Map<String, Object> fullFields = new HashMap<String, Object>();
    for (String f : idFields.values()) {
        fullFields.put(f, 1);
    }
    fullFields.putAll(mapFields);

    MongoConfigUtil.setFields(jobConf, new BasicDBObject(fullFields));
    MongoConfigUtil.setInputKey(jobConf, idFields.get("id"));
    MongoConfigUtil.setInputURI(jobConf, "mongodb://" + MONGO_HOST + "/" + mapCollection);
    MongoConfigUtil.setMapperOutputKey(jobConf, TenantAndIdEmittableKey.class);
    MongoConfigUtil.setMapperOutputValue(jobConf, BSONWritable.class);
    MongoConfigUtil.setOutputKey(jobConf, TenantAndIdEmittableKey.class);
    MongoConfigUtil.setOutputValue(jobConf, BSONWritable.class);

    // TODO - this probably should be configurable
    MongoConfigUtil.setReadSplitsFromSecondary(jobConf, true);

    MongoConfigUtil.setSplitSize(jobConf, 32);

    jobConf.setClass("mapred.input.key.class", TenantAndIdEmittableKey.class, EmittableKey.class);
    jobConf.setClass("mapred.input.value.class", BSONWritable.class, Object.class);

    jobConf.setClass("mapred.output.key.class", TenantAndIdEmittableKey.class, EmittableKey.class);
    jobConf.setClass("mapred.output.value.class", BSONWritable.class, Object.class);

    jobConf.setClass("mapreduce.inputformat.class", MongoTenantAndIdInputFormat.class, MongoInputFormat.class);
    jobConf.setClass("mapreduce.outputformat.class", MongoAggFormatter.class, MongoOutputFormat.class);
    MongoConfigUtil.setInputFormat(jobConf, MongoTenantAndIdInputFormat.class);
    MongoConfigUtil.setOutputFormat(jobConf, MongoAggFormatter.class);

    /**
     * Configure how hadoop calculates splits.
     *
     * We enable input splits to avoid having the entire job executed on a single hadoop node.
     *
     * We enable shard chunk splitting to allow mongo to specify how to split the input.
     *
     * We disable read splits from shards because we want hadoop connecting to mongos, not
     * mongod directly. This avoids incorrect results in situations where data is in the process
     * of migration at the same time hadoop is trying to read it.
     *
     * TODO - determine if we also need to set the input split key pattern. This depends
     * on how well data is distributed by _id. Setting the key pattern gives finer grained
     * control over how splits are calculated.
     */
    MongoConfigUtil.setCreateInputSplits(jobConf, true);
    MongoConfigUtil.setShardChunkSplittingEnabled(jobConf, true);
    MongoConfigUtil.setReadSplitsFromShards(jobConf, false);

    MongoConfigUtil.setOutputURI(jobConf, "mongodb://" + MONGO_HOST + "/" + reduceCollection);

    jobConf.setJarByClass(JobConfiguration.class);

    MongoConfigUtil.setMapper(jobConf, mapperClass);
    jobConf.setClass(JobContext.MAP_CLASS_ATTR, mapperClass, Mapper.class);

    MongoConfigUtil.setReducer(jobConf, reducerClass);
    jobConf.setClass(JobContext.REDUCE_CLASS_ATTR, reducerClass, Reducer.class);

    // Set this relatively high to keep the total map execution time low.
    // Formula:  1.75 * (# nodes * max tasks)
    // TODO : replace this hardcoded value with one calculated from configuration information.
    jobConf.setNumReduceTasks(52);

    // Add the configuration itself to the JobConf.
    JobConfiguration.toHadoopConfiguration(s, jobConf);

    return jobConf;
}