Example usage for org.apache.hadoop.mapred JobConf setLong

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setLong.

Prototype

public void setLong(String name, long value)

Source Link

Document

Set the value of the name property to a long.

Usage

From source file:voldemort.store.readwrite.mr.HadoopRWStoreBuilder.java

License:Apache License

/**
 * Run the job//w w w  . j  a v  a 2s. c  o  m
 */
public void build() {
    JobConf conf = new JobConf(config);
    conf.setInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE);
    conf.set("cluster.xml", new ClusterMapper().writeCluster(cluster));
    conf.set("stores.xml", new StoreDefinitionsMapper().writeStoreList(Collections.singletonList(storeDef)));
    conf.setInt("vector.node.id", this.vectorNodeId);
    conf.setLong("vector.node.version", this.vectorNodeVersion);
    conf.setLong("job.start.time.ms", System.currentTimeMillis());

    conf.setPartitionerClass(HadoopRWStoreBuilderPartitioner.class);

    conf.setInputFormat(inputFormatClass);
    conf.setMapperClass(mapperClass);
    conf.setMapOutputKeyClass(BytesWritable.class);
    conf.setMapOutputValueClass(BytesWritable.class);
    conf.setReducerClass(HadoopRWStoreBuilderReducer.class);

    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setOutputKeyClass(BytesWritable.class);
    conf.setOutputValueClass(BytesWritable.class);
    conf.setReduceSpeculativeExecution(false);

    conf.setJarByClass(getClass());
    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, tempPath);

    try {
        // delete the temp dir if it exists
        FileSystem tempFs = tempPath.getFileSystem(conf);
        tempFs.delete(tempPath, true);

        conf.setInt("num.chunks", reducersPerNode);
        int numReducers = cluster.getNumberOfNodes() * reducersPerNode;
        logger.info("Replication factor = " + storeDef.getReplicationFactor() + ", numNodes = "
                + cluster.getNumberOfNodes() + ", reducers per node = " + reducersPerNode + ", numReducers = "
                + numReducers);
        conf.setNumReduceTasks(numReducers);

        logger.info("Building RW store...");
        JobClient.runJob(conf);

    } catch (Exception e) {
        throw new VoldemortException(e);
    }

}

From source file:wiki.hadoop.mapred.lib.input.StreamWikiDumpInputFormat.java

License:Apache License

/** 
 * Generate the list of files and make them into FileSplits.
 * @param job the job context/*from   w ww .ja v a2s. c  om*/
 * @throws IOException
 */
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    LOG.info("StreamWikiDumpInputFormat.getSplits job=" + job + " n=" + numSplits);
    List<InputSplit> splits = new ArrayList<InputSplit>();
    FileStatus[] files = listStatus(job);
    // Save the number of input files for metrics/loadgen
    job.setLong("mapreduce.input.num.files", files.length);
    long totalSize = 0; // compute total size
    for (FileStatus file : files) { // check we have valid files
        if (file.isDir()) {
            throw new IOException("Not a file: " + file.getPath());
        }
        totalSize += file.getLen();
    }
    long minSize = job.getLong("mapred.min.split.size", 1);
    long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits);
    for (FileStatus file : files) {
        if (file.isDir()) {
            throw new IOException("Not a file: " + file.getPath());
        }
        long blockSize = file.getBlockSize();
        long splitSize = computeSplitSize(goalSize, minSize, blockSize);
        LOG.info(String.format("goalsize=%d splitsize=%d blocksize=%d", goalSize, splitSize, blockSize));
        //System.err.println(String.format("goalsize=%d splitsize=%d blocksize=%d", goalSize, splitSize, blockSize));
        for (InputSplit x : getSplits(job, file, pageBeginPattern, splitSize))
            splits.add(x);
    }
    System.err.println("splits=" + splits);
    return splits.toArray(new InputSplit[splits.size()]);
}