List of usage examples for org.apache.hadoop.mapred JobConf setLong
public void setLong(String name, long value)
name
property to a long
. From source file:voldemort.store.readwrite.mr.HadoopRWStoreBuilder.java
License:Apache License
/** * Run the job//w w w . j a v a 2s. c o m */ public void build() { JobConf conf = new JobConf(config); conf.setInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE); conf.set("cluster.xml", new ClusterMapper().writeCluster(cluster)); conf.set("stores.xml", new StoreDefinitionsMapper().writeStoreList(Collections.singletonList(storeDef))); conf.setInt("vector.node.id", this.vectorNodeId); conf.setLong("vector.node.version", this.vectorNodeVersion); conf.setLong("job.start.time.ms", System.currentTimeMillis()); conf.setPartitionerClass(HadoopRWStoreBuilderPartitioner.class); conf.setInputFormat(inputFormatClass); conf.setMapperClass(mapperClass); conf.setMapOutputKeyClass(BytesWritable.class); conf.setMapOutputValueClass(BytesWritable.class); conf.setReducerClass(HadoopRWStoreBuilderReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(BytesWritable.class); conf.setOutputValueClass(BytesWritable.class); conf.setReduceSpeculativeExecution(false); conf.setJarByClass(getClass()); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, tempPath); try { // delete the temp dir if it exists FileSystem tempFs = tempPath.getFileSystem(conf); tempFs.delete(tempPath, true); conf.setInt("num.chunks", reducersPerNode); int numReducers = cluster.getNumberOfNodes() * reducersPerNode; logger.info("Replication factor = " + storeDef.getReplicationFactor() + ", numNodes = " + cluster.getNumberOfNodes() + ", reducers per node = " + reducersPerNode + ", numReducers = " + numReducers); conf.setNumReduceTasks(numReducers); logger.info("Building RW store..."); JobClient.runJob(conf); } catch (Exception e) { throw new VoldemortException(e); } }
From source file:wiki.hadoop.mapred.lib.input.StreamWikiDumpInputFormat.java
License:Apache License
/** * Generate the list of files and make them into FileSplits. * @param job the job context/*from w ww .ja v a2s. c om*/ * @throws IOException */ @Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { LOG.info("StreamWikiDumpInputFormat.getSplits job=" + job + " n=" + numSplits); List<InputSplit> splits = new ArrayList<InputSplit>(); FileStatus[] files = listStatus(job); // Save the number of input files for metrics/loadgen job.setLong("mapreduce.input.num.files", files.length); long totalSize = 0; // compute total size for (FileStatus file : files) { // check we have valid files if (file.isDir()) { throw new IOException("Not a file: " + file.getPath()); } totalSize += file.getLen(); } long minSize = job.getLong("mapred.min.split.size", 1); long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits); for (FileStatus file : files) { if (file.isDir()) { throw new IOException("Not a file: " + file.getPath()); } long blockSize = file.getBlockSize(); long splitSize = computeSplitSize(goalSize, minSize, blockSize); LOG.info(String.format("goalsize=%d splitsize=%d blocksize=%d", goalSize, splitSize, blockSize)); //System.err.println(String.format("goalsize=%d splitsize=%d blocksize=%d", goalSize, splitSize, blockSize)); for (InputSplit x : getSplits(job, file, pageBeginPattern, splitSize)) splits.add(x); } System.err.println("splits=" + splits); return splits.toArray(new InputSplit[splits.size()]); }