List of usage examples for org.apache.hadoop.mapred JobConf getLong
public long getLong(String name, long defaultValue)
name
property as a long
. From source file:PageInputFormat.java
License:Apache License
private static long getMaxSplitSize(JobConf context) { return context.getLong(SPLIT_MAXSIZE, Long.MAX_VALUE); }
From source file:SleepJob.java
License:Apache License
public void configure(JobConf job) { this.mapSleepCount = job.getInt("sleep.job.map.sleep.count", mapSleepCount); this.reduceSleepCount = job.getInt("sleep.job.reduce.sleep.count", reduceSleepCount); this.mapSleepDuration = job.getLong("sleep.job.map.sleep.time", 100) / mapSleepCount; this.reduceSleepDuration = job.getLong("sleep.job.reduce.sleep.time", 100) / reduceSleepCount; }
From source file:DataJoinReducerBase.java
License:Apache License
public void configure(JobConf job) { super.configure(job); this.job = job; this.maxNumOfValuesPerGroup = job.getLong("datajoin.maxNumOfValuesPerGroup", 100); }
From source file:SleepJobWithArray.java
License:Apache License
public void configure(JobConf job) { this.mapSleepCount = job.getInt("sleep.job.map.sleep.count", mapSleepCount); this.initBigArray = job.getBoolean("initBigArray", false); this.bigArraySize = job.getInt("bigArraySize", bigArraySize); this.reduceSleepCount = job.getInt("sleep.job.reduce.sleep.count", reduceSleepCount); this.mapSleepDuration = job.getLong("sleep.job.map.sleep.time", 100) / mapSleepCount; this.reduceSleepDuration = job.getLong("sleep.job.reduce.sleep.time", 100) / reduceSleepCount; }
From source file:ca.sparkera.adapters.mapred.MainframeVBInputFormat.java
License:Apache License
/** * Splits files returned by {@link #listStatus(JobConf)} when they're too * big.//w w w .j av a 2 s. com */ @Override @SuppressWarnings("deprecation") public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { FileStatus[] files = listStatus(job); for (FileStatus file : files) { // check we have valid files if (file.isDir()) { throw new IOException("Not a file: " + file.getPath()); } totalSize += file.getLen(); } long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits); long minSize = Math.max(job.getLong("mapred.min.split.size", 1), minSplitSize); // generate splits ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits); for (FileStatus file : files) { Path path = file.getPath(); FileSystem fs = path.getFileSystem(job); FSDataInputStream fileIn; InputStream inputStream; fileIn = fs.open(path); inputStream = fileIn; filePosition = fileIn; long offset = 0; long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(fs, path)) { long blockSize = file.getBlockSize(); long bytesRemaining = length; long splitSize = 0; while (offset < length) { splitSize = computeSplitSize(goalSize, minSize, blockSize, inputStream); int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(new FileSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; offset = length - bytesRemaining; } if (bytesRemaining != 0) { throw new IOException( "Partial record(length = " + bytesRemaining + ") found at the end of file " + path); } } else if (length != 0) { splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts())); } else { // Create empty hosts array for zero length files splits.add(new FileSplit(path, 0, length, new String[0])); } if (inputStream != null) { inputStream.close(); inputStream = null; } } java.util.Date date = new java.util.Date(); System.out.println((new Timestamp(date.getTime())) + ",\t Split = 100% Total Splits - " + (++splitCount) + "\t Total Records in VB file - " + totalRecords); LOG.debug("Total # of splits: " + splits.size()); return splits.toArray(new FileSplit[splits.size()]); }
From source file:cascading.flow.Flow.java
License:Open Source License
public static long getJobPollingInterval(JobConf jobConf) { return jobConf.getLong("cascading.flow.job.pollinginterval", 5000); }
From source file:cascading.flow.hadoop.planner.HadoopFlowStepJob.java
License:Open Source License
private static long getStoreInterval(JobConf jobConf) { return jobConf.getLong(STATS_STORE_INTERVAL, 60 * 1000); }
From source file:cascading.flow.hadoop.planner.HadoopFlowStepJob.java
License:Open Source License
public static long getJobPollingInterval(JobConf jobConf) { return jobConf.getLong(JOB_POLLING_INTERVAL, 5000); }
From source file:com.benchmark.mapred.RandomTextWriter.java
License:Apache License
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything.//from w ww . jav a2s .com * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { return printUsage(); } JobConf job = new JobConf(getConf()); job.setJarByClass(RandomTextWriter.class); job.setJobName("random-text-writer"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(RandomWriter.RandomInputFormat.class); job.setMapperClass(Map.class); JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = job.getInt("test.randomtextwrite.maps_per_host", 10); long numBytesToWritePerMap = job.getLong("test.randomtextwrite.bytes_per_map", 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have test.randomtextwrite.bytes_per_map set to 0"); return -2; } long totalBytesToWrite = job.getLong("test.randomtextwrite.total_bytes", numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.setLong("test.randomtextwrite.bytes_per_map", totalBytesToWrite); } Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else { otherArgs.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } job.setOutputFormat(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0))); job.setNumMapTasks(numMaps); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
From source file:com.benchmark.mapred.RandomWriter.java
License:Apache License
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything.//ww w .j a v a 2 s . co m * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return -1; } Path outDir = new Path(args[0]); JobConf job = new JobConf(getConf()); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormat(RandomInputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(IdentityReducer.class); job.setOutputFormat(SequenceFileOutputFormat.class); JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10); long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map", 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0"); return -2; } long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite); } job.setNumMapTasks(numMaps); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }