List of usage examples for org.apache.hadoop.mapred JobConf getFloat
public float getFloat(String name, float defaultValue)
name
property as a float
. From source file:edu.umn.cs.spatialHadoop.operations.Intersects.java
License:Open Source License
public static <S extends Shape> long intersects(Path[] inFiles, Path userOutputPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, Intersects.class); LOG.info("Intersects journey starts ...."); FileSystem inFs = inFiles[0].getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { FileSystem outFs = FileSystem.get(job); do {//from www. j a v a 2 s . c o m outputPath = new Path(inFiles[0].getName() + ".sjmr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } FileSystem outFs = outputPath.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setJobName("Intersects"); job.setMapperClass(IntersectsMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IndexedText.class); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setLong("mapred.min.split.size", Math.max(inFs.getFileStatus(inFiles[0]).getBlockSize(), inFs.getFileStatus(inFiles[1]).getBlockSize())); job.setReducerClass(IntersectsReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setInputFormat(ShapeLineInputFormat.class); if (job.getBoolean("output", true)) job.setOutputFormat(TextOutputFormat.class); else job.setOutputFormat(NullOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFiles); // Calculate and set the dimensions of the grid to use in the map phase long total_size = 0; Rectangle mbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : inFiles) { FileSystem fs = file.getFileSystem(params); Rectangle file_mbr = FileMBR.fileMBR(file, params); mbr.expand(file_mbr); total_size += FileUtil.getPathSize(fs, file); } // If the largest file is globally indexed, use its partitions total_size += total_size * job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.2f); int sjmrPartitioningGridFactor = params.getInt(PartitioiningFactor, 20); int num_cells = (int) Math.max(1, total_size * sjmrPartitioningGridFactor / outFs.getDefaultBlockSize(outputPath)); LOG.info("Number of cells is configured to be " + num_cells); OperationsParams.setInactiveModeFlag(job, InactiveMode, isReduceInactive); OperationsParams.setJoiningThresholdPerOnce(job, JoiningThresholdPerOnce, joiningThresholdPerOnce); OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly); GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_cells); OperationsParams.setShape(job, PartitionGrid, gridInfo); TextOutputFormat.setOutputPath(job, outputPath); if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); } // Start the job RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.operations.Overlaps.java
License:Open Source License
public static <S extends Shape> long overlaps(Path[] inFiles, Path userOutputPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, Overlaps.class); LOG.info("Overlaps journey starts ...."); FileSystem inFs = inFiles[0].getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { FileSystem outFs = FileSystem.get(job); do {//from w w w . j av a 2s . com outputPath = new Path(inFiles[0].getName() + ".sjmr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } FileSystem outFs = outputPath.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setJobName("Overlaps"); job.setMapperClass(OverlapMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IndexedText.class); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setLong("mapred.min.split.size", Math.max(inFs.getFileStatus(inFiles[0]).getBlockSize(), inFs.getFileStatus(inFiles[1]).getBlockSize())); job.setReducerClass(OverlapReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setInputFormat(ShapeLineInputFormat.class); if (job.getBoolean("output", true)) job.setOutputFormat(TextOutputFormat.class); else job.setOutputFormat(NullOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFiles); // Calculate and set the dimensions of the grid to use in the map phase long total_size = 0; Rectangle mbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : inFiles) { FileSystem fs = file.getFileSystem(params); Rectangle file_mbr = FileMBR.fileMBR(file, params); mbr.expand(file_mbr); total_size += FileUtil.getPathSize(fs, file); } // If the largest file is globally indexed, use its partitions total_size += total_size * job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.2f); int sjmrPartitioningGridFactor = params.getInt(PartitioiningFactor, 20); int num_cells = (int) Math.max(1, total_size * sjmrPartitioningGridFactor / outFs.getDefaultBlockSize(outputPath)); LOG.info("Number of cells is configured to be " + num_cells); OperationsParams.setInactiveModeFlag(job, InactiveMode, isReduceInactive); OperationsParams.setJoiningThresholdPerOnce(job, JoiningThresholdPerOnce, joiningThresholdPerOnce); OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly); GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_cells); OperationsParams.setShape(job, PartitionGrid, gridInfo); TextOutputFormat.setOutputPath(job, outputPath); if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); } // Start the job RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.operations.SJMR.java
License:Open Source License
public static <S extends Shape> long sjmr(Path[] inFiles, Path userOutputPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, SJMR.class); LOG.info("SJMR journey starts ...."); FileSystem inFs = inFiles[0].getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { FileSystem outFs = FileSystem.get(job); do {/*www . ja v a2 s . c o m*/ outputPath = new Path(inFiles[0].getName() + ".sjmr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } FileSystem outFs = outputPath.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setJobName("SJMR"); job.setMapperClass(SJMRMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IndexedText.class); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setLong("mapred.min.split.size", Math.max(inFs.getFileStatus(inFiles[0]).getBlockSize(), inFs.getFileStatus(inFiles[1]).getBlockSize())); job.setReducerClass(SJMRReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setInputFormat(ShapeLineInputFormat.class); if (job.getBoolean("output", true)) job.setOutputFormat(TextOutputFormat.class); else job.setOutputFormat(NullOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFiles); // Calculate and set the dimensions of the grid to use in the map phase long total_size = 0; Rectangle mbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : inFiles) { FileSystem fs = file.getFileSystem(params); Rectangle file_mbr = FileMBR.fileMBR(file, params); mbr.expand(file_mbr); total_size += FileUtil.getPathSize(fs, file); } // If the largest file is globally indexed, use its partitions total_size += total_size * job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.2f); int sjmrPartitioningGridFactor = params.getInt(PartitioiningFactor, 20); int num_cells = (int) Math.max(1, total_size * sjmrPartitioningGridFactor / outFs.getDefaultBlockSize(outputPath)); LOG.info("Number of cells is configured to be " + num_cells); OperationsParams.setInactiveModeFlag(job, InactiveMode, isReduceInactive); OperationsParams.setJoiningThresholdPerOnce(job, JoiningThresholdPerOnce, joiningThresholdPerOnce); OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly); GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_cells); OperationsParams.setShape(job, PartitionGrid, gridInfo); TextOutputFormat.setOutputPath(job, outputPath); if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); } // Start the job RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.operations.Touches.java
License:Open Source License
public static <S extends Shape> long touches(Path[] inFiles, Path userOutputPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, Touches.class); LOG.info("Touches journey starts ...."); FileSystem inFs = inFiles[0].getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { FileSystem outFs = FileSystem.get(job); do {//from w w w . ja v a 2 s .c om outputPath = new Path(inFiles[0].getName() + ".sjmr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } FileSystem outFs = outputPath.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setJobName("Touches"); job.setMapperClass(TouchesMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IndexedText.class); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setLong("mapred.min.split.size", Math.max(inFs.getFileStatus(inFiles[0]).getBlockSize(), inFs.getFileStatus(inFiles[1]).getBlockSize())); job.setReducerClass(TouchesReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setInputFormat(ShapeLineInputFormat.class); if (job.getBoolean("output", true)) job.setOutputFormat(TextOutputFormat.class); else job.setOutputFormat(NullOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFiles); // Calculate and set the dimensions of the grid to use in the map phase long total_size = 0; Rectangle mbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : inFiles) { FileSystem fs = file.getFileSystem(params); Rectangle file_mbr = FileMBR.fileMBR(file, params); mbr.expand(file_mbr); total_size += FileUtil.getPathSize(fs, file); } // If the largest file is globally indexed, use its partitions total_size += total_size * job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.2f); int sjmrPartitioningGridFactor = params.getInt(PartitioiningFactor, 20); int num_cells = (int) Math.max(1, total_size * sjmrPartitioningGridFactor / outFs.getDefaultBlockSize(outputPath)); LOG.info("Number of cells is configured to be " + num_cells); OperationsParams.setInactiveModeFlag(job, InactiveMode, isReduceInactive); OperationsParams.setJoiningThresholdPerOnce(job, JoiningThresholdPerOnce, joiningThresholdPerOnce); OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly); GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_cells); OperationsParams.setShape(job, PartitionGrid, gridInfo); TextOutputFormat.setOutputPath(job, outputPath); if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); } // Start the job RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.operations.Within.java
License:Open Source License
public static <S extends Shape> long within(Path[] inFiles, Path userOutputPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, Within.class); LOG.info("Within journey starts ...."); FileSystem inFs = inFiles[0].getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { FileSystem outFs = FileSystem.get(job); do {/*from ww w .ja v a 2 s. c om*/ outputPath = new Path(inFiles[0].getName() + ".sjmr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } FileSystem outFs = outputPath.getFileSystem(job); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setJobName("Within"); job.setMapperClass(WithinMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IndexedText.class); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setLong("mapred.min.split.size", Math.max(inFs.getFileStatus(inFiles[0]).getBlockSize(), inFs.getFileStatus(inFiles[1]).getBlockSize())); job.setReducerClass(WithinReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setInputFormat(ShapeLineInputFormat.class); if (job.getBoolean("output", true)) job.setOutputFormat(TextOutputFormat.class); else job.setOutputFormat(NullOutputFormat.class); ShapeLineInputFormat.setInputPaths(job, inFiles); // Calculate and set the dimensions of the grid to use in the map phase long total_size = 0; Rectangle mbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Path file : inFiles) { FileSystem fs = file.getFileSystem(params); Rectangle file_mbr = FileMBR.fileMBR(file, params); mbr.expand(file_mbr); total_size += FileUtil.getPathSize(fs, file); } // If the largest file is globally indexed, use its partitions total_size += total_size * job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.2f); int sjmrPartitioningGridFactor = params.getInt(PartitioiningFactor, 20); int num_cells = (int) Math.max(1, total_size * sjmrPartitioningGridFactor / outFs.getDefaultBlockSize(outputPath)); LOG.info("Number of cells is configured to be " + num_cells); OperationsParams.setInactiveModeFlag(job, InactiveMode, isReduceInactive); OperationsParams.setJoiningThresholdPerOnce(job, JoiningThresholdPerOnce, joiningThresholdPerOnce); OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly); GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_cells); OperationsParams.setShape(job, PartitionGrid, gridInfo); TextOutputFormat.setOutputPath(job, outputPath); if (OperationsParams.isLocal(job, inFiles)) { // Enforce local execution if explicitly set by user or for small files job.set("mapred.job.tracker", "local"); } // Start the job RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
From source file:eu.stratosphere.myriad.driver.hadoop.MyriadInputFormat.java
License:Apache License
public static float getScalingFactor(JobConf conf) { float scalingFactor = conf.getFloat("mapred.myriad.dgen.scaling.factor", -1); if (scalingFactor <= 0) { throw new IllegalArgumentException("Bad `mapred.myriad.dgen.scaling.factor` parameter value"); }/* w w w . ja v a 2 s. co m*/ return scalingFactor; }
From source file:nl.tudelft.graphalytics.mapreducev2.evo.DirectedForestFireModelReducer.java
License:Apache License
@Override public void configure(JobConf conf) { this.isInit = conf.getBoolean(ForestFireModelUtils.IS_INIT, false); this.maxID = conf.getLong(ForestFireModelUtils.MAX_ID, -1); this.pRatio = conf.getFloat(ForestFireModelUtils.P_RATIO, 0); this.rRatio = conf.getFloat(ForestFireModelUtils.R_RATIO, 0); }
From source file:nl.tudelft.graphalytics.mapreducev2.evo.UndirectedForestFireModelReducer.java
License:Apache License
@Override public void configure(JobConf conf) { this.isInit = conf.getBoolean(ForestFireModelUtils.IS_INIT, false); this.isFinal = conf.getBoolean(ForestFireModelUtils.IS_FINAL, false); this.maxID = conf.getLong(ForestFireModelUtils.MAX_ID, -1); this.pRatio = conf.getFloat(ForestFireModelUtils.P_RATIO, 0); }
From source file:org.apache.nutch.scoring.webgraph.ScoreUpdater.java
License:Apache License
public void configure(JobConf conf) { this.conf = conf; clearScore = conf.getFloat("link.score.updater.clear.score", 0.0f); }
From source file:org.apache.tez.mapreduce.hadoop.TestDeprecatedKeys.java
License:Apache License
@Test(timeout = 5000) public void verifyReduceKeyTranslation() { JobConf jobConf = new JobConf(); jobConf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.4f); jobConf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, 20000l); jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000); jobConf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.55f); jobConf.setFloat(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, 0.60f); jobConf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.22f); jobConf.setBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, true); jobConf.setFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0.33f); MRHelpers.translateMRConfToTez(jobConf); assertEquals(0.4f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0f), 0.01f);/*from w w w. j a va2 s . c om*/ assertEquals(20000l, jobConf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, 0)); assertEquals(2000, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 0)); assertEquals(0.55f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0), 0.01f); assertEquals(0.60f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0), 0.01f); assertEquals(0.22f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0), 0.01f); assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false)); assertEquals(0.33f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0), 0.01f); }