List of usage examples for org.apache.hadoop.mapred JobConf set
public void set(String name, String value)
value
of the name
property. From source file:com.rapleaf.hank.hadoop.HadoopDomainCompactor.java
License:Apache License
public static void main(String[] args) throws IOException, InvalidConfigurationException { CommandLineChecker.check(args, new String[] { "domain name", "version to compact number", "mapred.task.timeout", "config path", "jobjar" }, HadoopDomainCompactor.class); String domainName = args[0];/*from w w w. j a va 2s . c om*/ Integer versionToCompactNumber = Integer.valueOf(args[1]); Integer mapredTaskTimeout = Integer.valueOf(args[2]); CoordinatorConfigurator configurator = new YamlClientConfigurator(args[3]); String jobJar = args[4]; DomainCompactorProperties properties = new DomainCompactorProperties(domainName, versionToCompactNumber, configurator); JobConf conf = new JobConf(); conf.setJar(jobJar); conf.set("mapred.task.timeout", mapredTaskTimeout.toString()); conf.setJobName(HadoopDomainCompactor.class.getSimpleName() + " Domain " + domainName + ", Version " + versionToCompactNumber); HadoopDomainCompactor compactor = new HadoopDomainCompactor(conf); LOG.info("Compacting Hank domain " + domainName + " version " + versionToCompactNumber + " with coordinator configuration " + configurator); compactor.buildHankDomain(properties, new IncrementalDomainVersionProperties.Base("Version " + versionToCompactNumber + " compacted")); }
From source file:com.ricemap.spateDB.core.SpatialSite.java
License:Apache License
public static void setCells(JobConf job, CellInfo[] cellsInfo) throws IOException { Path tempFile;/*from w w w. ja v a2 s .c om*/ FileSystem fs = FileSystem.get(job); do { tempFile = new Path(job.getJobName() + "_" + (int) (Math.random() * 1000000) + ".cells"); } while (fs.exists(tempFile)); FSDataOutputStream out = fs.create(tempFile); out.writeInt(cellsInfo.length); for (CellInfo cell : cellsInfo) { cell.write(out); } out.close(); fs.deleteOnExit(tempFile); DistributedCache.addCacheFile(tempFile.toUri(), job); job.set(OUTPUT_CELLS, tempFile.getName()); LOG.info("Partitioning file into " + cellsInfo.length + " cells"); }
From source file:com.ricemap.spateDB.operations.RangeQuery.java
License:Apache License
/** * Performs a range query using MapReduce * /*from w w w .j a v a 2s. co m*/ * @param fs * @param inputFile * @param queryRange * @param shape * @param output * @return * @throws IOException */ public static long rangeQueryMapReduce(FileSystem fs, Path inputFile, Path userOutputPath, Shape queryShape, Shape shape, boolean overwrite, boolean background, QueryInput query) throws IOException { JobConf job = new JobConf(FileMBR.class); FileSystem outFs = inputFile.getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path( inputFile.toUri().getPath() + ".rangequery_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } else { if (outFs.exists(outputPath)) { if (overwrite) { outFs.delete(outputPath, true); } else { throw new RuntimeException("Output path already exists and -overwrite flag is not set"); } } } job.setJobName("RangeQuery"); job.setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); RangeFilter.setQueryRange(job, queryShape); // Set query range for // filter ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setNumReduceTasks(3); // Decide which map function to use depending on how blocks are indexed // And also which input format to use if (SpatialSite.isRTree(fs, inputFile)) { // RTree indexed file LOG.info("Searching an RTree indexed file"); job.setInputFormat(RTreeInputFormat.class); } else { // A file with no local index LOG.info("Searching a non local-indexed file"); job.setInputFormat(ShapeInputFormat.class); } GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inputFile); // if (gIndex != null && gIndex.isReplicated()){ // job.setMapperClass(RangeQueryMap.class); Class<?> OutputKey = NullWritable.class; try { Class<?> c = shape.getClass(); Field f = c.getDeclaredField(query.field); f.setAccessible(true); if (f.getType().equals(Integer.TYPE)) { OutputKey = IntWritable.class; } else if (f.getType().equals(Double.TYPE)) { OutputKey = DoubleWritable.class; } else if (f.getType().equals(Long.TYPE)) { OutputKey = LongWritable.class; } } catch (SecurityException e) { e.printStackTrace(); } catch (NoSuchFieldException e) { // TODO Auto-generated catch block e.printStackTrace(); } job.setMapOutputKeyClass(OutputKey); switch (query.type) { case Distinct: job.setMapperClass(DistinctQueryMap.class); job.setReducerClass(DistinctQueryReduce.class); job.setMapOutputValueClass(NullWritable.class); break; case Distribution: job.setMapperClass(DistributionQueryMap.class); job.setReducerClass(DistributionQueryReduce.class); job.setMapOutputValueClass(IntWritable.class); break; default: break; } // } // else // job.setMapperClass(RangeQueryMapNoDupAvoidance.class); // Set query range for the map function job.set(QUERY_SHAPE_CLASS, queryShape.getClass().getName()); job.set(QUERY_SHAPE, queryShape.toText(new Text()).toString()); job.set(QUERY_FIELD, query.field); // Set shape class for the SpatialInputFormat SpatialSite.setShapeClass(job, shape.getClass()); job.setOutputFormat(TextOutputFormat.class); ShapeInputFormat.setInputPaths(job, inputFile); TextOutputFormat.setOutputPath(job, outputPath); // Submit the job if (!background) { RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); // If outputPath not set by user, automatically delete it if (userOutputPath == null) outFs.delete(outputPath, true); return resultCount; } else { JobClient jc = new JobClient(job); lastRunningJob = jc.submitJob(job); return -1; } }
From source file:com.ricemap.spateDB.util.RandomSpatialGenerator.java
License:Apache License
public static void generateMapReduce(Path file, Prism mbr, long size, long blocksize, Shape shape, String sindex, long seed, int rectsize, RandomShapeGenerator.DistributionType type, boolean overwrite) throws IOException { JobConf job = new JobConf(RandomSpatialGenerator.class); job.setJobName("Generator"); FileSystem outFs = file.getFileSystem(job); // Overwrite output file if (outFs.exists(file)) { if (overwrite) outFs.delete(file, true);/* w w w. ja v a2s . c o m*/ else throw new RuntimeException( "Output file '" + file + "' already exists and overwrite flag is not set"); } // Set generation parameters in job job.setLong(RandomShapeGenerator.GenerationSize, size); SpatialSite.setPrism(job, RandomShapeGenerator.GenerationMBR, mbr); if (seed != 0) job.setLong(RandomShapeGenerator.GenerationSeed, seed); if (rectsize != 0) job.setInt(RandomShapeGenerator.GenerationRectSize, rectsize); if (type != null) job.set(RandomShapeGenerator.GenerationType, type.toString()); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); // Set input format and map class job.setInputFormat(RandomInputFormat.class); job.setMapperClass(Repartition.RepartitionMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); SpatialSite.setShapeClass(job, shape.getClass()); if (blocksize != 0) { job.setLong(SpatialSite.LOCAL_INDEX_BLOCK_SIZE, blocksize); } CellInfo[] cells; if (sindex == null) { cells = new CellInfo[] { new CellInfo(1, mbr) }; } else if (sindex.equals("grid")) { GridInfo gridInfo = new GridInfo(mbr.t1, mbr.x1, mbr.y1, mbr.t2, mbr.x2, mbr.y2); FileSystem fs = file.getFileSystem(job); if (blocksize == 0) { blocksize = fs.getDefaultBlockSize(file); } int numOfCells = Repartition.calculateNumberOfPartitions(job, size, fs, file, blocksize); gridInfo.calculateCellDimensions(numOfCells); cells = gridInfo.getAllCells(); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cells); // Do not set a reduce function. Use the default identity reduce function if (cells.length == 1) { // All objects are in one partition. No need for a reduce phase job.setNumReduceTasks(0); } else { // More than one partition. Need a reduce phase to group shapes of the // same partition together job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cells.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); } // Set output path FileOutputFormat.setOutputPath(job, file); if (sindex == null || sindex.equals("grid")) { job.setOutputFormat(GridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } JobClient.runJob(job); // Concatenate all master files into one file FileStatus[] resultFiles = outFs.listStatus(file, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().contains("_master"); } }); String ext = resultFiles[0].getPath().getName() .substring(resultFiles[0].getPath().getName().lastIndexOf('.')); Path masterPath = new Path(file, "_master" + ext); OutputStream destOut = outFs.create(masterPath); byte[] buffer = new byte[4096]; for (FileStatus f : resultFiles) { InputStream in = outFs.open(f.getPath()); int bytes_read; do { bytes_read = in.read(buffer); if (bytes_read > 0) destOut.write(buffer, 0, bytes_read); } while (bytes_read > 0); in.close(); outFs.delete(f.getPath(), false); } destOut.close(); // Plot an image for the partitions used in file Path imagePath = new Path(file, "_partitions.png"); int imageSize = (int) (Math.sqrt(cells.length) * 300); Plot.plotLocal(masterPath, imagePath, new Partition(), imageSize, imageSize, Color.BLACK, false, false, false); }
From source file:com.scaleoutsoftware.soss.hserver.hadoop.MapperWrapperMapred.java
License:Apache License
/** * Update the job with details about the file split * @param job the job configuration to update * @param inputSplit the file split/* w w w. j a v a2 s .c o m*/ */ private void updateJobWithSplit(final JobConf job, Object inputSplit) { if (inputSplit instanceof FileSplit) { FileSplit fileSplit = (FileSplit) inputSplit; try { if (fileSplit.getPath() != null) { job.set("mapreduce.map.input.file", fileSplit.getPath().toString()); } } catch (IllegalArgumentException e) { //Swallow this, it appears in Hive splits, which do not have the path encoded //(storage handler for NamedMap is an example). } job.setLong("mapreduce.map.input.start", fileSplit.getStart()); job.setLong("mapreduce.map.input.length", fileSplit.getLength()); } LOG.info("Processing split: " + inputSplit); }
From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java
License:Apache License
static void updateJobConf(JobConf jobConf, TaskAttemptID taskAttemptID, int partition) { //--------------------------------------------------------------------------------- //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 1.2.0, //licensed under Apache License, Version 2.0 //---------------------------------------------------------------------------------- jobConf.set("mapred.tip.id", taskAttemptID.getTaskID().toString()); jobConf.set("mapred.task.id", taskAttemptID.toString()); jobConf.setBoolean("mapred.task.is.map", false); jobConf.setInt("mapred.task.partition", partition); jobConf.set("mapred.job.id", taskAttemptID.getJobID().toString()); //--------------------------------------------------------------------------------- //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 2.2.0, //licensed under Apache License, Version 2.0 //---------------------------------------------------------------------------------- jobConf.set(TASK_ID, taskAttemptID.getTaskID().toString()); jobConf.set(TASK_ATTEMPT_ID, taskAttemptID.toString()); jobConf.setBoolean(TASK_ISMAP, false); jobConf.setInt(TASK_PARTITION, partition); jobConf.set(ID, taskAttemptID.getJobID().toString()); //---------------------------------------------------------------------------------- }
From source file:com.scaleoutsoftware.soss.hserver.hive.HServerHiveStorageHandler.java
License:Apache License
@Override public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { String mapName = tableDesc.getProperties().getProperty(MAP_NAME); String valueSerializer = tableDesc.getProperties().getProperty(VALUE_SERIALIZER); String valueType = tableDesc.getProperties().getProperty(VALUE_TYPE); jobConf.set(MAP_NAME, mapName); if (valueSerializer != null) { jobConf.set(VALUE_SERIALIZER, valueSerializer); if (valueType != null) { jobConf.set(VALUE_TYPE, valueType); }/*from ww w . j a v a2 s . co m*/ } }
From source file:com.scaleunlimited.helpful.tools.AnalyzeEmail.java
License:Apache License
private static JobConf getDefaultJobConf() throws IOException { JobClient jobClient = new JobClient(new JobConf()); ClusterStatus status = jobClient.getClusterStatus(); int trackers = status.getTaskTrackers(); JobConf conf = new JobConf(); conf.setNumMapTasks(trackers * 10);/*from w w w .j a v a 2 s . c om*/ conf.setNumReduceTasks((trackers * conf.getInt("mapred.tasktracker.reduce.tasks.maximum", 2))); conf.setMapSpeculativeExecution(false); conf.setReduceSpeculativeExecution(false); conf.set("mapred.child.java.opts", "-server -Xmx512m -Xss128k"); // Should match the value used for Xss above. Note no 'k' suffix for the ulimit command. // New support that one day will be in Hadoop. conf.set("mapred.child.ulimit.stack", "128"); return conf; }
From source file:com.splicemachine.orc.OrcTester.java
License:Open Source License
static RecordWriter createOrcRecordWriter(File outputFile, Format format, Compression compression, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(); jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11"); jobConf.set("hive.exec.orc.default.compress", compression.name()); return new OrcOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class, compression != NONE, createTableProperties("test", columnObjectInspector.getTypeName()), () -> { });/*w w w.java2s. co m*/ }
From source file:com.splicemachine.orc.OrcTester.java
License:Open Source License
private static RecordWriter createDwrfRecordWriter(File outputFile, Compression compressionCodec, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(); jobConf.set("hive.exec.orc.default.compress", compressionCodec.name()); jobConf.set("hive.exec.orc.compress", compressionCodec.name()); OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1); OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2); OrcConf.setBoolVar(jobConf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true); return new OrcOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class, compressionCodec != NONE, createTableProperties("test", columnObjectInspector.getTypeName()), () -> {/*from w ww . j a v a2s . co m*/ }); }