List of usage examples for org.apache.hadoop.mapred JobConf setJarByClass
public void setJarByClass(Class cls)
From source file:org.slc.sli.aggregation.mapreduce.map.ConfigurableMapReduceJob.java
License:Apache License
@SuppressWarnings("rawtypes") protected static JobConf finalizeConfig(JobConf jobConf, ConfigSections s) throws IOException { Class<? extends Mapper> mapperClass = JobConfiguration.mapper.getMapClass(mapper); Class<? extends Reducer> reducerClass = JobConfiguration.function.getReduceClass(reduceFunction); Map<String, String> idFields = s.getMapper().getMapIdFields(); // validate we have enough to continue boolean valid = true; if (mapperClass == null) { log.severe("Invalid map/reduce configuration detected : no mapper class specified."); valid = false;/*www . j a v a2s .co m*/ } if (idFields == null) { idFields = new HashMap<String, String>(); log.severe("Invalid map/reduce configuration detected : no map id fields specified."); valid = false; } if (mapCollection == null) { log.severe("Invalid map/reduce configuration detected : no map collection specified."); valid = false; } if (mapQuery == null) { log.severe("Invalid map/reduce configuration detected : no map query specified."); valid = false; } if (mapFields == null) { log.severe("Invalid map/reduce configuration detected : no map input fields specified."); valid = false; } if (reducerClass == null) { log.severe("Invalid map/reduce configuration detected : no reducer class specified."); valid = false; } if (reduceCollection == null) { log.severe("Invalid map/reduce configuration detected : no reduce collection specified."); valid = false; } if (reduceField == null) { log.severe("Invalid map/reduce configuration detected : no reduce field specified."); valid = false; } if (!valid) { throw new IllegalArgumentException("Invalid mapper specified. Check log for details."); } jobConf.set("mapred.output.dir", String.format("%s-%s-%d", s.getMapper().getMapper(), s.getMetadata().getFunction(), System.currentTimeMillis())); jobConf.setJobName(s.getMetadata().getDescription() == null ? "M/R Job" : s.getMetadata().getDescription()); // enable speculative execution. Multiple mapper tasks are created for the same split. // First one to finish wins; the remaining tasks are terminated. jobConf.setSpeculativeExecution(true); jobConf.setUseNewMapper(true); jobConf.setUseNewReducer(true); /** * TODO -- decide if this is required. String id = conf.get("@ID@"); String tenantId = conf.get("@TENANT_ID@"); for (Map.Entry<String, Object> entry : query.entrySet()) { Object value = entry.getValue(); if (value instanceof String) { String s = (String) value; if (s.indexOf("@ID@") >= 0 && id != null) { s = s.replace("@ID@", id); query.put(entry.getKey(), s); } if (s.indexOf("@TENANT_ID@") >= 0 && tenantId != null) { s = s.replace("@TENANT_ID@", tenantId); query.put(entry.getKey(), s); } } } if (updateField.indexOf("@ID@") >= 0 && id != null) { updateField = updateField.replace("@ID@", id); } if (updateField.indexOf("@TENANT_ID@") >= 0 && tenantId != null) { updateField = updateField.replace("@TENANT_ID@", tenantId); } */ MongoConfigUtil.setQuery(jobConf, new BasicDBObject(mapQuery)); Map<String, Object> fullFields = new HashMap<String, Object>(); for (String f : idFields.values()) { fullFields.put(f, 1); } fullFields.putAll(mapFields); MongoConfigUtil.setFields(jobConf, new BasicDBObject(fullFields)); MongoConfigUtil.setInputKey(jobConf, idFields.get("id")); MongoConfigUtil.setInputURI(jobConf, "mongodb://" + MONGO_HOST + "/" + mapCollection); MongoConfigUtil.setMapperOutputKey(jobConf, TenantAndIdEmittableKey.class); MongoConfigUtil.setMapperOutputValue(jobConf, BSONWritable.class); MongoConfigUtil.setOutputKey(jobConf, TenantAndIdEmittableKey.class); MongoConfigUtil.setOutputValue(jobConf, BSONWritable.class); // TODO - this probably should be configurable MongoConfigUtil.setReadSplitsFromSecondary(jobConf, true); MongoConfigUtil.setSplitSize(jobConf, 32); jobConf.setClass("mapred.input.key.class", TenantAndIdEmittableKey.class, EmittableKey.class); jobConf.setClass("mapred.input.value.class", BSONWritable.class, Object.class); jobConf.setClass("mapred.output.key.class", TenantAndIdEmittableKey.class, EmittableKey.class); jobConf.setClass("mapred.output.value.class", BSONWritable.class, Object.class); jobConf.setClass("mapreduce.inputformat.class", MongoTenantAndIdInputFormat.class, MongoInputFormat.class); jobConf.setClass("mapreduce.outputformat.class", MongoAggFormatter.class, MongoOutputFormat.class); MongoConfigUtil.setInputFormat(jobConf, MongoTenantAndIdInputFormat.class); MongoConfigUtil.setOutputFormat(jobConf, MongoAggFormatter.class); /** * Configure how hadoop calculates splits. * * We enable input splits to avoid having the entire job executed on a single hadoop node. * * We enable shard chunk splitting to allow mongo to specify how to split the input. * * We disable read splits from shards because we want hadoop connecting to mongos, not * mongod directly. This avoids incorrect results in situations where data is in the process * of migration at the same time hadoop is trying to read it. * * TODO - determine if we also need to set the input split key pattern. This depends * on how well data is distributed by _id. Setting the key pattern gives finer grained * control over how splits are calculated. */ MongoConfigUtil.setCreateInputSplits(jobConf, true); MongoConfigUtil.setShardChunkSplittingEnabled(jobConf, true); MongoConfigUtil.setReadSplitsFromShards(jobConf, false); MongoConfigUtil.setOutputURI(jobConf, "mongodb://" + MONGO_HOST + "/" + reduceCollection); jobConf.setJarByClass(JobConfiguration.class); MongoConfigUtil.setMapper(jobConf, mapperClass); jobConf.setClass(JobContext.MAP_CLASS_ATTR, mapperClass, Mapper.class); MongoConfigUtil.setReducer(jobConf, reducerClass); jobConf.setClass(JobContext.REDUCE_CLASS_ATTR, reducerClass, Reducer.class); // Set this relatively high to keep the total map execution time low. // Formula: 1.75 * (# nodes * max tasks) // TODO : replace this hardcoded value with one calculated from configuration information. jobConf.setNumReduceTasks(52); // Add the configuration itself to the JobConf. JobConfiguration.toHadoopConfiguration(s, jobConf); return jobConf; }
From source file:org.smartfrog.services.hadoop.mapreduce.terasort.TeraGenJob.java
License:Apache License
@SuppressWarnings({ "ProhibitedExceptionDeclared" }) @Override//ww w . jav a2s . c o m public RunningJob runJob(String[] args) throws Exception { JobConf job = (JobConf) getConf(); setNumberOfRows(job, Long.parseLong(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraGen"); job.setJarByClass(TeraGenJob.class); job.setMapperClass(TeraGenMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(TeraGenRangeInputFormat.class); job.setOutputFormat(TeraOutputFormat.class); job.setBoolean(ClusterConstants.MAPRED_DISABLE_TOOL_WARNING, true); return JobClient.runJob(job); }
From source file:org.smartfrog.services.hadoop.mapreduce.terasort.TeraSortJob.java
License:Apache License
@SuppressWarnings("ProhibitedExceptionDeclared") @Override//from w w w . j av a 2 s .c o m public int run(String[] args) throws Exception { LOG.info("starting"); JobConf job = (JobConf) getConf(); Path inputDir = new Path(args[0]); inputDir = inputDir.makeQualified(inputDir.getFileSystem(job)); Path partitionFile = new Path(inputDir, TeraConstants.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraConstants.PARTITION_FILENAME); TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSort"); job.setJarByClass(TeraSortJob.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(TeraInputFormat.class); job.setOutputFormat(TeraOutputFormat.class); job.setPartitionerClass(TotalOrderPartitioner.class); job.setBoolean(ClusterConstants.MAPRED_DISABLE_TOOL_WARNING, true); TeraInputFormat.writePartitionFile(job, partitionFile); DistributedCache.addCacheFile(partitionUri, job); DistributedCache.createSymlink(job); job.setInt("dfs.replication", 1); job.setInt("mapred.submit.replication", 1); TeraOutputFormat.setFinalSync(job, true); RunningJob runningJob = JobClient.runJob(job); LOG.info("done"); return 0; }
From source file:org.smartfrog.services.hadoop.mapreduce.terasort.TeraValidateJob.java
License:Apache License
public int run(String[] args) throws Exception { JobConf job = (JobConf) getConf(); TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraValidate"); job.setJarByClass(TeraValidateJob.class); job.setMapperClass(ValidateMapper.class); job.setReducerClass(ValidateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // force a single reducer job.setNumReduceTasks(1);/*from ww w .j ava 2 s . c om*/ // force a single split job.setLong("mapred.min.split.size", Long.MAX_VALUE); job.setInputFormat(TeraInputFormat.class); JobClient.runJob(job); return 0; }
From source file:org.weikey.terasort.TeraSort.java
License:Apache License
@SuppressWarnings("deprecation") public int run(String[] args) throws Exception { LOG.info("starting"); JobConf job = (JobConf) getConf(); SortConfig sortConfig = new SortConfig(job); // if (args.length >= 3) { // job.setNumReduceTasks(Integer.valueOf(args[2])); // if (args.length >= 4) { // sortConfig.setStartKey(Integer.valueOf(args[3])); // if (args.length >= 5) { // sortConfig.setFieldSeparator(args[4]); // }/*from w w w . ja v a 2 s . c om*/ // } // } Integer numMapTasks = null; Integer numReduceTasks = null; List<String> otherArgs = new ArrayList<String>(); boolean createLzopIndex = false; for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { job.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { job.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-f".equals(args[i]) || "--ignore-case".equals(args[i])) { sortConfig.setIgnoreCase(true); } else if ("-u".equals(args[i]) || "--unique".equals(args[i])) { sortConfig.setUnique(true); } else if ("-k".equals(args[i]) || "--key".equals(args[i])) { String[] parts = StringUtils.split(args[++i], ","); sortConfig.setStartKey(Integer.valueOf(parts[0])); if (parts.length > 1) { sortConfig.setEndKey(Integer.valueOf(parts[1])); } } else if ("-t".equals(args[i]) || "--field-separator".equals(args[i])) { sortConfig.setFieldSeparator(args[++i]); } else if ("--total-order".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) { maxSplits = Integer.MAX_VALUE; } } else if ("--lzop-index".equals(args[i])) { createLzopIndex = true; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Make sure there are exactly 2 parameters left. if (otherArgs.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2."); return printUsage(); } Path inputDir = new Path(args[0]); inputDir = inputDir.makeQualified(inputDir.getFileSystem(job)); Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(TeraInputFormat.class); job.setOutputFormat(TeraOutputFormat.class); job.setPartitionerClass(TotalOrderPartitioner.class); TeraInputFormat.writePartitionFile(job, partitionFile); DistributedCache.addCacheFile(partitionUri, job); DistributedCache.createSymlink(job); job.setInt("dfs.replication", 1); TeraOutputFormat.setFinalSync(job, true); JobClient.runJob(job); LOG.info("done"); return 0; }
From source file:scray.cassandra.hadoop.example.LineCounter.java
License:Apache License
public int run(String[] args) throws Exception { JobConf job = new JobConf(LineCounter.class); job.setJobName("Counting number of rows with CassandraVNodes InputFormat"); job.setJarByClass(LineCounter.class); job.setReducerClass(ReducerToHDFS.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_DIR)); job.setMapperClass(RowMapper.class); HashSet<String> hosts = new HashSet<String>(); hosts.add(CASS_HOST);/*from w w w .j a v a 2 s . c o m*/ CassandraVNodeConfigHelper.setClusterName(job, "Test Cluster"); CassandraVNodeConfigHelper.setDatacenter(job, DATA_CENTER); CassandraVNodeConfigHelper.setKeyspace(job, KEYSPACE); CassandraVNodeConfigHelper.setColumnFamily(job, COLUMN_FAMILY); CassandraVNodeConfigHelper.setNodes(job, hosts); job.setInputFormat((Class<InputFormat<Long, Row>>) (Object) CassandraVNodeInputFormat.class); JobClient.runJob(job); return 0; }
From source file:source.TeraSort.java
License:Apache License
public int run(String[] args) throws Exception { LOG.info("starting"); JobConf job = (JobConf) getConf(); Path inputDir = new Path(args[0]); inputDir = inputDir.makeQualified(inputDir.getFileSystem(job)); Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(TeraInputFormat.class); job.setOutputFormat(TeraOutputFormat.class); job.setPartitionerClass(TotalOrderPartitioner.class); TeraInputFormat.writePartitionFile(job, partitionFile); DistributedCache.addCacheFile(partitionUri, job); DistributedCache.createSymlink(job); job.setInt("dfs.replication", getOutputReplication(job)); TeraOutputFormat.setFinalSync(job, true); JobClient.runJob(job);/* ww w.j av a2 s .c om*/ LOG.info("done"); return 0; }
From source file:uk.bl.wa.hadoop.hosts.HostsReport.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), HostsReport.class); log.info("Adding logs..."); String line;// w ww .jav a 2 s .com BufferedReader br = new BufferedReader(new FileReader(args[0])); while ((line = br.readLine()) != null) { log.info("Adding " + line); FileInputFormat.addInputPath(conf, new Path(line)); } br.close(); FileOutputFormat.setOutputPath(conf, new Path(args[1])); conf.setJarByClass(HostsReport.class); conf.setInputFormat(TextInputFormat.class); conf.setMapperClass(HostsReportMapper.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setCombinerClass(HostsReportReducer.class); conf.setReducerClass(HostsReportReducer.class); conf.setOutputFormat(TextOutputFormat.class); JobClient.runJob(conf); return 0; }
From source file:voldemort.store.readonly.mr.HadoopStoreBuilder.java
License:Apache License
/** * Run the job// ww w .ja va 2 s . co m */ public void build() { try { JobConf conf = new JobConf(config); conf.setInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE); conf.set("cluster.xml", new ClusterMapper().writeCluster(cluster)); conf.set("stores.xml", new StoreDefinitionsMapper().writeStoreList(Collections.singletonList(storeDef))); conf.setBoolean("save.keys", saveKeys); conf.setBoolean("reducer.per.bucket", reducerPerBucket); if (!isAvro) { conf.setPartitionerClass(HadoopStoreBuilderPartitioner.class); conf.setMapperClass(mapperClass); conf.setMapOutputKeyClass(BytesWritable.class); conf.setMapOutputValueClass(BytesWritable.class); if (reducerPerBucket) { conf.setReducerClass(HadoopStoreBuilderReducerPerBucket.class); } else { conf.setReducerClass(HadoopStoreBuilderReducer.class); } } conf.setInputFormat(inputFormatClass); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(BytesWritable.class); conf.setOutputValueClass(BytesWritable.class); conf.setJarByClass(getClass()); conf.setReduceSpeculativeExecution(false); FileInputFormat.setInputPaths(conf, inputPath); conf.set("final.output.dir", outputDir.toString()); conf.set("checksum.type", CheckSum.toString(checkSumType)); FileOutputFormat.setOutputPath(conf, tempDir); FileSystem outputFs = outputDir.getFileSystem(conf); if (outputFs.exists(outputDir)) { throw new IOException("Final output directory already exists."); } // delete output dir if it already exists FileSystem tempFs = tempDir.getFileSystem(conf); tempFs.delete(tempDir, true); long size = sizeOfPath(tempFs, inputPath); logger.info("Data size = " + size + ", replication factor = " + storeDef.getReplicationFactor() + ", numNodes = " + cluster.getNumberOfNodes() + ", chunk size = " + chunkSizeBytes); // Derive "rough" number of chunks and reducers int numReducers; if (saveKeys) { if (this.numChunks == -1) { this.numChunks = Math.max((int) (storeDef.getReplicationFactor() * size / cluster.getNumberOfPartitions() / storeDef.getReplicationFactor() / chunkSizeBytes), 1); } else { logger.info( "Overriding chunk size byte and taking num chunks (" + this.numChunks + ") directly"); } if (reducerPerBucket) { numReducers = cluster.getNumberOfPartitions() * storeDef.getReplicationFactor(); } else { numReducers = cluster.getNumberOfPartitions() * storeDef.getReplicationFactor() * numChunks; } } else { if (this.numChunks == -1) { this.numChunks = Math.max((int) (storeDef.getReplicationFactor() * size / cluster.getNumberOfPartitions() / chunkSizeBytes), 1); } else { logger.info( "Overriding chunk size byte and taking num chunks (" + this.numChunks + ") directly"); } if (reducerPerBucket) { numReducers = cluster.getNumberOfPartitions(); } else { numReducers = cluster.getNumberOfPartitions() * numChunks; } } conf.setInt("num.chunks", numChunks); conf.setNumReduceTasks(numReducers); if (isAvro) { conf.setPartitionerClass(AvroStoreBuilderPartitioner.class); // conf.setMapperClass(mapperClass); conf.setMapOutputKeyClass(ByteBuffer.class); conf.setMapOutputValueClass(ByteBuffer.class); conf.setInputFormat(inputFormatClass); conf.setOutputFormat((Class<? extends OutputFormat>) AvroOutputFormat.class); conf.setOutputKeyClass(ByteBuffer.class); conf.setOutputValueClass(ByteBuffer.class); // AvroJob confs for the avro mapper AvroJob.setInputSchema(conf, Schema.parse(config.get("avro.rec.schema"))); AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Schema.Type.BYTES), Schema.create(Schema.Type.BYTES))); AvroJob.setMapperClass(conf, mapperClass); if (reducerPerBucket) { conf.setReducerClass(AvroStoreBuilderReducerPerBucket.class); } else { conf.setReducerClass(AvroStoreBuilderReducer.class); } } logger.info("Number of chunks: " + numChunks + ", number of reducers: " + numReducers + ", save keys: " + saveKeys + ", reducerPerBucket: " + reducerPerBucket); logger.info("Building store..."); RunningJob job = JobClient.runJob(conf); // Once the job has completed log the counter Counters counters = job.getCounters(); if (saveKeys) { if (reducerPerBucket) { logger.info("Number of collisions in the job - " + counters.getCounter(KeyValueWriter.CollisionCounter.NUM_COLLISIONS)); logger.info("Maximum number of collisions for one entry - " + counters.getCounter(KeyValueWriter.CollisionCounter.MAX_COLLISIONS)); } else { logger.info("Number of collisions in the job - " + counters.getCounter(KeyValueWriter.CollisionCounter.NUM_COLLISIONS)); logger.info("Maximum number of collisions for one entry - " + counters.getCounter(KeyValueWriter.CollisionCounter.MAX_COLLISIONS)); } } // Do a CheckSumOfCheckSum - Similar to HDFS CheckSum checkSumGenerator = CheckSum.getInstance(this.checkSumType); if (!this.checkSumType.equals(CheckSumType.NONE) && checkSumGenerator == null) { throw new VoldemortException("Could not generate checksum digest for type " + this.checkSumType); } // Check if all folder exists and with format file for (Node node : cluster.getNodes()) { ReadOnlyStorageMetadata metadata = new ReadOnlyStorageMetadata(); if (saveKeys) { metadata.add(ReadOnlyStorageMetadata.FORMAT, ReadOnlyStorageFormat.READONLY_V2.getCode()); } else { metadata.add(ReadOnlyStorageMetadata.FORMAT, ReadOnlyStorageFormat.READONLY_V1.getCode()); } Path nodePath = new Path(outputDir.toString(), "node-" + node.getId()); if (!outputFs.exists(nodePath)) { logger.info("No data generated for node " + node.getId() + ". Generating empty folder"); outputFs.mkdirs(nodePath); // Create empty folder outputFs.setPermission(nodePath, new FsPermission(HADOOP_FILE_PERMISSION)); logger.info("Setting permission to 755 for " + nodePath); } if (checkSumType != CheckSumType.NONE) { FileStatus[] storeFiles = outputFs.listStatus(nodePath, new PathFilter() { public boolean accept(Path arg0) { if (arg0.getName().endsWith("checksum") && !arg0.getName().startsWith(".")) { return true; } return false; } }); if (storeFiles != null && storeFiles.length > 0) { Arrays.sort(storeFiles, new IndexFileLastComparator()); FSDataInputStream input = null; for (FileStatus file : storeFiles) { try { input = outputFs.open(file.getPath()); byte fileCheckSum[] = new byte[CheckSum.checkSumLength(this.checkSumType)]; input.read(fileCheckSum); logger.debug("Checksum for file " + file.toString() + " - " + new String(Hex.encodeHex(fileCheckSum))); checkSumGenerator.update(fileCheckSum); } catch (Exception e) { logger.error("Error while reading checksum file " + e.getMessage(), e); } finally { if (input != null) input.close(); } outputFs.delete(file.getPath(), false); } metadata.add(ReadOnlyStorageMetadata.CHECKSUM_TYPE, CheckSum.toString(checkSumType)); String checkSum = new String(Hex.encodeHex(checkSumGenerator.getCheckSum())); logger.info("Checksum for node " + node.getId() + " - " + checkSum); metadata.add(ReadOnlyStorageMetadata.CHECKSUM, checkSum); } } // Write metadata Path metadataPath = new Path(nodePath, ".metadata"); FSDataOutputStream metadataStream = outputFs.create(metadataPath); outputFs.setPermission(metadataPath, new FsPermission(HADOOP_FILE_PERMISSION)); logger.info("Setting permission to 755 for " + metadataPath); metadataStream.write(metadata.toJsonString().getBytes()); metadataStream.flush(); metadataStream.close(); } } catch (Exception e) { logger.error("Error in Store builder", e); throw new VoldemortException(e); } }
From source file:voldemort.store.readwrite.mr.HadoopRWStoreBuilder.java
License:Apache License
/** * Run the job//from www . java2s . c o m */ public void build() { JobConf conf = new JobConf(config); conf.setInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE); conf.set("cluster.xml", new ClusterMapper().writeCluster(cluster)); conf.set("stores.xml", new StoreDefinitionsMapper().writeStoreList(Collections.singletonList(storeDef))); conf.setInt("vector.node.id", this.vectorNodeId); conf.setLong("vector.node.version", this.vectorNodeVersion); conf.setLong("job.start.time.ms", System.currentTimeMillis()); conf.setPartitionerClass(HadoopRWStoreBuilderPartitioner.class); conf.setInputFormat(inputFormatClass); conf.setMapperClass(mapperClass); conf.setMapOutputKeyClass(BytesWritable.class); conf.setMapOutputValueClass(BytesWritable.class); conf.setReducerClass(HadoopRWStoreBuilderReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(BytesWritable.class); conf.setOutputValueClass(BytesWritable.class); conf.setReduceSpeculativeExecution(false); conf.setJarByClass(getClass()); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, tempPath); try { // delete the temp dir if it exists FileSystem tempFs = tempPath.getFileSystem(conf); tempFs.delete(tempPath, true); conf.setInt("num.chunks", reducersPerNode); int numReducers = cluster.getNumberOfNodes() * reducersPerNode; logger.info("Replication factor = " + storeDef.getReplicationFactor() + ", numNodes = " + cluster.getNumberOfNodes() + ", reducers per node = " + reducersPerNode + ", numReducers = " + numReducers); conf.setNumReduceTasks(numReducers); logger.info("Building RW store..."); JobClient.runJob(conf); } catch (Exception e) { throw new VoldemortException(e); } }