List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:com.inmobi.conduit.distcp.tools.TestDistCp.java
License:Apache License
public void testUniformSizeDistCp() throws Exception { try {//from w w w.j a v a 2 s. c o m clearState(); Path sourcePath = new Path(SOURCE_PATH).makeQualified(cluster.getFileSystem()); List<Path> sources = new ArrayList<Path>(); sources.add(sourcePath); Path targetPath = new Path(TARGET_PATH).makeQualified(cluster.getFileSystem()); DistCpOptions options = new DistCpOptions(sources, targetPath); options.setOutPutDirectory(counterOutputPath); options.setAtomicCommit(true); options.setBlocking(false); Job job = new DistCp(configuration, options).execute(); Path workDir = CopyOutputFormat.getWorkingDirectory(job); Path finalDir = CopyOutputFormat.getCommitDirectory(job); while (!job.isComplete()) { if (cluster.getFileSystem().exists(workDir)) { break; } } job.waitForCompletion(true); Assert.assertFalse(cluster.getFileSystem().exists(workDir)); Assert.assertTrue(cluster.getFileSystem().exists(finalDir)); Assert.assertFalse(cluster.getFileSystem() .exists(new Path(job.getConfiguration().get(DistCpConstants.CONF_LABEL_META_FOLDER)))); verifyResults(); } catch (Exception e) { LOG.error("Exception encountered", e); Assert.fail("Unexpected exception: " + e.getMessage()); } }
From source file:com.inmobi.conduit.local.LocalStreamService.java
License:Apache License
protected Job createJob(Path inputPath, long totalSize) throws IOException { String jobName = getName();//ww w .ja v a 2 s .co m Configuration conf = currentCluster.getHadoopConf(); conf.set(ConduitConstants.AUDIT_ENABLED_KEY, System.getProperty(ConduitConstants.AUDIT_ENABLED_KEY)); Job job = new Job(conf); job.setJobName(jobName); // DistributedCache.addFileToClassPath(inputFormatJarDestPath, // job.getConfiguration()); job.getConfiguration().set("tmpjars", inputFormatJarDestPath.toString() + "," + auditUtilJarDestPath.toString()); LOG.debug("Adding file [" + inputFormatJarDestPath + "] to distributed cache"); job.setInputFormatClass(UniformSizeInputFormat.class); Class<? extends Mapper<Text, FileStatus, NullWritable, Text>> mapperClass = getMapperClass(); job.setJarByClass(mapperClass); job.setMapperClass(mapperClass); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); // setting identity reducer job.setReducerClass(Reducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, tmpCounterOutputPath); job.getConfiguration().set("mapred.map.tasks.speculative.execution", "false"); job.getConfiguration().set(LOCALSTREAM_TMP_PATH, tmpPath.toString()); job.getConfiguration().set(SRC_FS_DEFAULT_NAME_KEY, srcCluster.getHadoopConf().get(FS_DEFAULT_NAME_KEY)); // set configurations needed for UniformSizeInputFormat int numMaps = getNumMapsForJob(totalSize); job.getConfiguration().setInt(DistCpConstants.CONF_LABEL_NUM_MAPS, numMaps); job.getConfiguration().setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, totalSize); job.getConfiguration().set(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, inputPath.toString()); LOG.info("Expected number of maps [" + numMaps + "] Total data size [" + totalSize + "]"); return job; }
From source file:com.inmobi.conduit.local.LocalStreamServiceTest.java
License:Apache License
private void testClusterName(String configName, String currentClusterName) throws Exception { ConduitConfigParser parser = new ConduitConfigParser(configName); ConduitConfig config = parser.getConfig(); Set<String> streamsToProcess = new HashSet<String>(); streamsToProcess.addAll(config.getSourceStreams().keySet()); Set<String> clustersToProcess = new HashSet<String>(); Set<TestLocalStreamService> services = new HashSet<TestLocalStreamService>(); Cluster currentCluster = null;//from w w w.ja va 2 s. c o m for (SourceStream sStream : config.getSourceStreams().values()) { for (String cluster : sStream.getSourceClusters()) { clustersToProcess.add(cluster); } } if (currentClusterName != null) { currentCluster = config.getClusters().get(currentClusterName); } for (String clusterName : clustersToProcess) { Cluster cluster = config.getClusters().get(clusterName); cluster.getHadoopConf().set("mapred.job.tracker", super.CreateJobConf().get("mapred.job.tracker")); TestLocalStreamService service = new TestLocalStreamService(config, cluster, currentCluster, new NullCheckPointProvider(), streamsToProcess); services.add(service); } for (TestLocalStreamService service : services) { FileSystem fs = service.getFileSystem(); service.preExecute(); if (currentClusterName != null) Assert.assertEquals(service.getCurrentCluster().getName(), currentClusterName); // creating a job with empty input path Path tmpJobInputPath = new Path("/tmp/job/input/path"); Map<FileStatus, String> fileListing = new TreeMap<FileStatus, String>(); Set<FileStatus> trashSet = new HashSet<FileStatus>(); // checkpointKey, CheckPointPath Table<String, String, String> checkpointPaths = HashBasedTable.create(); service.createMRInput(tmpJobInputPath, fileListing, trashSet, checkpointPaths); Job testJobConf = service.createJob(tmpJobInputPath, 1000); testJobConf.waitForCompletion(true); int numberOfCountersPerFile = 0; long sumOfCounterValues = 0; Path outputCounterPath = new Path(new Path(service.getCluster().getTmpPath(), service.getName()), "counters"); FileStatus[] statuses = fs.listStatus(outputCounterPath, new PathFilter() { public boolean accept(Path path) { return path.toString().contains("part"); } }); for (FileStatus fileSt : statuses) { Scanner scanner = new Scanner(fs.open(fileSt.getPath())); while (scanner.hasNext()) { String counterNameValue = null; try { counterNameValue = scanner.next(); String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER); Assert.assertEquals(4, tmp.length); Long numOfMsgs = Long.parseLong(tmp[3]); numberOfCountersPerFile++; sumOfCounterValues += numOfMsgs; } catch (Exception e) { LOG.error("Counters file has malformed line with counter name =" + counterNameValue + "..skipping the line", e); } } } // Should have 2 counters for each file Assert.assertEquals(NUMBER_OF_FILES * 2, numberOfCountersPerFile); // sum of all counter values should be equal to total number of messages Assert.assertEquals(NUMBER_OF_FILES * 3, sumOfCounterValues); Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY), service.getCurrentCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY)); Assert.assertEquals(testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY), service.getCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY)); if (currentCluster == null) Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY), testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY)); service.getFileSystem().delete(new Path(service.getCluster().getRootDir()), true); } }
From source file:com.inmobi.databus.local.LocalStreamService.java
License:Apache License
private Job createJob(Path inputPath) throws IOException { String jobName = "localstream"; Configuration conf = cluster.getHadoopConf(); Job job = new Job(conf); job.setJobName(jobName);//from w w w . j a v a 2s. co m KeyValueTextInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setJarByClass(CopyMapper.class); job.setMapperClass(CopyMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(NullOutputFormat.class); job.getConfiguration().set("mapred.map.tasks.speculative.execution", "false"); job.getConfiguration().set("localstream.tmp.path", tmpPath.toString()); return job; }
From source file:com.intel.hadoop.hbase.dot.KEY.java
License:Apache License
private void doMapReduce(Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass, String mrTableName) throws IOException, ClassNotFoundException, InterruptedException { this.conf.set(KEY.INPUT_TABLE, mrTableName); Job job = new Job(this.conf); job.setJobName("Generate Data for [" + mrTableName + "]"); job.setJarByClass(GenerateTestTable.class); job.setInputFormatClass(inputFormatClass); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); FileSystem fs = FileSystem.get(conf); Path path = new Path("/tmp", "tempout"); fs.delete(path, true);/*from w w w . j av a 2s. c o m*/ FileOutputFormat.setOutputPath(job, path); job.setMapperClass(mapperClass); job.setNumReduceTasks(0); TableMapReduceUtil.addDependencyJars(job); // Add a Class from the hbase.jar so it gets registered too. TableMapReduceUtil.addDependencyJars(job.getConfiguration(), org.apache.hadoop.hbase.util.Bytes.class); TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
From source file:com.intel.hadoop.hbase.dot.mapreduce.DotImportTsv.java
License:Apache License
/** * Sets up the actual job.//w w w . ja v a 2s .c o m * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException, ClassNotFoundException { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(SEPARATOR_CONF_KEY); if (actualSeparator != null) { conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes())); } // See if a non-default Mapper was set String mapperClassName = conf.get(MAPPER_CONF_KEY); Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER; String tableName = args[0]; Path inputDir = new Path(args[1]); Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(mapperClass); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(mapperClass); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); if (hfileOutPath != null) { if (!doesTableExist(tableName)) { createTable(conf, tableName); } HTable table = new HTable(conf, tableName); job.setReducerClass(PutSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); HFileOutputFormat.configureIncrementalLoad(job, table); } else { // No reducers. Just write straight to table. Call initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /* Guava used by TsvParser */); return job; }
From source file:com.intel.hibench.DFSIOWriter.java
License:Apache License
@Override public void beforeSubmit(MapReduceContext context) throws Exception { startTime = System.currentTimeMillis(); benchData.put(new Put(ONE, ONE, startTime)); Job job = context.getHadoopJob(); job.setInputFormatClass(RandomInputFormat.class); job.setMapperClass(Generator.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0);/*from w ww . java2 s .c o m*/ String sizeStr = context.getRuntimeArguments().get("size"); if (sizeStr != null) { LOG.info("size we get in config is : " + sizeStr); long totalBytes = Long.valueOf(sizeStr) * 1024 * 1024; job.getConfiguration().setLong(BENCH_SIZE, totalBytes); benchData.put(new Put(ONE, THREE, totalBytes)); } }
From source file:com.jbw.jobcontrol.Patent.java
@Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Job job1 = Job.getInstance(conf); job1.setJobName("test"); job1.setJarByClass(Patent.class); ChainMapper.addMapper(job1, InverseMapper.class, LongWritable.class, Text.class, Text.class, Text.class, conf);/* w ww . jav a2s . c om*/ ChainMapper.addMapper(job1, CountMapper.class, Text.class, Text.class, Text.class, IntWritable.class, conf); job1.setReducerClass(IntSumReducer.class); Job job2 = Job.getInstance(); ControlledJob cjob1 = new ControlledJob(job1.getConfiguration()); ControlledJob cjob2 = new ControlledJob(job2.getConfiguration()); cjob2.addDependingJob(cjob1); JobControl jc = new JobControl("process job"); jc.addJob(cjob1); jc.addJob(cjob2); Thread t = new Thread(jc); t.start(); while (true) { for (ControlledJob j : jc.getRunningJobList()) { break; } break; } return 0; }
From source file:com.junz.hadoop.custom.SytsLogInputFormat.java
License:Apache License
public static void setStartId(Job job, long id) { job.getConfiguration().setLong(START_ID_PROPERTY, id); }
From source file:com.junz.hadoop.custom.SytsLogInputFormat.java
License:Apache License
public static void setNumberOfIds(Job job, long number) { job.getConfiguration().setLong(NUMBER_LOG_PROPERTY, number); }