List of usage examples for org.apache.hadoop.mapred JobConf setJar
public void setJar(String jar)
From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java
License:Open Source License
public static JobConf createJobConf(String mapperTransformationFile, String combinerTransformationFile, String reducerTransformationFile, String hostname, String hdfsPort, String trackerPort) throws IOException, KettleException { JobConf conf = new JobConf(); conf.setJobName("wordcount"); KettleEnvironment.init();/* w ww. j a va 2 s . co m*/ // Register Map/Reduce Input and Map/Reduce Output plugin steps PluginMainClassType mainClassTypesAnnotation = StepPluginType.class .getAnnotation(PluginMainClassType.class); Map<Class<?>, String> inputClassMap = new HashMap<Class<?>, String>(); inputClassMap.put(mainClassTypesAnnotation.value(), HadoopEnterMeta.class.getName()); PluginInterface inputStepPlugin = new Plugin(new String[] { "HadoopEnterPlugin" }, StepPluginType.class, mainClassTypesAnnotation.value(), "Hadoop", "MapReduce Input", "Enter a Hadoop Mapper or Reducer transformation", "MRI.png", false, false, inputClassMap, new ArrayList<String>(), null, null); PluginRegistry.getInstance().registerPlugin(StepPluginType.class, inputStepPlugin); Map<Class<?>, String> outputClassMap = new HashMap<Class<?>, String>(); outputClassMap.put(mainClassTypesAnnotation.value(), HadoopExitMeta.class.getName()); PluginInterface outputStepPlugin = new Plugin(new String[] { "HadoopExitPlugin" }, StepPluginType.class, mainClassTypesAnnotation.value(), "Hadoop", "MapReduce Output", "Exit a Hadoop Mapper or Reducer transformation", "MRO.png", false, false, outputClassMap, new ArrayList<String>(), null, null); PluginRegistry.getInstance().registerPlugin(StepPluginType.class, outputStepPlugin); TransExecutionConfiguration transExecConfig = new TransExecutionConfiguration(); TransMeta transMeta = null; TransConfiguration transConfig = null; if (mapperTransformationFile != null) { conf.setMapRunnerClass(PentahoMapRunnable.class); transMeta = new TransMeta(mapperTransformationFile); transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-map-xml", transConfig.getXML()); conf.set("transformation-map-input-stepname", "Injector"); conf.set("transformation-map-output-stepname", "Output"); } if (combinerTransformationFile != null) { conf.setCombinerClass(GenericTransCombiner.class); transMeta = new TransMeta(combinerTransformationFile); transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-combiner-xml", transConfig.getXML()); conf.set("transformation-combiner-input-stepname", "Injector"); conf.set("transformation-combiner-output-stepname", "Output"); } if (reducerTransformationFile != null) { conf.setReducerClass((Class<? extends Reducer>) GenericTransReduce.class); transMeta = new TransMeta(reducerTransformationFile); transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-reduce-xml", transConfig.getXML()); conf.set("transformation-reduce-input-stepname", "Injector"); conf.set("transformation-reduce-output-stepname", "Output"); } conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); File jar = new File("./dist/pentaho-big-data-plugin-TRUNK-SNAPSHOT.jar"); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path("/")); FileOutputFormat.setOutputPath(conf, new Path("/")); conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort); conf.set("mapred.job.tracker", hostname + ":" + trackerPort); conf.setJar(jar.toURI().toURL().toExternalForm()); conf.setWorkingDirectory(new Path("/tmp/wordcount")); return conf; }
From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java
License:Apache License
public static JobConf createJobConf(String mapperTransformationFile, String combinerTransformationFile, String reducerTransformationFile, String hostname, String hdfsPort, String trackerPort) throws IOException, KettleException { JobConf conf = new JobConf(); conf.setJobName("wordcount"); KettleEnvironment.init();/*from w w w . j av a 2 s .c o m*/ // Register Map/Reduce Input and Map/Reduce Output plugin steps PluginMainClassType mainClassTypesAnnotation = StepPluginType.class .getAnnotation(PluginMainClassType.class); Map<Class<?>, String> inputClassMap = new HashMap<Class<?>, String>(); inputClassMap.put(mainClassTypesAnnotation.value(), HadoopEnterMeta.class.getName()); PluginInterface inputStepPlugin = new Plugin(new String[] { "HadoopEnterPlugin" }, StepPluginType.class, mainClassTypesAnnotation.value(), "Hadoop", "MapReduce Input", "Enter a Hadoop Mapper or Reducer transformation", "MRI.png", false, false, inputClassMap, new ArrayList<String>(), null, null); PluginRegistry.getInstance().registerPlugin(StepPluginType.class, inputStepPlugin); Map<Class<?>, String> outputClassMap = new HashMap<Class<?>, String>(); outputClassMap.put(mainClassTypesAnnotation.value(), HadoopExitMeta.class.getName()); PluginInterface outputStepPlugin = new Plugin(new String[] { "HadoopExitPlugin" }, StepPluginType.class, mainClassTypesAnnotation.value(), "Hadoop", "MapReduce Output", "Exit a Hadoop Mapper or Reducer transformation", "MRO.png", false, false, outputClassMap, new ArrayList<String>(), null, null); PluginRegistry.getInstance().registerPlugin(StepPluginType.class, outputStepPlugin); TransExecutionConfiguration transExecConfig = new TransExecutionConfiguration(); TransMeta transMeta = null; TransConfiguration transConfig = null; if (mapperTransformationFile != null) { conf.setMapRunnerClass(PentahoMapRunnable.class); transMeta = new TransMeta(mapperTransformationFile); transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-map-xml", transConfig.getXML()); conf.set("transformation-map-input-stepname", "Injector"); conf.set("transformation-map-output-stepname", "Output"); } if (combinerTransformationFile != null) { conf.setCombinerClass(GenericTransCombiner.class); transMeta = new TransMeta(combinerTransformationFile); transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-combiner-xml", transConfig.getXML()); conf.set("transformation-combiner-input-stepname", "Injector"); conf.set("transformation-combiner-output-stepname", "Output"); } if (reducerTransformationFile != null) { conf.setReducerClass(GenericTransReduce.class); transMeta = new TransMeta(reducerTransformationFile); transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-reduce-xml", transConfig.getXML()); conf.set("transformation-reduce-input-stepname", "Injector"); conf.set("transformation-reduce-output-stepname", "Output"); } conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); File jar = new File("./dist/pentaho-big-data-plugin-TRUNK-SNAPSHOT.jar"); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path("/")); FileOutputFormat.setOutputPath(conf, new Path("/")); conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort); conf.set("mapred.job.tracker", hostname + ":" + trackerPort); conf.setJar(jar.toURI().toURL().toExternalForm()); conf.setWorkingDirectory(new Path("/tmp/wordcount")); return conf; }
From source file:org.pentaho.hadoop.mapreduce.test.TransMapReduceJobTestFIXME.java
License:Open Source License
@Test public void submitJob() throws Exception { String[] args = { "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/input", "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/output" }; JobConf conf = new JobConf(); conf.setJobName("wordcount"); KettleEnvironment.init();// w ww. j a v a 2 s. c om TransExecutionConfiguration transExecConfig = new TransExecutionConfiguration(); TransMeta transMeta = new TransMeta("./test-res/wordcount-mapper.ktr"); TransConfiguration transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-map-xml", transConfig.getXML()); transMeta = new TransMeta("./test-res/wordcount-reducer.ktr"); transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-reduce-xml", transConfig.getXML()); conf.set("transformation-map-input-stepname", "Injector"); conf.set("transformation-map-output-stepname", "Output"); conf.set("transformation-reduce-input-stepname", "Injector"); conf.set("transformation-reduce-output-stepname", "Output"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); File jar = new File("./dist/pentaho-big-data-plugin-TRUNK-SNAPSHOT.jar"); URLClassLoader loader = new URLClassLoader(new URL[] { jar.toURI().toURL() }); conf.setMapperClass( (Class<? extends Mapper>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransMap")); conf.setCombinerClass( (Class<? extends Reducer>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce")); conf.setReducerClass( (Class<? extends Reducer>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce")); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort); conf.set("mapred.job.tracker", hostname + ":" + trackerPort); conf.setJar(jar.toURI().toURL().toExternalForm()); conf.setWorkingDirectory(new Path("/tmp/wordcount")); JobClient jobClient = new JobClient(conf); ClusterStatus status = jobClient.getClusterStatus(); assertEquals(State.RUNNING, status.getJobTrackerState()); RunningJob runningJob = jobClient.submitJob(conf); System.out.print("Running " + runningJob.getJobName() + ""); while (!runningJob.isComplete()) { System.out.print("."); Thread.sleep(500); } System.out.println(); System.out.println("Finished " + runningJob.getJobName() + "."); FileObject file = fsManager.resolveFile(buildHDFSURL("/junit/wordcount/output/part-00000")); String output = IOUtils.toString(file.getContent().getInputStream()); assertEquals( "Bye\t4\nGood\t2\nGoodbye\t1\nHadoop\t2\nHello\t5\nThis\t1\nWorld\t5\nand\t1\ncounting\t1\nextra\t1\nfor\t1\nis\t1\nsome\t1\ntext\t1\nwords\t1\n", output); }
From source file:org.smartfrog.services.hadoop.benchmark.citerank.CiteRankTool.java
License:Open Source License
/** * Create a configuration bound to this class, with various options set up * * @return the job/*from w w w .ja v a 2 s .c om*/ */ protected JobConf createConfiguration() { JobConf conf = new JobConf(getConf(), this.getClass()); conf.setJobName(getName()); conf.setJar(getJarName()); conf.setNumMapTasks(NUM_MAP_TASKS); conf.setInt("dfs.replication", NUM_REPLICAS); return conf; }
From source file:org.springframework.data.hadoop.mapreduce.JobFactoryBean.java
License:Apache License
@SuppressWarnings("rawtypes") public void afterPropertiesSet() throws Exception { final Configuration cfg = ConfigurationUtils.createFrom(configuration, properties); buildGenericOptions(cfg);/* w w w. ja va2 s .c om*/ if (StringUtils.hasText(user)) { UserGroupInformation ugi = UserGroupInformation.createProxyUser(user, UserGroupInformation.getLoginUser()); ugi.doAs(new PrivilegedExceptionAction<Void>() { @Override public Void run() throws Exception { job = new Job(cfg); return null; } }); } else { job = new Job(cfg); } ClassLoader loader = (beanClassLoader != null ? beanClassLoader : org.springframework.util.ClassUtils.getDefaultClassLoader()); if (jar != null) { JobConf conf = (JobConf) job.getConfiguration(); conf.setJar(jar.getURI().toString()); loader = ExecutionUtils.createParentLastClassLoader(jar, beanClassLoader, cfg); conf.setClassLoader(loader); } // set first to enable auto-detection of K/V to skip the key/value types to be specified if (mapper != null) { Class<? extends Mapper> mapperClass = resolveClass(mapper, loader, Mapper.class); job.setMapperClass(mapperClass); configureMapperTypesIfPossible(job, mapperClass); } if (reducer != null) { Class<? extends Reducer> reducerClass = resolveClass(reducer, loader, Reducer.class); job.setReducerClass(reducerClass); configureReducerTypesIfPossible(job, reducerClass); } if (StringUtils.hasText(name)) { job.setJobName(name); } if (combiner != null) { job.setCombinerClass(resolveClass(combiner, loader, Reducer.class)); } if (groupingComparator != null) { job.setGroupingComparatorClass(resolveClass(groupingComparator, loader, RawComparator.class)); } if (inputFormat != null) { job.setInputFormatClass(resolveClass(inputFormat, loader, InputFormat.class)); } if (mapKey != null) { job.setMapOutputKeyClass(resolveClass(mapKey, loader, Object.class)); } if (mapValue != null) { job.setMapOutputValueClass(resolveClass(mapValue, loader, Object.class)); } if (numReduceTasks != null) { job.setNumReduceTasks(numReduceTasks); } if (key != null) { job.setOutputKeyClass(resolveClass(key, loader, Object.class)); } if (value != null) { job.setOutputValueClass(resolveClass(value, loader, Object.class)); } if (outputFormat != null) { job.setOutputFormatClass(resolveClass(outputFormat, loader, OutputFormat.class)); } if (partitioner != null) { job.setPartitionerClass(resolveClass(partitioner, loader, Partitioner.class)); } if (sortComparator != null) { job.setSortComparatorClass(resolveClass(sortComparator, loader, RawComparator.class)); } if (StringUtils.hasText(workingDir)) { job.setWorkingDirectory(new Path(workingDir)); } if (jarClass != null) { job.setJarByClass(jarClass); } if (!CollectionUtils.isEmpty(inputPaths)) { for (String path : inputPaths) { FileInputFormat.addInputPath(job, new Path(path)); } } if (StringUtils.hasText(outputPath)) { FileOutputFormat.setOutputPath(job, new Path(outputPath)); } if (compressOutput != null) { FileOutputFormat.setCompressOutput(job, compressOutput); } if (codecClass != null) { FileOutputFormat.setOutputCompressorClass(job, resolveClass(codecClass, loader, CompressionCodec.class)); } processJob(job); }
From source file:voldemort.store.readonly.mr.azkaban.AbstractHadoopJob.java
License:Apache License
public static void setClassLoaderAndJar(JobConf conf, Class jobClass) { conf.setClassLoader(Thread.currentThread().getContextClassLoader()); String jar = HadoopUtils.findContainingJar(jobClass, Thread.currentThread().getContextClassLoader()); if (jar != null) { conf.setJar(jar); }//from w w w. ja va 2 s . c o m }