List of usage examples for org.apache.hadoop.mapred JobConf setCombinerClass
public void setCombinerClass(Class<? extends Reducer> theClass)
From source file:org.asayler.WikiTitleCount.java
License:Apache License
/** * The main driver for wikititlecount map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker.//from ww w .j av a 2 s. c o m */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), WikiTitleCount.class); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int num_maps = 1; int num_reducers = 1; conf.setJobName("wikititlecount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); /** Set Default Mappers */ num_maps = (int) (cluster.getMaxMapTasks()); /** Set Default Mappers */ num_reducers = (int) (cluster.getMaxReduceTasks() * 0.9); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { other_args.add(args[i]); } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); /* Set Mappers and Reducer */ conf.setNumMapTasks(num_maps); conf.setNumReduceTasks(num_reducers); JobClient.runJob(conf); return 0; }
From source file:org.hadoop.tdg.MaxTemperatureDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("Max temperature"); FileInputFormat.addInputPath(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[0])); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MaxTemperatureMapper.class); conf.setCombinerClass(MaxTemperatureReducer.class); conf.setReducerClass(MaxTemperatureReducer.class); JobClient.runJob(conf);/*from www. j a v a2s . c o m*/ return 0; }
From source file:org.mitre.bio.mapred.TotalSequenceLength.java
License:Open Source License
/** * Init the job with the given parameters and run it. * * @param jobConf the hadoop job configuration * @param input input {@link SequenceFile} path * @param output output path (this will contain ONE part with the length) * @return zero if successful//w ww. java 2 s . c o m * @throws java.lang.Exception */ public int initJob(JobConf jobConf, String input, String output, boolean cleanLogs) throws Exception { JobConf conf = new JobConf(jobConf, TotalSequenceLength.class); conf.setJobName("TotalSequenceLength"); // We can only handle one reducer if (conf.getNumReduceTasks() != 1) { conf.setNumReduceTasks(1); LOG.info("Setting number of reducers to ONE!"); } SequenceFileInputFormat.setInputPaths(conf, new Path(input)); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapperClass(SequenceMapClass.class); conf.setOutputKeyClass(IntWritable.class); // map output key class conf.setOutputValueClass(IntWritable.class); // map output value class conf.setCombinerClass(LengthReduceClass.class); conf.setReducerClass(LengthReduceClass.class); FileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf); if (cleanLogs) { LOG.info("removing log directory"); Path path = new Path(output, "_logs"); FileSystem fs = path.getFileSystem(jobConf); fs.delete(path, true); } return 0; }
From source file:org.mitre.ccv.mapred.CalculateKmerCounts.java
License:Open Source License
/** * Start up a map-reduce job with the given parameters. * * <P>Setting the system property "kmer.count.parent.fast.map" will result in this using a {@link java.util.Map} * to speed up the output of kmers at the expense of memory. * * @param jobConf//from ww w . j av a2 s. c om * @param start starting window size * @param end ending window size * @param input * @param output * @return * @throws java.lang.Exception */ public int initJob(JobConf jobConf, int start, int end, String input, String output) throws Exception { JobConf conf = new JobConf(jobConf, CalculateKmerCounts.class); conf.setJobName("CalculateKmerCounts"); if (start <= 2) throw new IllegalArgumentException("Value of 'start' argument must be larger than 2"); // Save our window size so that the tasks have access to them conf.set(START, Integer.toString(start)); conf.set(END, Integer.toString(end)); //conf.set(FAST_MAP, fastMap ? "Y":"N"); // Set up mapper SequenceFileInputFormat.setInputPaths(conf, new Path(input)); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapperClass(KmerCountMap.class); conf.setOutputKeyClass(Text.class); // map output key class conf.setOutputValueClass(KmerCountWritable.class); // map output value class // Set up combiner/reducer conf.setCombinerClass(KmerCountReducer.class); conf.setReducerClass(KmerCountReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf); return 0; }
From source file:org.mitre.ccv.mapred.CalculateKmerRevisedRelativeEntropy.java
License:Open Source License
/** * Start up a map-reduce job with the given parameters. * * @param jobConf//w w w . j av a 2 s .c o m * @param start starting window size * @param end ending window size * @param input * @param output * @return * @throws java.lang.Exception */ public int initJob(JobConf jobConf, String globalInput, String cvInput, String output, boolean cleanLogs) throws Exception { JobConf conf = new JobConf(jobConf, CalculateKmerRevisedRelativeEntropy.class); conf.setJobName("CalculateKmerRevisedRelativeEntropy"); /** * Set up paths */ String ts = FileUtils.getSimpleDate(); String cvOutput = output + "_" + ts + COMPOSITION_VECTORS_KMER_POSTFIX; /** commaSeparatedPaths */ String mergedInput = cvOutput + "," + globalInput; /** merged output */ String mergedOutput = output + "_" + ts + MERGED_KMER_POSTFIX; /** * First, map all the CompositionVector's k-mers to Text as keys and * local k-mer/value pairs (KmerPiValuePairWritables) as values. */ JobConf subConf = new JobConf(conf); subConf.setJobName("CalculateKmerRevisedRelativeEntropy-CompositionVectors"); // setup mapper SequenceFileInputFormat.setInputPaths(subConf, cvInput); subConf.setInputFormat(SequenceFileInputFormat.class); subConf.setMapperClass(CompositionVectorMap.class); subConf.setOutputKeyClass(Text.class); // job output key class subConf.setOutputValueClass(StringDoublePairWritable.class); // job output value class // Uses default reducer (IdentityReducer) subConf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(subConf, new Path(cvOutput)); LOG.info("Converting CompositionVectors to k-mer/pi-value pairs."); JobClient.runJob(subConf); /** * Second, map (merge) all the k-mer/pi-value pairs together in an * array of values (KmerPiValueArrayWritables). */ subConf = new JobConf(conf); subConf.setJobName("CalculateKmerRevisedRelativeEntropy-Merging"); // setup mapper SequenceFileInputFormat.setInputPaths(subConf, mergedInput); subConf.setInputFormat(SequenceFileInputFormat.class); subConf.setMapperClass(MergeMap.class); subConf.setOutputKeyClass(Text.class); subConf.setOutputValueClass(KmerPiValueArrayWritable.class); // setup combiner/reducer subConf.setCombinerClass(MergeReducer.class); subConf.setReducerClass(MergeReducer.class); subConf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(subConf, new Path(mergedOutput)); LOG.info("Merging k-mers/pi-values from CompositionVectors and all sequences (global)"); JobClient.runJob(subConf); /** * Third, calculate entropies (map-reduce) */ subConf = new JobConf(conf); subConf.setJobName("CalculateKmerRevisedRelativeEntropy-RRE"); // setup mapper SequenceFileInputFormat.setInputPaths(subConf, mergedOutput); subConf.setInputFormat(SequenceFileInputFormat.class); subConf.setMapperClass(EntropyMap.class); subConf.setOutputKeyClass(Text.class); subConf.setOutputValueClass(KmerEntropyPairWritable.class); // Setup Combiner and Reducer subConf.setCombinerClass(EntropyCombiner.class); subConf.setReducerClass(EntropyReducer.class); if (conf.getBoolean(TEXT_OUTPUT, false)) { FileOutputFormat.setOutputPath(subConf, new Path(output)); } else { subConf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(subConf, new Path(output)); } LOG.info("Calculating entropies"); JobClient.runJob(subConf); /** * Remove tmp directories */ Path tmp = new Path(cvOutput); FileSystem fs = tmp.getFileSystem(conf); fs.delete(tmp, true); tmp = new Path(mergedOutput); fs.delete(tmp, true); return 0; }
From source file:org.mitre.ccv.mapred.InvertKmerProbabilities.java
License:Open Source License
/** * Start up the job with the given parameters. * // w ww. j a v a 2 s . c o m * @param jobConf The {@link JobConf} to use * @param input path to the {@link SequenceFile}s * @param output path to save the output * @param cleanLogs if <code>true</code> remove the logs * @return * @throws java.lang.Exception */ public int initJob(JobConf jobConf, String input, String output, boolean cleanLogs) throws Exception { JobConf conf = new JobConf(jobConf, InvertKmerProbabilities.class); conf.setJobName("InvertKmerFrequencies"); // Set up mapper SequenceFileInputFormat.setInputPaths(conf, new Path(input)); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapperClass(InverterMapper.class); conf.setOutputKeyClass(Text.class); // final output key class conf.setOutputValueClass(KmerProbabilityMapWritable.class); // final output value class // Set up combiner/reducer conf.setCombinerClass(InverterReducer.class); conf.setReducerClass(InverterReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf); if (cleanLogs) { LOG.info("removing log directory"); Path path = new Path(output, "_logs"); FileSystem fs = path.getFileSystem(jobConf); fs.delete(path, true); } return 0; }
From source file:org.pentaho.di.job.entries.hadoopjobexecutor.JobEntryHadoopJobExecutor.java
License:Apache License
public Result execute(Result result, int arg1) throws KettleException { result.setNrErrors(0);//ww w . j a va 2s .com Log4jFileAppender appender = null; String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$ String hadoopDistro = System.getProperty("hadoop.distribution.name", hadoopDistribution); hadoopDistro = environmentSubstitute(hadoopDistro); if (Const.isEmpty(hadoopDistro)) { hadoopDistro = "generic"; } try { appender = LogWriter.createFileAppender(logFileName, true, false); LogWriter.getInstance().addAppender(appender); log.setLogLevel(parentJob.getLogLevel()); } catch (Exception e) { logError(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.FailedToOpenLogFile", logFileName, //$NON-NLS-1$ e.toString())); logError(Const.getStackTracker(e)); } try { URL resolvedJarUrl = null; String jarUrlS = environmentSubstitute(jarUrl); if (jarUrlS.indexOf("://") == -1) { // default to file:// File jarFile = new File(jarUrlS); resolvedJarUrl = jarFile.toURI().toURL(); } else { resolvedJarUrl = new URL(jarUrlS); } final String cmdLineArgsS = environmentSubstitute(cmdLineArgs); if (log.isDetailed()) logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.ResolvedJar", resolvedJarUrl.toExternalForm())); if (isSimple) { /* final AtomicInteger taskCount = new AtomicInteger(0); final AtomicInteger successCount = new AtomicInteger(0); final AtomicInteger failedCount = new AtomicInteger(0); */ if (log.isDetailed()) logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.SimpleMode")); List<Class<?>> classesWithMains = JarUtility .getClassesInJarWithMain(resolvedJarUrl.toExternalForm(), getClass().getClassLoader()); for (final Class<?> clazz : classesWithMains) { Runnable r = new Runnable() { public void run() { try { final ClassLoader cl = Thread.currentThread().getContextClassLoader(); try { // taskCount.incrementAndGet(); Thread.currentThread().setContextClassLoader(clazz.getClassLoader()); Method mainMethod = clazz.getMethod("main", new Class[] { String[].class }); Object[] args = (cmdLineArgsS != null) ? new Object[] { cmdLineArgsS.split(" ") } : new Object[0]; mainMethod.invoke(null, args); } finally { Thread.currentThread().setContextClassLoader(cl); // successCount.incrementAndGet(); // taskCount.decrementAndGet(); } } catch (Throwable ignored) { // skip, try the next one // logError(ignored.getMessage()); // failedCount.incrementAndGet(); ignored.printStackTrace(); } } }; Thread t = new Thread(r); t.start(); } // uncomment to implement blocking /* if (blocking) { while (taskCount.get() > 0 && !parentJob.isStopped()) { Thread.sleep(1000); } if (!parentJob.isStopped()) { result.setResult(successCount.get() > 0); result.setNrErrors((successCount.get() > 0) ? 0 : 1); } else { // we can't really know at this stage if // the hadoop job will finish successfully // because we have to stop now result.setResult(true); // look on the bright side of life :-)... result.setNrErrors(0); } } else { */ // non-blocking - just set success equal to no failures arising // from invocation // result.setResult(failedCount.get() == 0); // result.setNrErrors(failedCount.get()); result.setResult(true); result.setNrErrors(0); /* } */ } else { if (log.isDetailed()) logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.AdvancedMode")); URL[] urls = new URL[] { resolvedJarUrl }; URLClassLoader loader = new URLClassLoader(urls, getClass().getClassLoader()); JobConf conf = new JobConf(); String hadoopJobNameS = environmentSubstitute(hadoopJobName); conf.setJobName(hadoopJobNameS); String outputKeyClassS = environmentSubstitute(outputKeyClass); conf.setOutputKeyClass(loader.loadClass(outputKeyClassS)); String outputValueClassS = environmentSubstitute(outputValueClass); conf.setOutputValueClass(loader.loadClass(outputValueClassS)); if (mapperClass != null) { String mapperClassS = environmentSubstitute(mapperClass); Class<? extends Mapper> mapper = (Class<? extends Mapper>) loader.loadClass(mapperClassS); conf.setMapperClass(mapper); } if (combinerClass != null) { String combinerClassS = environmentSubstitute(combinerClass); Class<? extends Reducer> combiner = (Class<? extends Reducer>) loader.loadClass(combinerClassS); conf.setCombinerClass(combiner); } if (reducerClass != null) { String reducerClassS = environmentSubstitute(reducerClass); Class<? extends Reducer> reducer = (Class<? extends Reducer>) loader.loadClass(reducerClassS); conf.setReducerClass(reducer); } if (inputFormatClass != null) { String inputFormatClassS = environmentSubstitute(inputFormatClass); Class<? extends InputFormat> inputFormat = (Class<? extends InputFormat>) loader .loadClass(inputFormatClassS); conf.setInputFormat(inputFormat); } if (outputFormatClass != null) { String outputFormatClassS = environmentSubstitute(outputFormatClass); Class<? extends OutputFormat> outputFormat = (Class<? extends OutputFormat>) loader .loadClass(outputFormatClassS); conf.setOutputFormat(outputFormat); } String hdfsHostnameS = environmentSubstitute(hdfsHostname); String hdfsPortS = environmentSubstitute(hdfsPort); String jobTrackerHostnameS = environmentSubstitute(jobTrackerHostname); String jobTrackerPortS = environmentSubstitute(jobTrackerPort); // See if we can auto detect the distribution first HadoopConfigurer configurer = HadoopConfigurerFactory.locateConfigurer(); if (configurer == null) { // go with what has been selected by the user configurer = HadoopConfigurerFactory.getConfigurer(hadoopDistro); // if the user-specified distribution is detectable, make sure it is still // the current distribution! if (configurer != null && configurer.isDetectable()) { if (!configurer.isAvailable()) { throw new KettleException(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.Error.DistroNoLongerPresent", configurer.distributionName())); } } } if (configurer == null) { throw new KettleException(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.Error.UnknownHadoopDistribution", hadoopDistro)); } logBasic(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.Message.DistroConfigMessage", configurer.distributionName())); List<String> configMessages = new ArrayList<String>(); configurer.configure(hdfsHostnameS, hdfsPortS, jobTrackerHostnameS, jobTrackerPortS, conf, configMessages); for (String m : configMessages) { logBasic(m); } String inputPathS = environmentSubstitute(inputPath); String[] inputPathParts = inputPathS.split(","); List<Path> paths = new ArrayList<Path>(); for (String path : inputPathParts) { paths.add(new Path(configurer.getFilesystemURL() + path)); } Path[] finalPaths = paths.toArray(new Path[paths.size()]); //FileInputFormat.setInputPaths(conf, new Path(configurer.getFilesystemURL() + inputPathS)); FileInputFormat.setInputPaths(conf, finalPaths); String outputPathS = environmentSubstitute(outputPath); FileOutputFormat.setOutputPath(conf, new Path(configurer.getFilesystemURL() + outputPathS)); // process user defined values for (UserDefinedItem item : userDefined) { if (item.getName() != null && !"".equals(item.getName()) && item.getValue() != null && !"".equals(item.getValue())) { String nameS = environmentSubstitute(item.getName()); String valueS = environmentSubstitute(item.getValue()); conf.set(nameS, valueS); } } String workingDirectoryS = environmentSubstitute(workingDirectory); conf.setWorkingDirectory(new Path(configurer.getFilesystemURL() + workingDirectoryS)); conf.setJar(jarUrl); String numMapTasksS = environmentSubstitute(numMapTasks); String numReduceTasksS = environmentSubstitute(numReduceTasks); int numM = 1; try { numM = Integer.parseInt(numMapTasksS); } catch (NumberFormatException e) { logError("Can't parse number of map tasks '" + numMapTasksS + "'. Setting num" + "map tasks to 1"); } int numR = 1; try { numR = Integer.parseInt(numReduceTasksS); } catch (NumberFormatException e) { logError("Can't parse number of reduce tasks '" + numReduceTasksS + "'. Setting num" + "reduce tasks to 1"); } conf.setNumMapTasks(numM); conf.setNumReduceTasks(numR); JobClient jobClient = new JobClient(conf); RunningJob runningJob = jobClient.submitJob(conf); String loggingIntervalS = environmentSubstitute(loggingInterval); int logIntv = 60; try { logIntv = Integer.parseInt(loggingIntervalS); } catch (NumberFormatException e) { logError("Can't parse logging interval '" + loggingIntervalS + "'. Setting " + "logging interval to 60"); } if (blocking) { try { int taskCompletionEventIndex = 0; while (!parentJob.isStopped() && !runningJob.isComplete()) { if (logIntv >= 1) { printJobStatus(runningJob); taskCompletionEventIndex = logTaskMessages(runningJob, taskCompletionEventIndex); Thread.sleep(logIntv * 1000); } else { Thread.sleep(60000); } } if (parentJob.isStopped() && !runningJob.isComplete()) { // We must stop the job running on Hadoop runningJob.killJob(); // Indicate this job entry did not complete result.setResult(false); } printJobStatus(runningJob); // Log any messages we may have missed while polling logTaskMessages(runningJob, taskCompletionEventIndex); } catch (InterruptedException ie) { logError(ie.getMessage(), ie); } // Entry is successful if the MR job is successful overall result.setResult(runningJob.isSuccessful()); } } } catch (Throwable t) { t.printStackTrace(); result.setStopped(true); result.setNrErrors(1); result.setResult(false); logError(t.getMessage(), t); } if (appender != null) { LogWriter.getInstance().removeAppender(appender); appender.close(); ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(), parentJob.getJobname(), getName()); result.getResultFiles().put(resultFile.getFile().toString(), resultFile); } return result; }
From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java
License:Open Source License
public static JobConf createJobConf(String mapperTransformationFile, String combinerTransformationFile, String reducerTransformationFile, String hostname, String hdfsPort, String trackerPort) throws IOException, KettleException { JobConf conf = new JobConf(); conf.setJobName("wordcount"); KettleEnvironment.init();/*from w ww.j a v a 2 s .c o m*/ // Register Map/Reduce Input and Map/Reduce Output plugin steps PluginMainClassType mainClassTypesAnnotation = StepPluginType.class .getAnnotation(PluginMainClassType.class); Map<Class<?>, String> inputClassMap = new HashMap<Class<?>, String>(); inputClassMap.put(mainClassTypesAnnotation.value(), HadoopEnterMeta.class.getName()); PluginInterface inputStepPlugin = new Plugin(new String[] { "HadoopEnterPlugin" }, StepPluginType.class, mainClassTypesAnnotation.value(), "Hadoop", "MapReduce Input", "Enter a Hadoop Mapper or Reducer transformation", "MRI.png", false, false, inputClassMap, new ArrayList<String>(), null, null); PluginRegistry.getInstance().registerPlugin(StepPluginType.class, inputStepPlugin); Map<Class<?>, String> outputClassMap = new HashMap<Class<?>, String>(); outputClassMap.put(mainClassTypesAnnotation.value(), HadoopExitMeta.class.getName()); PluginInterface outputStepPlugin = new Plugin(new String[] { "HadoopExitPlugin" }, StepPluginType.class, mainClassTypesAnnotation.value(), "Hadoop", "MapReduce Output", "Exit a Hadoop Mapper or Reducer transformation", "MRO.png", false, false, outputClassMap, new ArrayList<String>(), null, null); PluginRegistry.getInstance().registerPlugin(StepPluginType.class, outputStepPlugin); TransExecutionConfiguration transExecConfig = new TransExecutionConfiguration(); TransMeta transMeta = null; TransConfiguration transConfig = null; if (mapperTransformationFile != null) { conf.setMapRunnerClass(PentahoMapRunnable.class); transMeta = new TransMeta(mapperTransformationFile); transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-map-xml", transConfig.getXML()); conf.set("transformation-map-input-stepname", "Injector"); conf.set("transformation-map-output-stepname", "Output"); } if (combinerTransformationFile != null) { conf.setCombinerClass(GenericTransCombiner.class); transMeta = new TransMeta(combinerTransformationFile); transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-combiner-xml", transConfig.getXML()); conf.set("transformation-combiner-input-stepname", "Injector"); conf.set("transformation-combiner-output-stepname", "Output"); } if (reducerTransformationFile != null) { conf.setReducerClass((Class<? extends Reducer>) GenericTransReduce.class); transMeta = new TransMeta(reducerTransformationFile); transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-reduce-xml", transConfig.getXML()); conf.set("transformation-reduce-input-stepname", "Injector"); conf.set("transformation-reduce-output-stepname", "Output"); } conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); File jar = new File("./dist/pentaho-big-data-plugin-TRUNK-SNAPSHOT.jar"); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path("/")); FileOutputFormat.setOutputPath(conf, new Path("/")); conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort); conf.set("mapred.job.tracker", hostname + ":" + trackerPort); conf.setJar(jar.toURI().toURL().toExternalForm()); conf.setWorkingDirectory(new Path("/tmp/wordcount")); return conf; }
From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java
License:Apache License
public static JobConf createJobConf(String mapperTransformationFile, String combinerTransformationFile, String reducerTransformationFile, String hostname, String hdfsPort, String trackerPort) throws IOException, KettleException { JobConf conf = new JobConf(); conf.setJobName("wordcount"); KettleEnvironment.init();/* ww w .ja v a 2 s. c o m*/ // Register Map/Reduce Input and Map/Reduce Output plugin steps PluginMainClassType mainClassTypesAnnotation = StepPluginType.class .getAnnotation(PluginMainClassType.class); Map<Class<?>, String> inputClassMap = new HashMap<Class<?>, String>(); inputClassMap.put(mainClassTypesAnnotation.value(), HadoopEnterMeta.class.getName()); PluginInterface inputStepPlugin = new Plugin(new String[] { "HadoopEnterPlugin" }, StepPluginType.class, mainClassTypesAnnotation.value(), "Hadoop", "MapReduce Input", "Enter a Hadoop Mapper or Reducer transformation", "MRI.png", false, false, inputClassMap, new ArrayList<String>(), null, null); PluginRegistry.getInstance().registerPlugin(StepPluginType.class, inputStepPlugin); Map<Class<?>, String> outputClassMap = new HashMap<Class<?>, String>(); outputClassMap.put(mainClassTypesAnnotation.value(), HadoopExitMeta.class.getName()); PluginInterface outputStepPlugin = new Plugin(new String[] { "HadoopExitPlugin" }, StepPluginType.class, mainClassTypesAnnotation.value(), "Hadoop", "MapReduce Output", "Exit a Hadoop Mapper or Reducer transformation", "MRO.png", false, false, outputClassMap, new ArrayList<String>(), null, null); PluginRegistry.getInstance().registerPlugin(StepPluginType.class, outputStepPlugin); TransExecutionConfiguration transExecConfig = new TransExecutionConfiguration(); TransMeta transMeta = null; TransConfiguration transConfig = null; if (mapperTransformationFile != null) { conf.setMapRunnerClass(PentahoMapRunnable.class); transMeta = new TransMeta(mapperTransformationFile); transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-map-xml", transConfig.getXML()); conf.set("transformation-map-input-stepname", "Injector"); conf.set("transformation-map-output-stepname", "Output"); } if (combinerTransformationFile != null) { conf.setCombinerClass(GenericTransCombiner.class); transMeta = new TransMeta(combinerTransformationFile); transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-combiner-xml", transConfig.getXML()); conf.set("transformation-combiner-input-stepname", "Injector"); conf.set("transformation-combiner-output-stepname", "Output"); } if (reducerTransformationFile != null) { conf.setReducerClass(GenericTransReduce.class); transMeta = new TransMeta(reducerTransformationFile); transConfig = new TransConfiguration(transMeta, transExecConfig); conf.set("transformation-reduce-xml", transConfig.getXML()); conf.set("transformation-reduce-input-stepname", "Injector"); conf.set("transformation-reduce-output-stepname", "Output"); } conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); File jar = new File("./dist/pentaho-big-data-plugin-TRUNK-SNAPSHOT.jar"); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path("/")); FileOutputFormat.setOutputPath(conf, new Path("/")); conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort); conf.set("mapred.job.tracker", hostname + ":" + trackerPort); conf.setJar(jar.toURI().toURL().toExternalForm()); conf.setWorkingDirectory(new Path("/tmp/wordcount")); return conf; }
From source file:org.pentaho.hadoop.mapreduce.test.TestSubmitMapReduceJob.java
License:Open Source License
@Test public void submitJob() throws Exception { String[] args = { "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/input", "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/output" }; JobConf conf = new JobConf(); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); File jar = new File("./test-res/pentaho-mapreduce-sample.jar"); URLClassLoader loader = new URLClassLoader(new URL[] { jar.toURI().toURL() }); conf.setMapperClass(//from w ww . j a va 2 s . co m (Class<? extends Mapper>) loader.loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Map")); conf.setCombinerClass((Class<? extends Reducer>) loader .loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Reduce")); conf.setReducerClass((Class<? extends Reducer>) loader .loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Reduce")); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort); conf.set("mapred.job.tracker", hostname + ":" + trackerPort); conf.setJarByClass(loader.loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount")); conf.setWorkingDirectory(new Path("/tmp/wordcount")); JobClient jobClient = new JobClient(conf); ClusterStatus status = jobClient.getClusterStatus(); assertEquals(State.RUNNING, status.getJobTrackerState()); RunningJob runningJob = jobClient.submitJob(conf); System.out.print("Running " + runningJob.getJobName() + ""); while (!runningJob.isComplete()) { System.out.print("."); Thread.sleep(500); } System.out.println(); System.out.println("Finished " + runningJob.getJobName() + "."); FileObject file = fsManager.resolveFile(buildHDFSURL("/junit/wordcount/output/part-00000")); String output = IOUtils.toString(file.getContent().getInputStream()); assertEquals("Bye\t1\nGoodbye\t1\nHadoop\t2\nHello\t2\nWorld\t2\n", output); }