List of usage examples for org.apache.hadoop.mapred JobConf setOutputValueClass
public void setOutputValueClass(Class<?> theClass)
From source file:invertedIndex.startJob.java
public static void start(String[] args) { try {/*from w w w.ja va 2s.c om*/ JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); lineIndexMapper Map = new lineIndexMapper(); conf.setMapperClass(Map.getClass()); lineIndexReducer Reduce = new lineIndexReducer(); conf.setCombinerClass(Reduce.getClass()); conf.setReducerClass(Reduce.getClass()); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[1])); Path outputDir = new Path(args[2]); outputDir.getFileSystem(conf).delete(outputDir, true); FileSystem fs = FileSystem.get(conf); fs.delete(outputDir, true); FileOutputFormat.setOutputPath(conf, outputDir); JobClient.runJob(conf); } catch (Exception Exp) { Exp.printStackTrace(); } }
From source file:io.aos.t4f.hadoop.mapred.WordCountMapReduceTest.java
License:Apache License
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker.// ww w . ja va 2 s .c om */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), WordCountMapReduceTest.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(WordCountMapper.class); conf.setCombinerClass(WordCountReducer.class); conf.setReducerClass(WordCountReducer.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:io.aos.t4f.hadoop.mapred.WordCountTest.java
License:Apache License
private JobConf createJobConf() { JobConf conf = mrCluster.createJobConf(); conf.setJobName("wordcount test"); conf.setMapperClass(WordCountMapper.class); conf.setReducerClass(WordCountReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setNumMapTasks(1);//from ww w .j ava 2 s .c o m conf.setNumReduceTasks(1); FileInputFormat.setInputPaths(conf, input); FileOutputFormat.setOutputPath(conf, output); return conf; }
From source file:io.dstream.tez.TezDAGBuilder.java
License:Apache License
/** * *///from w w w. j a v a 2 s.co m private JobConf buildJobConf(Class<? extends Writable> keyClass, Class<? extends Writable> valueClass) { JobConf jobConf = new JobConf(this.tezClient.getTezConfiguration()); jobConf.setOutputKeyClass(keyClass); jobConf.setOutputValueClass(valueClass); return jobConf; }
From source file:io.fluo.stress.trie.Generate.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 4) { log.error("Usage: " + this.getClass().getSimpleName() + " <numMappers> <numbersPerMapper> <max> <output dir>"); System.exit(-1);// w w w. j a va 2 s . c om } int numMappers = Integer.parseInt(args[0]); int numPerMapper = Integer.parseInt(args[1]); long max = Long.parseLong(args[2]); Path out = new Path(args[3]); Preconditions.checkArgument(numMappers > 0, "numMappers <= 0"); Preconditions.checkArgument(numPerMapper > 0, "numPerMapper <= 0"); Preconditions.checkArgument(max > 0, "max <= 0"); JobConf job = new JobConf(getConf()); job.setJobName(this.getClass().getName()); job.setJarByClass(Generate.class); job.setInt(TRIE_GEN_NUM_PER_MAPPER_PROP, numPerMapper); job.setInt(TRIE_GEN_NUM_MAPPERS_PROP, numMappers); job.setLong(TRIE_GEN_MAX_PROP, max); job.setInputFormat(RandomLongInputFormat.class); job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, out); RunningJob runningJob = JobClient.runJob(job); runningJob.waitForCompletion(); return runningJob.isSuccessful() ? 0 : -1; }
From source file:io.fluo.stress.trie.NumberIngest.java
License:Apache License
public static void main(String[] args) throws IOException, ConfigurationException { // Parse arguments if (args.length != 4) { log.error("Usage: NumberIngest <numMappers> <numbersPerMapper> <nodeSize> <fluoProps>"); System.exit(-1);/*from w w w .ja v a2s.c om*/ } int numMappers = Integer.parseInt(args[0]); int numPerMapper = Integer.parseInt(args[1]); int nodeSize = Integer.parseInt(args[2]); String fluoPropsPath = args[3]; String hadoopPrefix = System.getenv("HADOOP_PREFIX"); if (hadoopPrefix == null) { hadoopPrefix = System.getenv("HADOOP_HOME"); if (hadoopPrefix == null) { log.error("HADOOP_PREFIX or HADOOP_HOME needs to be set!"); System.exit(-1); } } // create test name String testId = String.format("test-%d", (new Date().getTime() / 1000)); String testDir = "/trie-stress/" + testId; setupHdfs(hadoopPrefix, testDir, numMappers, numPerMapper); JobConf ingestConf = new JobConf(NumberIngest.class); ingestConf.setJobName("NumberIngest"); FluoConfiguration config = new FluoConfiguration(new File(fluoPropsPath)); loadConfig(ingestConf, ConfigurationConverter.getProperties(config)); ingestConf.setInt(TRIE_NODE_SIZE_PROP, nodeSize); ingestConf.setOutputKeyClass(LongWritable.class); ingestConf.setOutputValueClass(IntWritable.class); ingestConf.setMapperClass(NumberIngest.IngestMapper.class); ingestConf.setReducerClass(NumberIngest.UniqueReducer.class); FileInputFormat.setInputPaths(ingestConf, new Path(testDir + "/input/")); FileOutputFormat.setOutputPath(ingestConf, new Path(testDir + "/unique/")); RunningJob ingestJob = JobClient.runJob(ingestConf); ingestJob.waitForCompletion(); if (ingestJob.isSuccessful()) { JobConf countConf = new JobConf(NumberIngest.class); countConf.setJobName("NumberCount"); countConf.setOutputKeyClass(Text.class); countConf.setOutputValueClass(LongWritable.class); countConf.setMapperClass(NumberIngest.CountMapper.class); countConf.setReducerClass(NumberIngest.CountReducer.class); FileInputFormat.setInputPaths(countConf, new Path(testDir + "/unique/")); FileOutputFormat.setOutputPath(countConf, new Path(testDir + "/output/")); RunningJob countJob = JobClient.runJob(countConf); countJob.waitForCompletion(); if (countJob.isSuccessful()) { log.info("Ingest and count jobs were successful"); log.info("Output can be viewed @ " + testDir); System.exit(0); } else { log.error("Count job failed for " + testId); } } else { log.error("Ingest job failed. Skipping count job for " + testId); } System.exit(-1); }
From source file:io.hops.erasure_coding.MapReduceEncoder.java
License:Apache License
/** * create new job conf based on configuration passed. * * @param conf//from w w w. j a va 2s . c o m * @return */ private static JobConf createJobConf(Configuration conf) { JobConf jobconf = new JobConf(conf, MapReduceEncoder.class); jobName = NAME + " " + dateForm.format(new Date(BaseEncodingManager.now())); jobconf.setUser(BaseEncodingManager.JOBUSER); jobconf.setJobName(jobName); jobconf.setMapSpeculativeExecution(false); RaidUtils.parseAndSetOptions(jobconf, SCHEDULER_OPTION_LABEL); jobconf.setJarByClass(MapReduceEncoder.class); jobconf.setInputFormat(DistRaidInputFormat.class); jobconf.setOutputKeyClass(Text.class); jobconf.setOutputValueClass(Text.class); jobconf.setMapperClass(DistRaidMapper.class); jobconf.setNumReduceTasks(0); return jobconf; }
From source file:it.isislab.sof.core.engine.hadoop.sshclient.utils.simulation.executor.SOF.java
License:Apache License
public static void main(String[] args) { /**/*from w w w . j av a 2 s . com*/ * aids /home/michele/Scrivania/aids netlogo /home/michele/Scrivania/aids/aids.nlogo /home/michele/Scrivania/aids/input.tmp /home/michele/Scrivania/aids/output /home/michele/Scrivania/aids/output.xml false pepp ciao * */ /* try {//Runtime.getRuntime().exec("rm -r /home/lizard87/Desktop/mason_test/output"); Runtime.getRuntime().exec("rm -r /home/michele/Scrivania/aids/output"); } catch (IOException e) {e.printStackTrace();}*/ if (args.length < 9 || args.length == 11 || args.length == 12 || args.length >= 15) { System.out.println("Usage:"); System.out.println("java -jar SCUD.jar " + "<simulation_name> " + "<simulation_path_home> " + "<simulation_type[mason |netlogo |generic]>" + "<simulation_generic_interpreter_path>" + "<simultion_program_path> " + "<simulation_mapper_input_path> " + "<simulation_mapper_output_path> " + "<simulation_output_domain_xmlfile> " + "<simulation_input_path> " + "<<simulation_rating_path>>" + "<oneshot[one|loop]> " + "<author_name> " + "<simulation_description> " + "<path_interpreter_evaluate_file> " + "<evaluate_file_path>"); System.exit(-1); } Configuration conf = null; JobConf job = null; String AUTHOR = null;/*author name*/ String SIMULATION_NAME = null;/*simulation name*/ String SIMULATION_HOME = null;/*path simulation*/ String SIM_TYPE = null;/*mason, netlogo, generic*/ String SIM_EXECUTABLE_SIMULATION_INTERPRETER_PATH = null; String SIM_EXECUTABLE_SIMULATION_PROGRAM = null; /*executable program *.jar | *.nlogo*/ String SIM_EXECUTION_INPUT_DATA_MAPPER = null;/*input.data path */ String SIM_EXECUTION_OUTPUT_MAPPER = null;/*output loop(i) path*/ String SIM_DESCRIPTION_OUTPUT_XML_DOMAIN = null;/*path of domain file */ String SIM_EXECUTION_INPUT_XML = null;/*execution input path*/ boolean ISLOOP = false;/*false[one] | true[loop]*/ //String DESCRIPTION=null;/*simulations' description*/ String INTERPRETER_REMOTE_PATH_EVALUATION = null;/*remote program bin path for executing EvalFoo*/ String EXECUTABLE_RATING_FILE = null;/*path of rating file*/ String SIM_RATING_PATH = null; // aids /home/michele/Scrivania/aids netlogo /home/michele/Scrivania/aids/aids.nlogo /home/michele/Scrivania/aids/input.tmp /home/michele/Scrivania/aids/output /home/michele/Scrivania/aids/domain.xml /home/michele/Scrivania/aids/input loop pepp ciao /usr/bin/python /home/michele/Scrivania/aids/evaluate.py if (args.length == 13) { SIMULATION_NAME = args[0]; SIMULATION_HOME = args[1]; SIM_TYPE = args[2]; SIM_EXECUTABLE_SIMULATION_PROGRAM = args[3]; SIM_EXECUTION_INPUT_DATA_MAPPER = args[4]; SIM_EXECUTION_OUTPUT_MAPPER = args[5]; SIM_DESCRIPTION_OUTPUT_XML_DOMAIN = args[6]; SIM_EXECUTION_INPUT_XML = args[7]; SIM_RATING_PATH = args[8]; ISLOOP = Boolean.parseBoolean(args[9]); AUTHOR = args[10]; //DESCRIPTION=args[11]; INTERPRETER_REMOTE_PATH_EVALUATION = args[11]; EXECUTABLE_RATING_FILE = args[12]; // System.out.println(DESCRIPTION); //System.out.println(INTERPRETER_REMOTE_PATH_EVALUATION); } else if (args.length == 9) { SIMULATION_NAME = args[0]; SIMULATION_HOME = args[1]; SIM_TYPE = args[2]; SIM_EXECUTABLE_SIMULATION_PROGRAM = args[3]; SIM_EXECUTION_INPUT_DATA_MAPPER = args[4]; SIM_EXECUTION_OUTPUT_MAPPER = args[5]; SIM_DESCRIPTION_OUTPUT_XML_DOMAIN = args[6]; ISLOOP = Boolean.parseBoolean(args[7]); AUTHOR = args[8]; //DESCRIPTION=args[9]; } else if (args.length == 14) { SIMULATION_NAME = args[0]; SIMULATION_HOME = args[1]; SIM_TYPE = args[2]; SIM_EXECUTABLE_SIMULATION_INTERPRETER_PATH = args[3]; SIM_EXECUTABLE_SIMULATION_PROGRAM = args[4]; SIM_EXECUTION_INPUT_DATA_MAPPER = args[5]; SIM_EXECUTION_OUTPUT_MAPPER = args[6]; SIM_DESCRIPTION_OUTPUT_XML_DOMAIN = args[7]; SIM_EXECUTION_INPUT_XML = args[8]; SIM_RATING_PATH = args[9]; ISLOOP = Boolean.parseBoolean(args[10]); AUTHOR = args[11]; // DESCRIPTION=args[12]; INTERPRETER_REMOTE_PATH_EVALUATION = args[12]; EXECUTABLE_RATING_FILE = args[13]; } else if (args.length == 10) { SIMULATION_NAME = args[0]; SIMULATION_HOME = args[1]; SIM_TYPE = args[2]; SIM_EXECUTABLE_SIMULATION_INTERPRETER_PATH = args[3]; SIM_EXECUTABLE_SIMULATION_PROGRAM = args[4]; SIM_EXECUTION_INPUT_DATA_MAPPER = args[5]; SIM_EXECUTION_OUTPUT_MAPPER = args[6]; SIM_DESCRIPTION_OUTPUT_XML_DOMAIN = args[7]; ISLOOP = Boolean.parseBoolean(args[8]); AUTHOR = args[9]; // DESCRIPTION=args[10]; } if (!(SIM_TYPE.equalsIgnoreCase("mason") || SIM_TYPE.equalsIgnoreCase("netlogo") || SIM_TYPE.equalsIgnoreCase("generic"))) { System.exit(-2); } conf = new Configuration(); job = new JobConf(conf, SOF.class); job.setJobName(SIMULATION_NAME/*SIMULATION NAME*/); job.set("simulation.home", SIMULATION_HOME); job.set("simulation.name", SIMULATION_NAME); job.set("simulation.type", SIM_TYPE); if (SIM_TYPE.equalsIgnoreCase("generic")) { job.set("simulation.interpreter.genericsim", SIM_EXECUTABLE_SIMULATION_INTERPRETER_PATH); } job.set("simulation.program.simulation", SIM_EXECUTABLE_SIMULATION_PROGRAM); job.set("simulation.executable.input", SIM_EXECUTION_INPUT_DATA_MAPPER); job.set("simulation.executable.output", SIM_EXECUTION_OUTPUT_MAPPER); job.setBoolean("simulation.executable.mode", ISLOOP); //job.set("simulation.executable.mode", ISLOOP); job.set("simulation.executable.author", AUTHOR); //job.set("simulation.executable.description", DESCRIPTION); job.set("simulation.description.output.domain", SIM_DESCRIPTION_OUTPUT_XML_DOMAIN); /** * GENERA IL .TMP * COMMENTA LA LINEA * TEST IN LOCALE * SOLO PER IL LOCALE */ //XmlToText.convertXmlFileToFileText(conf,"/home/lizard87/Desktop/mason_test/input.xml"); //XmlToText.convertXmlFileToFileText(conf,"/home/lizard87/Desktop/input.xml"); //XmlToText.convertXmlFileToFileText(conf,"/home/lizard87/Desktop/aids/input.xml"); if (ISLOOP) { job.set("simulation.description.input", SIM_EXECUTION_INPUT_XML); job.set("simulation.program.rating", EXECUTABLE_RATING_FILE); //job.set("simulation.interpreter.selection", INTERPRETER_REMOTE_PATH_SELECTION); job.set("simulation.interpreter.rating", INTERPRETER_REMOTE_PATH_EVALUATION); job.set("simulation.executable.loop.rating", SIM_RATING_PATH); } FileInputFormat.addInputPath(job, new Path(SIM_EXECUTION_INPUT_DATA_MAPPER)/*DIRECTORY INPUT*/); FileOutputFormat.setOutputPath(job, new Path(SIM_EXECUTION_OUTPUT_MAPPER)); if (SIM_TYPE.equalsIgnoreCase("mason")) { job.setMapperClass(SOFMapperMason.class); job.setReducerClass(SOFReducerMason.class); } else if (SIM_TYPE.equalsIgnoreCase("netlogo")) { job.setMapperClass(SOFMapperNetLogo.class); job.setReducerClass(SOFReducerNetLogo.class); } else if (SIM_TYPE.equalsIgnoreCase("generic")) { job.setMapperClass(SOFMapperGeneric.class); job.setReducerClass(SOFReducerGeneric.class); } job.setOutputKeyClass(org.apache.hadoop.io.Text.class); job.setOutputValueClass(org.apache.hadoop.io.Text.class); JobClient jobc; try { jobc = new JobClient(job); System.out.println(jobc + " " + job); RunningJob runjob; runjob = JobClient.runJob(job); while (runjob.getJobStatus().equals(JobStatus.SUCCEEDED)) { } System.exit(0); } catch (IOException e) { e.printStackTrace(); } }
From source file:Iterator.SpeciesIterDriver2.java
@SuppressWarnings("deprecation") public static void main(String[] args) { int iterationCount = 0; while (iterationCount <= 20) { System.out.println("Running Iteration - " + iterationCount); JobClient client = new JobClient(); JobConf conf = new JobConf(SpeciesIterDriver2.class); conf.setJobName("Species Iter - " + iterationCount); // This property is set to generate 5 reducer tasks conf.setNumReduceTasks(5);// ww w. j ava 2 s.co m conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); //output-iterator-0 contains the input data FileInputFormat.setInputPaths(conf, new Path("output-iterator-" + iterationCount)); iterationCount++; FileOutputFormat.setOutputPath(conf, new Path("output-iterator-" + iterationCount)); conf.setMapperClass(SpeciesIterMapper2.class); conf.setReducerClass(SpeciesIterReducer2.class); conf.setCombinerClass(SpeciesIterReducer2.class); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } } }
From source file:ivory.core.index.MergeGlobalStatsAcrossIndexSegments.java
License:Apache License
public int runTool() throws Exception { JobConf conf = new JobConf(getConf(), MergeGlobalStatsAcrossIndexSegments.class); FileSystem fs = FileSystem.get(conf); String collectionName = conf.get("Ivory.CollectionName"); String indexPaths = conf.get("Ivory.IndexPaths"); String dataOutputPath = conf.get("Ivory.DataOutputPath"); int dfThreshold = conf.getInt("Ivory.DfThreshold", 0); // first, compute size of global term space Path tmpPaths = new Path("/tmp/index-paths.txt"); FSDataOutputStream out = fs.create(tmpPaths, true); for (String s : indexPaths.split(",")) { out.write(new String(s + "\n").getBytes()); }/* w w w. j a v a2 s . co m*/ out.close(); LOG.info("Job: ComputeNumberOfTermsAcrossIndexSegments"); conf.setJobName("ComputeNumberOfTermsAcrossIndexSegments:" + collectionName); FileInputFormat.addInputPath(conf, tmpPaths); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInputFormat(NLineInputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(PairOfIntLong.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(MyMapper.class); conf.setReducerClass(IdentityReducer.class); long startTime = System.currentTimeMillis(); RunningJob job = JobClient.runJob(conf); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); Counters counters = job.getCounters(); long totalNumTerms = counters.findCounter("org.apache.hadoop.mapred.Task$Counter", 6, "REDUCE_INPUT_GROUPS") .getCounter(); LOG.info("total number of terms in global dictionary = " + totalNumTerms); // now build the dictionary fs.delete(new Path(dataOutputPath), true); conf = new JobConf(getConf(), MergeGlobalStatsAcrossIndexSegments.class); LOG.info("Job: MergeGlobalStatsAcrossIndexSegments"); conf.setJobName("MergeGlobalStatsAcrossIndexSegments:" + collectionName); FileInputFormat.addInputPath(conf, tmpPaths); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInputFormat(NLineInputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(PairOfIntLong.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(MyMapper.class); conf.setReducerClass(MyReducer.class); conf.setLong("Ivory.IndexNumberOfTerms", (int) totalNumTerms); startTime = System.currentTimeMillis(); job = JobClient.runJob(conf); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); // compute some # docs, collection length, avg doc length long collectionLength = 0; int docCount = 0; for (String index : indexPaths.split(",")) { LOG.info("reading stats for " + index); RetrievalEnvironment env = new RetrievalEnvironment(index, fs); long l = env.readCollectionLength(); int n = env.readCollectionDocumentCount(); LOG.info(" - CollectionLength: " + l); LOG.info(" - CollectionDocumentCount: " + n); collectionLength += l; docCount += n; } float avgdl = (float) collectionLength / docCount; LOG.info("all index segments: "); LOG.info(" - CollectionLength: " + collectionLength); LOG.info(" - CollectionDocumentCount: " + docCount); LOG.info(" - AverageDocumentLenght: " + avgdl); RetrievalEnvironment env = new RetrievalEnvironment(dataOutputPath, fs); env.writeCollectionAverageDocumentLength(avgdl); env.writeCollectionLength(collectionLength); env.writeCollectionDocumentCount(docCount); return 0; }