List of usage examples for org.apache.hadoop.mapred JobConf setNumMapTasks
public void setNumMapTasks(int n)
From source file:org.cloudata.core.testjob.tera.TeraJob.java
License:Apache License
public void runJob(String tableName, int numOfTablets, int dataLength, int totalGb, String keyOutputPath) throws IOException { CloudataConf nconf = new CloudataConf(); JobConf jobConf = new JobConf(TeraJob.class); jobConf.set("user.name", nconf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); if (!CTable.existsTable(nconf, tableName)) { TableSchema tableInfo = new TableSchema(tableName, "Test"); tableInfo.addColumn(new ColumnInfo("Col1")); tableInfo.addColumn(new ColumnInfo("Col2", TableSchema.CACHE_TYPE)); tableInfo.addColumn(new ColumnInfo("Col3")); CTable.createTable(nconf, tableInfo); }/*from w w w.j a v a 2 s . c o m*/ jobConf.setJobName("TeraOnlineJob" + "(" + new Date() + ")"); long rowsPerTask = ((((long) totalGb) * 1024L * 1024L * 1024L) / ((long) dataLength)) / (long) numOfTablets; jobConf.setInt("teraJob.dataLength", dataLength); jobConf.setLong("teraJob.rowsPerTask", rowsPerTask); jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000); FileOutputFormat.setOutputPath(jobConf, new Path(keyOutputPath)); //<MAP> jobConf.setMapperClass(TeraOnlineMap.class); jobConf.setInputFormat(SimpleInputFormat.class); jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName); jobConf.setNumMapTasks(numOfTablets); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0); //</REDUCE> try { //Run Job JobClient.runJob(jobConf); } finally { //delete temp output path FileSystem fs = FileSystem.get(jobConf); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.elasticsearch.hadoop.integration.mr.AbstractExtraMRTests.java
License:Apache License
@Parameters public static Collection<Object[]> configs() throws IOException { JobConf conf = HdpBootstrap.hadoopConfig(); conf.setInputFormat(SplittableTextInputFormat.class); conf.setOutputFormat(EsOutputFormat.class); conf.setReducerClass(IdentityReducer.class); HadoopCfgUtils.setGenericOptions(conf); conf.setNumMapTasks(2); conf.setInt("actual.splits", 2); conf.setNumReduceTasks(0);//from w ww . j av a 2s . c o m JobConf standard = new JobConf(conf); standard.setMapperClass(TabMapper.class); standard.setMapOutputValueClass(LinkedMapWritable.class); standard.set(ConfigurationOptions.ES_INPUT_JSON, "false"); FileInputFormat.setInputPaths(standard, new Path(TestUtils.gibberishDat(conf))); JobConf json = new JobConf(conf); json.setMapperClass(IdentityMapper.class); json.setMapOutputValueClass(Text.class); json.set(ConfigurationOptions.ES_INPUT_JSON, "true"); FileInputFormat.setInputPaths(json, new Path(TestUtils.gibberishJson(conf))); return Arrays.asList(new Object[][] { { standard, "" }, { json, "json-" } }); }
From source file:org.elasticsearch.hadoop.integration.mr.AbstractMROldApiSaveTest.java
License:Apache License
@Parameters public static Collection<Object[]> configs() { JobConf conf = HdpBootstrap.hadoopConfig(); conf.setInputFormat(SplittableTextInputFormat.class); conf.setOutputFormat(EsOutputFormat.class); conf.setReducerClass(IdentityReducer.class); HadoopCfgUtils.setGenericOptions(conf); conf.setNumMapTasks(2); conf.setInt("actual.splits", 2); conf.setNumReduceTasks(0);// ww w . j a v a 2 s . co m JobConf standard = new JobConf(conf); standard.setMapperClass(TabMapper.class); standard.setMapOutputValueClass(LinkedMapWritable.class); standard.set(ConfigurationOptions.ES_INPUT_JSON, "false"); FileInputFormat.setInputPaths(standard, new Path(TestUtils.sampleArtistsDat(conf))); JobConf json = new JobConf(conf); json.setMapperClass(IdentityMapper.class); json.setMapOutputValueClass(Text.class); json.set(ConfigurationOptions.ES_INPUT_JSON, "true"); FileInputFormat.setInputPaths(json, new Path(TestUtils.sampleArtistsJson(conf))); return Arrays.asList(new Object[][] { { standard, "" }, { json, "json-" } }); }
From source file:org.gbif.ocurrence.index.solr.ConfTester.java
License:Apache License
public JobConf setupJobConf(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount) { JobConf job = new JobConf(getConf(), ConfTester.class); job.setNumMapTasks(numMapper); job.setNumReduceTasks(numReducer);//from w w w . ja va 2 s. c o m job.setMapperClass(ConfTester.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(ConfTester.class); job.setOutputFormat(NullOutputFormat.class); job.setInputFormat(SleepInputFormat.class); job.setPartitionerClass(ConfTester.class); job.setSpeculativeExecution(false); job.setJobName("Sleep job"); FileInputFormat.addInputPath(job, new Path("ignored")); job.setLong("sleep.job.map.sleep.time", mapSleepTime); job.setLong("sleep.job.reduce.sleep.time", reduceSleepTime); job.setInt("sleep.job.map.sleep.count", mapSleepCount); job.setInt("sleep.job.reduce.sleep.count", reduceSleepCount); return job; }
From source file:org.hbasene.index.create.mapred.BuildTableIndex.java
License:Apache License
/** * @param conf// ww w. j a v a2 s .co m * @param numMapTasks * @param numReduceTasks * @param indexDir * @param tableName * @param columnNames * @return JobConf */ public JobConf createJob(Configuration conf, int numMapTasks, int numReduceTasks, String indexDir, String tableName, String columnNames) { JobConf jobConf = new JobConf(conf, BuildTableIndex.class); jobConf.setJobName("build index for table " + tableName); jobConf.setNumMapTasks(numMapTasks); // number of indexes to partition into jobConf.setNumReduceTasks(numReduceTasks); // use identity map (a waste, but just as an example) IdentityTableMap.initJob(tableName, columnNames, IdentityTableMap.class, jobConf); // use IndexTableReduce to build a Lucene index jobConf.setReducerClass(IndexTableReduce.class); FileOutputFormat.setOutputPath(jobConf, new Path(indexDir)); jobConf.setOutputFormat(IndexOutputFormat.class); jobConf.setJarByClass(BuildTableIndex.class); return jobConf; }
From source file:org.hxx.hadoop.GeneratorHbase.java
License:Apache License
private RunningJob generateJob(String table, Path segment, int reduceCnt, long topN, boolean filter, boolean norm, boolean force) throws IOException { LOG.info("Generator: segment=" + segment); JobConf job = new NutchJob(getConf()); // job.setJarByClass(GeneratorHbase.class); job.setJobName("generate:" + table + " " + (new SimpleDateFormat("HH:mm:ss")).format(System.currentTimeMillis()) + " path=" + segment); // job.setLong(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 300000); if (reduceCnt == -1) { reduceCnt = job.getNumMapTasks(); // a partition per fetch task }// w w w .ja va 2 s. c om if ("local".equals(job.get("mapred.job.tracker")) && reduceCnt != 1) { LOG.info("Generator: jobtracker is 'local', generating exactly one partition."); reduceCnt = 1; } // job.setLong(GENERATOR_CUR_TIME, curTime); // record real generation time long generateTime = System.currentTimeMillis(); job.setLong(Nutch.GENERATE_TIME_KEY, generateTime); job.setLong(GENERATOR_TOP_N, topN); job.setBoolean(GENERATOR_FILTER, filter); job.setBoolean(GENERATOR_NORMALISE, norm); job.set(GENERATL_TABLE, table); job.setInt(GENERATL_REDUCECNT, reduceCnt); job.setInt("partition.url.seed", new Random().nextInt()); job.setInputFormat(CodeInputFormat.class); job.setNumMapTasks(1); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(GenerateMark.class); job.setNumReduceTasks(reduceCnt); job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); job.setOutputKeyComparatorClass(HashComparator.class); Path output = new Path(segment, CrawlDatum.GENERATE_DIR_NAME); FileOutputFormat.setOutputPath(job, output); RunningJob r = JobClient.runJob(job); return r; }
From source file:org.hxx.hadoop.GeneratorRedHbase.java
License:Apache License
private RunningJob generateJob(String table, Path segment, int numLists, long topN, long curTime, boolean filter, boolean norm, boolean force) throws IOException { LOG.info("Generator: segment=" + segment); JobConf job = new NutchJob(getConf()); job.setJarByClass(GeneratorRedHbase.class); job.setJobName("generate: from " + table + " " + (new SimpleDateFormat("MMdd HH:mm:ss")).format(System.currentTimeMillis())); // job.setLong(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 300000); if (numLists == -1) { numLists = job.getNumMapTasks(); // a partition per fetch task }//from w w w.j a va 2s . com if ("local".equals(job.get("mapred.job.tracker")) && numLists != 1) { // override LOG.info("Generator: jobtracker is 'local', generating exactly one partition."); numLists = 1; } // job.setLong(GENERATOR_CUR_TIME, curTime); // record real generation time long generateTime = System.currentTimeMillis(); job.setLong(Nutch.GENERATE_TIME_KEY, generateTime); job.setLong(GENERATOR_TOP_N, topN); job.setBoolean(GENERATOR_FILTER, filter); job.setBoolean(GENERATOR_NORMALISE, norm); job.set(GENERATL_TABLE, table); job.setInt(GENERATL_REDUCENUM, numLists); job.setInt("partition.url.seed", new Random().nextInt()); job.setInputFormat(CodeInputFormat.class); job.setNumMapTasks(1); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(GenerateMark.class); job.setNumReduceTasks(numLists); job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); job.setOutputKeyComparatorClass(HashComparator.class); Path output = new Path(segment, CrawlDatum.GENERATE_DIR_NAME); FileOutputFormat.setOutputPath(job, output); RunningJob r = null; try { r = JobClient.runJob(job); } catch (IOException e) { throw e; } return r; }
From source file:org.mitre.bio.mapred.Fasta2SequenceFile.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); boolean cleanLogs = false; List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try {// w w w . j av a2s. co m if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-c".equals(args[i])) { cleanLogs = true; } else if ("-n".equals(args[i])) { conf.setInt(HEADER_FORMAT, Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } return initJob(conf, other_args.get(0), other_args.get(1), cleanLogs); }
From source file:org.mitre.bio.mapred.TotalSequenceLength.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); boolean cleanLogs = false; List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try {//ww w . j a v a 2s . c o m if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-c".equals(args[i])) { cleanLogs = true; } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } int res = initJob(conf, other_args.get(0), other_args.get(1), cleanLogs); int cnt = this.getCount(conf, other_args.get(1)); System.out.printf("Total length of sequences is %d\n", cnt); return res; }
From source file:org.mitre.ccv.mapred.CalculateCompositionVectors.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); int start = CalculateKmerCounts.DEFAULT_START; int end = CalculateKmerCounts.DEFAULT_END; boolean cleanLogs = false; // @TODO: use commons getopts List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try {/*from w w w . ja va2 s. c o m*/ if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-s".equals(args[i])) { start = Integer.parseInt(args[++i]); } else if ("-e".equals(args[i])) { end = Integer.parseInt(args[++i]); } else if ("-c".equals(args[i])) { cleanLogs = true; } else if ("-libjars".equals(args[i])) { conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf)); URL[] libjars = FileUtils.getLibJars(conf); if (libjars != null && libjars.length > 0) { // Add libjars to client/tasks classpath conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); // Adds libjars to our classpath Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } return initJob(conf, start, end, other_args.get(0), other_args.get(1), cleanLogs); }