List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks
public void setNumReduceTasks(int n)
From source file:org.elasticsearch.hadoop.integration.mr.AbstractMROldApiSaveTest.java
License:Apache License
@Parameters public static Collection<Object[]> configs() { JobConf conf = HdpBootstrap.hadoopConfig(); conf.setInputFormat(SplittableTextInputFormat.class); conf.setOutputFormat(EsOutputFormat.class); conf.setReducerClass(IdentityReducer.class); HadoopCfgUtils.setGenericOptions(conf); conf.setNumMapTasks(2);/*from ww w. j a va 2 s. c o m*/ conf.setInt("actual.splits", 2); conf.setNumReduceTasks(0); JobConf standard = new JobConf(conf); standard.setMapperClass(TabMapper.class); standard.setMapOutputValueClass(LinkedMapWritable.class); standard.set(ConfigurationOptions.ES_INPUT_JSON, "false"); FileInputFormat.setInputPaths(standard, new Path(TestUtils.sampleArtistsDat(conf))); JobConf json = new JobConf(conf); json.setMapperClass(IdentityMapper.class); json.setMapOutputValueClass(Text.class); json.set(ConfigurationOptions.ES_INPUT_JSON, "true"); FileInputFormat.setInputPaths(json, new Path(TestUtils.sampleArtistsJson(conf))); return Arrays.asList(new Object[][] { { standard, "" }, { json, "json-" } }); }
From source file:org.elasticsearch.hadoop.integration.mr.AbstractMROldApiSearchTest.java
License:Apache License
private JobConf createJobConf() throws IOException { JobConf conf = HdpBootstrap.hadoopConfig(); conf.setInputFormat(EsInputFormat.class); conf.setOutputFormat(PrintStreamOutputFormat.class); conf.setOutputKeyClass(Text.class); boolean type = random.nextBoolean(); Class<?> mapType = (type ? MapWritable.class : LinkedMapWritable.class); conf.setOutputValueClass(mapType);// w w w .j av a2 s .c o m HadoopCfgUtils.setGenericOptions(conf); conf.set(ConfigurationOptions.ES_QUERY, query); conf.setNumReduceTasks(0); conf.set(ConfigurationOptions.ES_READ_METADATA, String.valueOf(readMetadata)); conf.set(ConfigurationOptions.ES_READ_METADATA_VERSION, String.valueOf(true)); conf.set(ConfigurationOptions.ES_OUTPUT_JSON, String.valueOf(readAsJson)); QueryTestParams.provisionQueries(conf); FileInputFormat.setInputPaths(conf, new Path(TestUtils.sampleArtistsDat())); return conf; }
From source file:org.gbif.ocurrence.index.solr.ConfTester.java
License:Apache License
public JobConf setupJobConf(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount) { JobConf job = new JobConf(getConf(), ConfTester.class); job.setNumMapTasks(numMapper);//from ww w. jav a 2 s . com job.setNumReduceTasks(numReducer); job.setMapperClass(ConfTester.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(ConfTester.class); job.setOutputFormat(NullOutputFormat.class); job.setInputFormat(SleepInputFormat.class); job.setPartitionerClass(ConfTester.class); job.setSpeculativeExecution(false); job.setJobName("Sleep job"); FileInputFormat.addInputPath(job, new Path("ignored")); job.setLong("sleep.job.map.sleep.time", mapSleepTime); job.setLong("sleep.job.reduce.sleep.time", reduceSleepTime); job.setInt("sleep.job.map.sleep.count", mapSleepCount); job.setInt("sleep.job.reduce.sleep.count", reduceSleepCount); return job; }
From source file:org.hbasene.index.create.mapred.BuildTableIndex.java
License:Apache License
/** * @param conf//from ww w. j a v a 2 s . c o m * @param numMapTasks * @param numReduceTasks * @param indexDir * @param tableName * @param columnNames * @return JobConf */ public JobConf createJob(Configuration conf, int numMapTasks, int numReduceTasks, String indexDir, String tableName, String columnNames) { JobConf jobConf = new JobConf(conf, BuildTableIndex.class); jobConf.setJobName("build index for table " + tableName); jobConf.setNumMapTasks(numMapTasks); // number of indexes to partition into jobConf.setNumReduceTasks(numReduceTasks); // use identity map (a waste, but just as an example) IdentityTableMap.initJob(tableName, columnNames, IdentityTableMap.class, jobConf); // use IndexTableReduce to build a Lucene index jobConf.setReducerClass(IndexTableReduce.class); FileOutputFormat.setOutputPath(jobConf, new Path(indexDir)); jobConf.setOutputFormat(IndexOutputFormat.class); jobConf.setJarByClass(BuildTableIndex.class); return jobConf; }
From source file:org.hxx.hadoop.GeneratorHbase.java
License:Apache License
private RunningJob generateJob(String table, Path segment, long topN, int reduceCnt, boolean filter, boolean norm, boolean force) throws IOException { LOG.info("Generator: from table=" + table + " segment=" + segment); JobConf job = new NutchJob(getConf()); // job.setJarByClass(GeneratorHbase.class); job.setJobName("generate:" + table + " " + (new SimpleDateFormat("HH:mm:ss")).format(System.currentTimeMillis()) + " path=" + segment); if (reduceCnt == -1) { reduceCnt = job.getNumMapTasks(); // a partition per fetch task }/* ww w . java 2s . c om*/ if ("local".equals(job.get("mapred.job.tracker")) && reduceCnt != 1) { LOG.info("Generator: jobtracker is 'local', generating exactly one partition."); reduceCnt = 1; } // job.setLong(GENERATOR_CUR_TIME, curTime); // record real generation time long generateTime = System.currentTimeMillis(); job.setLong(Nutch.GENERATE_TIME_KEY, generateTime); job.setLong(GENERATOR_TOP_N, topN); job.setBoolean(GENERATOR_FILTER, filter); job.setBoolean(GENERATOR_NORMALISE, norm); job.set(GENERATL_TABLE, table); job.setInt(GENERATL_REDUCECNT, reduceCnt); job.setInt("partition.url.seed", new Random().nextInt()); job.setInputFormat(TableTopInputFormat.class);// ? job.setMapperClass(GenerateMark.class);// generate? job.setPartitionerClass(GenerateMark.class); job.setNumReduceTasks(reduceCnt); job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); job.setOutputKeyComparatorClass(HashComparator.class); Path output = new Path(segment, CrawlDatum.GENERATE_DIR_NAME); FileOutputFormat.setOutputPath(job, output); RunningJob r = JobClient.runJob(job); return r; }
From source file:org.hxx.hadoop.GeneratorHbase.java
License:Apache License
private RunningJob generateJob(String table, Path segment, int reduceCnt, long topN, boolean filter, boolean norm, boolean force) throws IOException { LOG.info("Generator: segment=" + segment); JobConf job = new NutchJob(getConf()); // job.setJarByClass(GeneratorHbase.class); job.setJobName("generate:" + table + " " + (new SimpleDateFormat("HH:mm:ss")).format(System.currentTimeMillis()) + " path=" + segment); // job.setLong(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 300000); if (reduceCnt == -1) { reduceCnt = job.getNumMapTasks(); // a partition per fetch task }//from w w w . j a v a 2 s . c om if ("local".equals(job.get("mapred.job.tracker")) && reduceCnt != 1) { LOG.info("Generator: jobtracker is 'local', generating exactly one partition."); reduceCnt = 1; } // job.setLong(GENERATOR_CUR_TIME, curTime); // record real generation time long generateTime = System.currentTimeMillis(); job.setLong(Nutch.GENERATE_TIME_KEY, generateTime); job.setLong(GENERATOR_TOP_N, topN); job.setBoolean(GENERATOR_FILTER, filter); job.setBoolean(GENERATOR_NORMALISE, norm); job.set(GENERATL_TABLE, table); job.setInt(GENERATL_REDUCECNT, reduceCnt); job.setInt("partition.url.seed", new Random().nextInt()); job.setInputFormat(CodeInputFormat.class); job.setNumMapTasks(1); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(GenerateMark.class); job.setNumReduceTasks(reduceCnt); job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); job.setOutputKeyComparatorClass(HashComparator.class); Path output = new Path(segment, CrawlDatum.GENERATE_DIR_NAME); FileOutputFormat.setOutputPath(job, output); RunningJob r = JobClient.runJob(job); return r; }
From source file:org.hxx.hadoop.GeneratorMapHbase.java
License:Apache License
private RunningJob generateJob(String table, Path segment, int numLists, long topN, long curTime, boolean filter, boolean norm, boolean force) throws IOException { LOG.info("Generator: segment: " + segment); JobConf job = new NutchJob(getConf()); job.setJarByClass(GeneratorMapHbase.class); job.setJobName("generate: from " + table + " " + (new SimpleDateFormat("yyyyMMdd HH:mm:ss")).format(System.currentTimeMillis())); // job.setLong(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 300000); if (numLists == -1) { numLists = job.getNumMapTasks(); // a partition per fetch task }// w w w . ja v a 2s . c o m numLists = 4;// TODO if ("local".equals(job.get("mapred.job.tracker")) && numLists != 1) { // override LOG.info("Generator: jobtracker is 'local', generating exactly one partition."); numLists = 1; } // job.setLong(GENERATOR_CUR_TIME, curTime); // record real generation time long generateTime = System.currentTimeMillis(); job.setLong(Nutch.GENERATE_TIME_KEY, generateTime); job.setLong(GENERATOR_TOP_N, topN); job.setBoolean(GENERATOR_FILTER, filter); job.setBoolean(GENERATOR_NORMALISE, norm); job.set(GENERATL_TABLE, table); job.setInt(GENERATL_REDUCENUM, numLists); job.setInputFormat(TableTopInputFormat.class);// ? job.setMapperClass(GenerateMark.class);// generate? job.setPartitionerClass(URLCountPartitioner.class); job.setNumReduceTasks(numLists); job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); job.setOutputKeyComparatorClass(HashComparator.class); Path output = new Path(segment, CrawlDatum.GENERATE_DIR_NAME); FileOutputFormat.setOutputPath(job, output); RunningJob r = null; try { r = JobClient.runJob(job); } catch (IOException e) { throw e; } return r; }
From source file:org.hxx.hadoop.GeneratorRedHbase.java
License:Apache License
private RunningJob generateJob(String table, Path segment, int numLists, long topN, long curTime, boolean filter, boolean norm, boolean force) throws IOException { LOG.info("Generator: segment=" + segment); JobConf job = new NutchJob(getConf()); job.setJarByClass(GeneratorRedHbase.class); job.setJobName("generate: from " + table + " " + (new SimpleDateFormat("MMdd HH:mm:ss")).format(System.currentTimeMillis())); // job.setLong(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 300000); if (numLists == -1) { numLists = job.getNumMapTasks(); // a partition per fetch task }/*from w w w .j a v a 2s . c o m*/ if ("local".equals(job.get("mapred.job.tracker")) && numLists != 1) { // override LOG.info("Generator: jobtracker is 'local', generating exactly one partition."); numLists = 1; } // job.setLong(GENERATOR_CUR_TIME, curTime); // record real generation time long generateTime = System.currentTimeMillis(); job.setLong(Nutch.GENERATE_TIME_KEY, generateTime); job.setLong(GENERATOR_TOP_N, topN); job.setBoolean(GENERATOR_FILTER, filter); job.setBoolean(GENERATOR_NORMALISE, norm); job.set(GENERATL_TABLE, table); job.setInt(GENERATL_REDUCENUM, numLists); job.setInt("partition.url.seed", new Random().nextInt()); job.setInputFormat(CodeInputFormat.class); job.setNumMapTasks(1); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(GenerateMark.class); job.setNumReduceTasks(numLists); job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); job.setOutputKeyComparatorClass(HashComparator.class); Path output = new Path(segment, CrawlDatum.GENERATE_DIR_NAME); FileOutputFormat.setOutputPath(job, output); RunningJob r = null; try { r = JobClient.runJob(job); } catch (IOException e) { throw e; } return r; }
From source file:org.locationtech.geomesa.jobs.interop.mapred.FeatureCountJob.java
License:Open Source License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(FeatureCountJob.class); conf.setJobName("simple feature count"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0); conf.setInputFormat(GeoMesaInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileOutputFormat.setOutputPath(conf, new Path("/tmp/myjob")); java.util.Map<String, String> params = new HashMap<String, String>(); params.put("instanceId", "myinstance"); params.put("zookeepers", "zoo1,zoo2,zoo3"); params.put("user", "myuser"); params.put("password", "mypassword"); params.put("tableName", "mycatalog"); Query query = new Query("myfeature", ECQL.toFilter("BBOX(geom, -165,5,-50,75)")); GeoMesaInputFormat.configure(conf, params, query); JobClient.runJob(conf);/*w ww . ja v a 2 s . c o m*/ }
From source file:org.locationtech.geomesa.jobs.interop.mapred.FeatureWriterJob.java
License:Open Source License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(FeatureCountJob.class); conf.setJobName("simple feature writing"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(ScalaSimpleFeature.class); conf.setMapperClass(MyMapper.class); conf.setNumReduceTasks(0); conf.setInputFormat(GeoMesaInputFormat.class); conf.setOutputFormat(GeoMesaOutputFormat.class); Map<String, String> params = new HashMap<String, String>(); params.put("instanceId", "myinstance"); params.put("zookeepers", "zoo1,zoo2,zoo3"); params.put("user", "myuser"); params.put("password", "mypassword"); params.put("tableName", "mycatalog"); Query query = new Query("myfeature", ECQL.toFilter("BBOX(geom, -165,5,-50,75)")); GeoMesaInputFormat.configure(conf, params, query); Map<String, String> outParams = new HashMap<String, String>(); outParams.put("instanceId", "myinstance"); outParams.put("zookeepers", "zoo1,zoo2,zoo3"); outParams.put("user", "myuser"); outParams.put("password", "mypassword"); outParams.put("tableName", "mycatalog_2"); GeoMesaOutputFormat.configureDataStore(conf, outParams); JobClient.runJob(conf);/*from w w w. jav a2 s . c o m*/ }