List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputValueClass
public void setMapOutputValueClass(Class<?> theClass)
From source file:org.locationtech.geomesa.jobs.interop.mapred.FeatureWriterJob.java
License:Open Source License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(FeatureCountJob.class); conf.setJobName("simple feature writing"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(ScalaSimpleFeature.class); conf.setMapperClass(MyMapper.class); conf.setNumReduceTasks(0);//w ww. ja va 2 s.c o m conf.setInputFormat(GeoMesaInputFormat.class); conf.setOutputFormat(GeoMesaOutputFormat.class); Map<String, String> params = new HashMap<String, String>(); params.put("instanceId", "myinstance"); params.put("zookeepers", "zoo1,zoo2,zoo3"); params.put("user", "myuser"); params.put("password", "mypassword"); params.put("tableName", "mycatalog"); Query query = new Query("myfeature", ECQL.toFilter("BBOX(geom, -165,5,-50,75)")); GeoMesaInputFormat.configure(conf, params, query); Map<String, String> outParams = new HashMap<String, String>(); outParams.put("instanceId", "myinstance"); outParams.put("zookeepers", "zoo1,zoo2,zoo3"); outParams.put("user", "myuser"); outParams.put("password", "mypassword"); outParams.put("tableName", "mycatalog_2"); GeoMesaOutputFormat.configureDataStore(conf, outParams); JobClient.runJob(conf); }
From source file:org.mitre.ccv.mapred.CalculateCosineDistanceMatrix.java
License:Open Source License
public int initJob(JobConf jobConf, String input, String output) throws Exception { JobConf conf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class); final Path inputPath = new Path(input); final FileSystem fs = inputPath.getFileSystem(conf); final Path qInputPath = fs.makeQualified(inputPath); /**//from w w w .j av a2 s. c om * Need to get all of the sample names/labels */ JobConf cacheConf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class); cacheConf.setJobName("CacheNorm2MapReduce"); cacheConf.setNumReduceTasks(1); // Want ONE part file // Set up IdentityMapper SequenceFileInputFormat.setInputPaths(cacheConf, new Path(input)); cacheConf.setInputFormat(SequenceFileInputFormat.class); cacheConf.setMapperClass(Norm2Mapper.class); cacheConf.setOutputKeyClass(StringDoublePairWritable.class); cacheConf.setOutputValueClass(SparseVectorWritable.class); // Set up IdentityReducer cacheConf.setReducerClass(IdentityReducer.class); cacheConf.setOutputFormat(SequenceFileOutputFormat.class); cacheConf.setNumReduceTasks(1); Path sfPath = FileUtils.createRemoteTempPath(fs, qInputPath.getParent()); LOG.info(String.format("Generating feature vector SequenceFile path %s", sfPath.toString())); SequenceFileOutputFormat.setOutputPath(cacheConf, sfPath); JobClient.runJob(cacheConf); Path cachePath = new Path(sfPath.toString() + Path.SEPARATOR + "part-00000"); // need to know the size (the reducer might be able to send this back via the Reporter, but how do we grab that info? StringDoublePairWritable key = new StringDoublePairWritable(); int size = 0; SequenceFile.Reader reader = new SequenceFile.Reader(fs, cachePath, conf); boolean hasNext = reader.next(key); while (hasNext) { size += 1; hasNext = reader.next(key); } try { reader.close(); } catch (IOException ioe) { // closing the SequenceFile.Reader will throw an exception if the file is over some unknown size LOG.debug("Probably caused by closing the SequenceFile.Reader. All is well", ioe); } //LOG.info(String.format("Caching model file %s", qInputPath.toString())); URI listURI = new URI(fs.makeQualified(cachePath).toString()); DistributedCache.addCacheFile(listURI, conf); LOG.info(String.format("SequenceFile cache path %s (%s) with %d labels", listURI.toString(), cachePath.getName(), size)); conf.set(CACHE_PATH, cachePath.getName()); conf.setInt(DISTANCE_MATRIX_SIZE, size); /** * Main MapReduce Task of generating dot products */ LOG.info("Generating distances"); JobConf distanceConf = new JobConf(conf, CalculateCosineDistanceMatrix.class); distanceConf.setJobName("DistanceMapReduce"); // Set up distance mapper SequenceFileInputFormat.setInputPaths(distanceConf, new Path(input)); distanceConf.setInputFormat(SequenceFileInputFormat.class); distanceConf.setMapperClass(DistanceMap.class); distanceConf.setMapOutputKeyClass(Text.class); distanceConf.setMapOutputValueClass(SparseVectorWritable.class); // Set up reducer to merge lower-triangle results into a single dense distance vector distanceConf.setReducerClass(DistanceReducer.class); distanceConf.setOutputKeyClass(Text.class); distanceConf.setOutputValueClass(DenseVectorWritable.class); distanceConf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(distanceConf, new Path(output)); JobClient.runJob(distanceConf); return 0; }
From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java
License:Open Source License
@Test public void testCombinerOutputClasses() throws IOException, KettleException { JobConf jobConf = createJobConf("./test-res/wordcount-mapper.ktr", "./test-res/wordcount-reducer.ktr", "./test-res/wordcount-reducer.ktr"); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputValueClass(NullWritable.class); jobConf.setOutputValueClass(LongWritable.class); GenericTransCombiner combiner = new GenericTransCombiner(); combiner.configure(jobConf);// ww w . ja va 2s.co m assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK()); assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV()); }
From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java
License:Open Source License
@Test public void testReducerOutputClasses() throws IOException, KettleException { JobConf jobConf = createJobConf("./test-res/wordcount-mapper.ktr", "./test-res/wordcount-reducer.ktr", "./test-res/wordcount-reducer.ktr"); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputValueClass(NullWritable.class); jobConf.setOutputValueClass(LongWritable.class); GenericTransReduce reducer = new GenericTransReduce(); reducer.configure(jobConf);// w ww .jav a2s . c om assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK()); assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV()); }
From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java
License:Apache License
@Test public void testCombinerOutputClasses() throws IOException, KettleException { JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr", "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr"); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputValueClass(NullWritable.class); jobConf.setOutputValueClass(LongWritable.class); GenericTransCombiner combiner = new GenericTransCombiner(); combiner.configure(jobConf);/*w ww.j av a 2s .c o m*/ assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK()); assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV()); }
From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java
License:Apache License
@Test public void testReducerOutputClasses() throws IOException, KettleException { JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr", "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr"); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputValueClass(NullWritable.class); jobConf.setOutputValueClass(LongWritable.class); GenericTransReduce reducer = new GenericTransReduce(); reducer.configure(jobConf);//from w ww. ja va2 s . co m assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK()); assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV()); }
From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIT.java
License:Apache License
@Test public void testCombinerOutputClasses() throws IOException, KettleException { JobConf jobConf = createJobConf("./src/it/resources/wordcount-mapper.ktr", "./src/it/resources/wordcount-reducer.ktr", "./src/it/resources/wordcount-reducer.ktr"); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputValueClass(NullWritable.class); jobConf.setOutputValueClass(LongWritable.class); GenericTransCombiner combiner = new GenericTransCombiner(); combiner.configure(jobConf);/*from w w w .ja va 2s . co m*/ assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK()); assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV()); }
From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIT.java
License:Apache License
@Test public void testReducerOutputClasses() throws IOException, KettleException { JobConf jobConf = createJobConf("./src/it/resources/wordcount-mapper.ktr", "./src/it/resources/wordcount-reducer.ktr", "./src/it/resources/wordcount-reducer.ktr"); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputValueClass(NullWritable.class); jobConf.setOutputValueClass(LongWritable.class); GenericTransReduce reducer = new GenericTransReduce(); reducer.configure(jobConf);/* w ww . j ava 2 s . c om*/ assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK()); assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV()); }
From source file:org.pentaho.hadoop.sample.wordcount.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { String hdfsHost = "localhost:9000"; String jobTrackerHost = "localhost:9001"; String fsPrefix = "hdfs"; String dirInput = "/wordcount/input"; String dirOutput = "/wordcount/output"; if (args.length == 1 && (args[0].equals("--help") || args[0].equals("-h") || args[0].equals("/?"))) { System.out.println("Usage: WordCount <options>"); System.out.println();// w ww . j a va 2 s.c o m System.out.println("Options:"); System.out.println(); System.out.println("--input=DIR The directory containing the input files for the"); System.out.println(" WordCount Hadoop job"); System.out.println("--output=DIR The directory where the results of the WordCount"); System.out.println(" Hadoop job will be stored"); System.out.println("--hdfsHost=HOST The host<:port> of the HDFS service"); System.out.println(" e.g.- localhost:9000"); System.out.println("--jobTrackerHost=HOST The host<:port> of the job tracker service"); System.out.println(" e.g.- localhost:9001"); System.out.println("--fsPrefix=PREFIX The prefix to use for for the filesystem"); System.out.println(" e.g.- hdfs"); System.out.println(); System.out.println(); System.out.println("If an option is not provided through the command prompt the following defaults"); System.out.println("will be used:"); System.out.println("--input='/wordcount/input'"); System.out.println("--output='/wordcount/output'"); System.out.println("--hdfsHost=localhost:9000"); System.out.println("--jobTrackerHost=localhost:9001"); System.out.println("--fsPrefix=hdfs"); } else { if (args.length > 0) { for (String arg : args) { if (arg.startsWith("--input=")) { dirInput = WordCount.getArgValue(arg); } else if (arg.startsWith("--output=")) { dirOutput = WordCount.getArgValue(arg); } else if (arg.startsWith("--hdfsHost=")) { hdfsHost = WordCount.getArgValue(arg); } else if (arg.startsWith("--jobTrackerHost=")) { jobTrackerHost = WordCount.getArgValue(arg); } else if (arg.startsWith("--fsPrefix=")) { fsPrefix = WordCount.getArgValue(arg); } } } JobConf conf = new JobConf(WordCount.class); conf.setJobName("WordCount"); String hdfsBaseUrl = fsPrefix + "://" + hdfsHost; conf.set("fs.default.name", hdfsBaseUrl + "/"); if (jobTrackerHost != null && jobTrackerHost.length() > 0) { conf.set("mapred.job.tracker", jobTrackerHost); } FileInputFormat.setInputPaths(conf, new Path[] { new Path(hdfsBaseUrl + dirInput) }); FileOutputFormat.setOutputPath(conf, new Path(hdfsBaseUrl + dirOutput)); conf.setMapperClass(WordCountMapper.class); conf.setReducerClass(WordCountReducer.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); JobClient.runJob(conf); } }
From source file:org.pooledtimeseries.MeanChiSquareDistanceCalculation.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration baseConf = new Configuration(); baseConf.set("mapreduce.job.maps", "96"); baseConf.set("mapred.tasktracker.map.tasks.maximum", "96"); JobConf conf = new JobConf(baseConf, MeanChiSquareDistanceCalculation.class); System.out.println("Before Map:" + conf.getNumMapTasks()); conf.setNumMapTasks(96);/*ww w .j a v a 2 s. c o m*/ System.out.println("After Map:" + conf.getNumMapTasks()); conf.setJobName("mean_chi_square_calculation"); System.out.println("Track:" + baseConf.get("mapred.job.tracker")); System.out.println("Job Name- " + conf.getJobName()); System.out.println(baseConf.get("mapreduce.job.maps")); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(DoubleWritable.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(DoubleWritable.class); conf.setOutputFormat(TextOutputFormat.class); conf.setInputFormat(CartesianInputFormat.class); CartesianInputFormat.setLeftInputInfo(conf, SequenceFileInputFormat.class, args[0]); CartesianInputFormat.setRightInputInfo(conf, SequenceFileInputFormat.class, args[0]); FileOutputFormat.setOutputPath(conf, new Path(args[1])); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); JobClient.runJob(conf); }