List of usage examples for org.apache.hadoop.mapreduce MapReduceTestUtil readOutput
public static String readOutput(Path outDir, Configuration conf) throws IOException
From source file:io.vitess.hadoop.MapReduceIT.java
License:Apache License
/** * Run a mapper only MR job and verify all the rows in the source table were outputted into HDFS. *//*w ww. ja v a 2s . c om*/ public void testDumpTableToHDFS() throws Exception { // Configurations for the job, output from mapper as Text Configuration conf = createJobConf(); Job job = Job.getInstance(conf); job.setJobName("table"); job.setJarByClass(VitessInputFormat.class); job.setMapperClass(TableMapper.class); VitessInputFormat.setInput(job, "localhost:" + testEnv.getPort(), testEnv.getKeyspace(), "select id, name, age from vtgate_test", ImmutableList.<String>of(), 4 /* splitCount */, 0 /* numRowsPerQueryPart */, Algorithm.EQUAL_SPLITS, TestUtil.getRpcClientFactory().getClass()); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(RowWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(0); Path outDir = new Path(testEnv.getTestOutputPath(), "mrvitess/output"); FileSystem fs = FileSystem.get(conf); if (fs.exists(outDir)) { fs.delete(outDir, true); } FileOutputFormat.setOutputPath(job, outDir); job.waitForCompletion(true); assertTrue(job.isSuccessful()); String[] outputLines = MapReduceTestUtil.readOutput(outDir, conf).split("\n"); // there should be one line per row in the source table assertEquals(NUM_ROWS, outputLines.length); Set<Long> actualAges = new HashSet<>(); Set<String> actualNames = new HashSet<>(); // Parse and verify we've gotten all the ages and rows. Gson gson = new Gson(); for (String line : outputLines) { String[] parts = line.split("\t"); actualAges.add(Long.valueOf(parts[0])); // Rows are written as JSON since this is TextOutputFormat. String rowJson = parts[1]; Type mapType = new TypeToken<Map<String, String>>() { }.getType(); @SuppressWarnings("unchecked") Map<String, String> map = (Map<String, String>) gson.fromJson(rowJson, mapType); actualNames.add(map.get("name")); } Set<Long> expectedAges = new HashSet<>(); Set<String> expectedNames = new HashSet<>(); for (long i = 1; i <= NUM_ROWS; i++) { // Generate values that match TestUtil.insertRows(). expectedAges.add(i % 10); expectedNames.add("name_" + i); } assertEquals(expectedAges.size(), actualAges.size()); assertTrue(actualAges.containsAll(expectedAges)); assertEquals(NUM_ROWS, actualNames.size()); assertTrue(actualNames.containsAll(expectedNames)); }
From source file:io.vitess.hadoop.MapReduceIT.java
License:Apache License
/** * Map all rows and aggregate by age at the reducer. *///from w w w . jav a2 s . co m public void testReducerAggregateRows() throws Exception { Configuration conf = createJobConf(); Job job = Job.getInstance(conf); job.setJobName("table"); job.setJarByClass(VitessInputFormat.class); job.setMapperClass(TableMapper.class); VitessInputFormat.setInput(job, "localhost:" + testEnv.getPort(), testEnv.getKeyspace(), "select id, name, age from vtgate_test", ImmutableList.<String>of(), 1 /* splitCount */, 0 /* numRowsPerQueryPart */, Algorithm.EQUAL_SPLITS, TestUtil.getRpcClientFactory().getClass()); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(RowWritable.class); job.setReducerClass(CountReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); Path outDir = new Path(testEnv.getTestOutputPath(), "mrvitess/output"); FileSystem fs = FileSystem.get(conf); if (fs.exists(outDir)) { fs.delete(outDir, true); } FileOutputFormat.setOutputPath(job, outDir); job.waitForCompletion(true); assertTrue(job.isSuccessful()); String[] outputLines = MapReduceTestUtil.readOutput(outDir, conf).split("\n"); // There should be 10 different ages, because age = i % 10. assertEquals(10, outputLines.length); // All rows should be accounted for. int totalRowsReduced = 0; for (String line : outputLines) { totalRowsReduced += Integer.parseInt(line); } assertEquals(NUM_ROWS, totalRowsReduced); }