Example usage for org.apache.hadoop.mapreduce MapReduceTestUtil readOutput

List of usage examples for org.apache.hadoop.mapreduce MapReduceTestUtil readOutput

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce MapReduceTestUtil readOutput.

Prototype

public static String readOutput(Path outDir, Configuration conf) throws IOException 

Source Link

Usage

From source file:io.vitess.hadoop.MapReduceIT.java

License:Apache License

/**
 * Run a mapper only MR job and verify all the rows in the source table were outputted into HDFS.
 *//*w ww.  ja v  a 2s . c om*/
public void testDumpTableToHDFS() throws Exception {
    // Configurations for the job, output from mapper as Text
    Configuration conf = createJobConf();
    Job job = Job.getInstance(conf);
    job.setJobName("table");
    job.setJarByClass(VitessInputFormat.class);
    job.setMapperClass(TableMapper.class);
    VitessInputFormat.setInput(job, "localhost:" + testEnv.getPort(), testEnv.getKeyspace(),
            "select id, name, age from vtgate_test", ImmutableList.<String>of(), 4 /* splitCount */,
            0 /* numRowsPerQueryPart */, Algorithm.EQUAL_SPLITS, TestUtil.getRpcClientFactory().getClass());
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(RowWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setNumReduceTasks(0);

    Path outDir = new Path(testEnv.getTestOutputPath(), "mrvitess/output");
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outDir)) {
        fs.delete(outDir, true);
    }
    FileOutputFormat.setOutputPath(job, outDir);

    job.waitForCompletion(true);
    assertTrue(job.isSuccessful());

    String[] outputLines = MapReduceTestUtil.readOutput(outDir, conf).split("\n");
    // there should be one line per row in the source table
    assertEquals(NUM_ROWS, outputLines.length);
    Set<Long> actualAges = new HashSet<>();
    Set<String> actualNames = new HashSet<>();

    // Parse and verify we've gotten all the ages and rows.
    Gson gson = new Gson();
    for (String line : outputLines) {
        String[] parts = line.split("\t");
        actualAges.add(Long.valueOf(parts[0]));

        // Rows are written as JSON since this is TextOutputFormat.
        String rowJson = parts[1];
        Type mapType = new TypeToken<Map<String, String>>() {
        }.getType();
        @SuppressWarnings("unchecked")
        Map<String, String> map = (Map<String, String>) gson.fromJson(rowJson, mapType);
        actualNames.add(map.get("name"));
    }

    Set<Long> expectedAges = new HashSet<>();
    Set<String> expectedNames = new HashSet<>();
    for (long i = 1; i <= NUM_ROWS; i++) {
        // Generate values that match TestUtil.insertRows().
        expectedAges.add(i % 10);
        expectedNames.add("name_" + i);
    }
    assertEquals(expectedAges.size(), actualAges.size());
    assertTrue(actualAges.containsAll(expectedAges));
    assertEquals(NUM_ROWS, actualNames.size());
    assertTrue(actualNames.containsAll(expectedNames));
}

From source file:io.vitess.hadoop.MapReduceIT.java

License:Apache License

/**
 * Map all rows and aggregate by age at the reducer.
 *///from   w  w  w  . jav  a2  s . co  m
public void testReducerAggregateRows() throws Exception {
    Configuration conf = createJobConf();

    Job job = Job.getInstance(conf);
    job.setJobName("table");
    job.setJarByClass(VitessInputFormat.class);
    job.setMapperClass(TableMapper.class);
    VitessInputFormat.setInput(job, "localhost:" + testEnv.getPort(), testEnv.getKeyspace(),
            "select id, name, age from vtgate_test", ImmutableList.<String>of(), 1 /* splitCount */,
            0 /* numRowsPerQueryPart */, Algorithm.EQUAL_SPLITS, TestUtil.getRpcClientFactory().getClass());

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(RowWritable.class);

    job.setReducerClass(CountReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    Path outDir = new Path(testEnv.getTestOutputPath(), "mrvitess/output");
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outDir)) {
        fs.delete(outDir, true);
    }
    FileOutputFormat.setOutputPath(job, outDir);

    job.waitForCompletion(true);
    assertTrue(job.isSuccessful());

    String[] outputLines = MapReduceTestUtil.readOutput(outDir, conf).split("\n");
    // There should be 10 different ages, because age = i % 10.
    assertEquals(10, outputLines.length);
    // All rows should be accounted for.
    int totalRowsReduced = 0;
    for (String line : outputLines) {
        totalRowsReduced += Integer.parseInt(line);
    }
    assertEquals(NUM_ROWS, totalRowsReduced);
}