Example usage for org.apache.hadoop.mapreduce.task JobContextImpl JobContextImpl

List of usage examples for org.apache.hadoop.mapreduce.task JobContextImpl JobContextImpl

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.task JobContextImpl JobContextImpl.

Prototype

public JobContextImpl(Configuration conf, JobID jobId) 

Source Link

Usage

From source file:org.apache.gora.shims.hadoop2.HadoopShim2.java

License:Apache License

/**
 * {@inheritDoc}//from www .  j a  va 2  s .c o m
 * 
 * Use the Hadoop 2.x way of creating a {@link JobContext} object.
 */
public JobContext createJobContext(Configuration configuration) {
    return new JobContextImpl(configuration, null);
}

From source file:org.apache.hcatalog.shims.HCatHadoopShims23.java

License:Apache License

@Override
public JobContext createJobContext(Configuration conf, JobID jobId) {
    return new JobContextImpl(conf instanceof JobConf ? new JobConf(conf) : conf, jobId);
}

From source file:org.apache.jena.hadoop.rdf.io.input.AbstractNodeTupleInputFormatTests.java

License:Apache License

/**
 * Runs a test with a single input/* w  ww.j  a  v  a2 s .  com*/
 * 
 * @param config
 *            Configuration
 * @param input
 *            Input
 * @param expectedTuples
 *            Expected tuples
 * @throws IOException
 * @throws InterruptedException
 */
protected final void testSingleInput(Configuration config, File input, int expectedSplits, int expectedTuples)
        throws IOException, InterruptedException {
    // Set up fake job
    InputFormat<LongWritable, T> inputFormat = this.getInputFormat();
    Job job = Job.getInstance(config);
    job.setInputFormatClass(inputFormat.getClass());
    this.addInputPath(input, job.getConfiguration(), job);
    JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
    Assert.assertEquals(1, FileInputFormat.getInputPaths(context).length);
    NLineInputFormat.setNumLinesPerSplit(job, LARGE_SIZE);

    // Check splits
    List<InputSplit> splits = inputFormat.getSplits(context);
    Assert.assertEquals(expectedSplits, splits.size());

    // Check tuples
    for (InputSplit split : splits) {
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                new TaskAttemptID());
        RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext);
        reader.initialize(split, taskContext);
        this.checkTuples(reader, expectedTuples);
    }
}

From source file:org.apache.jena.hadoop.rdf.io.input.AbstractNodeTupleInputFormatTests.java

License:Apache License

/**
 * Runs a multiple input test/*from  ww w .  j  av  a2s .c  om*/
 * 
 * @param inputs
 *            Inputs
 * @param expectedSplits
 *            Number of splits expected
 * @param expectedTuples
 *            Number of tuples expected
 * @throws IOException
 * @throws InterruptedException
 */
protected final void testMultipleInputs(File[] inputs, int expectedSplits, int expectedTuples)
        throws IOException, InterruptedException {
    // Prepare configuration and inputs
    Configuration config = this.prepareConfiguration();

    // Set up fake job
    InputFormat<LongWritable, T> inputFormat = this.getInputFormat();
    Job job = Job.getInstance(config);
    job.setInputFormatClass(inputFormat.getClass());
    for (File input : inputs) {
        this.addInputPath(input, job.getConfiguration(), job);
    }
    JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
    Assert.assertEquals(inputs.length, FileInputFormat.getInputPaths(context).length);
    NLineInputFormat.setNumLinesPerSplit(job, expectedTuples);

    // Check splits
    List<InputSplit> splits = inputFormat.getSplits(context);
    Assert.assertEquals(expectedSplits, splits.size());

    // Check tuples
    int count = 0;
    for (InputSplit split : splits) {
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                new TaskAttemptID());
        RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext);
        reader.initialize(split, taskContext);
        count += this.countTuples(reader);
    }
    Assert.assertEquals(expectedTuples, count);
}

From source file:org.apache.jena.hadoop.rdf.io.input.AbstractNodeTupleInputFormatTests.java

License:Apache License

protected final void testSplitInputs(Configuration config, File[] inputs, int expectedSplits,
        int expectedTuples) throws IOException, InterruptedException {
    // Set up fake job
    InputFormat<LongWritable, T> inputFormat = this.getInputFormat();
    Job job = Job.getInstance(config);//from  www . j  a  va  2  s  .c  o m
    job.setInputFormatClass(inputFormat.getClass());
    for (File input : inputs) {
        this.addInputPath(input, job.getConfiguration(), job);
    }
    JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
    Assert.assertEquals(inputs.length, FileInputFormat.getInputPaths(context).length);

    // Check splits
    List<InputSplit> splits = inputFormat.getSplits(context);
    Assert.assertEquals(expectedSplits, splits.size());

    // Check tuples
    int count = 0;
    for (InputSplit split : splits) {
        // Validate split
        Assert.assertTrue(this.isValidSplit(split, config));

        // Read split
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                new TaskAttemptID());
        RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext);
        reader.initialize(split, taskContext);
        count += this.countTuples(reader);
    }
    Assert.assertEquals(expectedTuples, count);
}

From source file:org.apache.jena.hadoop.rdf.io.input.bnodes.AbstractBlankNodeTests.java

License:Apache License

/**
 * Test that starts with two blank nodes with the same identity in a single
 * file, splits them over two files and checks that we can workaround
 * JENA-820 successfully by setting the/*from ww w .java 2s.  c  om*/
 * {@link RdfIOConstants#GLOBAL_BNODE_IDENTITY} flag for our subsequent job
 * 
 * @throws IOException
 * @throws InterruptedException
 */
@Test
public final void blank_node_divergence_01() throws IOException, InterruptedException {
    Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile());
    Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity());

    // Temporary files
    File a = File.createTempFile("bnode_divergence", getInitialInputExtension());
    File intermediateOutputDir = Files.createTempDirectory("bnode_divergence", new FileAttribute[0]).toFile();

    try {
        // Prepare the input data
        // Two mentions of the same blank node in the same file
        List<T> tuples = new ArrayList<>();
        Node bnode = NodeFactory.createBlankNode();
        Node pred = NodeFactory.createURI("http://example.org/predicate");
        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("first")));
        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("second")));
        writeTuples(a, tuples);

        // Set up fake job which will process the file as a single split
        Configuration config = new Configuration(true);
        InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat();
        Job job = Job.getInstance(config);
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()));
        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath()));
        JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        List<InputSplit> splits = inputFormat.getSplits(context);
        Assert.assertEquals(1, splits.size());

        for (InputSplit split : splits) {
            // Initialize the input reading
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    createAttemptID(1, 1, 1));
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            // Copy the input to the output - each triple goes to a separate
            // output file
            // This is how we force multiple files to be produced
            int taskID = 1;
            while (reader.nextKeyValue()) {
                // Prepare the output writing
                OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat();
                TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                        createAttemptID(1, ++taskID, 1));
                RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext);

                writer.write(reader.getCurrentKey(), reader.getCurrentValue());
                writer.close(outputTaskContext);
            }
        }

        // Promote outputs from temporary status
        promoteInputs(intermediateOutputDir);

        // Now we need to create a subsequent job that reads the
        // intermediate outputs
        // As described in JENA-820 at this point the blank nodes are
        // consistent, however when we read them from different files they
        // by default get treated as different nodes and so the blank nodes
        // diverge which is incorrect and undesirable behaviour in
        // multi-stage pipelines
        LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath());
        job = Job.getInstance(config);
        inputFormat = createIntermediateInputFormat();
        job.setInputFormatClass(inputFormat.getClass());
        FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath()));

        // Enabling this flag works around the JENA-820 issue
        job.getConfiguration().setBoolean(RdfIOConstants.GLOBAL_BNODE_IDENTITY, true);
        context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        splits = inputFormat.getSplits(context);
        Assert.assertEquals(2, splits.size());

        // Expect to end up with a single blank node
        Set<Node> nodes = new HashSet<Node>();
        for (InputSplit split : splits) {
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    new TaskAttemptID());
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            while (reader.nextKeyValue()) {
                nodes.add(getSubject(reader.getCurrentValue().get()));
            }
        }
        // Nodes should not have diverged
        Assert.assertEquals(1, nodes.size());

    } finally {
        a.delete();
        deleteDirectory(intermediateOutputDir);
    }
}

From source file:org.apache.jena.hadoop.rdf.io.input.bnodes.AbstractBlankNodeTests.java

License:Apache License

/**
 * Test that starts with two blank nodes with the same identity in a single
 * file, splits them over two files and shows that they diverge in the
 * subsequent job when the JENA-820 workaround is not enabled
 * //from  w  w w .  j a  va2 s .c  o m
 * @throws IOException
 * @throws InterruptedException
 */
@Test
public void blank_node_divergence_02() throws IOException, InterruptedException {
    Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile());
    Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity());

    // Temporary files
    File a = File.createTempFile("bnode_divergence", getInitialInputExtension());
    File intermediateOutputDir = Files.createTempDirectory("bnode_divergence", new FileAttribute[0]).toFile();

    try {
        // Prepare the input data
        // Two mentions of the same blank node in the same file
        List<T> tuples = new ArrayList<>();
        Node bnode = NodeFactory.createBlankNode();
        Node pred = NodeFactory.createURI("http://example.org/predicate");
        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("first")));
        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("second")));
        writeTuples(a, tuples);

        // Set up fake job which will process the file as a single split
        Configuration config = new Configuration(true);
        InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat();
        Job job = Job.getInstance(config);
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()));
        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath()));
        JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        List<InputSplit> splits = inputFormat.getSplits(context);
        Assert.assertEquals(1, splits.size());

        for (InputSplit split : splits) {
            // Initialize the input reading
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    createAttemptID(1, 1, 1));
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            // Copy the input to the output - each triple goes to a separate
            // output file
            // This is how we force multiple files to be produced
            int taskID = 1;
            while (reader.nextKeyValue()) {
                // Prepare the output writing
                OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat();
                TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                        createAttemptID(1, ++taskID, 1));
                RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext);

                writer.write(reader.getCurrentKey(), reader.getCurrentValue());
                writer.close(outputTaskContext);
            }
        }

        // Promote outputs from temporary status
        promoteInputs(intermediateOutputDir);

        // Now we need to create a subsequent job that reads the
        // intermediate outputs
        // As described in JENA-820 at this point the blank nodes are
        // consistent, however when we read them from different files they
        // by default get treated as different nodes and so the blank nodes
        // diverge which is incorrect and undesirable behaviour in
        // multi-stage pipelines. However it is the default behaviour
        // because when we start from external inputs we want them to be
        // file scoped.
        LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath());
        job = Job.getInstance(config);
        inputFormat = createIntermediateInputFormat();
        job.setInputFormatClass(inputFormat.getClass());
        FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath()));

        // Make sure JENA-820 flag is disabled
        job.getConfiguration().setBoolean(RdfIOConstants.GLOBAL_BNODE_IDENTITY, false);
        context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        splits = inputFormat.getSplits(context);
        Assert.assertEquals(2, splits.size());

        // Expect to end up with a single blank node
        Set<Node> nodes = new HashSet<Node>();
        for (InputSplit split : splits) {
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    new TaskAttemptID());
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            while (reader.nextKeyValue()) {
                nodes.add(getSubject(reader.getCurrentValue().get()));
            }
        }
        // Nodes should have diverged
        Assert.assertEquals(2, nodes.size());

    } finally {
        a.delete();
        deleteDirectory(intermediateOutputDir);
    }
}

From source file:org.apache.jena.hadoop.rdf.io.input.bnodes.AbstractBlankNodeTests.java

License:Apache License

/**
 * Test that starts with two blank nodes in two different files and checks
 * that writing them to a single file does not conflate them
 * //from   ww w . j a  v a  2  s.  co  m
 * @throws IOException
 * @throws InterruptedException
 */
@Test
public void blank_node_identity_01() throws IOException, InterruptedException {
    Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile());
    Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity());

    // Temporary files
    File a = File.createTempFile("bnode_identity", getInitialInputExtension());
    File b = File.createTempFile("bnode_identity", getInitialInputExtension());
    File intermediateOutputDir = Files.createTempDirectory("bnode_identity", new FileAttribute[0]).toFile();

    try {
        // Prepare the input data
        // Different blank nodes in different files
        List<T> tuples = new ArrayList<>();
        Node bnode1 = NodeFactory.createBlankNode();
        Node bnode2 = NodeFactory.createBlankNode();
        Node pred = NodeFactory.createURI("http://example.org/predicate");

        tuples.add(createTuple(bnode1, pred, NodeFactory.createLiteral("first")));
        writeTuples(a, tuples);

        tuples.clear();
        tuples.add(createTuple(bnode2, pred, NodeFactory.createLiteral("second")));
        writeTuples(b, tuples);

        // Set up fake job which will process the two files
        Configuration config = new Configuration(true);
        InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat();
        Job job = Job.getInstance(config);
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()), new Path(b.getAbsolutePath()));
        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath()));
        JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        List<InputSplit> splits = inputFormat.getSplits(context);
        Assert.assertEquals(2, splits.size());

        // Prepare the output writing - putting all output to a single file
        OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat();
        TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                createAttemptID(1, 2, 1));
        RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext);

        for (InputSplit split : splits) {
            // Initialize the input reading
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    createAttemptID(1, 1, 1));
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            // Copy the input to the output - all triples go to a single
            // output
            while (reader.nextKeyValue()) {
                writer.write(reader.getCurrentKey(), reader.getCurrentValue());
            }
        }
        writer.close(outputTaskContext);

        // Promote outputs from temporary status
        promoteInputs(intermediateOutputDir);

        // Now we need to create a subsequent job that reads the
        // intermediate outputs
        // The Blank nodes should have been given separate identities so we
        // should not be conflating them, this is the opposite problem to
        // that described in JENA-820
        LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath());
        job = Job.getInstance(config);
        inputFormat = createIntermediateInputFormat();
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath()));
        context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        splits = inputFormat.getSplits(context);
        Assert.assertEquals(1, splits.size());

        // Expect to end up with a single blank node
        Set<Node> nodes = new HashSet<Node>();
        for (InputSplit split : splits) {
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    new TaskAttemptID());
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            while (reader.nextKeyValue()) {
                nodes.add(getSubject(reader.getCurrentValue().get()));
            }
        }
        // Nodes must not have converged
        Assert.assertEquals(2, nodes.size());

    } finally {
        a.delete();
        b.delete();
        deleteDirectory(intermediateOutputDir);
    }
}

From source file:org.apache.jena.hadoop.rdf.io.input.bnodes.AbstractBlankNodeTests.java

License:Apache License

/**
 * Test that starts with two blank nodes in two different files and checks
 * that writing them to a single file does not conflate them
 * //from www .j av  a2 s .  c o m
 * @throws IOException
 * @throws InterruptedException
 */
@Test
public void blank_node_identity_02() throws IOException, InterruptedException {
    Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile());
    Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity());

    // Temporary files
    File a = File.createTempFile("bnode_identity", getInitialInputExtension());
    File b = File.createTempFile("bnode_identity", getInitialInputExtension());
    File intermediateOutputDir = Files.createTempDirectory("bnode_identity", new FileAttribute[0]).toFile();

    try {
        // Prepare the input data
        // Same blank node but in different files so must be treated as
        // different blank nodes and not converge
        List<T> tuples = new ArrayList<>();
        Node bnode = NodeFactory.createBlankNode();
        Node pred = NodeFactory.createURI("http://example.org/predicate");

        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("first")));
        writeTuples(a, tuples);

        tuples.clear();
        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("second")));
        writeTuples(b, tuples);

        // Set up fake job which will process the two files
        Configuration config = new Configuration(true);
        InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat();
        Job job = Job.getInstance(config);
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()), new Path(b.getAbsolutePath()));
        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath()));
        JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        List<InputSplit> splits = inputFormat.getSplits(context);
        Assert.assertEquals(2, splits.size());

        // Prepare the output writing - putting all output to a single file
        OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat();
        TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                createAttemptID(1, 2, 1));
        RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext);

        for (InputSplit split : splits) {
            // Initialize the input reading
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    createAttemptID(1, 1, 1));
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            // Copy the input to the output - all triples go to a single
            // output
            while (reader.nextKeyValue()) {
                writer.write(reader.getCurrentKey(), reader.getCurrentValue());
            }
        }
        writer.close(outputTaskContext);

        // Promote outputs from temporary status
        promoteInputs(intermediateOutputDir);

        // Now we need to create a subsequent job that reads the
        // intermediate outputs
        // The Blank nodes should have been given separate identities so we
        // should not be conflating them, this is the opposite problem to
        // that described in JENA-820
        LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath());
        job = Job.getInstance(config);
        inputFormat = createIntermediateInputFormat();
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath()));
        context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        splits = inputFormat.getSplits(context);
        Assert.assertEquals(1, splits.size());

        // Expect to end up with a single blank node
        Set<Node> nodes = new HashSet<Node>();
        for (InputSplit split : splits) {
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    new TaskAttemptID());
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            while (reader.nextKeyValue()) {
                nodes.add(getSubject(reader.getCurrentValue().get()));
            }
        }
        // Nodes must not diverge
        Assert.assertEquals(2, nodes.size());

    } finally {
        a.delete();
        b.delete();
        deleteDirectory(intermediateOutputDir);
    }
}

From source file:org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormatTests.java

License:Apache License

/**
 * Tests output/*from w  w w.j a  v a 2s  .c  om*/
 * 
 * @param f
 *            File to output to
 * @param num
 *            Number of tuples to output
 * @throws IOException
 * @throws InterruptedException
 */
protected final void testOutput(File f, int num) throws IOException, InterruptedException {
    // Prepare configuration
    Configuration config = this.prepareConfiguration();

    // Set up fake job
    OutputFormat<NullWritable, T> outputFormat = this.getOutputFormat();
    Job job = Job.getInstance(config);
    job.setOutputFormatClass(outputFormat.getClass());
    this.addOutputPath(f, job.getConfiguration(), job);
    JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
    Assert.assertNotNull(FileOutputFormat.getOutputPath(context));

    // Output the data
    TaskAttemptID id = new TaskAttemptID("outputTest", 1, TaskType.MAP, 1, 1);
    TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), id);
    RecordWriter<NullWritable, T> writer = outputFormat.getRecordWriter(taskContext);
    Iterator<T> tuples = this.generateTuples(num);
    while (tuples.hasNext()) {
        writer.write(NullWritable.get(), tuples.next());
    }
    writer.close(taskContext);

    // Check output
    File outputFile = this.findOutputFile(this.folder.getRoot(), context);
    Assert.assertNotNull(outputFile);
    this.checkTuples(outputFile, num);
}