Example usage for org.apache.hadoop.mapreduce RecordWriter close

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordWriter close.

Prototype

public abstract void close(TaskAttemptContext context) throws IOException, InterruptedException;

Source Link

Document

Close this RecordWriter to future operations.

Usage

From source file:org.apache.hcatalog.pig.TestE2EScenarios.java

License:Apache License

private void copyTable(String in, String out) throws IOException, InterruptedException {
    Job ijob = new Job();
    Job ojob = new Job();
    HCatInputFormat inpy = new HCatInputFormat();
    inpy.setInput(ijob, null, in);/* ww w.j a  va  2s  .c  o  m*/
    HCatOutputFormat oupy = new HCatOutputFormat();
    oupy.setOutput(ojob, OutputJobInfo.create(null, out, new HashMap<String, String>()));

    // Test HCatContext

    System.err.println("HCatContext INSTANCE is present : " + HCatContext.INSTANCE.getConf().isPresent());
    if (HCatContext.INSTANCE.getConf().isPresent()) {
        System.err.println("HCatContext tinyint->int promotion says " + HCatContext.INSTANCE.getConf().get()
                .getBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION,
                        HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT));
    }

    HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration());
    System.err.println("Copying from [" + in + "] to [" + out + "] with schema : " + tableSchema.toString());
    oupy.setSchema(ojob, tableSchema);
    oupy.checkOutputSpecs(ojob);
    OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration()));
    oc.setupJob(ojob);

    for (InputSplit split : inpy.getSplits(ijob)) {

        TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration());
        TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration());

        RecordReader<WritableComparable, HCatRecord> rr = inpy.createRecordReader(split, rtaskContext);
        rr.initialize(split, rtaskContext);

        OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext);
        taskOc.setupTask(wtaskContext);
        RecordWriter<WritableComparable<?>, HCatRecord> rw = oupy.getRecordWriter(wtaskContext);

        while (rr.nextKeyValue()) {
            rw.write(rr.getCurrentKey(), rr.getCurrentValue());
        }
        rw.close(wtaskContext);
        taskOc.commitTask(wtaskContext);
        rr.close();
    }

    oc.commitJob(ojob);
}

From source file:org.apache.hive.hcatalog.data.transfer.impl.HCatOutputFormatWriter.java

License:Apache License

@Override
public void write(Iterator<HCatRecord> recordItr) throws HCatException {

    int id = sp.getId();
    setVarsInConf(id);/*w  w w.j a  v a  2  s .c  o  m*/
    HCatOutputFormat outFormat = new HCatOutputFormat();
    TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf,
            new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id));
    OutputCommitter committer = null;
    RecordWriter<WritableComparable<?>, HCatRecord> writer;
    try {
        committer = outFormat.getOutputCommitter(cntxt);
        committer.setupTask(cntxt);
        writer = outFormat.getRecordWriter(cntxt);
        while (recordItr.hasNext()) {
            HCatRecord rec = recordItr.next();
            writer.write(null, rec);
        }
        writer.close(cntxt);
        if (committer.needsTaskCommit(cntxt)) {
            committer.commitTask(cntxt);
        }
    } catch (IOException e) {
        if (null != committer) {
            try {
                committer.abortTask(cntxt);
            } catch (IOException e1) {
                throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
            }
        }
        throw new HCatException("Failed while writing", e);
    } catch (InterruptedException e) {
        if (null != committer) {
            try {
                committer.abortTask(cntxt);
            } catch (IOException e1) {
                throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
            }
        }
        throw new HCatException("Failed while writing", e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Task.java

License:Apache License

/**
 * Closes writer.//from   w  w  w.  j  av  a2s . c  o m
 *
 * @throws Exception If fails and logger hasn't been specified.
 */
protected void closeWriter() throws Exception {
    RecordWriter writer = hadoopCtx.writer();

    if (writer != null)
        writer.close(hadoopCtx);
}

From source file:org.apache.jena.hadoop.rdf.io.input.bnodes.AbstractBlankNodeTests.java

License:Apache License

/**
 * Test that starts with two blank nodes with the same identity in a single
 * file, splits them over two files and checks that we can workaround
 * JENA-820 successfully by setting the/*w w  w  . java2 s  .c o  m*/
 * {@link RdfIOConstants#GLOBAL_BNODE_IDENTITY} flag for our subsequent job
 * 
 * @throws IOException
 * @throws InterruptedException
 */
@Test
public final void blank_node_divergence_01() throws IOException, InterruptedException {
    Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile());
    Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity());

    // Temporary files
    File a = File.createTempFile("bnode_divergence", getInitialInputExtension());
    File intermediateOutputDir = Files.createTempDirectory("bnode_divergence", new FileAttribute[0]).toFile();

    try {
        // Prepare the input data
        // Two mentions of the same blank node in the same file
        List<T> tuples = new ArrayList<>();
        Node bnode = NodeFactory.createBlankNode();
        Node pred = NodeFactory.createURI("http://example.org/predicate");
        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("first")));
        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("second")));
        writeTuples(a, tuples);

        // Set up fake job which will process the file as a single split
        Configuration config = new Configuration(true);
        InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat();
        Job job = Job.getInstance(config);
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()));
        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath()));
        JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        List<InputSplit> splits = inputFormat.getSplits(context);
        Assert.assertEquals(1, splits.size());

        for (InputSplit split : splits) {
            // Initialize the input reading
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    createAttemptID(1, 1, 1));
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            // Copy the input to the output - each triple goes to a separate
            // output file
            // This is how we force multiple files to be produced
            int taskID = 1;
            while (reader.nextKeyValue()) {
                // Prepare the output writing
                OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat();
                TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                        createAttemptID(1, ++taskID, 1));
                RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext);

                writer.write(reader.getCurrentKey(), reader.getCurrentValue());
                writer.close(outputTaskContext);
            }
        }

        // Promote outputs from temporary status
        promoteInputs(intermediateOutputDir);

        // Now we need to create a subsequent job that reads the
        // intermediate outputs
        // As described in JENA-820 at this point the blank nodes are
        // consistent, however when we read them from different files they
        // by default get treated as different nodes and so the blank nodes
        // diverge which is incorrect and undesirable behaviour in
        // multi-stage pipelines
        LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath());
        job = Job.getInstance(config);
        inputFormat = createIntermediateInputFormat();
        job.setInputFormatClass(inputFormat.getClass());
        FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath()));

        // Enabling this flag works around the JENA-820 issue
        job.getConfiguration().setBoolean(RdfIOConstants.GLOBAL_BNODE_IDENTITY, true);
        context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        splits = inputFormat.getSplits(context);
        Assert.assertEquals(2, splits.size());

        // Expect to end up with a single blank node
        Set<Node> nodes = new HashSet<Node>();
        for (InputSplit split : splits) {
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    new TaskAttemptID());
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            while (reader.nextKeyValue()) {
                nodes.add(getSubject(reader.getCurrentValue().get()));
            }
        }
        // Nodes should not have diverged
        Assert.assertEquals(1, nodes.size());

    } finally {
        a.delete();
        deleteDirectory(intermediateOutputDir);
    }
}

From source file:org.apache.jena.hadoop.rdf.io.input.bnodes.AbstractBlankNodeTests.java

License:Apache License

/**
 * Test that starts with two blank nodes with the same identity in a single
 * file, splits them over two files and shows that they diverge in the
 * subsequent job when the JENA-820 workaround is not enabled
 * //from   w  w  w .  ja  v  a2  s  . c o  m
 * @throws IOException
 * @throws InterruptedException
 */
@Test
public void blank_node_divergence_02() throws IOException, InterruptedException {
    Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile());
    Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity());

    // Temporary files
    File a = File.createTempFile("bnode_divergence", getInitialInputExtension());
    File intermediateOutputDir = Files.createTempDirectory("bnode_divergence", new FileAttribute[0]).toFile();

    try {
        // Prepare the input data
        // Two mentions of the same blank node in the same file
        List<T> tuples = new ArrayList<>();
        Node bnode = NodeFactory.createBlankNode();
        Node pred = NodeFactory.createURI("http://example.org/predicate");
        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("first")));
        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("second")));
        writeTuples(a, tuples);

        // Set up fake job which will process the file as a single split
        Configuration config = new Configuration(true);
        InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat();
        Job job = Job.getInstance(config);
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()));
        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath()));
        JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        List<InputSplit> splits = inputFormat.getSplits(context);
        Assert.assertEquals(1, splits.size());

        for (InputSplit split : splits) {
            // Initialize the input reading
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    createAttemptID(1, 1, 1));
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            // Copy the input to the output - each triple goes to a separate
            // output file
            // This is how we force multiple files to be produced
            int taskID = 1;
            while (reader.nextKeyValue()) {
                // Prepare the output writing
                OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat();
                TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                        createAttemptID(1, ++taskID, 1));
                RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext);

                writer.write(reader.getCurrentKey(), reader.getCurrentValue());
                writer.close(outputTaskContext);
            }
        }

        // Promote outputs from temporary status
        promoteInputs(intermediateOutputDir);

        // Now we need to create a subsequent job that reads the
        // intermediate outputs
        // As described in JENA-820 at this point the blank nodes are
        // consistent, however when we read them from different files they
        // by default get treated as different nodes and so the blank nodes
        // diverge which is incorrect and undesirable behaviour in
        // multi-stage pipelines. However it is the default behaviour
        // because when we start from external inputs we want them to be
        // file scoped.
        LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath());
        job = Job.getInstance(config);
        inputFormat = createIntermediateInputFormat();
        job.setInputFormatClass(inputFormat.getClass());
        FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath()));

        // Make sure JENA-820 flag is disabled
        job.getConfiguration().setBoolean(RdfIOConstants.GLOBAL_BNODE_IDENTITY, false);
        context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        splits = inputFormat.getSplits(context);
        Assert.assertEquals(2, splits.size());

        // Expect to end up with a single blank node
        Set<Node> nodes = new HashSet<Node>();
        for (InputSplit split : splits) {
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    new TaskAttemptID());
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            while (reader.nextKeyValue()) {
                nodes.add(getSubject(reader.getCurrentValue().get()));
            }
        }
        // Nodes should have diverged
        Assert.assertEquals(2, nodes.size());

    } finally {
        a.delete();
        deleteDirectory(intermediateOutputDir);
    }
}

From source file:org.apache.jena.hadoop.rdf.io.input.bnodes.AbstractBlankNodeTests.java

License:Apache License

/**
 * Test that starts with two blank nodes in two different files and checks
 * that writing them to a single file does not conflate them
 * /*from w  w  w.  jav  a 2 s.c  o m*/
 * @throws IOException
 * @throws InterruptedException
 */
@Test
public void blank_node_identity_01() throws IOException, InterruptedException {
    Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile());
    Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity());

    // Temporary files
    File a = File.createTempFile("bnode_identity", getInitialInputExtension());
    File b = File.createTempFile("bnode_identity", getInitialInputExtension());
    File intermediateOutputDir = Files.createTempDirectory("bnode_identity", new FileAttribute[0]).toFile();

    try {
        // Prepare the input data
        // Different blank nodes in different files
        List<T> tuples = new ArrayList<>();
        Node bnode1 = NodeFactory.createBlankNode();
        Node bnode2 = NodeFactory.createBlankNode();
        Node pred = NodeFactory.createURI("http://example.org/predicate");

        tuples.add(createTuple(bnode1, pred, NodeFactory.createLiteral("first")));
        writeTuples(a, tuples);

        tuples.clear();
        tuples.add(createTuple(bnode2, pred, NodeFactory.createLiteral("second")));
        writeTuples(b, tuples);

        // Set up fake job which will process the two files
        Configuration config = new Configuration(true);
        InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat();
        Job job = Job.getInstance(config);
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()), new Path(b.getAbsolutePath()));
        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath()));
        JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        List<InputSplit> splits = inputFormat.getSplits(context);
        Assert.assertEquals(2, splits.size());

        // Prepare the output writing - putting all output to a single file
        OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat();
        TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                createAttemptID(1, 2, 1));
        RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext);

        for (InputSplit split : splits) {
            // Initialize the input reading
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    createAttemptID(1, 1, 1));
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            // Copy the input to the output - all triples go to a single
            // output
            while (reader.nextKeyValue()) {
                writer.write(reader.getCurrentKey(), reader.getCurrentValue());
            }
        }
        writer.close(outputTaskContext);

        // Promote outputs from temporary status
        promoteInputs(intermediateOutputDir);

        // Now we need to create a subsequent job that reads the
        // intermediate outputs
        // The Blank nodes should have been given separate identities so we
        // should not be conflating them, this is the opposite problem to
        // that described in JENA-820
        LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath());
        job = Job.getInstance(config);
        inputFormat = createIntermediateInputFormat();
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath()));
        context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        splits = inputFormat.getSplits(context);
        Assert.assertEquals(1, splits.size());

        // Expect to end up with a single blank node
        Set<Node> nodes = new HashSet<Node>();
        for (InputSplit split : splits) {
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    new TaskAttemptID());
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            while (reader.nextKeyValue()) {
                nodes.add(getSubject(reader.getCurrentValue().get()));
            }
        }
        // Nodes must not have converged
        Assert.assertEquals(2, nodes.size());

    } finally {
        a.delete();
        b.delete();
        deleteDirectory(intermediateOutputDir);
    }
}

From source file:org.apache.jena.hadoop.rdf.io.input.bnodes.AbstractBlankNodeTests.java

License:Apache License

/**
 * Test that starts with two blank nodes in two different files and checks
 * that writing them to a single file does not conflate them
 * /*from   w w  w.  ja v a  2s .c  o  m*/
 * @throws IOException
 * @throws InterruptedException
 */
@Test
public void blank_node_identity_02() throws IOException, InterruptedException {
    Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile());
    Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity());

    // Temporary files
    File a = File.createTempFile("bnode_identity", getInitialInputExtension());
    File b = File.createTempFile("bnode_identity", getInitialInputExtension());
    File intermediateOutputDir = Files.createTempDirectory("bnode_identity", new FileAttribute[0]).toFile();

    try {
        // Prepare the input data
        // Same blank node but in different files so must be treated as
        // different blank nodes and not converge
        List<T> tuples = new ArrayList<>();
        Node bnode = NodeFactory.createBlankNode();
        Node pred = NodeFactory.createURI("http://example.org/predicate");

        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("first")));
        writeTuples(a, tuples);

        tuples.clear();
        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("second")));
        writeTuples(b, tuples);

        // Set up fake job which will process the two files
        Configuration config = new Configuration(true);
        InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat();
        Job job = Job.getInstance(config);
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()), new Path(b.getAbsolutePath()));
        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath()));
        JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        List<InputSplit> splits = inputFormat.getSplits(context);
        Assert.assertEquals(2, splits.size());

        // Prepare the output writing - putting all output to a single file
        OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat();
        TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                createAttemptID(1, 2, 1));
        RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext);

        for (InputSplit split : splits) {
            // Initialize the input reading
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    createAttemptID(1, 1, 1));
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            // Copy the input to the output - all triples go to a single
            // output
            while (reader.nextKeyValue()) {
                writer.write(reader.getCurrentKey(), reader.getCurrentValue());
            }
        }
        writer.close(outputTaskContext);

        // Promote outputs from temporary status
        promoteInputs(intermediateOutputDir);

        // Now we need to create a subsequent job that reads the
        // intermediate outputs
        // The Blank nodes should have been given separate identities so we
        // should not be conflating them, this is the opposite problem to
        // that described in JENA-820
        LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath());
        job = Job.getInstance(config);
        inputFormat = createIntermediateInputFormat();
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath()));
        context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        splits = inputFormat.getSplits(context);
        Assert.assertEquals(1, splits.size());

        // Expect to end up with a single blank node
        Set<Node> nodes = new HashSet<Node>();
        for (InputSplit split : splits) {
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    new TaskAttemptID());
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            while (reader.nextKeyValue()) {
                nodes.add(getSubject(reader.getCurrentValue().get()));
            }
        }
        // Nodes must not diverge
        Assert.assertEquals(2, nodes.size());

    } finally {
        a.delete();
        b.delete();
        deleteDirectory(intermediateOutputDir);
    }
}

From source file:org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormatTests.java

License:Apache License

/**
 * Tests output/*from www.j  ava  2  s  .  c  om*/
 * 
 * @param f
 *            File to output to
 * @param num
 *            Number of tuples to output
 * @throws IOException
 * @throws InterruptedException
 */
protected final void testOutput(File f, int num) throws IOException, InterruptedException {
    // Prepare configuration
    Configuration config = this.prepareConfiguration();

    // Set up fake job
    OutputFormat<NullWritable, T> outputFormat = this.getOutputFormat();
    Job job = Job.getInstance(config);
    job.setOutputFormatClass(outputFormat.getClass());
    this.addOutputPath(f, job.getConfiguration(), job);
    JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
    Assert.assertNotNull(FileOutputFormat.getOutputPath(context));

    // Output the data
    TaskAttemptID id = new TaskAttemptID("outputTest", 1, TaskType.MAP, 1, 1);
    TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), id);
    RecordWriter<NullWritable, T> writer = outputFormat.getRecordWriter(taskContext);
    Iterator<T> tuples = this.generateTuples(num);
    while (tuples.hasNext()) {
        writer.write(NullWritable.get(), tuples.next());
    }
    writer.close(taskContext);

    // Check output
    File outputFile = this.findOutputFile(this.folder.getRoot(), context);
    Assert.assertNotNull(outputFile);
    this.checkTuples(outputFile, num);
}

From source file:org.apache.kudu.mapreduce.ITKuduTableOutputFormat.java

License:Apache License

@Test
public void test() throws Exception {
    createTable(TABLE_NAME, getBasicSchema(), getBasicCreateTableOptions());

    KuduTableOutputFormat output = new KuduTableOutputFormat();
    Configuration conf = new Configuration();
    conf.set(KuduTableOutputFormat.MASTER_ADDRESSES_KEY, getMasterAddresses());
    conf.set(KuduTableOutputFormat.OUTPUT_TABLE_KEY, TABLE_NAME);
    output.setConf(conf);/*from  w  ww  . j  a  v  a  2  s.co m*/

    String multitonKey = conf.get(KuduTableOutputFormat.MULTITON_KEY);
    KuduTable table = KuduTableOutputFormat.getKuduTable(multitonKey);
    assertNotNull(table);

    Insert insert = table.newInsert();
    PartialRow row = insert.getRow();
    row.addInt(0, 1);
    row.addInt(1, 2);
    row.addInt(2, 3);
    row.addString(3, "a string");
    row.addBoolean(4, true);

    RecordWriter<NullWritable, Operation> rw = output.getRecordWriter(null);
    rw.write(NullWritable.get(), insert);
    rw.close(null);
    AsyncKuduScanner.AsyncKuduScannerBuilder builder = client.newScannerBuilder(table);
    assertEquals(1, countRowsInScan(builder.build()));
}

From source file:org.apache.mahout.classifier.bayes.MultipleOutputFormat.java

License:Apache License

/**
 * Create a composite record writer that can write key/value data to different
 * output files//  w w  w. j  a  v a2s.  c o m
 * 
 * @param fs
 *          the file system to use
 * @param job
 *          the job conf for the job
 * @param name
 *          the leaf file name for the output file (such as part-00000")
 * @param arg3
 *          a progressable for reporting progress.
 * @return a composite record writer
 * @throws IOException
 */
public RecordWriter<K, V> getRecordWriter(FileSystem fs, Configuration job, String name, Progressable arg3)
        throws IOException {

    final FileSystem myFS = fs;
    final String myName = generateLeafFileName(name);
    final Configuration myJob = job;
    final Progressable myProgressable = arg3;

    return new RecordWriter<K, V>() {

        // a cache storing the record writers for different output files.
        private final TreeMap<String, RecordWriter<K, V>> recordWriters = new TreeMap<String, RecordWriter<K, V>>();

        @Override
        public void write(K key, V value) throws IOException {

            // get the file name based on the key
            String keyBasedPath = generateFileNameForKeyValue(key, value, myName);

            // get the file name based on the input file name
            String finalPath = getInputFileBasedOutputFileName(myJob, keyBasedPath);

            // get the actual key
            K actualKey = generateActualKey(key, value);
            V actualValue = generateActualValue(key, value);

            RecordWriter<K, V> rw = this.recordWriters.get(finalPath);
            if (rw == null) {
                // if we don't have the record writer yet for the final path, create
                // one
                // and add it to the cache
                rw = getBaseRecordWriter(myFS, myJob, finalPath, myProgressable);
                this.recordWriters.put(finalPath, rw);
            }
            try {
                rw.write(actualKey, actualValue);
            } catch (InterruptedException e) {
                // continue
            }
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            for (RecordWriter<K, V> rw : recordWriters.values()) {
                rw.close(context);
            }
            this.recordWriters.clear();
        }
    };
}