com.datasalt.pangool.tuplemr.mapred.lib.output.TupleOutputFormat.java Source code

Java tutorial

Introduction

Here is the source code for com.datasalt.pangool.tuplemr.mapred.lib.output.TupleOutputFormat.java

Source

package com.datasalt.pangool.tuplemr.mapred.lib.output;

/**
 * Copyright [2012] [Datasalt Systems S.L.]
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.io.Serializable;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.DefaultCodec;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.ReflectionUtils;

import com.datasalt.pangool.io.ITuple;
import com.datasalt.pangool.io.Schema;
import com.datasalt.pangool.io.TupleFile;

/** An {@link org.apache.hadoop.mapreduce.OutputFormat} that writes {@link com.datasalt.pangool.io.ITuple}s. */
@SuppressWarnings("serial")
public class TupleOutputFormat extends FileOutputFormat<ITuple, NullWritable> implements Serializable {

    private Schema outputSchema = null;

    /**
     * Empty constructor means the output Schema will be picked from the first Tuple that is emitted. 
     */
    public TupleOutputFormat() {
    }

    /**
     * Providing output schema enables output validation.
     */
    public TupleOutputFormat(Schema outputSchema) {
        this.outputSchema = outputSchema;
    }

    /**
     * Deprecated. Use {@link #TupleOutputFormat(com.datasalt.pangool.io.Schema)} instead.
     */
    @Deprecated
    public TupleOutputFormat(String outputSchema) {
        this.outputSchema = Schema.parse(outputSchema);
    }

    private CompressionCodec getCodec(TaskAttemptContext context) {
        if (getCompressOutput(context)) {
            // find the right codec
            Class<?> codecClass = SequenceFileOutputFormat.getOutputCompressorClass(context, DefaultCodec.class);
            return (CompressionCodec) ReflectionUtils.newInstance(codecClass, context.getConfiguration());
        }
        return null;
    }

    public RecordWriter<ITuple, NullWritable> getRecordWriter(final TaskAttemptContext context)
            throws IOException, InterruptedException {

        final Configuration conf = context.getConfiguration();

        final CompressionCodec codec = getCodec(context);
        final SequenceFile.CompressionType compressionType = getCompressOutput(context)
                ? SequenceFileOutputFormat.getOutputCompressionType(context)
                : SequenceFile.CompressionType.NONE;
        // get the path of the temporary output file
        final Path file = getDefaultWorkFile(context, "");
        final FileSystem fs = file.getFileSystem(conf);

        return new RecordWriter<ITuple, NullWritable>() {

            TupleFile.Writer out;

            public void write(ITuple key, NullWritable value) throws IOException {
                if (out == null) {
                    if (outputSchema == null) {
                        outputSchema = key.getSchema();
                    }
                    out = new TupleFile.Writer(fs, conf, file, outputSchema, compressionType, codec, context);
                }
                out.append(key);
            }

            public void close(TaskAttemptContext context) throws IOException {
                out.close();
            }
        };
    }
}