ca.uwaterloo.iss4e.hadoop.io.CartesianRecordReader.java Source code

Java tutorial

Introduction

Here is the source code for ca.uwaterloo.iss4e.hadoop.io.CartesianRecordReader.java

Source

package ca.uwaterloo.iss4e.hadoop.io;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.join.CompositeInputSplit;
import org.apache.hadoop.util.ReflectionUtils;

import java.io.IOException;

/**
 *  Copyright (c) 2014 Xiufeng Liu ( xiufeng.liu@uwaterloo.ca )
 *
 *  This file is free software: you may copy, redistribute and/or modify it
 *  under the terms of the GNU General Public License version 2
 *  as published by the Free Software Foundation.
 *
 *  This file is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses.
 */
public class CartesianRecordReader<K1, V1, K2, V2> extends RecordReader<Text, Text> {
    private RecordReader leftRR = null, rightRR = null;
    private FileInputFormat rightFIF;
    private InputSplit rightIS;
    private InputSplit leftIS;
    private TaskAttemptContext rightTaskAttemptContext;

    private Text key;
    private Text value;

    private boolean goToNextLeft = true;
    private boolean alldone = false;

    public CartesianRecordReader(CompositeInputSplit split, TaskAttemptContext taskAttemptContext)
            throws IOException {

        this.leftIS = split.get(0);
        this.rightIS = split.get(1);
        this.rightTaskAttemptContext = taskAttemptContext;
        this.key = new Text();
        this.value = new Text();
        Configuration conf = rightTaskAttemptContext.getConfiguration();
        try {
            // Create left record reader
            FileInputFormat leftFIF = (FileInputFormat) ReflectionUtils
                    .newInstance(Class.forName(conf.get(CartesianInputFormat.LEFT_INPUT_FORMAT)), conf);

            leftRR = leftFIF.createRecordReader(leftIS, taskAttemptContext);

            // Create right record reader
            rightFIF = (FileInputFormat) ReflectionUtils
                    .newInstance(Class.forName(conf.get(CartesianInputFormat.RIGHT_INPUT_FORMAT)), conf);
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
            throw new IOException(e);
        } catch (InterruptedException e) {
            e.printStackTrace();
            throw new IOException(e);
        }
    }

    @Override
    public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
            throws IOException, InterruptedException {
        leftRR.initialize(this.leftIS, taskAttemptContext);
    }

    @Override
    public boolean nextKeyValue() throws IOException, InterruptedException {
        do {
            // If we are to go to the next left key/value pair
            if (goToNextLeft) {
                // Read the next key value pair, false means no more pairs
                if (!leftRR.nextKeyValue()) {
                    // If no more, then this task is nearly finished
                    alldone = true;
                    break;
                } else {
                    // If we aren't done, set the value to the key and set our flags
                    key.set(leftRR.getCurrentValue().toString());
                    goToNextLeft = alldone = false;

                    // Reset the right record reader
                    this.rightRR = this.rightFIF.createRecordReader(this.rightIS, this.rightTaskAttemptContext);
                    this.rightRR.initialize(this.rightIS, this.rightTaskAttemptContext);
                }
            }

            // Read the next key value pair from the right data set
            if (rightRR.nextKeyValue()) {
                // If success, set the value
                value.set(rightRR.getCurrentValue().toString());
            } else {
                // Otherwise, this right data set is complete
                // and we should go to the next left pair
                goToNextLeft = true;
            }

            // This loop will continue if we finished reading key/value
            // pairs from the right data set
        } while (goToNextLeft);

        // Return true if a key/value pair was read, false otherwise
        return !alldone;
    }

    @Override
    public Text getCurrentKey() throws IOException, InterruptedException {
        return key;
    }

    @Override
    public Text getCurrentValue() throws IOException, InterruptedException {
        return value;
    }

    @Override
    public float getProgress() throws IOException, InterruptedException {
        return leftRR.getProgress();
    }

    @Override
    public void close() throws IOException {
        leftRR.close();
        rightRR.close();
    }
}