com.cloudera.fts.spark.format.RawFileRecordReader.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.fts.spark.format.RawFileRecordReader.java

Source

/*
 * Copyright 2014 Cloudera Inc.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */
package com.cloudera.fts.spark.format;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.ByteArrayOutputStream;
import java.io.IOException;

public class RawFileRecordReader extends RecordReader<Text, BytesWritable> {
    private static final int BUFFER_SIZE = 8192;
    private Text key;
    private BytesWritable value;
    private FSDataInputStream fileIn = null;
    private boolean finished;

    @Override
    public void initialize(InputSplit inputSplit, TaskAttemptContext context)
            throws IOException, InterruptedException {

        Configuration conf = context.getConfiguration();
        FileSplit split = (FileSplit) inputSplit;
        Path path = split.getPath();
        FileSystem fs = path.getFileSystem(conf);
        fileIn = fs.open(path);
        key = new Text(path.toString());
        finished = false;
    }

    @Override
    public boolean nextKeyValue() throws IOException, InterruptedException {
        if (finished == true)
            return false;
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        byte[] buffer = new byte[BUFFER_SIZE];

        // Read all bytes from file
        try {
            while (true) {
                int bytesRead = fileIn.read(buffer, 0, BUFFER_SIZE);
                if (bytesRead <= 0) {
                    break;
                }
                bos.write(buffer, 0, bytesRead);
            }
            value = new BytesWritable(bos.toByteArray());
        } finally {
            if (fileIn != null)
                fileIn.close();
            finished = true;
        }
        return true;
    }

    @Override
    public float getProgress() throws IOException, InterruptedException {
        return finished ? 1 : 0;
    }

    @Override
    public Text getCurrentKey() throws IOException, InterruptedException {
        return key;
    }

    @Override
    public BytesWritable getCurrentValue() throws IOException, InterruptedException {
        return value;
    }

    @Override
    public void close() throws IOException {
        if (fileIn != null)
            fileIn.close();
    }
}