com.blackberry.logdriver.mapreduce.gzip.GzipLineRecordReader.java Source code

Java tutorial

Introduction

Here is the source code for com.blackberry.logdriver.mapreduce.gzip.GzipLineRecordReader.java

Source

/** Copyright (c) 2014 BlackBerry Limited
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License. 
 */

package com.blackberry.logdriver.mapreduce.gzip;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.zip.GZIPInputStream;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class GzipLineRecordReader extends RecordReader<LongWritable, Text> {
    private static final Logger LOG = LoggerFactory.getLogger(GzipLineRecordReader.class);

    private BufferedReader in = null;

    private long pos = 0;

    private LongWritable key;
    private Text value;

    private float progress = 0.0f;

    @Override
    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
        key = new LongWritable();
        value = new Text();

        Path path = ((FileSplit) split).getPath();

        LOG.info("Starting to process file {}", path);
        in = new BufferedReader(
                new InputStreamReader(new GZIPInputStream(FileSystem.get(context.getConfiguration()).open(path))));
    }

    @Override
    public boolean nextKeyValue() throws IOException {
        String line = null;
        if ((line = in.readLine()) == null) {
            progress = 1.0f;
            return false;
        }

        LOG.trace("Next line is {}", line);
        key.set(++pos);
        value.set(line);
        return true;
    }

    @Override
    public LongWritable getCurrentKey() {
        return key;
    }

    @Override
    public Text getCurrentValue() {
        return value;
    }

    @Override
    public void close() throws IOException {
        if (in != null) {
            in.close();
        }
    }

    @Override
    public float getProgress() throws IOException {
        return progress;
    }
}