com.blackberry.logdriver.mapreduce.boom.BoomIndividualRecordReader.java Source code

Java tutorial

Introduction

Here is the source code for com.blackberry.logdriver.mapreduce.boom.BoomIndividualRecordReader.java

Source

/** Copyright (c) 2014 BlackBerry Limited
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License. 
 */

package com.blackberry.logdriver.mapreduce.boom;

import java.io.IOException;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.List;

import org.apache.avro.file.DataFileReader;
import org.apache.avro.generic.GenericData.Record;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.mapred.FsInput;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.blackberry.logdriver.Schemas;
import com.blackberry.logdriver.boom.LogLineData;

public class BoomIndividualRecordReader extends RecordReader<LogLineData, Text> {
    private static final Logger LOG = LoggerFactory.getLogger(BoomIndividualRecordReader.class);

    private FileSplit split;

    private long start = 0;
    private long end = 0;
    private long pos = 0;

    private DataFileReader<Record> reader = null;

    private LogLineData lld = null;
    private long second = 0;
    private long lineNumber = 0;
    private Deque<Record> lines = new ArrayDeque<Record>();

    private LogLineData key = new LogLineData();
    private Text value = new Text();

    public BoomIndividualRecordReader(FileSplit split, TaskAttemptContext context) {
        this.split = split;
    }

    @Override
    public void close() throws IOException {
        if (reader != null) {
            reader.close();
        }
    }

    @Override
    public LogLineData getCurrentKey() {
        return key;
    }

    @Override
    public Text getCurrentValue() {
        return value;
    }

    @Override
    public float getProgress() throws IOException {
        // This should probably not happen, but if it does, then don't divide by
        // zero.
        if (split.getLength() == 0) {
            return 0;
        }

        return (pos - split.getStart()) / split.getLength();
    }

    @Override
    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
        FileSplit fileSplit = (FileSplit) split;

        LOG.info("Initializing {}:{}+{}",
                new Object[] { fileSplit.getPath(), fileSplit.getStart(), fileSplit.getLength() });

        // Check for zero length files
        if (fileSplit.getPath().getFileSystem(context.getConfiguration()).getFileStatus(fileSplit.getPath())
                .getLen() == 0) {
            reader = null;
            return;
        }

        GenericDatumReader<Record> datumReader = new GenericDatumReader<Record>(Schemas.getSchema("logBlock"));
        reader = new DataFileReader<Record>(new FsInput(fileSplit.getPath(), context.getConfiguration()),
                datumReader);
        datumReader.setExpected(Schemas.getSchema("logBlock"));
        datumReader.setSchema(reader.getSchema());

        long size = fileSplit.getLength();
        start = fileSplit.getStart();
        end = start + size;

        reader.sync(start);
    }

    @SuppressWarnings("unchecked")
    @Override
    public boolean nextKeyValue() throws IOException {
        while (lines.size() == 0) {
            // If we're out of lines in the current record, then get the next record -
            // unless we're out of records or past the end of where we should be.
            while (reader == null || reader.hasNext() == false || reader.pastSync(end)) {
                return false;
            }

            Record record = reader.next();
            lld = new LogLineData();
            lld.setBlockNumber((Long) record.get("blockNumber"));
            lld.setCreateTime((Long) record.get("createTime"));
            second = (Long) record.get("second");
            lineNumber = 0;

            lines.addAll((List<Record>) record.get("logLines"));
        }

        Record line = lines.pollFirst();
        long ms = (Long) line.get("ms");
        String message = line.get("message").toString();
        int eventId = (Integer) line.get("eventId");

        ++lineNumber;

        key.set(lld);
        key.setLineNumber(lineNumber);
        key.setTimestamp(second * 1000 + ms);
        key.setEventId(eventId);

        value.set(message);

        pos = reader.tell();

        return true;
    }

}