Java tutorial
/* * TextFileRecordReader.java * * Copyright (C) 2016 Pavel Prokhorov (pavelvpster@gmail.com) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package org.interactiverobotics.source_code_crawler.step6; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.input.FileSplit; /** * TextFileRecordReader. */ public class TextFileRecordReader extends RecordReader<Text, Text> { private FileSplit fileSplit; private Configuration configuration; private Text key = new Text(); private Text value = new Text(); private boolean processed = false; @Override public void initialize(final InputSplit split, final TaskAttemptContext context) throws IOException, InterruptedException { this.fileSplit = (FileSplit) split; this.configuration = context.getConfiguration(); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (!processed) { final byte[] contents = new byte[(int) fileSplit.getLength()]; final Path file = fileSplit.getPath(); final FileSystem fileSystem = file.getFileSystem(configuration); FSDataInputStream in = null; try { in = fileSystem.open(file); IOUtils.readFully(in, contents, 0, contents.length); key.set(file.toString()); value.set(contents, 0, contents.length); } finally { IOUtils.closeStream(in); } processed = true; return true; } return false; } @Override public Text getCurrentKey() throws IOException, InterruptedException { return key; } @Override public Text getCurrentValue() throws IOException, InterruptedException { return value; } @Override public float getProgress() throws IOException { return processed ? 1.0f : 0.0f; } @Override public void close() throws IOException { } }