List of usage examples for org.apache.hadoop.mapred LineRecordReader LineRecordReader
public LineRecordReader(Configuration job, FileSplit split) throws IOException
From source file:Job1RecordReader.java
License:Apache License
public Job1RecordReader(JobConf job, FileSplit split) throws IOException { lineReader = new LineRecordReader(job, split); lineKey = lineReader.createKey();/*from ww w. jav a 2 s. c o m*/ lineValue = lineReader.createValue(); }
From source file:com.alexholmes.hadooputils.sort.SortInputFormatOld.java
License:Apache License
@Override public RecordReader<Text, Text> getRecordReader(final InputSplit genericSplit, final JobConf job, final Reporter reporter) throws IOException { reporter.setStatus(genericSplit.toString()); return new SortRecordReader(job, new LineRecordReader(job, (FileSplit) genericSplit)); }
From source file:com.hdfs.concat.crush.KeyValuePreservingTextInputFormat.java
License:Apache License
@Override public RecordReader<Text, Text> getRecordReader(InputSplit genericSplit, JobConf job, Reporter reporter) throws IOException { reporter.setStatus(genericSplit.toString()); return new KeyValuePreservingRecordReader(new LineRecordReader(job, (FileSplit) genericSplit)); }
From source file:com.inmobi.messaging.consumer.databus.mapred.DatabusRecordReader.java
License:Apache License
public DatabusRecordReader(JobConf job, InputSplit split) throws IOException { lineReader = new LineRecordReader(job, (FileSplit) split); }
From source file:com.micmiu.hive.fotmater.Base64TextInputFormat.java
License:Apache License
public RecordReader<LongWritable, BytesWritable> getRecordReader(InputSplit genericSplit, JobConf job, Reporter reporter) throws IOException { reporter.setStatus(genericSplit.toString()); Base64LineRecordReader reader = new Base64LineRecordReader( new LineRecordReader(job, (FileSplit) genericSplit)); reader.configure(job);//from w w w . ja v a 2s . c om return reader; }
From source file:edu.ub.ahstfg.indexer.mapred.IndexRecordReader.java
License:Open Source License
/** * Sole constructor./* w w w. ja va 2 s.c o m*/ * @param job Job reading the records. * @param input Split where are the records. * @param reporter Job reporter. * @throws IOException */ public IndexRecordReader(JobConf job, FileSplit input, Reporter reporter) throws IOException { this.reporter = reporter; lineReader = new LineRecordReader(job, input); lineKey = lineReader.createKey(); lineValue = lineReader.createValue(); numMachines = job.getInt(ParamSet.NUM_MACHINES, 10); numDocs = job.getInt(ParamSet.NUM_DOCS, 1000); qDocsPerMapper = numDocs / numMachines; rDocsPerMapper = numDocs - (qDocsPerMapper * numMachines); fillDocsPerMapper(); }
From source file:it.crs4.pydoop.mapred.TextInputFormat.java
License:Apache License
public RecordReader<LongWritable, Text> getRecordReader(InputSplit genericSplit, JobConf job, Reporter reporter) throws IOException { reporter.setStatus(genericSplit.toString()); return new LineRecordReader(job, (FileSplit) genericSplit); }
From source file:mlbench.pagerank.PagerankMerge.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" })
public static void main(String[] args) throws IOException, InterruptedException {
try {//from w ww. j a v a 2s. c om
parseArgs(args);
HashMap<String, String> conf = new HashMap<String, String>();
initConf(conf);
MPI_D.Init(args, MPI_D.Mode.Common, conf);
JobConf jobConf = new JobConf(confPath);
if (MPI_D.COMM_BIPARTITE_O != null) {
// O communicator
int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
if (rank == 0) {
LOG.info(PagerankMerge.class.getSimpleName() + " O start.");
}
FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, jobConf,
inDir, rank);
for (int i = 0; i < inputs.length; i++) {
FileSplit fsplit = inputs[i];
LineRecordReader kvrr = new LineRecordReader(jobConf, fsplit);
LongWritable key = kvrr.createKey();
Text value = kvrr.createValue();
{
while (kvrr.next(key, value)) {
String line_text = value.toString();
final String[] line = line_text.split("\t");
if (line.length >= 2) {
MPI_D.Send(new IntWritable(Integer.parseInt(line[0])), new Text(line[1]));
}
}
}
}
} else if (MPI_D.COMM_BIPARTITE_A != null) {
// A communicator
int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
if (rank == 0) {
LOG.info(PagerankMerge.class.getSimpleName() + " A start.");
}
HadoopWriter<IntWritable, Text> outrw = HadoopIOUtil.getNewWriter(jobConf, outDir,
IntWritable.class, Text.class, TextOutputFormat.class, null, rank, MPI_D.COMM_BIPARTITE_A);
IntWritable oldKey = null;
double next_rank = 0;
double previous_rank = 0;
double diff = 0;
int local_diffs = 0;
random_coeff = (1 - mixing_c) / (double) number_nodes;
converge_threshold = ((double) 1.0 / (double) number_nodes) / 10;
Object[] keyValue = MPI_D.Recv();
while (keyValue != null) {
IntWritable key = (IntWritable) keyValue[0];
Text value = (Text) keyValue[1];
if (oldKey == null) {
oldKey = key;
}
if (!key.equals(oldKey)) {
next_rank = next_rank * mixing_c + random_coeff;
outrw.write(oldKey, new Text("v" + next_rank));
diff = Math.abs(previous_rank - next_rank);
if (diff > converge_threshold) {
local_diffs += 1;
}
oldKey = key;
next_rank = 0;
previous_rank = 0;
}
String cur_value_str = value.toString();
if (cur_value_str.charAt(0) == 's') {
previous_rank = Double.parseDouble(cur_value_str.substring(1));
} else {
next_rank += Double.parseDouble(cur_value_str.substring(1));
}
keyValue = MPI_D.Recv();
}
if (previous_rank != 0) {
next_rank = next_rank * mixing_c + random_coeff;
outrw.write(oldKey, new Text("v" + next_rank));
diff = Math.abs(previous_rank - next_rank);
if (diff > converge_threshold)
local_diffs += 1;
}
outrw.close();
reduceDiffs(local_diffs, rank);
}
MPI_D.Finalize();
} catch (MPI_D_Exception e) {
e.printStackTrace();
}
}
From source file:mlbench.pagerank.PagerankNaive.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" })
public static void main(String[] args) throws IOException, InterruptedException {
try {// w w w .j a v a 2s . c o m
parseArgs(args);
HashMap<String, String> conf = new HashMap<String, String>();
initConf(conf);
MPI_D.Init(args, MPI_D.Mode.Common, conf);
JobConf jobConf = new JobConf(confPath);
if (MPI_D.COMM_BIPARTITE_O != null) {
// O communicator
int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
if (rank == 0) {
LOG.info(PagerankNaive.class.getSimpleName() + " O start.");
}
FileSplit[] inputs1 = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
jobConf, edgeDir, rank);
FileSplit[] inputs2 = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
jobConf, vecDir, rank);
FileSplit[] inputs = (FileSplit[]) ArrayUtils.addAll(inputs2, inputs1);
for (int i = 0; i < inputs.length; i++) {
FileSplit fsplit = inputs[i];
LineRecordReader kvrr = new LineRecordReader(jobConf, fsplit);
LongWritable key = kvrr.createKey();
Text value = kvrr.createValue();
{
IntWritable k = new IntWritable();
Text v = new Text();
while (kvrr.next(key, value)) {
String line_text = value.toString();
// ignore comments in edge file
if (line_text.startsWith("#"))
continue;
final String[] line = line_text.split("\t");
if (line.length < 2)
continue;
// vector : ROWID VALUE('vNNNN')
if (line[1].charAt(0) == 'v') {
k.set(Integer.parseInt(line[0]));
v.set(line[1]);
MPI_D.Send(k, v);
} else {
/*
* In other matrix-vector multiplication, we
* output (dst, src) here However, In PageRank,
* the matrix-vector computation formula is M^T
* * v. Therefore, we output (src,dst) here.
*/
int src_id = Integer.parseInt(line[0]);
int dst_id = Integer.parseInt(line[1]);
k.set(src_id);
v.set(line[1]);
MPI_D.Send(k, v);
if (make_symmetric == 1) {
k.set(dst_id);
v.set(line[0]);
MPI_D.Send(k, v);
}
}
}
}
}
} else if (MPI_D.COMM_BIPARTITE_A != null) {
// A communicator
int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
if (rank == 0) {
LOG.info(PagerankNaive.class.getSimpleName() + " A start.");
}
HadoopWriter<IntWritable, Text> outrw = HadoopIOUtil.getNewWriter(jobConf, outDir,
IntWritable.class, Text.class, TextOutputFormat.class, null, rank, MPI_D.COMM_BIPARTITE_A);
IntWritable oldKey = null;
int i;
double cur_rank = 0;
ArrayList<Integer> dst_nodes_list = new ArrayList<Integer>();
Object[] keyValue = MPI_D.Recv();
while (keyValue != null) {
IntWritable key = (IntWritable) keyValue[0];
Text value = (Text) keyValue[1];
if (oldKey == null) {
oldKey = key;
}
// A new key arrives
if (!key.equals(oldKey)) {
outrw.write(oldKey, new Text("s" + cur_rank));
int outdeg = dst_nodes_list.size();
if (outdeg > 0) {
cur_rank = cur_rank / (double) outdeg;
}
for (i = 0; i < outdeg; i++) {
outrw.write(new IntWritable(dst_nodes_list.get(i)), new Text("v" + cur_rank));
}
oldKey = key;
cur_rank = 0;
dst_nodes_list = new ArrayList<Integer>();
}
// common record
String line_text = value.toString();
final String[] line = line_text.split("\t");
if (line.length == 1) {
if (line_text.charAt(0) == 'v') { // vector : VALUE
cur_rank = Double.parseDouble(line_text.substring(1));
} else { // edge : ROWID
dst_nodes_list.add(Integer.parseInt(line[0]));
}
}
keyValue = MPI_D.Recv();
}
// write the left part
if (cur_rank != 0) {
outrw.write(oldKey, new Text("s" + cur_rank));
int outdeg = dst_nodes_list.size();
if (outdeg > 0) {
cur_rank = cur_rank / (double) outdeg;
}
for (i = 0; i < outdeg; i++) {
outrw.write(new IntWritable(dst_nodes_list.get(i)), new Text("v" + cur_rank));
}
}
outrw.close();
}
MPI_D.Finalize();
} catch (MPI_D_Exception e) {
e.printStackTrace();
}
}
From source file:nl.tudelft.graphalytics.mapreducev2.common.DirectedNodeNeighbourRecordReader.java
License:Apache License
public DirectedNodeNeighbourRecordReader(JobConf job, FileSplit split) throws IOException { lineReader = new LineRecordReader(job, split); lineKey = lineReader.createKey();//from w w w .j a v a 2s .c o m lineValue = lineReader.createValue(); }