Example usage for org.apache.hadoop.mapred LineRecordReader LineRecordReader

List of usage examples for org.apache.hadoop.mapred LineRecordReader LineRecordReader

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred LineRecordReader LineRecordReader.

Prototype

public LineRecordReader(Configuration job, FileSplit split) throws IOException 

Source Link

Usage

From source file:Job1RecordReader.java

License:Apache License

public Job1RecordReader(JobConf job, FileSplit split) throws IOException {
    lineReader = new LineRecordReader(job, split);
    lineKey = lineReader.createKey();/*from ww w. jav  a 2 s. c o  m*/
    lineValue = lineReader.createValue();
}

From source file:com.alexholmes.hadooputils.sort.SortInputFormatOld.java

License:Apache License

@Override
public RecordReader<Text, Text> getRecordReader(final InputSplit genericSplit, final JobConf job,
        final Reporter reporter) throws IOException {
    reporter.setStatus(genericSplit.toString());
    return new SortRecordReader(job, new LineRecordReader(job, (FileSplit) genericSplit));
}

From source file:com.hdfs.concat.crush.KeyValuePreservingTextInputFormat.java

License:Apache License

@Override
public RecordReader<Text, Text> getRecordReader(InputSplit genericSplit, JobConf job, Reporter reporter)
        throws IOException {

    reporter.setStatus(genericSplit.toString());

    return new KeyValuePreservingRecordReader(new LineRecordReader(job, (FileSplit) genericSplit));
}

From source file:com.inmobi.messaging.consumer.databus.mapred.DatabusRecordReader.java

License:Apache License

public DatabusRecordReader(JobConf job, InputSplit split) throws IOException {
    lineReader = new LineRecordReader(job, (FileSplit) split);
}

From source file:com.micmiu.hive.fotmater.Base64TextInputFormat.java

License:Apache License

public RecordReader<LongWritable, BytesWritable> getRecordReader(InputSplit genericSplit, JobConf job,
        Reporter reporter) throws IOException {
    reporter.setStatus(genericSplit.toString());
    Base64LineRecordReader reader = new Base64LineRecordReader(
            new LineRecordReader(job, (FileSplit) genericSplit));
    reader.configure(job);//from w w  w  .  ja v a 2s  .  c om
    return reader;
}

From source file:edu.ub.ahstfg.indexer.mapred.IndexRecordReader.java

License:Open Source License

/**
 * Sole constructor./*  w  w  w. ja va  2  s.c o  m*/
 * @param job Job reading the records.
 * @param input Split where are the records.
 * @param reporter Job reporter.
 * @throws IOException
 */
public IndexRecordReader(JobConf job, FileSplit input, Reporter reporter) throws IOException {
    this.reporter = reporter;

    lineReader = new LineRecordReader(job, input);
    lineKey = lineReader.createKey();
    lineValue = lineReader.createValue();

    numMachines = job.getInt(ParamSet.NUM_MACHINES, 10);
    numDocs = job.getInt(ParamSet.NUM_DOCS, 1000);
    qDocsPerMapper = numDocs / numMachines;
    rDocsPerMapper = numDocs - (qDocsPerMapper * numMachines);

    fillDocsPerMapper();
}

From source file:it.crs4.pydoop.mapred.TextInputFormat.java

License:Apache License

public RecordReader<LongWritable, Text> getRecordReader(InputSplit genericSplit, JobConf job, Reporter reporter)
        throws IOException {
    reporter.setStatus(genericSplit.toString());
    return new LineRecordReader(job, (FileSplit) genericSplit);
}

From source file:mlbench.pagerank.PagerankMerge.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
public static void main(String[] args) throws IOException, InterruptedException {
    try {//from   w ww.  j  a  v  a 2s. c  om
        parseArgs(args);
        HashMap<String, String> conf = new HashMap<String, String>();
        initConf(conf);
        MPI_D.Init(args, MPI_D.Mode.Common, conf);

        JobConf jobConf = new JobConf(confPath);
        if (MPI_D.COMM_BIPARTITE_O != null) {
            // O communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
            if (rank == 0) {
                LOG.info(PagerankMerge.class.getSimpleName() + " O start.");
            }
            FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, jobConf,
                    inDir, rank);
            for (int i = 0; i < inputs.length; i++) {
                FileSplit fsplit = inputs[i];
                LineRecordReader kvrr = new LineRecordReader(jobConf, fsplit);

                LongWritable key = kvrr.createKey();
                Text value = kvrr.createValue();
                {
                    while (kvrr.next(key, value)) {
                        String line_text = value.toString();
                        final String[] line = line_text.split("\t");
                        if (line.length >= 2) {
                            MPI_D.Send(new IntWritable(Integer.parseInt(line[0])), new Text(line[1]));
                        }
                    }
                }
            }

        } else if (MPI_D.COMM_BIPARTITE_A != null) {
            // A communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
            if (rank == 0) {
                LOG.info(PagerankMerge.class.getSimpleName() + " A start.");
            }
            HadoopWriter<IntWritable, Text> outrw = HadoopIOUtil.getNewWriter(jobConf, outDir,
                    IntWritable.class, Text.class, TextOutputFormat.class, null, rank, MPI_D.COMM_BIPARTITE_A);

            IntWritable oldKey = null;
            double next_rank = 0;
            double previous_rank = 0;
            double diff = 0;
            int local_diffs = 0;
            random_coeff = (1 - mixing_c) / (double) number_nodes;
            converge_threshold = ((double) 1.0 / (double) number_nodes) / 10;
            Object[] keyValue = MPI_D.Recv();
            while (keyValue != null) {
                IntWritable key = (IntWritable) keyValue[0];
                Text value = (Text) keyValue[1];
                if (oldKey == null) {
                    oldKey = key;
                }
                if (!key.equals(oldKey)) {
                    next_rank = next_rank * mixing_c + random_coeff;
                    outrw.write(oldKey, new Text("v" + next_rank));
                    diff = Math.abs(previous_rank - next_rank);
                    if (diff > converge_threshold) {
                        local_diffs += 1;
                    }
                    oldKey = key;
                    next_rank = 0;
                    previous_rank = 0;
                }

                String cur_value_str = value.toString();
                if (cur_value_str.charAt(0) == 's') {
                    previous_rank = Double.parseDouble(cur_value_str.substring(1));
                } else {
                    next_rank += Double.parseDouble(cur_value_str.substring(1));
                }

                keyValue = MPI_D.Recv();
            }
            if (previous_rank != 0) {
                next_rank = next_rank * mixing_c + random_coeff;
                outrw.write(oldKey, new Text("v" + next_rank));
                diff = Math.abs(previous_rank - next_rank);
                if (diff > converge_threshold)
                    local_diffs += 1;
            }
            outrw.close();
            reduceDiffs(local_diffs, rank);
        }

        MPI_D.Finalize();
    } catch (MPI_D_Exception e) {
        e.printStackTrace();
    }
}

From source file:mlbench.pagerank.PagerankNaive.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
public static void main(String[] args) throws IOException, InterruptedException {
    try {//  w w  w .j a  v  a  2s  .  c o  m
        parseArgs(args);
        HashMap<String, String> conf = new HashMap<String, String>();
        initConf(conf);
        MPI_D.Init(args, MPI_D.Mode.Common, conf);

        JobConf jobConf = new JobConf(confPath);
        if (MPI_D.COMM_BIPARTITE_O != null) {
            // O communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
            if (rank == 0) {
                LOG.info(PagerankNaive.class.getSimpleName() + " O start.");
            }
            FileSplit[] inputs1 = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                    jobConf, edgeDir, rank);
            FileSplit[] inputs2 = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                    jobConf, vecDir, rank);
            FileSplit[] inputs = (FileSplit[]) ArrayUtils.addAll(inputs2, inputs1);
            for (int i = 0; i < inputs.length; i++) {
                FileSplit fsplit = inputs[i];
                LineRecordReader kvrr = new LineRecordReader(jobConf, fsplit);

                LongWritable key = kvrr.createKey();
                Text value = kvrr.createValue();
                {
                    IntWritable k = new IntWritable();
                    Text v = new Text();
                    while (kvrr.next(key, value)) {
                        String line_text = value.toString();
                        // ignore comments in edge file
                        if (line_text.startsWith("#"))
                            continue;

                        final String[] line = line_text.split("\t");
                        if (line.length < 2)
                            continue;

                        // vector : ROWID VALUE('vNNNN')
                        if (line[1].charAt(0) == 'v') {
                            k.set(Integer.parseInt(line[0]));
                            v.set(line[1]);
                            MPI_D.Send(k, v);
                        } else {
                            /*
                             * In other matrix-vector multiplication, we
                            * output (dst, src) here However, In PageRank,
                            * the matrix-vector computation formula is M^T
                            * * v. Therefore, we output (src,dst) here.
                            */
                            int src_id = Integer.parseInt(line[0]);
                            int dst_id = Integer.parseInt(line[1]);
                            k.set(src_id);
                            v.set(line[1]);
                            MPI_D.Send(k, v);

                            if (make_symmetric == 1) {
                                k.set(dst_id);
                                v.set(line[0]);
                                MPI_D.Send(k, v);
                            }
                        }
                    }
                }
            }

        } else if (MPI_D.COMM_BIPARTITE_A != null) {
            // A communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
            if (rank == 0) {
                LOG.info(PagerankNaive.class.getSimpleName() + " A start.");
            }

            HadoopWriter<IntWritable, Text> outrw = HadoopIOUtil.getNewWriter(jobConf, outDir,
                    IntWritable.class, Text.class, TextOutputFormat.class, null, rank, MPI_D.COMM_BIPARTITE_A);

            IntWritable oldKey = null;
            int i;
            double cur_rank = 0;
            ArrayList<Integer> dst_nodes_list = new ArrayList<Integer>();
            Object[] keyValue = MPI_D.Recv();
            while (keyValue != null) {
                IntWritable key = (IntWritable) keyValue[0];
                Text value = (Text) keyValue[1];
                if (oldKey == null) {
                    oldKey = key;
                }
                // A new key arrives
                if (!key.equals(oldKey)) {
                    outrw.write(oldKey, new Text("s" + cur_rank));
                    int outdeg = dst_nodes_list.size();
                    if (outdeg > 0) {
                        cur_rank = cur_rank / (double) outdeg;
                    }
                    for (i = 0; i < outdeg; i++) {
                        outrw.write(new IntWritable(dst_nodes_list.get(i)), new Text("v" + cur_rank));
                    }
                    oldKey = key;
                    cur_rank = 0;
                    dst_nodes_list = new ArrayList<Integer>();
                }
                // common record
                String line_text = value.toString();
                final String[] line = line_text.split("\t");
                if (line.length == 1) {
                    if (line_text.charAt(0) == 'v') { // vector : VALUE
                        cur_rank = Double.parseDouble(line_text.substring(1));
                    } else { // edge : ROWID
                        dst_nodes_list.add(Integer.parseInt(line[0]));
                    }
                }
                keyValue = MPI_D.Recv();
            }
            // write the left part
            if (cur_rank != 0) {
                outrw.write(oldKey, new Text("s" + cur_rank));
                int outdeg = dst_nodes_list.size();
                if (outdeg > 0) {
                    cur_rank = cur_rank / (double) outdeg;
                }
                for (i = 0; i < outdeg; i++) {
                    outrw.write(new IntWritable(dst_nodes_list.get(i)), new Text("v" + cur_rank));
                }
            }
            outrw.close();
        }
        MPI_D.Finalize();
    } catch (MPI_D_Exception e) {
        e.printStackTrace();
    }
}

From source file:nl.tudelft.graphalytics.mapreducev2.common.DirectedNodeNeighbourRecordReader.java

License:Apache License

public DirectedNodeNeighbourRecordReader(JobConf job, FileSplit split) throws IOException {
    lineReader = new LineRecordReader(job, split);

    lineKey = lineReader.createKey();//from  w w  w .j a v  a 2s  .c  o  m
    lineValue = lineReader.createValue();
}