Example usage for org.apache.hadoop.mapred LineRecordReader next

List of usage examples for org.apache.hadoop.mapred LineRecordReader next

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred LineRecordReader next.

Prototype

public synchronized boolean next(LongWritable key, Text value) throws IOException 

Source Link

Document

Read a line.

Usage

From source file:edu.umn.cs.spatialHadoop.mapred.SpatialInputFormat.java

License:Open Source License

protected void listStatus(final FileSystem fs, Path dir, final List<FileStatus> result, BlockFilter filter)
        throws IOException {
    GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, dir);
    if (gindex == null) {
        FileStatus[] listStatus;//from  ww w.  j  ava  2 s.com
        if (OperationsParams.isWildcard(dir)) {
            // Wild card
            listStatus = fs.globStatus(dir);
        } else {
            listStatus = fs.listStatus(dir, SpatialSite.NonHiddenFileFilter);
        }
        // Add all files under this directory
        for (FileStatus status : listStatus) {
            if (status.isDir()) {
                listStatus(fs, status.getPath(), result, filter);
            } else if (status.getPath().getName().toLowerCase().endsWith(".list")) {
                LineRecordReader in = new LineRecordReader(fs.open(status.getPath()), 0, status.getLen(),
                        Integer.MAX_VALUE);
                LongWritable key = in.createKey();
                Text value = in.createValue();
                while (in.next(key, value)) {
                    result.add(fs.getFileStatus(new Path(status.getPath().getParent(), value.toString())));
                }
                in.close();
            } else {
                result.add(status);
            }
        }
    } else {
        final Path indexDir = OperationsParams.isWildcard(dir) ? dir.getParent() : dir;
        // Use the global index to limit files
        filter.selectCells(gindex, new ResultCollector<Partition>() {
            @Override
            public void collect(Partition partition) {
                try {
                    Path cell_path = new Path(indexDir, partition.filename);
                    if (!fs.exists(cell_path))
                        LOG.warn("Matched file not found: " + cell_path);
                    result.add(fs.getFileStatus(cell_path));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        });
    }
}

From source file:edu.umn.cs.spatialHadoop.operations.CatUnion.java

License:Open Source License

/**
 * Read all categories from the category file
 * @param categoryFile/*  w w  w  . j av  a 2  s .  c om*/
 * @param categoryShapes
 * @param idToCategory
 * @throws IOException
 */
private static void readCategories(Path categoryFile, Map<Integer, Integer> idToCategory) throws IOException {
    Map<Integer, String> idToCatName = new HashMap<Integer, String>();
    FileSystem fsCategory = FileSystem.getLocal(new Configuration());
    long categoryFileSize = fsCategory.getFileStatus(categoryFile).getLen();
    if (categoryFileSize > 1024 * 1024)
        LOG.warn("Category file size is big: " + categoryFileSize);
    InputStream inCategory = fsCategory.open(categoryFile);
    LineRecordReader lineReader = new LineRecordReader(inCategory, 0, categoryFileSize, new Configuration());
    LongWritable lineOffset = lineReader.createKey();
    Text line = lineReader.createValue();

    Set<String> catNames = new TreeSet<String>();

    while (lineReader.next(lineOffset, line)) {
        int shape_id = TextSerializerHelper.consumeInt(line, ',');
        String cat_name = line.toString();
        catNames.add(cat_name);
        idToCatName.put(shape_id, cat_name);
    }

    lineReader.close();

    // Change category names to numbers
    Map<String, Integer> cat_name_to_id = new HashMap<String, Integer>();
    int cat_id = 0;
    for (String cat_name : catNames) {
        cat_name_to_id.put(cat_name, cat_id++);
    }

    for (Map.Entry<Integer, String> entry : idToCatName.entrySet()) {
        idToCategory.put(entry.getKey(), cat_name_to_id.get(entry.getValue()));
    }
}

From source file:kogiri.mapreduce.libra.kmersimilarity_m.KmerSimilarityMap.java

License:Open Source License

private void sumScores(Path outputPath, Configuration conf) throws IOException {
    Path[] resultFiles = KmerSimilarityHelper.getAllKmerSimilarityResultFilePath(conf, outputPath.toString());
    FileSystem fs = outputPath.getFileSystem(conf);

    KmerSimilarityOutputRecord scoreRec = null;
    for (Path resultFile : resultFiles) {
        LOG.info("Reading the scores from " + resultFile.toString());
        FSDataInputStream is = fs.open(resultFile);
        FileStatus status = fs.getFileStatus(resultFile);

        LineRecordReader reader = new LineRecordReader(is, 0, status.getLen(), conf);

        LongWritable off = new LongWritable();
        Text val = new Text();

        while (reader.next(off, val)) {
            if (scoreRec == null) {
                scoreRec = KmerSimilarityOutputRecord.createInstance(val.toString());
            } else {
                KmerSimilarityOutputRecord rec2 = KmerSimilarityOutputRecord.createInstance(val.toString());
                scoreRec.addScore(rec2.getScore());
            }/*w w  w .j a  v a 2 s .c o m*/
        }

        reader.close();
    }

    double[] accumulatedScore = scoreRec.getScore();

    String resultFilename = KmerSimilarityHelper.makeKmerSimilarityFinalResultFileName();
    Path resultFilePath = new Path(outputPath, resultFilename);

    LOG.info("Creating a final score file : " + resultFilePath.toString());

    FSDataOutputStream os = fs.create(resultFilePath);
    int n = (int) Math.sqrt(accumulatedScore.length);
    for (int i = 0; i < accumulatedScore.length; i++) {
        int x = i / n;
        int y = i % n;

        String k = x + "-" + y;
        String v = Double.toString(accumulatedScore[i]);
        String out = k + "\t" + v + "\n";
        os.write(out.getBytes());
    }

    os.close();
}

From source file:libra.core.kmersimilarity_m.KmerSimilarityMap.java

License:Apache License

private void sumScores(Path outputPath, Configuration conf) throws IOException {
    Path[] resultFiles = KmerSimilarityHelper.getAllKmerSimilarityResultFilePath(conf, outputPath.toString());
    FileSystem fs = outputPath.getFileSystem(conf);

    KmerSimilarityOutputRecord scoreRec = null;
    for (Path resultFile : resultFiles) {
        LOG.info("Reading the scores from " + resultFile.toString());
        FSDataInputStream is = fs.open(resultFile);
        FileStatus status = fs.getFileStatus(resultFile);

        LineRecordReader reader = new LineRecordReader(is, 0, status.getLen(), conf);

        LongWritable off = new LongWritable();
        Text val = new Text();

        while (reader.next(off, val)) {
            if (scoreRec == null) {
                scoreRec = KmerSimilarityOutputRecord.createInstance(val.toString());
            } else {
                KmerSimilarityOutputRecord rec2 = KmerSimilarityOutputRecord.createInstance(val.toString());
                scoreRec.addScore(rec2.getScore());
            }//from w  ww.ja va2 s .co m
        }

        reader.close();
    }

    double[] accumulatedScore = scoreRec.getScore();

    String resultFilename = KmerSimilarityHelper.makeKmerSimilarityFinalResultFileName();
    Path resultFilePath = new Path(outputPath, resultFilename);

    LOG.info("Creating a final score file : " + resultFilePath.toString());

    FSDataOutputStream os = fs.create(resultFilePath);
    int n = (int) Math.sqrt(accumulatedScore.length);
    for (int i = 0; i < accumulatedScore.length; i++) {
        int x = i / n;
        int y = i % n;

        String k = x + "-" + y;
        String v = Double.toString(accumulatedScore[i]);
        if (x == y) {
            v = Double.toString(1.0);
        }
        String out = k + "\t" + v + "\n";
        os.write(out.getBytes());
    }

    os.close();
}

From source file:mlbench.pagerank.PagerankMerge.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
public static void main(String[] args) throws IOException, InterruptedException {
    try {//from w  w  w  .j  a v a 2 s . c  om
        parseArgs(args);
        HashMap<String, String> conf = new HashMap<String, String>();
        initConf(conf);
        MPI_D.Init(args, MPI_D.Mode.Common, conf);

        JobConf jobConf = new JobConf(confPath);
        if (MPI_D.COMM_BIPARTITE_O != null) {
            // O communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
            if (rank == 0) {
                LOG.info(PagerankMerge.class.getSimpleName() + " O start.");
            }
            FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, jobConf,
                    inDir, rank);
            for (int i = 0; i < inputs.length; i++) {
                FileSplit fsplit = inputs[i];
                LineRecordReader kvrr = new LineRecordReader(jobConf, fsplit);

                LongWritable key = kvrr.createKey();
                Text value = kvrr.createValue();
                {
                    while (kvrr.next(key, value)) {
                        String line_text = value.toString();
                        final String[] line = line_text.split("\t");
                        if (line.length >= 2) {
                            MPI_D.Send(new IntWritable(Integer.parseInt(line[0])), new Text(line[1]));
                        }
                    }
                }
            }

        } else if (MPI_D.COMM_BIPARTITE_A != null) {
            // A communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
            if (rank == 0) {
                LOG.info(PagerankMerge.class.getSimpleName() + " A start.");
            }
            HadoopWriter<IntWritable, Text> outrw = HadoopIOUtil.getNewWriter(jobConf, outDir,
                    IntWritable.class, Text.class, TextOutputFormat.class, null, rank, MPI_D.COMM_BIPARTITE_A);

            IntWritable oldKey = null;
            double next_rank = 0;
            double previous_rank = 0;
            double diff = 0;
            int local_diffs = 0;
            random_coeff = (1 - mixing_c) / (double) number_nodes;
            converge_threshold = ((double) 1.0 / (double) number_nodes) / 10;
            Object[] keyValue = MPI_D.Recv();
            while (keyValue != null) {
                IntWritable key = (IntWritable) keyValue[0];
                Text value = (Text) keyValue[1];
                if (oldKey == null) {
                    oldKey = key;
                }
                if (!key.equals(oldKey)) {
                    next_rank = next_rank * mixing_c + random_coeff;
                    outrw.write(oldKey, new Text("v" + next_rank));
                    diff = Math.abs(previous_rank - next_rank);
                    if (diff > converge_threshold) {
                        local_diffs += 1;
                    }
                    oldKey = key;
                    next_rank = 0;
                    previous_rank = 0;
                }

                String cur_value_str = value.toString();
                if (cur_value_str.charAt(0) == 's') {
                    previous_rank = Double.parseDouble(cur_value_str.substring(1));
                } else {
                    next_rank += Double.parseDouble(cur_value_str.substring(1));
                }

                keyValue = MPI_D.Recv();
            }
            if (previous_rank != 0) {
                next_rank = next_rank * mixing_c + random_coeff;
                outrw.write(oldKey, new Text("v" + next_rank));
                diff = Math.abs(previous_rank - next_rank);
                if (diff > converge_threshold)
                    local_diffs += 1;
            }
            outrw.close();
            reduceDiffs(local_diffs, rank);
        }

        MPI_D.Finalize();
    } catch (MPI_D_Exception e) {
        e.printStackTrace();
    }
}

From source file:mlbench.pagerank.PagerankNaive.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
public static void main(String[] args) throws IOException, InterruptedException {
    try {//www  .jav  a2s  . c  o m
        parseArgs(args);
        HashMap<String, String> conf = new HashMap<String, String>();
        initConf(conf);
        MPI_D.Init(args, MPI_D.Mode.Common, conf);

        JobConf jobConf = new JobConf(confPath);
        if (MPI_D.COMM_BIPARTITE_O != null) {
            // O communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
            if (rank == 0) {
                LOG.info(PagerankNaive.class.getSimpleName() + " O start.");
            }
            FileSplit[] inputs1 = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                    jobConf, edgeDir, rank);
            FileSplit[] inputs2 = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                    jobConf, vecDir, rank);
            FileSplit[] inputs = (FileSplit[]) ArrayUtils.addAll(inputs2, inputs1);
            for (int i = 0; i < inputs.length; i++) {
                FileSplit fsplit = inputs[i];
                LineRecordReader kvrr = new LineRecordReader(jobConf, fsplit);

                LongWritable key = kvrr.createKey();
                Text value = kvrr.createValue();
                {
                    IntWritable k = new IntWritable();
                    Text v = new Text();
                    while (kvrr.next(key, value)) {
                        String line_text = value.toString();
                        // ignore comments in edge file
                        if (line_text.startsWith("#"))
                            continue;

                        final String[] line = line_text.split("\t");
                        if (line.length < 2)
                            continue;

                        // vector : ROWID VALUE('vNNNN')
                        if (line[1].charAt(0) == 'v') {
                            k.set(Integer.parseInt(line[0]));
                            v.set(line[1]);
                            MPI_D.Send(k, v);
                        } else {
                            /*
                             * In other matrix-vector multiplication, we
                            * output (dst, src) here However, In PageRank,
                            * the matrix-vector computation formula is M^T
                            * * v. Therefore, we output (src,dst) here.
                            */
                            int src_id = Integer.parseInt(line[0]);
                            int dst_id = Integer.parseInt(line[1]);
                            k.set(src_id);
                            v.set(line[1]);
                            MPI_D.Send(k, v);

                            if (make_symmetric == 1) {
                                k.set(dst_id);
                                v.set(line[0]);
                                MPI_D.Send(k, v);
                            }
                        }
                    }
                }
            }

        } else if (MPI_D.COMM_BIPARTITE_A != null) {
            // A communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
            if (rank == 0) {
                LOG.info(PagerankNaive.class.getSimpleName() + " A start.");
            }

            HadoopWriter<IntWritable, Text> outrw = HadoopIOUtil.getNewWriter(jobConf, outDir,
                    IntWritable.class, Text.class, TextOutputFormat.class, null, rank, MPI_D.COMM_BIPARTITE_A);

            IntWritable oldKey = null;
            int i;
            double cur_rank = 0;
            ArrayList<Integer> dst_nodes_list = new ArrayList<Integer>();
            Object[] keyValue = MPI_D.Recv();
            while (keyValue != null) {
                IntWritable key = (IntWritable) keyValue[0];
                Text value = (Text) keyValue[1];
                if (oldKey == null) {
                    oldKey = key;
                }
                // A new key arrives
                if (!key.equals(oldKey)) {
                    outrw.write(oldKey, new Text("s" + cur_rank));
                    int outdeg = dst_nodes_list.size();
                    if (outdeg > 0) {
                        cur_rank = cur_rank / (double) outdeg;
                    }
                    for (i = 0; i < outdeg; i++) {
                        outrw.write(new IntWritable(dst_nodes_list.get(i)), new Text("v" + cur_rank));
                    }
                    oldKey = key;
                    cur_rank = 0;
                    dst_nodes_list = new ArrayList<Integer>();
                }
                // common record
                String line_text = value.toString();
                final String[] line = line_text.split("\t");
                if (line.length == 1) {
                    if (line_text.charAt(0) == 'v') { // vector : VALUE
                        cur_rank = Double.parseDouble(line_text.substring(1));
                    } else { // edge : ROWID
                        dst_nodes_list.add(Integer.parseInt(line[0]));
                    }
                }
                keyValue = MPI_D.Recv();
            }
            // write the left part
            if (cur_rank != 0) {
                outrw.write(oldKey, new Text("s" + cur_rank));
                int outdeg = dst_nodes_list.size();
                if (outdeg > 0) {
                    cur_rank = cur_rank / (double) outdeg;
                }
                for (i = 0; i < outdeg; i++) {
                    outrw.write(new IntWritable(dst_nodes_list.get(i)), new Text("v" + cur_rank));
                }
            }
            outrw.close();
        }
        MPI_D.Finalize();
    } catch (MPI_D_Exception e) {
        e.printStackTrace();
    }
}