List of usage examples for org.apache.hadoop.mapred LineRecordReader next
public synchronized boolean next(LongWritable key, Text value) throws IOException
From source file:edu.umn.cs.spatialHadoop.mapred.SpatialInputFormat.java
License:Open Source License
protected void listStatus(final FileSystem fs, Path dir, final List<FileStatus> result, BlockFilter filter) throws IOException { GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, dir); if (gindex == null) { FileStatus[] listStatus;//from ww w. j ava 2 s.com if (OperationsParams.isWildcard(dir)) { // Wild card listStatus = fs.globStatus(dir); } else { listStatus = fs.listStatus(dir, SpatialSite.NonHiddenFileFilter); } // Add all files under this directory for (FileStatus status : listStatus) { if (status.isDir()) { listStatus(fs, status.getPath(), result, filter); } else if (status.getPath().getName().toLowerCase().endsWith(".list")) { LineRecordReader in = new LineRecordReader(fs.open(status.getPath()), 0, status.getLen(), Integer.MAX_VALUE); LongWritable key = in.createKey(); Text value = in.createValue(); while (in.next(key, value)) { result.add(fs.getFileStatus(new Path(status.getPath().getParent(), value.toString()))); } in.close(); } else { result.add(status); } } } else { final Path indexDir = OperationsParams.isWildcard(dir) ? dir.getParent() : dir; // Use the global index to limit files filter.selectCells(gindex, new ResultCollector<Partition>() { @Override public void collect(Partition partition) { try { Path cell_path = new Path(indexDir, partition.filename); if (!fs.exists(cell_path)) LOG.warn("Matched file not found: " + cell_path); result.add(fs.getFileStatus(cell_path)); } catch (IOException e) { e.printStackTrace(); } } }); } }
From source file:edu.umn.cs.spatialHadoop.operations.CatUnion.java
License:Open Source License
/** * Read all categories from the category file * @param categoryFile/* w w w . j av a 2 s . c om*/ * @param categoryShapes * @param idToCategory * @throws IOException */ private static void readCategories(Path categoryFile, Map<Integer, Integer> idToCategory) throws IOException { Map<Integer, String> idToCatName = new HashMap<Integer, String>(); FileSystem fsCategory = FileSystem.getLocal(new Configuration()); long categoryFileSize = fsCategory.getFileStatus(categoryFile).getLen(); if (categoryFileSize > 1024 * 1024) LOG.warn("Category file size is big: " + categoryFileSize); InputStream inCategory = fsCategory.open(categoryFile); LineRecordReader lineReader = new LineRecordReader(inCategory, 0, categoryFileSize, new Configuration()); LongWritable lineOffset = lineReader.createKey(); Text line = lineReader.createValue(); Set<String> catNames = new TreeSet<String>(); while (lineReader.next(lineOffset, line)) { int shape_id = TextSerializerHelper.consumeInt(line, ','); String cat_name = line.toString(); catNames.add(cat_name); idToCatName.put(shape_id, cat_name); } lineReader.close(); // Change category names to numbers Map<String, Integer> cat_name_to_id = new HashMap<String, Integer>(); int cat_id = 0; for (String cat_name : catNames) { cat_name_to_id.put(cat_name, cat_id++); } for (Map.Entry<Integer, String> entry : idToCatName.entrySet()) { idToCategory.put(entry.getKey(), cat_name_to_id.get(entry.getValue())); } }
From source file:kogiri.mapreduce.libra.kmersimilarity_m.KmerSimilarityMap.java
License:Open Source License
private void sumScores(Path outputPath, Configuration conf) throws IOException { Path[] resultFiles = KmerSimilarityHelper.getAllKmerSimilarityResultFilePath(conf, outputPath.toString()); FileSystem fs = outputPath.getFileSystem(conf); KmerSimilarityOutputRecord scoreRec = null; for (Path resultFile : resultFiles) { LOG.info("Reading the scores from " + resultFile.toString()); FSDataInputStream is = fs.open(resultFile); FileStatus status = fs.getFileStatus(resultFile); LineRecordReader reader = new LineRecordReader(is, 0, status.getLen(), conf); LongWritable off = new LongWritable(); Text val = new Text(); while (reader.next(off, val)) { if (scoreRec == null) { scoreRec = KmerSimilarityOutputRecord.createInstance(val.toString()); } else { KmerSimilarityOutputRecord rec2 = KmerSimilarityOutputRecord.createInstance(val.toString()); scoreRec.addScore(rec2.getScore()); }/*w w w .j a v a 2 s .c o m*/ } reader.close(); } double[] accumulatedScore = scoreRec.getScore(); String resultFilename = KmerSimilarityHelper.makeKmerSimilarityFinalResultFileName(); Path resultFilePath = new Path(outputPath, resultFilename); LOG.info("Creating a final score file : " + resultFilePath.toString()); FSDataOutputStream os = fs.create(resultFilePath); int n = (int) Math.sqrt(accumulatedScore.length); for (int i = 0; i < accumulatedScore.length; i++) { int x = i / n; int y = i % n; String k = x + "-" + y; String v = Double.toString(accumulatedScore[i]); String out = k + "\t" + v + "\n"; os.write(out.getBytes()); } os.close(); }
From source file:libra.core.kmersimilarity_m.KmerSimilarityMap.java
License:Apache License
private void sumScores(Path outputPath, Configuration conf) throws IOException { Path[] resultFiles = KmerSimilarityHelper.getAllKmerSimilarityResultFilePath(conf, outputPath.toString()); FileSystem fs = outputPath.getFileSystem(conf); KmerSimilarityOutputRecord scoreRec = null; for (Path resultFile : resultFiles) { LOG.info("Reading the scores from " + resultFile.toString()); FSDataInputStream is = fs.open(resultFile); FileStatus status = fs.getFileStatus(resultFile); LineRecordReader reader = new LineRecordReader(is, 0, status.getLen(), conf); LongWritable off = new LongWritable(); Text val = new Text(); while (reader.next(off, val)) { if (scoreRec == null) { scoreRec = KmerSimilarityOutputRecord.createInstance(val.toString()); } else { KmerSimilarityOutputRecord rec2 = KmerSimilarityOutputRecord.createInstance(val.toString()); scoreRec.addScore(rec2.getScore()); }//from w ww.ja va2 s .co m } reader.close(); } double[] accumulatedScore = scoreRec.getScore(); String resultFilename = KmerSimilarityHelper.makeKmerSimilarityFinalResultFileName(); Path resultFilePath = new Path(outputPath, resultFilename); LOG.info("Creating a final score file : " + resultFilePath.toString()); FSDataOutputStream os = fs.create(resultFilePath); int n = (int) Math.sqrt(accumulatedScore.length); for (int i = 0; i < accumulatedScore.length; i++) { int x = i / n; int y = i % n; String k = x + "-" + y; String v = Double.toString(accumulatedScore[i]); if (x == y) { v = Double.toString(1.0); } String out = k + "\t" + v + "\n"; os.write(out.getBytes()); } os.close(); }
From source file:mlbench.pagerank.PagerankMerge.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" }) public static void main(String[] args) throws IOException, InterruptedException { try {//from w w w .j a v a 2 s . c om parseArgs(args); HashMap<String, String> conf = new HashMap<String, String>(); initConf(conf); MPI_D.Init(args, MPI_D.Mode.Common, conf); JobConf jobConf = new JobConf(confPath); if (MPI_D.COMM_BIPARTITE_O != null) { // O communicator int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O); int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O); if (rank == 0) { LOG.info(PagerankMerge.class.getSimpleName() + " O start."); } FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, jobConf, inDir, rank); for (int i = 0; i < inputs.length; i++) { FileSplit fsplit = inputs[i]; LineRecordReader kvrr = new LineRecordReader(jobConf, fsplit); LongWritable key = kvrr.createKey(); Text value = kvrr.createValue(); { while (kvrr.next(key, value)) { String line_text = value.toString(); final String[] line = line_text.split("\t"); if (line.length >= 2) { MPI_D.Send(new IntWritable(Integer.parseInt(line[0])), new Text(line[1])); } } } } } else if (MPI_D.COMM_BIPARTITE_A != null) { // A communicator int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A); if (rank == 0) { LOG.info(PagerankMerge.class.getSimpleName() + " A start."); } HadoopWriter<IntWritable, Text> outrw = HadoopIOUtil.getNewWriter(jobConf, outDir, IntWritable.class, Text.class, TextOutputFormat.class, null, rank, MPI_D.COMM_BIPARTITE_A); IntWritable oldKey = null; double next_rank = 0; double previous_rank = 0; double diff = 0; int local_diffs = 0; random_coeff = (1 - mixing_c) / (double) number_nodes; converge_threshold = ((double) 1.0 / (double) number_nodes) / 10; Object[] keyValue = MPI_D.Recv(); while (keyValue != null) { IntWritable key = (IntWritable) keyValue[0]; Text value = (Text) keyValue[1]; if (oldKey == null) { oldKey = key; } if (!key.equals(oldKey)) { next_rank = next_rank * mixing_c + random_coeff; outrw.write(oldKey, new Text("v" + next_rank)); diff = Math.abs(previous_rank - next_rank); if (diff > converge_threshold) { local_diffs += 1; } oldKey = key; next_rank = 0; previous_rank = 0; } String cur_value_str = value.toString(); if (cur_value_str.charAt(0) == 's') { previous_rank = Double.parseDouble(cur_value_str.substring(1)); } else { next_rank += Double.parseDouble(cur_value_str.substring(1)); } keyValue = MPI_D.Recv(); } if (previous_rank != 0) { next_rank = next_rank * mixing_c + random_coeff; outrw.write(oldKey, new Text("v" + next_rank)); diff = Math.abs(previous_rank - next_rank); if (diff > converge_threshold) local_diffs += 1; } outrw.close(); reduceDiffs(local_diffs, rank); } MPI_D.Finalize(); } catch (MPI_D_Exception e) { e.printStackTrace(); } }
From source file:mlbench.pagerank.PagerankNaive.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" }) public static void main(String[] args) throws IOException, InterruptedException { try {//www .jav a2s . c o m parseArgs(args); HashMap<String, String> conf = new HashMap<String, String>(); initConf(conf); MPI_D.Init(args, MPI_D.Mode.Common, conf); JobConf jobConf = new JobConf(confPath); if (MPI_D.COMM_BIPARTITE_O != null) { // O communicator int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O); int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O); if (rank == 0) { LOG.info(PagerankNaive.class.getSimpleName() + " O start."); } FileSplit[] inputs1 = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, jobConf, edgeDir, rank); FileSplit[] inputs2 = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, jobConf, vecDir, rank); FileSplit[] inputs = (FileSplit[]) ArrayUtils.addAll(inputs2, inputs1); for (int i = 0; i < inputs.length; i++) { FileSplit fsplit = inputs[i]; LineRecordReader kvrr = new LineRecordReader(jobConf, fsplit); LongWritable key = kvrr.createKey(); Text value = kvrr.createValue(); { IntWritable k = new IntWritable(); Text v = new Text(); while (kvrr.next(key, value)) { String line_text = value.toString(); // ignore comments in edge file if (line_text.startsWith("#")) continue; final String[] line = line_text.split("\t"); if (line.length < 2) continue; // vector : ROWID VALUE('vNNNN') if (line[1].charAt(0) == 'v') { k.set(Integer.parseInt(line[0])); v.set(line[1]); MPI_D.Send(k, v); } else { /* * In other matrix-vector multiplication, we * output (dst, src) here However, In PageRank, * the matrix-vector computation formula is M^T * * v. Therefore, we output (src,dst) here. */ int src_id = Integer.parseInt(line[0]); int dst_id = Integer.parseInt(line[1]); k.set(src_id); v.set(line[1]); MPI_D.Send(k, v); if (make_symmetric == 1) { k.set(dst_id); v.set(line[0]); MPI_D.Send(k, v); } } } } } } else if (MPI_D.COMM_BIPARTITE_A != null) { // A communicator int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A); if (rank == 0) { LOG.info(PagerankNaive.class.getSimpleName() + " A start."); } HadoopWriter<IntWritable, Text> outrw = HadoopIOUtil.getNewWriter(jobConf, outDir, IntWritable.class, Text.class, TextOutputFormat.class, null, rank, MPI_D.COMM_BIPARTITE_A); IntWritable oldKey = null; int i; double cur_rank = 0; ArrayList<Integer> dst_nodes_list = new ArrayList<Integer>(); Object[] keyValue = MPI_D.Recv(); while (keyValue != null) { IntWritable key = (IntWritable) keyValue[0]; Text value = (Text) keyValue[1]; if (oldKey == null) { oldKey = key; } // A new key arrives if (!key.equals(oldKey)) { outrw.write(oldKey, new Text("s" + cur_rank)); int outdeg = dst_nodes_list.size(); if (outdeg > 0) { cur_rank = cur_rank / (double) outdeg; } for (i = 0; i < outdeg; i++) { outrw.write(new IntWritable(dst_nodes_list.get(i)), new Text("v" + cur_rank)); } oldKey = key; cur_rank = 0; dst_nodes_list = new ArrayList<Integer>(); } // common record String line_text = value.toString(); final String[] line = line_text.split("\t"); if (line.length == 1) { if (line_text.charAt(0) == 'v') { // vector : VALUE cur_rank = Double.parseDouble(line_text.substring(1)); } else { // edge : ROWID dst_nodes_list.add(Integer.parseInt(line[0])); } } keyValue = MPI_D.Recv(); } // write the left part if (cur_rank != 0) { outrw.write(oldKey, new Text("s" + cur_rank)); int outdeg = dst_nodes_list.size(); if (outdeg > 0) { cur_rank = cur_rank / (double) outdeg; } for (i = 0; i < outdeg; i++) { outrw.write(new IntWritable(dst_nodes_list.get(i)), new Text("v" + cur_rank)); } } outrw.close(); } MPI_D.Finalize(); } catch (MPI_D_Exception e) { e.printStackTrace(); } }