List of usage examples for org.apache.hadoop.mapreduce.lib.output MapFileOutputFormat getEntry
public static <K extends WritableComparable<?>, V extends Writable> Writable getEntry(MapFile.Reader[] readers, Partitioner<K, V> partitioner, K key, V value) throws IOException
From source file:com.github.ygf.pagerank.InLinksTopNReducer.java
License:Apache License
@Override protected void cleanup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); Path titlesDir = new Path(conf.get("inlinks.titles_dir")); MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf); Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>(); IntWritable page = new IntWritable(); Text title = new Text(); int[] inLinks = new int[topN.size()]; String[] titles = new String[topN.size()]; for (int i = inLinks.length - 1; i >= 0; i--) { Map.Entry<Integer, Integer> entry = topN.poll(); page.set(entry.getValue());//from w ww .ja v a2 s . com MapFileOutputFormat.getEntry(readers, partitioner, page, title); inLinks[i] = entry.getKey(); titles[i] = title.toString(); } for (MapFile.Reader reader : readers) { reader.close(); } for (int i = 0; i < inLinks.length; i++) { context.write(new IntWritable(inLinks[i]), new Text(titles[i])); } }
From source file:com.github.ygf.pagerank.PageRankIterationMapper.java
License:Apache License
@Override public void map(ShortArrayWritable inKey, MatrixBlockWritable inValue, Context context) throws IOException, InterruptedException { // This task gets each block M_{i,j}, loads the corresponding stripe j // of the vector v_{k-1} and produces the partial result of the stripe i // of the vector v_k. Configuration conf = context.getConfiguration(); int iter = Integer.parseInt(conf.get("pagerank.iteration")); int numPages = Integer.parseInt(conf.get("pagerank.num_pages")); short blockSize = Short.parseShort(conf.get("pagerank.block_size")); Writable[] blockIndexes = inKey.get(); short i = ((ShortWritable) blockIndexes[0]).get(); short j = ((ShortWritable) blockIndexes[1]).get(); int vjSize = (j > numPages / blockSize) ? (numPages % blockSize) : blockSize; FloatWritable[] vj = new FloatWritable[vjSize]; if (iter == 1) { // Initial PageRank vector with 1/n for all pages. for (int k = 0; k < vj.length; k++) { vj[k] = new FloatWritable(1.0f / numPages); }//www . ja v a2s . c o m } else { // Load the stripe j of the vector v_{k-1} from the MapFiles. Path outputDir = MapFileOutputFormat.getOutputPath(context).getParent(); Path vjDir = new Path(outputDir, "v" + (iter - 1)); MapFile.Reader[] readers = MapFileOutputFormat.getReaders(vjDir, conf); Partitioner<ShortWritable, FloatArrayWritable> partitioner = new HashPartitioner<ShortWritable, FloatArrayWritable>(); ShortWritable key = new ShortWritable(j); FloatArrayWritable value = new FloatArrayWritable(); MapFileOutputFormat.getEntry(readers, partitioner, key, value); Writable[] writables = value.get(); for (int k = 0; k < vj.length; k++) { vj[k] = (FloatWritable) writables[k]; } for (MapFile.Reader reader : readers) { reader.close(); } } // Initialize the partial result i of the vector v_k. int viSize = (i > numPages / blockSize) ? (numPages % blockSize) : blockSize; FloatWritable[] vi = new FloatWritable[viSize]; for (int k = 0; k < vi.length; k++) { vi[k] = new FloatWritable(0); } // Multiply M_{i,j} by the stripe j of the vector v_{k-1} to obtain the // partial result i of the vector v_k. Writable[][] blockColumns = inValue.get(); for (int k = 0; k < blockColumns.length; k++) { Writable[] blockColumn = blockColumns[k]; if (blockColumn.length > 0) { int vDegree = ((ShortWritable) blockColumn[0]).get(); for (int columnIndex = 1; columnIndex < blockColumn.length; columnIndex++) { int l = ((ShortWritable) blockColumn[columnIndex]).get(); vi[l].set(vi[l].get() + (1.0f / vDegree) * vj[k].get()); } } } context.write(new ShortWritable(i), new FloatArrayWritable(vi)); }
From source file:com.github.ygf.pagerank.PageRankTopNReducer.java
License:Apache License
@Override protected void cleanup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); Path titlesDir = new Path(conf.get("pagerank.titles_dir")); MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf); Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>(); IntWritable page = new IntWritable(); Text title = new Text(); float[] pageRanks = new float[topN.size()]; String[] titles = new String[topN.size()]; // The order of the entries is reversed. The priority queue is in // non-decreasing order and we want the highest PageRank first. for (int i = pageRanks.length - 1; i >= 0; i--) { Map.Entry<Float, Integer> entry = topN.poll(); // Get the title of the page from the title index. page.set(entry.getValue());/* w w w .ja v a 2s.com*/ MapFileOutputFormat.getEntry(readers, partitioner, page, title); pageRanks[i] = entry.getKey(); titles[i] = title.toString(); } for (MapFile.Reader reader : readers) { reader.close(); } for (int i = 0; i < pageRanks.length; i++) { context.write(new FloatWritable(pageRanks[i]), new Text(titles[i])); } }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { JobBuilder.printUsage(this, "<path> <key>"); return -1; }/*from w ww. ja v a 2s .co m*/ Path path = new Path(args[0]); IntWritable key = new IntWritable(Integer.parseInt(args[1])); Reader[] readers = /*[*/MapFileOutputFormat.getReaders(path, getConf())/*]*/; Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>(); Text val = new Text(); Writable entry = /*[*/MapFileOutputFormat.getEntry(readers, partitioner, key, val)/*]*/; if (entry == null) { System.err.println("Key not found: " + key); return -1; } NcdcRecordParser parser = new NcdcRecordParser(); parser.parse(val.toString()); System.out.printf("%s\t%s\n", parser.getStationId(), parser.getYear()); return 0; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { JobBuilder.printUsage(this, "<path> <key>"); return -1; }//from w w w . ja v a2 s . com Path path = new Path(args[0]); IntWritable key = new IntWritable(Integer.parseInt(args[1])); FileSystem fs = path.getFileSystem(getConf()); Reader[] readers = /*[*/MapFileOutputFormat.getReaders(fs, path, getConf())/*]*/; Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>(); Text val = new Text(); Writable entry = /*[*/MapFileOutputFormat.getEntry(readers, partitioner, key, val)/*]*/; if (entry == null) { System.err.println("Key not found: " + key); return -1; } NcdcRecordParser parser = new NcdcRecordParser(); parser.parse(val.toString()); System.out.printf("%s\t%s\n", parser.getStationId(), parser.getYear()); return 0; }