List of usage examples for org.apache.hadoop.mapreduce.lib.output MapFileOutputFormat getReaders
public static MapFile.Reader[] getReaders(Path dir, Configuration conf) throws IOException
From source file:com.github.ygf.pagerank.InLinksTopNReducer.java
License:Apache License
@Override protected void cleanup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); Path titlesDir = new Path(conf.get("inlinks.titles_dir")); MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf); Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>(); IntWritable page = new IntWritable(); Text title = new Text(); int[] inLinks = new int[topN.size()]; String[] titles = new String[topN.size()]; for (int i = inLinks.length - 1; i >= 0; i--) { Map.Entry<Integer, Integer> entry = topN.poll(); page.set(entry.getValue());/*from ww w . j av a 2 s .c om*/ MapFileOutputFormat.getEntry(readers, partitioner, page, title); inLinks[i] = entry.getKey(); titles[i] = title.toString(); } for (MapFile.Reader reader : readers) { reader.close(); } for (int i = 0; i < inLinks.length; i++) { context.write(new IntWritable(inLinks[i]), new Text(titles[i])); } }
From source file:com.github.ygf.pagerank.PageRank.java
License:Apache License
private int getNumPages(Configuration conf, Path titlesDir) throws Exception { int numPages = 0; IntWritable pageNumber = new IntWritable(); MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf); for (int i = 0; i < readers.length; i++) { readers[i].finalKey(pageNumber); if (pageNumber.get() > numPages) { numPages = pageNumber.get(); }// w w w . j ava 2 s .c om } for (MapFile.Reader reader : readers) { reader.close(); } return numPages; }
From source file:com.github.ygf.pagerank.PageRankIterationMapper.java
License:Apache License
@Override public void map(ShortArrayWritable inKey, MatrixBlockWritable inValue, Context context) throws IOException, InterruptedException { // This task gets each block M_{i,j}, loads the corresponding stripe j // of the vector v_{k-1} and produces the partial result of the stripe i // of the vector v_k. Configuration conf = context.getConfiguration(); int iter = Integer.parseInt(conf.get("pagerank.iteration")); int numPages = Integer.parseInt(conf.get("pagerank.num_pages")); short blockSize = Short.parseShort(conf.get("pagerank.block_size")); Writable[] blockIndexes = inKey.get(); short i = ((ShortWritable) blockIndexes[0]).get(); short j = ((ShortWritable) blockIndexes[1]).get(); int vjSize = (j > numPages / blockSize) ? (numPages % blockSize) : blockSize; FloatWritable[] vj = new FloatWritable[vjSize]; if (iter == 1) { // Initial PageRank vector with 1/n for all pages. for (int k = 0; k < vj.length; k++) { vj[k] = new FloatWritable(1.0f / numPages); }/*from www . ja v a 2 s . c om*/ } else { // Load the stripe j of the vector v_{k-1} from the MapFiles. Path outputDir = MapFileOutputFormat.getOutputPath(context).getParent(); Path vjDir = new Path(outputDir, "v" + (iter - 1)); MapFile.Reader[] readers = MapFileOutputFormat.getReaders(vjDir, conf); Partitioner<ShortWritable, FloatArrayWritable> partitioner = new HashPartitioner<ShortWritable, FloatArrayWritable>(); ShortWritable key = new ShortWritable(j); FloatArrayWritable value = new FloatArrayWritable(); MapFileOutputFormat.getEntry(readers, partitioner, key, value); Writable[] writables = value.get(); for (int k = 0; k < vj.length; k++) { vj[k] = (FloatWritable) writables[k]; } for (MapFile.Reader reader : readers) { reader.close(); } } // Initialize the partial result i of the vector v_k. int viSize = (i > numPages / blockSize) ? (numPages % blockSize) : blockSize; FloatWritable[] vi = new FloatWritable[viSize]; for (int k = 0; k < vi.length; k++) { vi[k] = new FloatWritable(0); } // Multiply M_{i,j} by the stripe j of the vector v_{k-1} to obtain the // partial result i of the vector v_k. Writable[][] blockColumns = inValue.get(); for (int k = 0; k < blockColumns.length; k++) { Writable[] blockColumn = blockColumns[k]; if (blockColumn.length > 0) { int vDegree = ((ShortWritable) blockColumn[0]).get(); for (int columnIndex = 1; columnIndex < blockColumn.length; columnIndex++) { int l = ((ShortWritable) blockColumn[columnIndex]).get(); vi[l].set(vi[l].get() + (1.0f / vDegree) * vj[k].get()); } } } context.write(new ShortWritable(i), new FloatArrayWritable(vi)); }
From source file:com.github.ygf.pagerank.PageRankTopNReducer.java
License:Apache License
@Override protected void cleanup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); Path titlesDir = new Path(conf.get("pagerank.titles_dir")); MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf); Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>(); IntWritable page = new IntWritable(); Text title = new Text(); float[] pageRanks = new float[topN.size()]; String[] titles = new String[topN.size()]; // The order of the entries is reversed. The priority queue is in // non-decreasing order and we want the highest PageRank first. for (int i = pageRanks.length - 1; i >= 0; i--) { Map.Entry<Float, Integer> entry = topN.poll(); // Get the title of the page from the title index. page.set(entry.getValue());//from w w w . ja v a2 s . com MapFileOutputFormat.getEntry(readers, partitioner, page, title); pageRanks[i] = entry.getKey(); titles[i] = title.toString(); } for (MapFile.Reader reader : readers) { reader.close(); } for (int i = 0; i < pageRanks.length; i++) { context.write(new FloatWritable(pageRanks[i]), new Text(titles[i])); } }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
JobBuilder.printUsage(this, "<path> <key>");
return -1;
}/* w w w .j a v a2 s .c om*/
Path path = new Path(args[0]);
IntWritable key = new IntWritable(Integer.parseInt(args[1]));
Reader[] readers = /*[*/MapFileOutputFormat.getReaders(path, getConf())/*]*/;
Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
Text val = new Text();
Writable entry = /*[*/MapFileOutputFormat.getEntry(readers, partitioner, key, val)/*]*/;
if (entry == null) {
System.err.println("Key not found: " + key);
return -1;
}
NcdcRecordParser parser = new NcdcRecordParser();
parser.parse(val.toString());
System.out.printf("%s\t%s\n", parser.getStationId(), parser.getYear());
return 0;
}
From source file:crunch.MaxTemperature.java
License:Apache License
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
JobBuilder.printUsage(this, "<path> <key>");
return -1;
}/*from w w w.j a v a2 s.c o m*/
Path path = new Path(args[0]);
IntWritable key = new IntWritable(Integer.parseInt(args[1]));
Reader[] readers = MapFileOutputFormat.getReaders(path, getConf());
Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
Text val = new Text();
// vv LookupRecordsByTemperature-ReaderFragment
Reader reader = readers[partitioner.getPartition(key, val, readers.length)];
// ^^ LookupRecordsByTemperature-ReaderFragment
Writable entry = reader.get(key, val);
if (entry == null) {
System.err.println("Key not found: " + key);
return -1;
}
NcdcRecordParser parser = new NcdcRecordParser();
IntWritable nextKey = new IntWritable();
do {
parser.parse(val.toString());
System.out.printf("%s\t%s\n", parser.getStationId(), parser.getYear());
} while (reader.next(nextKey, val) && key.equals(nextKey));
return 0;
}
From source file:org.apache.nutch.segment.TestSegmentMergerCrawlDatums.java
License:Apache License
/** * Checks the merged segment and removes the stuff again. * /*www . java2 s.c o m*/ * @param the * test directory * @param the * merged segment * @return the final status */ protected byte checkMergedSegment(Path testDir, Path mergedSegment) throws Exception { // Get a MapFile reader for the <Text,CrawlDatum> pairs MapFile.Reader[] readers = MapFileOutputFormat .getReaders(new Path(mergedSegment, CrawlDatum.FETCH_DIR_NAME), conf); Text key = new Text(); CrawlDatum value = new CrawlDatum(); byte finalStatus = 0x0; for (MapFile.Reader reader : readers) { while (reader.next(key, value)) { LOG.info("Reading status for: " + key.toString() + " > " + CrawlDatum.getStatusName(value.getStatus())); // Only consider fetch status if (CrawlDatum.hasFetchStatus(value) && key.toString().equals("http://nutch.apache.org/")) { finalStatus = value.getStatus(); } } // Close the reader again reader.close(); } // Remove the test directory again fs.delete(testDir, true); LOG.info("Final fetch status for: http://nutch.apache.org/ > " + CrawlDatum.getStatusName(finalStatus)); // Return the final status return finalStatus; }