List of usage examples for org.apache.hadoop.mapred MapFileOutputFormat getEntry
public static <K extends WritableComparable, V extends Writable> Writable getEntry(MapFile.Reader[] readers, Partitioner<K, V> partitioner, K key, V value) throws IOException
From source file:org.apache.nutch.crawl.CrawlDbReader.java
License:Apache License
public CrawlDatum get(String crawlDb, String url, Configuration config) throws IOException { Text key = new Text(url); CrawlDatum val = new CrawlDatum(); openReaders(crawlDb, config);//w w w . j av a 2 s. c om CrawlDatum res = (CrawlDatum) MapFileOutputFormat.getEntry(readers, new HashPartitioner<Text, CrawlDatum>(), key, val); return res; }
From source file:org.apache.nutch.crawl.LinkDbReader.java
License:Apache License
public Inlinks getInlinks(Text url) throws IOException { if (readers == null) { synchronized (this) { readers = MapFileOutputFormat.getReaders(fs, new Path(directory, LinkDb.CURRENT_NAME), getConf()); }// w ww.j a va 2 s . c o m } return (Inlinks) MapFileOutputFormat.getEntry(readers, PARTITIONER, url, new Inlinks()); }
From source file:org.apache.nutch.scoring.webgraph.LoopReader.java
License:Apache License
/** * Prints loopset for a single url. The loopset information will show any * outlink url the eventually forms a link cycle. * /*from w ww . j av a 2 s . c o m*/ * @param webGraphDb The WebGraph to check for loops * @param url The url to check. * * @throws IOException If an error occurs while printing loopset information. */ public void dumpUrl(Path webGraphDb, String url) throws IOException { // open the readers fs = FileSystem.get(getConf()); loopReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb, Loops.LOOPS_DIR), getConf()); // get the loopset for a given url, if any Text key = new Text(url); LoopSet loop = new LoopSet(); MapFileOutputFormat.getEntry(loopReaders, new HashPartitioner<Text, LoopSet>(), key, loop); // print out each loop url in the set System.out.println(url + ":"); for (String loopUrl : loop.getLoopSet()) { System.out.println(" " + loopUrl); } // close the readers FSUtils.closeReaders(loopReaders); }
From source file:org.apache.nutch.scoring.webgraph.NodeReader.java
License:Apache License
/** * Prints the content of the Node represented by the url to system out. * /*from w ww . j av a 2 s. c om*/ * @param webGraphDb The webgraph from which to get the node. * @param url The url of the node. * * @throws IOException If an error occurs while getting the node. */ public void dumpUrl(Path webGraphDb, String url) throws IOException { fs = FileSystem.get(getConf()); nodeReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb, WebGraph.NODE_DIR), getConf()); // open the readers, get the node, print out the info, and close the readers Text key = new Text(url); Node node = new Node(); MapFileOutputFormat.getEntry(nodeReaders, new HashPartitioner<Text, Node>(), key, node); System.out.println(url + ":"); System.out.println(" inlink score: " + node.getInlinkScore()); System.out.println(" outlink score: " + node.getOutlinkScore()); System.out.println(" num inlinks: " + node.getNumInlinks()); System.out.println(" num outlinks: " + node.getNumOutlinks()); FSUtils.closeReaders(nodeReaders); }
From source file:org.archive.tnh.nutch.Segments.java
License:Apache License
public String get(String key) throws IOException { Writable w = MapFileOutputFormat.getEntry(this.parseTextReaders, PARTITIONER, new Text(key), new ParseText()); if (w == null) return null; return w.toString(); }