Example usage for org.apache.hadoop.mapred MapFileOutputFormat getEntry

List of usage examples for org.apache.hadoop.mapred MapFileOutputFormat getEntry

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred MapFileOutputFormat getEntry.

Prototype

public static <K extends WritableComparable, V extends Writable> Writable getEntry(MapFile.Reader[] readers,
        Partitioner<K, V> partitioner, K key, V value) throws IOException 

Source Link

Document

Get an entry from output generated by this class.

Usage

From source file:org.apache.nutch.crawl.CrawlDbReader.java

License:Apache License

public CrawlDatum get(String crawlDb, String url, Configuration config) throws IOException {
    Text key = new Text(url);
    CrawlDatum val = new CrawlDatum();
    openReaders(crawlDb, config);//w  w w  . j av  a 2  s.  c  om
    CrawlDatum res = (CrawlDatum) MapFileOutputFormat.getEntry(readers, new HashPartitioner<Text, CrawlDatum>(),
            key, val);
    return res;
}

From source file:org.apache.nutch.crawl.LinkDbReader.java

License:Apache License

public Inlinks getInlinks(Text url) throws IOException {

    if (readers == null) {
        synchronized (this) {
            readers = MapFileOutputFormat.getReaders(fs, new Path(directory, LinkDb.CURRENT_NAME), getConf());
        }//  w  ww.j  a  va 2  s . c  o m
    }

    return (Inlinks) MapFileOutputFormat.getEntry(readers, PARTITIONER, url, new Inlinks());
}

From source file:org.apache.nutch.scoring.webgraph.LoopReader.java

License:Apache License

/**
 * Prints loopset for a single url.  The loopset information will show any
 * outlink url the eventually forms a link cycle.
 * /*from w ww .  j av  a  2  s  . c o  m*/
 * @param webGraphDb The WebGraph to check for loops
 * @param url The url to check.
 * 
 * @throws IOException If an error occurs while printing loopset information.
 */
public void dumpUrl(Path webGraphDb, String url) throws IOException {

    // open the readers
    fs = FileSystem.get(getConf());
    loopReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb, Loops.LOOPS_DIR), getConf());

    // get the loopset for a given url, if any
    Text key = new Text(url);
    LoopSet loop = new LoopSet();
    MapFileOutputFormat.getEntry(loopReaders, new HashPartitioner<Text, LoopSet>(), key, loop);

    // print out each loop url in the set
    System.out.println(url + ":");
    for (String loopUrl : loop.getLoopSet()) {
        System.out.println("  " + loopUrl);
    }

    // close the readers
    FSUtils.closeReaders(loopReaders);
}

From source file:org.apache.nutch.scoring.webgraph.NodeReader.java

License:Apache License

/**
 * Prints the content of the Node represented by the url to system out.
 * /*from w  ww .  j av  a  2  s.  c om*/
 * @param webGraphDb The webgraph from which to get the node.
 * @param url The url of the node.
 * 
 * @throws IOException If an error occurs while getting the node.
 */
public void dumpUrl(Path webGraphDb, String url) throws IOException {

    fs = FileSystem.get(getConf());
    nodeReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb, WebGraph.NODE_DIR), getConf());

    // open the readers, get the node, print out the info, and close the readers
    Text key = new Text(url);
    Node node = new Node();
    MapFileOutputFormat.getEntry(nodeReaders, new HashPartitioner<Text, Node>(), key, node);
    System.out.println(url + ":");
    System.out.println("  inlink score: " + node.getInlinkScore());
    System.out.println("  outlink score: " + node.getOutlinkScore());
    System.out.println("  num inlinks: " + node.getNumInlinks());
    System.out.println("  num outlinks: " + node.getNumOutlinks());
    FSUtils.closeReaders(nodeReaders);
}

From source file:org.archive.tnh.nutch.Segments.java

License:Apache License

public String get(String key) throws IOException {
    Writable w = MapFileOutputFormat.getEntry(this.parseTextReaders, PARTITIONER, new Text(key),
            new ParseText());

    if (w == null)
        return null;

    return w.toString();
}