Example usage for org.apache.hadoop.mapred MapFileOutputFormat getEntry

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred MapFileOutputFormat getEntry.

Prototype

public static <K extends WritableComparable, V extends Writable> Writable getEntry(MapFile.Reader[] readers,
        Partitioner<K, V> partitioner, K key, V value) throws IOException

Source Link

Document

Get an entry from output generated by this class.

Usage

From source file:org.apache.nutch.crawl.CrawlDbReader.java

License:Apache License

public CrawlDatum get(String crawlDb, String url, Configuration config) throws IOException {
    Text key = new Text(url);
    CrawlDatum val = new CrawlDatum();
    openReaders(crawlDb, config);//w  w w  . j av  a 2  s.  c  om
    CrawlDatum res = (CrawlDatum) MapFileOutputFormat.getEntry(readers, new HashPartitioner<Text, CrawlDatum>(),
            key, val);
    return res;
}

From source file:org.apache.nutch.crawl.LinkDbReader.java

License:Apache License

public Inlinks getInlinks(Text url) throws IOException {

    if (readers == null) {
        synchronized (this) {
            readers = MapFileOutputFormat.getReaders(fs, new Path(directory, LinkDb.CURRENT_NAME), getConf());
        }//  w  ww.j  a  va 2  s . c  o m
    }

    return (Inlinks) MapFileOutputFormat.getEntry(readers, PARTITIONER, url, new Inlinks());
}

From source file:org.apache.nutch.scoring.webgraph.LoopReader.java

License:Apache License

/**
 * Prints loopset for a single url.  The loopset information will show any
 * outlink url the eventually forms a link cycle.
 * /*from w ww .  j av  a  2  s  . c o  m*/
 * @param webGraphDb The WebGraph to check for loops
 * @param url The url to check.
 * 
 * @throws IOException If an error occurs while printing loopset information.
 */
public void dumpUrl(Path webGraphDb, String url) throws IOException {

    // open the readers
    fs = FileSystem.get(getConf());
    loopReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb, Loops.LOOPS_DIR), getConf());

    // get the loopset for a given url, if any
    Text key = new Text(url);
    LoopSet loop = new LoopSet();
    MapFileOutputFormat.getEntry(loopReaders, new HashPartitioner<Text, LoopSet>(), key, loop);

    // print out each loop url in the set
    System.out.println(url + ":");
    for (String loopUrl : loop.getLoopSet()) {
        System.out.println("  " + loopUrl);
    }

    // close the readers
    FSUtils.closeReaders(loopReaders);
}

From source file:org.apache.nutch.scoring.webgraph.NodeReader.java

License:Apache License

/**
 * Prints the content of the Node represented by the url to system out.
 * /*from w  ww .  j av  a  2  s.  c om*/
 * @param webGraphDb The webgraph from which to get the node.
 * @param url The url of the node.
 * 
 * @throws IOException If an error occurs while getting the node.
 */
public void dumpUrl(Path webGraphDb, String url) throws IOException {

    fs = FileSystem.get(getConf());
    nodeReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb, WebGraph.NODE_DIR), getConf());

    // open the readers, get the node, print out the info, and close the readers
    Text key = new Text(url);
    Node node = new Node();
    MapFileOutputFormat.getEntry(nodeReaders, new HashPartitioner<Text, Node>(), key, node);
    System.out.println(url + ":");
    System.out.println("  inlink score: " + node.getInlinkScore());
    System.out.println("  outlink score: " + node.getOutlinkScore());
    System.out.println("  num inlinks: " + node.getNumInlinks());
    System.out.println("  num outlinks: " + node.getNumOutlinks());
    FSUtils.closeReaders(nodeReaders);
}

From source file:org.archive.tnh.nutch.Segments.java

License:Apache License

public String get(String key) throws IOException {
    Writable w = MapFileOutputFormat.getEntry(this.parseTextReaders, PARTITIONER, new Text(key),
            new ParseText());

    if (w == null)
        return null;

    return w.toString();
}