Example usage for org.apache.hadoop.io MapFile.Reader MapFile.Reader

List of usage examples for org.apache.hadoop.io MapFile.Reader MapFile.Reader

Introduction

In this page you can find the example usage for org.apache.hadoop.io MapFile.Reader MapFile.Reader.

Prototype

@Deprecated
public Reader(FileSystem fs, String dirName, Configuration conf) throws IOException 

Source Link

Document

Construct a map reader for the named map.

Usage

From source file:com.datatorrent.contrib.hdht.HadoopFilePerformanceTest.java

License:Open Source License

@Test
public void testMapFileRead() throws Exception {

    logger.info("Reading {} with {} key/value pairs", Testfile.MAPFILE.filename(),
            String.format("%,d", testSize));
    writeMapFile();/*from w ww .  j a  v a  2 s.co  m*/

    Text key = new Text();
    Text value = new Text();

    writeMapFile();

    // Set amount of memory to use for buffer
    float bufferPercent = 0.25f;
    int bufferSize = (int) (ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax() * bufferPercent);

    MapFile.Reader reader = new MapFile.Reader(Testfile.MAPFILE.filepath(), conf,
            SequenceFile.Reader.bufferSize(bufferSize));

    startTimer();
    reader.reset();
    while (reader.next(key, value)) {
        //logger.debug("read key:{} value:{}", key, value);
    }
    logger.info("Duration for reader.next() SEQUENTIAL keys: {}", stopTimer(Testfile.MAPFILE, "READ-SEQ"));

    startTimer();
    reader.reset();
    for (int i = 0; i < testSize; i++) {
        key.set(getKey(i));
        reader.get(key, value);
        //logger.debug("{}:{}", key, value);
    }
    logger.info("Duration for reader.get(key) SEQUENTIAL keys: {}", stopTimer(Testfile.MAPFILE, "READ-SEQ-ID"));

    Random random = new Random();
    startTimer();
    for (int i = 0; i < testSize; i++) {
        key.set(getKey(random.nextInt(testSize)));
        reader.get(key, value);
        //logger.debug("{}:{}", key, value);
    }
    logger.info("Duration for reader.get(key) RANDOM keys: {}", stopTimer(Testfile.MAPFILE, "READ-RAND"));
    reader.close();

}

From source file:crunch.MaxTemperature.java

License:Apache License

@Before
    public void setUp() throws IOException {
        MapFileWriteDemo.main(new String[] { MAP_URI });

        Configuration conf = new Configuration();
        fs = FileSystem.get(URI.create(MAP_URI), conf);

        reader = new MapFile.Reader(fs, MAP_URI, conf);
        key = (WritableComparable<?>) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
    }// w  ww  .j  ava  2s  .c  o m

From source file:io.aos.hdfs.MapFileSeekTest.java

License:Apache License

@Before
public void setUp() throws IOException {
    MapFileWriteDemo.main(new String[] { MAP_URI });

    Configuration conf = new Configuration();
    fs = FileSystem.get(URI.create(MAP_URI), conf);

    reader = new MapFile.Reader(fs, MAP_URI, conf);
    key = (WritableComparable<?>) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
    value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
}

From source file:org.archive.jbs.tools.Dump.java

License:Apache License

public void dump(FileSystem fs, Path inputPath, int mode) throws Exception {
    Configuration conf = getConf();

    MapFile.Reader mapReader = null;
    SequenceFile.Reader seqReader = null;
    try {/*from ww  w  .  j a v  a2s. c om*/
        mapReader = new MapFile.Reader(fs, inputPath.toString(), conf);
    } catch (IOException ioe) {
        // Hrm, try a sequence file...
    }

    if (mapReader != null) {
        WritableComparable key = (WritableComparable) ReflectionUtils.newInstance(mapReader.getKeyClass(),
                conf);
        Writable value = (Writable) ReflectionUtils.newInstance(mapReader.getValueClass(), conf);

        while (mapReader.next(key, value)) {
            output(key, value, mode);
        }
    } else {
        // Not a MapFile...try a SequenceFile.
        try {
            seqReader = new SequenceFile.Reader(fs, inputPath, conf);
        } catch (IOException ioe) {
            // Hrm, neither MapFile nor SequenceFile.
            throw new IOException("Cannot open file: " + inputPath);
        }

        WritableComparable key = (WritableComparable) ReflectionUtils.newInstance(seqReader.getKeyClass(),
                conf);
        Writable value = (Writable) ReflectionUtils.newInstance(seqReader.getValueClass(), conf);

        while (seqReader.next(key, value)) {
            output(key, value, mode);
        }
    }
}

From source file:org.archive.nutchwax.tools.PageRanker.java

License:Apache License

/**
 *
 *///from   w  w  w  .  j a va2s  . co m
public int run(String[] args) throws Exception {
    String usage = "Usage: PageRanker [OPTIONS] outputFile <linkdb|paths>\n"
            + "Emit PageRank values for URLs in linkDb(s).  Suitable for use with\n"
            + "PageRank scoring filter.\n" + "\n" + "OPTIONS:\n"
            + "  -p              Use exact path as given, don't assume it's a typical\n"
            + "                    linkdb with \"current/part-nnnnn\" subdirs.\n"
            + "  -t threshold    Do not emit records with less than this many inlinks.\n"
            + "                    Default value 10.";
    if (args.length < 1) {
        System.err.println("Usage: " + usage);
        return -1;
    }

    boolean exactPath = false;
    int threshold = 10;

    int pos = 0;
    for (; pos < args.length && args[pos].charAt(0) == '-'; pos++) {
        if (args[pos].equals("-p")) {
            exactPath = true;
        }
        if (args[pos].equals("-t")) {
            pos++;
            if (args.length - pos < 1) {
                System.err.println("Error: missing argument to -t option");
                return -1;
            }
            try {
                threshold = Integer.parseInt(args[pos]);
            } catch (NumberFormatException nfe) {
                System.err.println("Error: bad value for -t option: " + args[pos]);
                return -1;
            }
        }
    }

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    if (pos >= args.length) {
        System.err.println("Error: missing outputFile");
        return -1;
    }

    Path outputPath = new Path(args[pos++]);
    if (fs.exists(outputPath)) {
        System.err.println("Erorr: outputFile already exists: " + outputPath);
        return -1;
    }

    if (pos >= args.length) {
        System.err.println("Error: missing linkdb");
        return -1;
    }

    List<Path> mapfiles = new ArrayList<Path>();

    // If we are using exact paths, add each one to the list.
    // Otherwise, assume the given path is to a linkdb and look for
    // <linkdbPath>/current/part-nnnnn sub-dirs.
    if (exactPath) {
        for (; pos < args.length; pos++) {
            mapfiles.add(new Path(args[pos]));
        }
    } else {
        for (; pos < args.length; pos++) {
            FileStatus[] fstats = fs.listStatus(new Path(args[pos] + "/current"),
                    HadoopFSUtil.getPassDirectoriesFilter(fs));
            mapfiles.addAll(Arrays.asList(HadoopFSUtil.getPaths(fstats)));
        }
    }

    System.out.println("mapfiles = " + mapfiles);

    PrintWriter output = new PrintWriter(
            new OutputStreamWriter(fs.create(outputPath).getWrappedStream(), "UTF-8"));

    try {
        for (Path p : mapfiles) {
            MapFile.Reader reader = new MapFile.Reader(fs, p.toString(), conf);

            WritableComparable key = (WritableComparable) ReflectionUtils.newInstance(reader.getKeyClass(),
                    conf);
            Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                if (!(key instanceof Text))
                    continue;

                String toUrl = ((Text) key).toString();

                // HACK: Should make this into some externally configurable regex.
                if (!toUrl.startsWith("http"))
                    continue;

                int count = -1;
                if (value instanceof IntWritable) {
                    count = ((IntWritable) value).get();
                } else if (value instanceof Inlinks) {
                    Inlinks inlinks = (Inlinks) value;

                    count = inlinks.size();
                }

                if (count < threshold)
                    continue;

                output.println(count + " " + toUrl);
            }
        }

        return 0;
    } catch (Exception e) {
        LOG.fatal("PageRanker: " + StringUtils.stringifyException(e));
        return -1;
    } finally {
        output.flush();
        output.close();
    }
}

From source file:org.archive.nutchwax.tools.ParseTextCombiner.java

License:Apache License

/**
 *
 *///from   w ww  .  j  a v a2s. c  o  m
public int run(String[] args) throws Exception {
    String usage = "Usage: ParseTextCombiner [-v] output input...\n";

    if (args.length < 1) {
        System.err.println("Usage: " + usage);
        return 1;
    }

    if (args[0].equals("-h")) {
        System.err.println("Usage: " + usage);
        return 1;
    }

    int argStart = 0;
    if (args[argStart].equals("-v")) {
        verbose = true;
        argStart = 1;
    }

    if (args.length - argStart < 2) {
        System.err.println("Usage: " + usage);
        return 1;
    }

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    Path outputPath = new Path(args[argStart]);
    if (fs.exists(outputPath)) {
        System.err.println("ERROR: output already exists: " + outputPath);
        return -1;
    }

    MapFile.Reader[] readers = new MapFile.Reader[args.length - argStart - 1];
    for (int pos = argStart + 1; pos < args.length; pos++) {
        readers[pos - argStart - 1] = new MapFile.Reader(fs, args[pos], conf);
    }

    WritableComparable[] keys = new WritableComparable[readers.length];
    Writable[] values = new Writable[readers.length];

    WritableComparator wc = WritableComparator.get((Class<WritableComparable>) readers[0].getKeyClass());

    MapFile.Writer writer = new MapFile.Writer(conf, fs, outputPath.toString(),
            (Class<WritableComparable>) readers[0].getKeyClass(), readers[0].getValueClass());

    int readCount = 0;
    int writeCount = 0;

    for (int i = 0; i < readers.length; i++) {
        WritableComparable key = (WritableComparable) ReflectionUtils.newInstance(readers[i].getKeyClass(),
                conf);
        Writable value = (Writable) ReflectionUtils.newInstance(readers[i].getValueClass(), conf);

        if (readers[i].next(key, value)) {
            keys[i] = key;
            values[i] = value;

            readCount++;
            if (verbose)
                System.out.println("read: " + i + ": " + key);
        } else {
            // Not even one key/value pair in the map.
            System.out.println("WARN: No key/value pairs in mapfile: " + args[i + argStart + 1]);
            try {
                readers[i].close();
            } catch (IOException ioe) {
                /* Don't care */ }
            readers[i] = null;
        }
    }

    while (true) {
        int candidate = -1;

        for (int i = 0; i < keys.length; i++) {
            if (keys[i] == null)
                continue;

            if (candidate < 0) {
                candidate = i;
            } else if (wc.compare(keys[i], keys[candidate]) < 0) {
                candidate = i;
            }
        }

        if (candidate < 0) {
            if (verbose)
                System.out.println("Candidate < 0, all done.");
            break;
        }

        // Candidate is the index of the "smallest" key.

        // Write it out.
        writer.append(keys[candidate], values[candidate]);
        writeCount++;
        if (verbose)
            System.out.println("write: " + candidate + ": " + keys[candidate]);

        // Now read in a new value from the corresponding reader.
        if (!readers[candidate].next(keys[candidate], values[candidate])) {
            if (verbose)
                System.out.println(
                        "No more key/value pairs in (" + candidate + "): " + args[candidate + argStart + 1]);

            // No more key/value pairs left in this reader.
            try {
                readers[candidate].close();
            } catch (IOException ioe) {
                /* Don't care */ }
            readers[candidate] = null;
            keys[candidate] = null;
            values[candidate] = null;
        } else {
            readCount++;
            if (verbose)
                System.out.println("read: " + candidate + ": " + keys[candidate]);
        }
    }

    System.out.println("Total # records in : " + readCount);
    System.out.println("Total # records out: " + writeCount);

    writer.close();

    return 0;
}

From source file:org.hadoop.tdg.TestPseudoHadoop.java

License:Apache License

/**
 * sorted sequence file/*from   w w  w  .  j  a  v a2 s.  com*/
 *
 * @throws IOException
 */
@Test
public void mapFileIO() throws IOException {
    LongWritable key = new LongWritable();
    Text value = new Text();
    MapFile.Writer writer = null;
    try {
        writer = new MapFile.Writer(fs.getConf(), fs, DST, key.getClass(), value.getClass());
        for (int i = 0; i < 100; i++) {
            key.set(i);
            value.set(DATA[i % DATA.length]);
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }

    MapFile.Reader reader = null;
    try {
        reader = new MapFile.Reader(fs, DST, fs.getConf());
        LongWritable readerKey = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), fs.getConf());
        Text readerValue = (Text) ReflectionUtils.newInstance(reader.getValueClass(), fs.getConf());
        while (reader.next(readerKey, readerValue)) {
            System.out.printf("%s\t%s\n", readerKey, readerValue);
        }
    } finally {
        IOUtils.closeStream(writer);
    }
}