Example usage for org.apache.hadoop.io MapFile.Reader MapFile.Reader

List of usage examples for org.apache.hadoop.io MapFile.Reader MapFile.Reader


In this page you can find the example usage for org.apache.hadoop.io MapFile.Reader MapFile.Reader.


public Reader(FileSystem fs, String dirName, Configuration conf) throws IOException 

Source Link


Construct a map reader for the named map.


From source file:com.datatorrent.contrib.hdht.HadoopFilePerformanceTest.java

License:Open Source License

public void testMapFileRead() throws Exception {

    logger.info("Reading {} with {} key/value pairs", Testfile.MAPFILE.filename(),
            String.format("%,d", testSize));
    writeMapFile();/*from w ww .  j a  v a  2 s.co  m*/

    Text key = new Text();
    Text value = new Text();


    // Set amount of memory to use for buffer
    float bufferPercent = 0.25f;
    int bufferSize = (int) (ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax() * bufferPercent);

    MapFile.Reader reader = new MapFile.Reader(Testfile.MAPFILE.filepath(), conf,

    while (reader.next(key, value)) {
        //logger.debug("read key:{} value:{}", key, value);
    logger.info("Duration for reader.next() SEQUENTIAL keys: {}", stopTimer(Testfile.MAPFILE, "READ-SEQ"));

    for (int i = 0; i < testSize; i++) {
        reader.get(key, value);
        //logger.debug("{}:{}", key, value);
    logger.info("Duration for reader.get(key) SEQUENTIAL keys: {}", stopTimer(Testfile.MAPFILE, "READ-SEQ-ID"));

    Random random = new Random();
    for (int i = 0; i < testSize; i++) {
        reader.get(key, value);
        //logger.debug("{}:{}", key, value);
    logger.info("Duration for reader.get(key) RANDOM keys: {}", stopTimer(Testfile.MAPFILE, "READ-RAND"));


From source file:crunch.MaxTemperature.java

License:Apache License

    public void setUp() throws IOException {
        MapFileWriteDemo.main(new String[] { MAP_URI });

        Configuration conf = new Configuration();
        fs = FileSystem.get(URI.create(MAP_URI), conf);

        reader = new MapFile.Reader(fs, MAP_URI, conf);
        key = (WritableComparable<?>) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
    }// w  ww  .j  ava  2s  .c  o m

From source file:io.aos.hdfs.MapFileSeekTest.java

License:Apache License

public void setUp() throws IOException {
    MapFileWriteDemo.main(new String[] { MAP_URI });

    Configuration conf = new Configuration();
    fs = FileSystem.get(URI.create(MAP_URI), conf);

    reader = new MapFile.Reader(fs, MAP_URI, conf);
    key = (WritableComparable<?>) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
    value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);

From source file:org.archive.jbs.tools.Dump.java

License:Apache License

public void dump(FileSystem fs, Path inputPath, int mode) throws Exception {
    Configuration conf = getConf();

    MapFile.Reader mapReader = null;
    SequenceFile.Reader seqReader = null;
    try {/*from ww  w  .  j a v  a2s. c om*/
        mapReader = new MapFile.Reader(fs, inputPath.toString(), conf);
    } catch (IOException ioe) {
        // Hrm, try a sequence file...

    if (mapReader != null) {
        WritableComparable key = (WritableComparable) ReflectionUtils.newInstance(mapReader.getKeyClass(),
        Writable value = (Writable) ReflectionUtils.newInstance(mapReader.getValueClass(), conf);

        while (mapReader.next(key, value)) {
            output(key, value, mode);
    } else {
        // Not a MapFile...try a SequenceFile.
        try {
            seqReader = new SequenceFile.Reader(fs, inputPath, conf);
        } catch (IOException ioe) {
            // Hrm, neither MapFile nor SequenceFile.
            throw new IOException("Cannot open file: " + inputPath);

        WritableComparable key = (WritableComparable) ReflectionUtils.newInstance(seqReader.getKeyClass(),
        Writable value = (Writable) ReflectionUtils.newInstance(seqReader.getValueClass(), conf);

        while (seqReader.next(key, value)) {
            output(key, value, mode);

From source file:org.archive.nutchwax.tools.PageRanker.java

License:Apache License

 *///from   w  w  w  .  j a va2s  . co m
public int run(String[] args) throws Exception {
    String usage = "Usage: PageRanker [OPTIONS] outputFile <linkdb|paths>\n"
            + "Emit PageRank values for URLs in linkDb(s).  Suitable for use with\n"
            + "PageRank scoring filter.\n" + "\n" + "OPTIONS:\n"
            + "  -p              Use exact path as given, don't assume it's a typical\n"
            + "                    linkdb with \"current/part-nnnnn\" subdirs.\n"
            + "  -t threshold    Do not emit records with less than this many inlinks.\n"
            + "                    Default value 10.";
    if (args.length < 1) {
        System.err.println("Usage: " + usage);
        return -1;

    boolean exactPath = false;
    int threshold = 10;

    int pos = 0;
    for (; pos < args.length && args[pos].charAt(0) == '-'; pos++) {
        if (args[pos].equals("-p")) {
            exactPath = true;
        if (args[pos].equals("-t")) {
            if (args.length - pos < 1) {
                System.err.println("Error: missing argument to -t option");
                return -1;
            try {
                threshold = Integer.parseInt(args[pos]);
            } catch (NumberFormatException nfe) {
                System.err.println("Error: bad value for -t option: " + args[pos]);
                return -1;

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    if (pos >= args.length) {
        System.err.println("Error: missing outputFile");
        return -1;

    Path outputPath = new Path(args[pos++]);
    if (fs.exists(outputPath)) {
        System.err.println("Erorr: outputFile already exists: " + outputPath);
        return -1;

    if (pos >= args.length) {
        System.err.println("Error: missing linkdb");
        return -1;

    List<Path> mapfiles = new ArrayList<Path>();

    // If we are using exact paths, add each one to the list.
    // Otherwise, assume the given path is to a linkdb and look for
    // <linkdbPath>/current/part-nnnnn sub-dirs.
    if (exactPath) {
        for (; pos < args.length; pos++) {
            mapfiles.add(new Path(args[pos]));
    } else {
        for (; pos < args.length; pos++) {
            FileStatus[] fstats = fs.listStatus(new Path(args[pos] + "/current"),

    System.out.println("mapfiles = " + mapfiles);

    PrintWriter output = new PrintWriter(
            new OutputStreamWriter(fs.create(outputPath).getWrappedStream(), "UTF-8"));

    try {
        for (Path p : mapfiles) {
            MapFile.Reader reader = new MapFile.Reader(fs, p.toString(), conf);

            WritableComparable key = (WritableComparable) ReflectionUtils.newInstance(reader.getKeyClass(),
            Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                if (!(key instanceof Text))

                String toUrl = ((Text) key).toString();

                // HACK: Should make this into some externally configurable regex.
                if (!toUrl.startsWith("http"))

                int count = -1;
                if (value instanceof IntWritable) {
                    count = ((IntWritable) value).get();
                } else if (value instanceof Inlinks) {
                    Inlinks inlinks = (Inlinks) value;

                    count = inlinks.size();

                if (count < threshold)

                output.println(count + " " + toUrl);

        return 0;
    } catch (Exception e) {
        LOG.fatal("PageRanker: " + StringUtils.stringifyException(e));
        return -1;
    } finally {

From source file:org.archive.nutchwax.tools.ParseTextCombiner.java

License:Apache License

 *///from   w ww  .  j  a v a2s. c  o  m
public int run(String[] args) throws Exception {
    String usage = "Usage: ParseTextCombiner [-v] output input...\n";

    if (args.length < 1) {
        System.err.println("Usage: " + usage);
        return 1;

    if (args[0].equals("-h")) {
        System.err.println("Usage: " + usage);
        return 1;

    int argStart = 0;
    if (args[argStart].equals("-v")) {
        verbose = true;
        argStart = 1;

    if (args.length - argStart < 2) {
        System.err.println("Usage: " + usage);
        return 1;

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    Path outputPath = new Path(args[argStart]);
    if (fs.exists(outputPath)) {
        System.err.println("ERROR: output already exists: " + outputPath);
        return -1;

    MapFile.Reader[] readers = new MapFile.Reader[args.length - argStart - 1];
    for (int pos = argStart + 1; pos < args.length; pos++) {
        readers[pos - argStart - 1] = new MapFile.Reader(fs, args[pos], conf);

    WritableComparable[] keys = new WritableComparable[readers.length];
    Writable[] values = new Writable[readers.length];

    WritableComparator wc = WritableComparator.get((Class<WritableComparable>) readers[0].getKeyClass());

    MapFile.Writer writer = new MapFile.Writer(conf, fs, outputPath.toString(),
            (Class<WritableComparable>) readers[0].getKeyClass(), readers[0].getValueClass());

    int readCount = 0;
    int writeCount = 0;

    for (int i = 0; i < readers.length; i++) {
        WritableComparable key = (WritableComparable) ReflectionUtils.newInstance(readers[i].getKeyClass(),
        Writable value = (Writable) ReflectionUtils.newInstance(readers[i].getValueClass(), conf);

        if (readers[i].next(key, value)) {
            keys[i] = key;
            values[i] = value;

            if (verbose)
                System.out.println("read: " + i + ": " + key);
        } else {
            // Not even one key/value pair in the map.
            System.out.println("WARN: No key/value pairs in mapfile: " + args[i + argStart + 1]);
            try {
            } catch (IOException ioe) {
                /* Don't care */ }
            readers[i] = null;

    while (true) {
        int candidate = -1;

        for (int i = 0; i < keys.length; i++) {
            if (keys[i] == null)

            if (candidate < 0) {
                candidate = i;
            } else if (wc.compare(keys[i], keys[candidate]) < 0) {
                candidate = i;

        if (candidate < 0) {
            if (verbose)
                System.out.println("Candidate < 0, all done.");

        // Candidate is the index of the "smallest" key.

        // Write it out.
        writer.append(keys[candidate], values[candidate]);
        if (verbose)
            System.out.println("write: " + candidate + ": " + keys[candidate]);

        // Now read in a new value from the corresponding reader.
        if (!readers[candidate].next(keys[candidate], values[candidate])) {
            if (verbose)
                        "No more key/value pairs in (" + candidate + "): " + args[candidate + argStart + 1]);

            // No more key/value pairs left in this reader.
            try {
            } catch (IOException ioe) {
                /* Don't care */ }
            readers[candidate] = null;
            keys[candidate] = null;
            values[candidate] = null;
        } else {
            if (verbose)
                System.out.println("read: " + candidate + ": " + keys[candidate]);

    System.out.println("Total # records in : " + readCount);
    System.out.println("Total # records out: " + writeCount);


    return 0;

From source file:org.hadoop.tdg.TestPseudoHadoop.java

License:Apache License

 * sorted sequence file/*from   w w  w  .  j  a  v a2 s.  com*/
 * @throws IOException
public void mapFileIO() throws IOException {
    LongWritable key = new LongWritable();
    Text value = new Text();
    MapFile.Writer writer = null;
    try {
        writer = new MapFile.Writer(fs.getConf(), fs, DST, key.getClass(), value.getClass());
        for (int i = 0; i < 100; i++) {
            value.set(DATA[i % DATA.length]);
            writer.append(key, value);
    } finally {

    MapFile.Reader reader = null;
    try {
        reader = new MapFile.Reader(fs, DST, fs.getConf());
        LongWritable readerKey = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), fs.getConf());
        Text readerValue = (Text) ReflectionUtils.newInstance(reader.getValueClass(), fs.getConf());
        while (reader.next(readerKey, readerValue)) {
            System.out.printf("%s\t%s\n", readerKey, readerValue);
    } finally {