edu.brown.cs.mapreduce.benchmarks.Benchmark2.java Source code

Introduction

Here is the source code for edu.brown.cs.mapreduce.benchmarks.Benchmark2.java
Source

/***************************************************************************
*   Copyright (C) 2008 by Andy Pavlo, Brown University                    *
*   http://www.cs.brown.edu/~pavlo/                                       *
*                                                                         *
*   Permission is hereby granted, free of charge, to any person obtaining *
*   a copy of this software and associated documentation files (the       *
*   "Software"), to deal in the Software without restriction, including   *
*   without limitation the rights to use, copy, modify, merge, publish,   *
*   distribute, sublicense, and/or sell copies of the Software, and to    *
*   permit persons to whom the Software is furnished to do so, subject to *
*   the following conditions:                                             *
*                                                                         *
*   The above copyright notice and this permission notice shall be        *
*   included in all copies or substantial portions of the Software.       *
*                                                                         *
*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       *
*   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    *
*   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*
*   IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR     *
*   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
*   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
*   OTHER DEALINGS IN THE SOFTWARE.                                       *
***************************************************************************/
package edu.brown.cs.mapreduce.benchmarks;

import java.io.*;
import java.util.Iterator;

import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.mapred.join.TupleWritable;
import org.apache.hadoop.util.*;
import edu.brown.cs.mapreduce.BenchmarkBase;

public class Benchmark2 extends Configured implements Tool {

    public static class BaseMap extends MapReduceBase {
        protected boolean USE_SUBSTRING = false;
        protected boolean SHOWN_ERROR = false;

        public void configure(JobConf job) {
            super.configure(job);
            //
            // Get the page rank from the conf object
            //
            String property = job.get(BenchmarkBase.PROPERTY_BENCHMARKS2_SUBSTRING);
            if (property == null) {
                this.USE_SUBSTRING = Boolean.parseBoolean(property);
            }
            //this.USE_SUBSTRING = true; // HACK FOR NOW
            //         System.out.println("USE_SUBSTRING: " + this.USE_SUBSTRING);
        }
    }

    public static class TupleWritableMap extends BaseMap
            implements Mapper<Text, TupleWritable, Text, DoubleWritable> {
        public void map(Text key, TupleWritable value, OutputCollector<Text, DoubleWritable> output,
                Reporter reporter) throws IOException {
            //
            // The *third* field should be our revenue
            //
            DoubleWritable adRevenue = (DoubleWritable) value.get(2);
            if (this.USE_SUBSTRING) {
                key = new Text(key.toString().substring(0, 7));
            }
            output.collect(key, adRevenue);
        }
    }

    public static class TextMap extends BaseMap implements Mapper<Text, Text, Text, DoubleWritable> {
        public void map(Text key, Text value, OutputCollector<Text, DoubleWritable> output, Reporter reporter)
                throws IOException {
            //
            // Split the value using VALUE_DELIMITER into separate fields
            // The *third* field should be our revenue
            //
            String fields[] = value.toString().split("\\" + BenchmarkBase.VALUE_DELIMITER);
            if (fields.length > 0) {
                try {
                    Double revenue = Double.parseDouble(fields[2]);
                    if (this.USE_SUBSTRING) {
                        key = new Text(key.toString().substring(0, 7));
                    }
                    output.collect(key, new DoubleWritable(revenue));
                } catch (ArrayIndexOutOfBoundsException ex) {
                    System.err.println("ERROR: Invalid record for key '" + key + "'");
                    if (!this.SHOWN_ERROR) {
                        for (int i = 0; i < fields.length; i++) {
                            System.err.println("[" + i + "] " + fields[i]);
                        } // FOR
                        this.SHOWN_ERROR = true;
                    }
                } catch (NumberFormatException ex) {
                    System.err.println("ERROR: Invalid adRevenue field for key '" + key + "'");
                    ex.printStackTrace();
                    //System.exit(1);
                }
            } /*
              int size = value.size();
              if (size > 0) {
                try {
                   Integer revenue = Integer.parseInt(value.get(size - 1).toString());
                   output.collect(key, new IntWritable(revenue));
                } catch (NumberFormatException ex) {
                   ex.printStackTrace();
                   System.exit(1);
                }
              }
              */
            return;
        }
    } // END CLASS

    public static class Reduce extends MapReduceBase
            implements Reducer<Text, DoubleWritable, Text, DoubleWritable> {
        public void reduce(Text key, Iterator<DoubleWritable> values, OutputCollector<Text, DoubleWritable> output,
                Reporter reporter) throws IOException {
            double sum = 0;
            while (values.hasNext()) {
                sum += values.next().get();
            } // WHILE
            output.collect(key, new DoubleWritable(sum));
        }
    } // END CLASS

    /* (non-Javadoc)
     * @see org.apache.hadoop.util.Tool#run(java.lang.String[])
     */
    public int run(String[] args) throws Exception {
        BenchmarkBase base = new BenchmarkBase(this.getConf(), this.getClass(), args);
        JobConf job = base.getJobConf();

        job.setInputFormat(base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class);
        //job.setInputFormat(KeyValueSetInputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        if (base.getTupleData()) {
            job.setMapperClass(Benchmark2.TupleWritableMap.class);
        } else {
            job.setMapperClass(Benchmark2.TextMap.class);
        }
        job.setCombinerClass(Benchmark2.Reduce.class);
        job.setReducerClass(Benchmark2.Reduce.class);
        //job.setNumReduceTasks(0);

        try {
            base.runJob(job);
            if (base.getCombine())
                base.runCombine();
        } catch (Exception ex) {
            ex.printStackTrace();
            System.exit(1);
        }
        return 0;
    }
}