edu.ucsb.cs.knn.KnnDriver.java Source code

Java tutorial

Introduction

Here is the source code for edu.ucsb.cs.knn.KnnDriver.java

Source

/**
 * Copyright 2012-2013 The Regents of the University of California
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on
 * an "AS IS"; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations under
 * the License.
 * 
 * Author: maha alabduljalil <maha (at) cs.ucsb.edu>
 * @Since Aug 18, 2012
 */

package edu.ucsb.cs.knn;

/*
 * Track1: Learning to predict users' ratings of musical items. Items can be tracks,
 * albums, artists and genres. Items form a hierarchy, such that each track belongs to
 * an album, albums belong to artists, and together they are tagged by genres.
 * 
 * Project Goal:
 *  - Boost performance for KNN generation.
 *  - Make sure code execution time is state-of-art compatiable.<br>
 *  - Demonstrate the scalability of the code using Yahoo dataset.<br>
 *  
 *  Dataset:
 *  - Training set: used to build the knn of each song. 
 *  - Validation set: used to query the above results and validate it's producing correct values.
 *  - Test set: now that the contest is over, it's similar to validation (used to be hidden and used
 *  for choosing winners and calculating errors in prediction-RMSE).
 */
import java.io.IOException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.ProgramDriver;

import edu.ucsb.cs.knn.hybrid.HybridMain;
import edu.ucsb.cs.knn.inverted.InvertedMain;
import edu.ucsb.cs.knn.preprocess.SongInvertedIndexMain;
import edu.ucsb.cs.knn.preprocess.UserForwardIndexMain;
import edu.ucsb.cs.knn.query.QueryMain;
import edu.ucsb.cs.knn.types.SequenceReader;

/**
 * The starting point of execution. This class present the job options available
 * to run then execute user's choice.
 * 
 * @author Maha
 * 
 */
public class KnnDriver {

    public static final String INPUT_DIR_PROPERTY = "knn.input.dir";
    public static final String OUTPUT_DIR_PROPERTY = "knn.output.dir";
    public static final String THRESHOLD_PROPERTY = "knn.sim.threshold";
    public static final float THRESHOLD_VALUE = 0.85f;

    /**
     * Presents the available job options for the user to choose from. Then
     * invoke the class responsible for configuring the job.
     */
    public static void main(String argv[]) {

        int exitCode = -1;
        ProgramDriver pgd = new ProgramDriver();
        try {
            pgd.addClass("invindex", SongInvertedIndexMain.class, "Generate song inverted index for Yahoo data.");
            pgd.addClass("forindex", UserForwardIndexMain.class, "Generate user forward index for Yahoo data.");
            pgd.addClass("invertedknn", InvertedMain.class, " knn over songs inverted index.");
            pgd.addClass("hybridknn", HybridMain.class, " not yet done ...knn over songs forward index.");
            // pgd.addClass("estimateknn", InvertedMain.class, " TO-DO.");
            pgd.addClass("readseq", SequenceReader.class, " View sequence file as text to read.");
            pgd.addClass("queryknn", QueryMain.class, " Query the KNN results for recommendation.");
            pgd.driver(argv);
            exitCode = 0;
        } catch (Throwable e) {
            e.printStackTrace();
        }
        System.exit(exitCode);
    }

    /**
     * Submit the configured job to Hadoop JobTracker to start the process.
     */
    public static void run(JobConf job) throws IOException {

        job.setJarByClass(KnnDriver.class); // This method sets the jar
        String ret = stars() + "\nKnnDriver(" + job.getJobName() + ")\n" + "  Input Path:  {";
        Path inputs[] = FileInputFormat.getInputPaths(job);
        for (int ctr = 0; ctr < inputs.length; ctr++) {
            if (ctr > 0) {
                ret += "\n                ";
            }
            ret += inputs[ctr].toString();
        }
        ret += "}\n";
        ret += "  Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + "  Map Tasks:    "
                + job.getNumMapTasks() + "\n" + "  Reduce Tasks: " + job.getNumReduceTasks() + "\n";
        ret += "  Threshold:    " + job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE) + "\n";
        System.out.println(ret);
        //
        // run job
        //
        JobClient.runJob(job);
    }

    /**
     * Prints stars to the output console.
     */
    public static String stars() {
        return new String(new char[77]).replace("\0", "*");
    }

}