Example usage for org.apache.hadoop.mapred JobConf setJarByClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setJarByClass.

Prototype

public void setJarByClass(Class cls)

Source Link

Document

Set the job's jar file by finding an example class location.

Usage

From source file:edu.ucsb.cs.hybrid.HybridDriver.java

License:Apache License

public static void main(String args[]) throws ParseException, IOException {

    // job.set("mapred.job.tracker", "local");
    // job.set("fs.default.name", "file:///");

    JobConf job = new JobConf();
    job.setJarByClass(HybridDriver.class);
    new GenericOptionsParser(job, args);
    setMapperAndRunner(job);//  w  w  w. j  av a2 s  .c o  m
    job.setMapOutputKeyClass(DocDocWritable.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(DocDocWritable.class);
    job.setOutputValueClass(FloatWritable.class);

    Path inputPath = new Path(INPUT_DIR);
    CustomSequenceFileInputFormat.addInputPath(job, inputPath);
    Path outputPath = new Path(OUTPUT_DIR);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);

    job.setBoolean("fs.hdfs.impl.disable.cache", true); //xun not sure if needed

    if (job.getBoolean(Config.SPLITABLE_PROPERTY, Config.SPLITABLE_VALUE)) {
        job.setInputFormat(CustomSequenceFileInputFormat.class);
        Long splitMB = job.getLong(Config.SPLIT_MB_PROPERTY, Config.SPLIT_MB_VALUE) * 1024 * 1024;
        job.setLong("mapred.min.split.size", splitMB);
        job.setLong("mapred.max.split.size", splitMB);
        job.setLong("dfs.block.size", splitMB);
    } else {
        //  Comment the following of splitter for www experiments it assumes no splitting
        // of partitions for load balancing, should be fixed.
        Splitter.configure(job, inputPath);// remove comment unless for www
        job.setInputFormat(NonSplitableSequenceInputFormat.class); //remove comment
    }
    //SIGIR'14 two-stage balancing //not yet fully incorporated 
    if (job.getInt(Config.LOAD_BALANCE_PROPERTY, Config.LOAD_BALANCE_VALUE) != 0) {
        TwoStageLoadbalancing.main(job.getInt(Config.LOAD_BALANCE_PROPERTY, Config.LOAD_BALANCE_VALUE),
                new Path(PartDriver.OUTPUT_DIR), job);
    }
    JobSubmitter.run(job, "SIMILARITY", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
    if (job.getBoolean(Config.CONVERT_TEXT_PROPERTY, Config.CONVERT_TEXT_VALUE))
        IDMappingJob(args);
}

From source file:edu.ucsb.cs.hybrid.HybridDriver.java

License:Apache License

public static void IDMappingJob(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    job.setJarByClass(HybridDriver.class);
    job.setJobName("Converting binary similarity scores to text");
    job.setMapperClass(IDMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setNumReduceTasks(0);//from ww w. j  av a 2s .co  m
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    Path inputPath = new Path(OUTPUT_DIR);
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.setInputPaths(job, inputPath);
    Path outputPath = new Path("SimilarityScores");
    job.setOutputFormat(TextOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);
    HashPagesDriver.prepareDistribCache(job, HashPagesDriver.IDS_FILE2); //remove not sure
    JobSubmitter.run(job, "BINARY TO TEXT", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
}

From source file:edu.ucsb.cs.knn.KnnDriver.java

License:Apache License

/**
 * Submit the configured job to Hadoop JobTracker to start the process.
 *///from w  ww .ja  v  a 2  s . c o  m
public static void run(JobConf job) throws IOException {

    job.setJarByClass(KnnDriver.class); // This method sets the jar
    String ret = stars() + "\nKnnDriver(" + job.getJobName() + ")\n" + "  Input Path:  {";
    Path inputs[] = FileInputFormat.getInputPaths(job);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0) {
            ret += "\n                ";
        }
        ret += inputs[ctr].toString();
    }
    ret += "}\n";
    ret += "  Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + "  Map Tasks:    "
            + job.getNumMapTasks() + "\n" + "  Reduce Tasks: " + job.getNumReduceTasks() + "\n";
    ret += "  Threshold:    " + job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE) + "\n";
    System.out.println(ret);
    //
    // run job
    //
    JobClient.runJob(job);
}

From source file:edu.ucsb.cs.lsh.minhash.MinHashLshDriver.java

License:Apache License

public static void main(String args[]) throws ParseException, IOException {

    JobConf job = new JobConf();
    job.setJarByClass(MinHashLshDriver.class);
    job.setJobName(MinHashLshDriver.class.getSimpleName());
    GenericOptionsParser gop = new GenericOptionsParser(job, args);
    args = gop.getRemainingArgs();/*  ww w. j  av a 2s  .com*/

    job.setMapperClass(LshMapper.class);
    job.setMapOutputKeyClass(IntArrayWritable.class); // signatures
    job.setMapOutputValueClass(LongWritable.class); // doc IDs
    job.setNumReduceTasks(job.getInt(NUM_REDUCERS_PROPERTY, NUM_REDUCERS_VALUE));
    job.setReducerClass(LshReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    String inputDir = args[0];
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input directory not set.");
    }
    FileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path("lsh-jaccard-buckets");
    FileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);

    LshTable lshTable = new LshTable(job.getInt(K_PROPERTY, K_VALUE), job.getInt(L_PROPERTY, L_VALUE), 1024,
            job.getLong(NUM_FEATURES_PROPERTY, NUM_FEATURES_VALUE),
            job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE));

    writeLsh(job, outputPath.getFileSystem(job), lshTable);

    JobSubmitter.run(job, "LSH", job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE));
}

From source file:edu.ucsb.cs.partitioning.cosine.CosineAllPartitionMain.java

License:Apache License

/**
 * Job3: Core Cosine partitioning with skipping based on partition maximum
 * vectors length, size and weight./*from www.  j a  va2  s.  co  m*/
 */
public static JobConf runCosinePartition(JobConf job, String[] args) throws IOException {
    new GenericOptionsParser(job, args);
    job.setJobName(Partitioner.class.getSimpleName() + " + " + CosineAllPartitionMain.class.getSimpleName());
    job.setJarByClass(CosineAllPartitionMain.class);
    job = setMapReduce(job, CosineAllPartMapper.class, IdentityReducer.class);
    job = setInputOutput(job, new Path(Partitioner.OUTPUT_DIR), interPath);
    JobSubmitter.run(job, "Cosine Partitioning",
            job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
    FileSystem.get(job).delete(new Path(Partitioner.OUTPUT_DIR), true);
    return job;
}

From source file:edu.ucsb.cs.partitioning.cosine.HolderCosinePartitionMain.java

License:Apache License

/**
 * Executes four jobs. Job1 does norm-sorting. Job2 is a regular java
 * program to uniformly partitions the records. Job3 runs the cosine
 * partitioner and finally Job4 is the organizer to rename files into Gij
 * and remove unnecessary partitioning through merging.
 *///from  w  w  w . j av a 2s. c  om
public static void main(String[] args) throws IOException {
    runSort(args, "normsort"); // sorts based on p-norm //remove comments
    JobConf job = runUniformPartition(args, 1);// remove comment
    runCosinePartition(job, args, HolderCosinePartitionMain.class, CosineNormPartMapper.class);
    rewritePartitions(job); // rename to job !! remove comment
    job.setJarByClass(HolderCosinePartitionMain.class);
}

From source file:edu.ucsb.cs.partitioning.cosine.Partitioner.java

License:Apache License

/**
 * Uniformly partitions the sequence vectors given the number of partitions
 * input in the configuration file. It also prepares partitions information
 * about its partitions: maximum p-norms, weights or norm/weights/lengths in
 * a file to guide the core static partitioning next for skipping.
 * /* w w  w  . ja  v  a2s. c  o m*/
 * @param norm_weight_all
 * @return
 */
public static JobConf main(String[] args, int norm_weight_all) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    job.setJarByClass(Partitioner.class);
    System.out.println(
            JobSubmitter.stars() + "\n Running partitioner to prepare uniform partitionins (Single JVM) ");

    String inputDir = SortDriver.OUTPUT_DIR, maxDir;

    if (norm_weight_all == 1)
        maxDir = "/maxpnorm";
    //      maxDir = inputDir + "/maxpnorm";
    else if (norm_weight_all == 2)
        maxDir = "/maxweight";
    //      maxDir = inputDir + "/maxweight";
    else
        maxDir = "/maxall";
    //      maxDir = inputDir + "/maxall";

    if (!(new Path(inputDir).getFileSystem(job)).exists(new Path(inputDir)))
        throw new UnsupportedOperationException("ERROR: " + inputDir + " directory not set.");

    job.set(MAX_DIR_PATH, maxDir);
    job.set(Config.NUM_PARTITIONS_PROPERTY, Integer.toString(produceStaticParitions(job, inputDir, OUTPUT_DIR,
            maxDir, job.getInt(Config.NUM_PARTITIONS_PROPERTY, Config.NUM_PARTITIONS_VALUE), norm_weight_all)));
    return job;
}

From source file:edu.ucsb.cs.partitioning.jaccard.JaccardCoarsePartitionMain.java

License:Apache License

public static void main(String[] args) throws IOException {

    runSort(args, "lengthsort");
    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    job.setJobName(JaccardCoarsePartitionMain.class.getSimpleName());
    job.setJarByClass(JaccardCoarsePartitionMain.class);
    ///*from w w  w  .j a  v  a 2s. co m*/
    // set input & output & threshold & numPartitions
    //
    String inputDir = PartDriver.INPUT_DIR; //String inputDir = SortDriver.OUTPUT_DIR;
    FileSystem.get(job).delete(new Path(PartDriver.OUTPUT_DIR), true);
    float threshold = job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE);
    int nPartitions = job.getInt(Config.NUM_PARTITIONS_PROPERTY, Config.NUM_PARTITIONS_VALUE);
    //
    // run regular java program
    //
    System.out.println(JobSubmitter.stars() + "\n  Running Sequential Job:  jaccard coarse 1D partitioning "
            + "\n  Threshold:  " + threshold);

    FileSystem hdfs = produceStaticParitions(inputDir, PartDriver.OUTPUT_DIR, nPartitions);
    produceSkipList(true, threshold, nPartitions, hdfs, job);
    Collector.printJaccardStatistics(job, PartDriver.OUTPUT_DIR);
}

From source file:edu.ucsb.cs.partitioning.lsh.LshPartitionMain.java

License:Apache License

public static void main(String args[]) throws ParseException, IOException {

    JobConf job = new JobConf();
    job.setJarByClass(LshPartitionMain.class);
    job.setJobName(LshPartitionMain.class.getSimpleName());
    GenericOptionsParser gop = new GenericOptionsParser(job, args);
    args = gop.getRemainingArgs();//from   w w  w .  j av  a 2s.c  o m

    job.setMapperClass(LshMapper.class);
    job.setMapOutputKeyClass(IntArrayWritable.class); // signatures
    job.setMapOutputValueClass(LongWritable.class); // doc IDs
    job.setNumReduceTasks(job.getInt(NUM_REDUCERS_PROPERTY, NUM_REDUCERS_VALUE));
    job.setReducerClass(LshReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    String inputDir = args[0];
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input directory not set.");
    }
    FileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path("lsh-jaccard-buckets");
    FileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);

    LshTable lshTable = new LshTable(job.getInt(K_PROPERTY, K_VALUE), job.getInt(L_PROPERTY, L_VALUE), 1024,
            job.getLong(NUM_FEATURES_PROPERTY, NUM_FEATURES_VALUE),
            job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE));

    writeLsh(job, outputPath.getFileSystem(job), lshTable);

    run(job);

}

From source file:edu.ucsb.cs.preprocessing.sequence.SeqWriter.java

License:Apache License

/**
 * Runs a MR job with maps only to convert input directory of numeric valued
 * records to hadoop sequence format. It assumes a text input of format of
 * [id feature weight ..] to be the format of input.
 *//*from   w w w  .j ava  2 s  . c  o  m*/
public static void writeSequence() throws IOException {

    JobConf job = new JobConf();
    job.setJobName("Convert text vectors to hadoop seqeunce ");
    job.setJarByClass(SeqWriter.class);

    job.setMapperClass(SeqMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(FeatureWeightArrayWritable.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FeatureWeightArrayWritable.class);

    job.setInputFormat(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(INPUT_DIR));
    FileSystem.get(job).delete(new Path(HashPagesDriver.IDS_FILE2), true);
    Path outputPath = new Path(OUTPUT_DIR);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);

    JobSubmitter.run(job, "PREPROCESS", -1);
}