Example usage for org.apache.hadoop.mapreduce Job setJarByClass

List of usage examples for org.apache.hadoop.mapreduce Job setJarByClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJarByClass.

Prototype

public void setJarByClass(Class<?> cls) 

Source Link

Document

Set the Jar by finding where a given class came from.

Usage

From source file:basic.PartitionGraph.java

License:Apache License

/**
 * Runs this tool./*  ww  w . j a  v a 2  s  .c  o  m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(RANGE, "use range partitioner"));

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of partitions")
            .create(NUM_PARTITIONS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)
            || !cmdline.hasOption(NUM_PARTITIONS)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inPath = cmdline.getOptionValue(INPUT);
    String outPath = cmdline.getOptionValue(OUTPUT);
    int nodeCount = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    int numParts = Integer.parseInt(cmdline.getOptionValue(NUM_PARTITIONS));
    boolean useRange = cmdline.hasOption(RANGE);

    LOG.info("Tool name: " + PartitionGraph.class.getSimpleName());
    LOG.info(" - input dir: " + inPath);
    LOG.info(" - output dir: " + outPath);
    LOG.info(" - num partitions: " + numParts);
    LOG.info(" - node cnt: " + nodeCount);
    LOG.info(" - use range partitioner: " + useRange);

    Configuration conf = getConf();
    conf.setInt("NodeCount", nodeCount);

    Job job = Job.getInstance(conf);
    job.setJobName(PartitionGraph.class.getSimpleName() + ":" + inPath);
    job.setJarByClass(PartitionGraph.class);

    job.setNumReduceTasks(numParts);

    FileInputFormat.setInputPaths(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    if (useRange) {
        job.setPartitionerClass(RangePartitioner.class);
    }

    FileSystem.get(conf).delete(new Path(outPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:bb.BranchAndBound.java

License:Apache License

static Job getJob(String input, String output, String dataDir, int iteration) throws Exception {
    Configuration conf = new Configuration();

    FileSystem hdfs = FileSystem.get(conf);
    FileStatus[] fileStatus = hdfs.listStatus(new Path(input));
    for (int i = 0; i < fileStatus.length; ++i) {
        if (fileStatus[i].getLen() == 0) {
            hdfs.delete(fileStatus[i].getPath());
        }/*from   w  w w. j  a  v  a2 s .co m*/
    }
    DistributedCache.addCacheFile(new URI(dataDir + "/data"), conf);
    Job ret = new Job(conf, dataDir + "_iteration_" + iteration);
    ret.setJarByClass(BranchAndBound.class);
    ret.setMapperClass(BBMapper1.class);
    ret.setReducerClass(BBReducer.class);
    //ret.setReducerClass(MergeReducer.class);
    FileInputFormat.setInputPaths(ret, new Path(input));
    //if( iteration > 7 ) FileInputFormat.setMinInputSplitSize(ret, 67108864);
    FileOutputFormat.setOutputPath(ret, new Path(output));
    ret.setOutputKeyClass(NullWritable.class);
    ret.setOutputValueClass(Text.class);
    return ret;
}

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private boolean runAprioriOncPhaseOnce(FIMOptions opt, long nrLines, int i, String info, String outputDir,
        String cacheFile) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
    int prefixSize = opt.prefixLength;

    System.out/*from  ww w .  j a v  a 2s . c  o  m*/
            .println("[AprioriPhase]: Phase: " + i + " input: " + opt.inputFile + ", output: " + opt.outputDir);

    Job job = prepareJob(new Path(opt.inputFile), new Path(outputDir), SplitByKTextInputFormat.class,
            AprioriPhaseMapper.class, Text.class, Text.class, AprioriPhaseReducer.class, Text.class,
            IntWritable.class, TextOutputFormat.class);

    job.setJobName(info);
    job.setJarByClass(BigFIMDriver.class);

    job.setNumReduceTasks(1);

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);
    if (nrLines != -1) {
        conf.setLong(NUMBER_OF_LINES_KEY, nrLines);
    }

    if (cacheFile != null) {
        addCacheFile(new URI(cacheFile.replace(" ", "%20")), conf);
    }

    runJob(job, info);

    if (prefixSize <= i
            && job.getCounters().findCounter(COUNTER_GROUPNAME, COUNTER_NRLARGEPREFIXGROUPS).getValue() == 0) {
        return false;
    }
    if (prefixSize < i) {
        System.out.println(
                "[AprioriPhase]: Prefix group length updated! Now " + (i) + " instead of " + prefixSize);
    }
    return true;
}

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private void startCreatePrefixGroups(FIMOptions opt, int phase)
        throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
    Path path = new Path(opt.outputDir + separator + "tg" + phase);
    for (FileStatus status : path.getFileSystem(new Configuration()).listStatus(path)) {
        String cacheFile = status.getPath().toString();
        String trieGroupCount = cacheFile.substring(cacheFile.lastIndexOf('/'), cacheFile.length());
        trieGroupCount = trieGroupCount.split("-")[1];
        String outputFile = opt.outputDir + separator + "pg-trieGroup" + trieGroupCount;
        System.out.println("[CreatePrefixGroups]: input: " + opt.inputFile + ", output: " + opt.outputDir
                + ", cache: " + cacheFile);

        Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class,
                ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ComputeTidListReducer.class,
                IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class);

        job.setJobName("Create Prefix Groups");
        job.setJarByClass(BigFIMDriver.class);
        job.setNumReduceTasks(1);//from   w  ww  .  j  av a  2  s . c  om

        Configuration conf = job.getConfiguration();
        setConfigurationValues(conf, opt);
        conf.setInt(PREFIX_LENGTH_KEY, phase);

        addCacheFile(new URI(cacheFile.replace(" ", "%20")), job.getConfiguration());

        runJob(job, "Prefix Creation");
    }
}

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private void startMining(FIMOptions opt) throws IOException, ClassNotFoundException, InterruptedException {
    String inputFilesDir = opt.outputDir + separator + "pg" + separator;
    String outputFile = opt.outputDir + separator + OFis;
    System.out.println("[StartMining]: input: " + inputFilesDir + ", output: " + outputFile);

    Job job = prepareJob(new Path(inputFilesDir), new Path(outputFile), NoSplitSequenceFileInputFormat.class,
            EclatMinerMapper.class, Text.class, Text.class, EclatMinerReducer.class, Text.class, Text.class,
            TextOutputFormat.class);

    job.setJobName("Start Mining");
    job.setJarByClass(BigFIMDriver.class);
    job.setNumReduceTasks(1);//w ww.  j a va  2s . c o m

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);

    List<Path> inputPaths = new ArrayList<Path>();

    FileStatus[] listStatus = FileSystem.get(conf).globStatus(new Path(inputFilesDir + "bucket*"));
    for (FileStatus fstat : listStatus) {
        inputPaths.add(fstat.getPath());
    }

    setInputPaths(job, inputPaths.toArray(new Path[inputPaths.size()]));

    runJob(job, "Mining");
}

From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java

License:Apache License

/**
 * Starts the first MapReduce cycle. First, the transaction file is partitioned into a number of chunks that is given
 * to different mappers. Each mapper reads a chunk and return the items together with their partial tid-lists. The
 * reducer attaches the partial tid-lists to each other, then discards the infrequent ones and sorts the frequent one
 * based on ascending frequency and divides the singletons among available mappers.
 * /*from  w w  w  . j  a  v a 2s  .  com*/
 * This method generates three files, the frequent singletons (OSingletonsTids), the order file for singletons based
 * on ascending frequency (OSingletonsOrder) and the singletons distribution file (OSingletonsDistribution).
 * 
 * @param outputFile
 * @param opt
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
private void readHorizontalDb(String outputFile, FIMOptions opt)
        throws IOException, ClassNotFoundException, InterruptedException {
    System.out.println("[ItemReading]: input: " + opt.inputFile + ", output: " + outputFile);

    Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class,
            ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ItemReaderReducer.class,
            IntWritable.class, Writable.class, TextOutputFormat.class);

    job.setJobName("Read Singletons");
    job.setJarByClass(DistEclatDriver.class);
    job.setNumReduceTasks(1);

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);

    addNamedOutput(job, OSingletonsDistribution, TextOutputFormat.class, Text.class, Text.class);
    addNamedOutput(job, OSingletonsOrder, TextOutputFormat.class, Text.class, Text.class);
    addNamedOutput(job, OSingletonsTids, SequenceFileOutputFormat.class, IntWritable.class,
            IntMatrixWritable.class);

    runJob(job, "Item Reading");
}

From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java

License:Apache License

/**
 * Starts the second MapReduce cycle. Each mapper gets a list of singletons from which it should start building X-FIs.
 * Each mapper uses Eclat to quickly compute the list of X-FIs. The total set of X-FIs is again obtained by the
 * reducer, which then gets divided into independent sets. All sets that have been computed from level 1 to X are
 * already reported. The distribution of seeds is obtained by some allocation scheme, e.g., Round-Robin,
 * Lowest-Frequency, ...//from  www . j  a v a  2  s .  c om
 * 
 * This method generates three files, the frequent itemsets from level 1 to X (OFises), the prefix groups
 * (OPrefixGroups) and the prefix distribution file (OPrefixDistribution).
 * 
 * @param inputDir
 * @param outputDir
 * @param opt
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 * @throws URISyntaxException
 */
private void startPrefixComputation(String inputDir, String outputDir, FIMOptions opt)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {

    String inputFile = inputDir + separator + OSingletonsDistribution + rExt;
    String singletonsOrderFile = inputDir + separator + OSingletonsOrder + rExt;
    String singletonsTidsFile = inputDir + separator + OSingletonsTids + rExt;

    System.out.println("[PrefixComputation]: input: " + inputFile);

    Job job = prepareJob(new Path(inputFile), new Path(outputDir), NLineInputFormat.class,
            PrefixComputerMapper.class, Text.class, IntMatrixWritable.class, PrefixComputerReducer.class,
            IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class);

    job.setJobName("Compute Prefixes");
    job.setJarByClass(DistEclatDriver.class);
    job.setNumReduceTasks(1);

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);

    addCacheFile(new URI(singletonsOrderFile.replace(" ", "%20")), job.getConfiguration());
    addCacheFile(new URI(singletonsTidsFile.replace(" ", "%20")), job.getConfiguration());

    runJob(job, "Partition Prefixes");
}

From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java

License:Apache License

/**
 * Starts the third MapReduce cycle. Each mapper reads the prefix groups assigned to it and computes the collection of
 * closed sets. All information is reported to the reducer which finally writes the output to disk.
 * /* w  w  w  .j a  v a 2s .c o  m*/
 * 
 * @param inputDir
 * @param config
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 * @throws URISyntaxException
 */
private void startMining(String inputDir, FIMOptions opt)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {

    String inputFilesDir = inputDir;
    String outputFile = opt.outputDir + separator + OFis;
    System.out.println("[StartMining]: input: " + inputFilesDir + ", output: " + outputFile);

    Job job = prepareJob(new Path(inputFilesDir), new Path(outputFile), NoSplitSequenceFileInputFormat.class,
            EclatMinerMapper.class, Text.class, Text.class, EclatMinerReducer.class, Text.class, Text.class,
            TextOutputFormat.class);

    job.setJobName("Start Mining");
    job.setJarByClass(DistEclatDriver.class);
    job.setNumReduceTasks(1);

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);

    List<Path> inputPaths = new ArrayList<Path>();

    FileStatus[] listStatus = FileSystem.get(conf).globStatus(new Path(inputFilesDir + "bucket*"));
    for (FileStatus fstat : listStatus) {
        inputPaths.add(fstat.getPath());
    }

    if (inputPaths.isEmpty()) {
        System.out.println("[StartMining]: No prefixes to extend further");
        return;
    }

    setInputPaths(job, inputPaths.toArray(new Path[inputPaths.size()]));

    runJob(job, "Mining");
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

@SuppressWarnings("rawtypes")
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat) throws IOException {
    Job job = new Job(new Configuration());

    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }//from ww  w . j a  v  a 2 s.  c o  m
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected int runPass1RNAJob(Configuration pass1Conf, String tmpOutDir)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {
    HalvadeConf.setIsPass2(pass1Conf, false);
    HalvadeResourceManager.setJobResources(halvadeOpts, pass1Conf, HalvadeResourceManager.RNA_SHMEM_PASS1, true,
            halvadeOpts.useBamInput);/*from   www  . ja  v a2  s . co  m*/
    Job pass1Job = Job.getInstance(pass1Conf, "Halvade pass 1 RNA pipeline");
    pass1Job.addCacheArchive(new URI(halvadeOpts.halvadeBinaries));
    pass1Job.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class);
    FileSystem fs = FileSystem.get(new URI(halvadeOpts.in), pass1Conf);
    try {
        if (fs.getFileStatus(new Path(halvadeOpts.in)).isDirectory()) {
            // add every file in directory
            FileStatus[] files = fs.listStatus(new Path(halvadeOpts.in));
            for (FileStatus file : files) {
                if (!file.isDirectory()) {
                    FileInputFormat.addInputPath(pass1Job, file.getPath());
                }
            }
        } else {
            FileInputFormat.addInputPath(pass1Job, new Path(halvadeOpts.in));
        }
    } catch (IOException | IllegalArgumentException e) {
        Logger.EXCEPTION(e);
    }

    FileSystem outFs = FileSystem.get(new URI(tmpOutDir), pass1Conf);
    boolean skipPass1 = false;
    if (outFs.exists(new Path(tmpOutDir))) {
        // check if genome already exists
        skipPass1 = outFs.exists(new Path(tmpOutDir + "/_SUCCESS"));
        if (skipPass1)
            Logger.DEBUG("pass1 genome already created, skipping pass 1");
        else {
            Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists.");
            Logger.INFO("ERROR: Please remove this directory before trying again.");
            System.exit(-2);
        }
    }
    if (!skipPass1) {
        FileOutputFormat.setOutputPath(pass1Job, new Path(tmpOutDir));
        pass1Job.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class);

        pass1Job.setInputFormatClass(HalvadeTextInputFormat.class);
        pass1Job.setMapOutputKeyClass(GenomeSJ.class);
        pass1Job.setMapOutputValueClass(Text.class);

        pass1Job.setSortComparatorClass(GenomeSJSortComparator.class);
        pass1Job.setGroupingComparatorClass(GenomeSJGroupingComparator.class);
        pass1Job.setNumReduceTasks(1);
        pass1Job.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RebuildStarGenomeReducer.class);
        pass1Job.setOutputKeyClass(LongWritable.class);
        pass1Job.setOutputValueClass(Text.class);

        return runTimedJob(pass1Job, "Halvade pass 1 Job");
    } else
        return 0;
}