Example usage for org.apache.hadoop.mapreduce Job setJarByClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJarByClass.

Prototype

public void setJarByClass(Class<?> cls)

Source Link

Document

Set the Jar by finding where a given class came from.

Usage

From source file:basic.PartitionGraph.java

License:Apache License

/**
 * Runs this tool./*  ww  w . j a  v a 2  s  .c  o  m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(RANGE, "use range partitioner"));

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of partitions")
            .create(NUM_PARTITIONS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)
            || !cmdline.hasOption(NUM_PARTITIONS)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inPath = cmdline.getOptionValue(INPUT);
    String outPath = cmdline.getOptionValue(OUTPUT);
    int nodeCount = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    int numParts = Integer.parseInt(cmdline.getOptionValue(NUM_PARTITIONS));
    boolean useRange = cmdline.hasOption(RANGE);

    LOG.info("Tool name: " + PartitionGraph.class.getSimpleName());
    LOG.info(" - input dir: " + inPath);
    LOG.info(" - output dir: " + outPath);
    LOG.info(" - num partitions: " + numParts);
    LOG.info(" - node cnt: " + nodeCount);
    LOG.info(" - use range partitioner: " + useRange);

    Configuration conf = getConf();
    conf.setInt("NodeCount", nodeCount);

    Job job = Job.getInstance(conf);
    job.setJobName(PartitionGraph.class.getSimpleName() + ":" + inPath);
    job.setJarByClass(PartitionGraph.class);

    job.setNumReduceTasks(numParts);

    FileInputFormat.setInputPaths(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    if (useRange) {
        job.setPartitionerClass(RangePartitioner.class);
    }

    FileSystem.get(conf).delete(new Path(outPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:bb.BranchAndBound.java

License:Apache License

static Job getJob(String input, String output, String dataDir, int iteration) throws Exception {
    Configuration conf = new Configuration();

    FileSystem hdfs = FileSystem.get(conf);
    FileStatus[] fileStatus = hdfs.listStatus(new Path(input));
    for (int i = 0; i < fileStatus.length; ++i) {
        if (fileStatus[i].getLen() == 0) {
            hdfs.delete(fileStatus[i].getPath());
        }/*from   w  w w. j  a  v  a2 s .co m*/
    }
    DistributedCache.addCacheFile(new URI(dataDir + "/data"), conf);
    Job ret = new Job(conf, dataDir + "_iteration_" + iteration);
    ret.setJarByClass(BranchAndBound.class);
    ret.setMapperClass(BBMapper1.class);
    ret.setReducerClass(BBReducer.class);
    //ret.setReducerClass(MergeReducer.class);
    FileInputFormat.setInputPaths(ret, new Path(input));
    //if( iteration > 7 ) FileInputFormat.setMinInputSplitSize(ret, 67108864);
    FileOutputFormat.setOutputPath(ret, new Path(output));
    ret.setOutputKeyClass(NullWritable.class);
    ret.setOutputValueClass(Text.class);
    return ret;
}

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private boolean runAprioriOncPhaseOnce(FIMOptions opt, long nrLines, int i, String info, String outputDir,
        String cacheFile) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
    int prefixSize = opt.prefixLength;

    System.out/*from  ww w .  j a v  a 2s . c  o  m*/
            .println("[AprioriPhase]: Phase: " + i + " input: " + opt.inputFile + ", output: " + opt.outputDir);

    Job job = prepareJob(new Path(opt.inputFile), new Path(outputDir), SplitByKTextInputFormat.class,
            AprioriPhaseMapper.class, Text.class, Text.class, AprioriPhaseReducer.class, Text.class,
            IntWritable.class, TextOutputFormat.class);

    job.setJobName(info);
    job.setJarByClass(BigFIMDriver.class);

    job.setNumReduceTasks(1);

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);
    if (nrLines != -1) {
        conf.setLong(NUMBER_OF_LINES_KEY, nrLines);
    }

    if (cacheFile != null) {
        addCacheFile(new URI(cacheFile.replace(" ", "%20")), conf);
    }

    runJob(job, info);

    if (prefixSize <= i
            && job.getCounters().findCounter(COUNTER_GROUPNAME, COUNTER_NRLARGEPREFIXGROUPS).getValue() == 0) {
        return false;
    }
    if (prefixSize < i) {
        System.out.println(
                "[AprioriPhase]: Prefix group length updated! Now " + (i) + " instead of " + prefixSize);
    }
    return true;
}

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private void startCreatePrefixGroups(FIMOptions opt, int phase)
        throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
    Path path = new Path(opt.outputDir + separator + "tg" + phase);
    for (FileStatus status : path.getFileSystem(new Configuration()).listStatus(path)) {
        String cacheFile = status.getPath().toString();
        String trieGroupCount = cacheFile.substring(cacheFile.lastIndexOf('/'), cacheFile.length());
        trieGroupCount = trieGroupCount.split("-")[1];
        String outputFile = opt.outputDir + separator + "pg-trieGroup" + trieGroupCount;
        System.out.println("[CreatePrefixGroups]: input: " + opt.inputFile + ", output: " + opt.outputDir
                + ", cache: " + cacheFile);

        Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class,
                ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ComputeTidListReducer.class,
                IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class);

        job.setJobName("Create Prefix Groups");
        job.setJarByClass(BigFIMDriver.class);
        job.setNumReduceTasks(1);//from   w  ww  .  j  av a  2  s . c  om

        Configuration conf = job.getConfiguration();
        setConfigurationValues(conf, opt);
        conf.setInt(PREFIX_LENGTH_KEY, phase);

        addCacheFile(new URI(cacheFile.replace(" ", "%20")), job.getConfiguration());

        runJob(job, "Prefix Creation");
    }
}

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private void startMining(FIMOptions opt) throws IOException, ClassNotFoundException, InterruptedException {
    String inputFilesDir = opt.outputDir + separator + "pg" + separator;
    String outputFile = opt.outputDir + separator + OFis;
    System.out.println("[StartMining]: input: " + inputFilesDir + ", output: " + outputFile);

    Job job = prepareJob(new Path(inputFilesDir), new Path(outputFile), NoSplitSequenceFileInputFormat.class,
            EclatMinerMapper.class, Text.class, Text.class, EclatMinerReducer.class, Text.class, Text.class,
            TextOutputFormat.class);

    job.setJobName("Start Mining");
    job.setJarByClass(BigFIMDriver.class);
    job.setNumReduceTasks(1);//w ww.  j a va  2s . c o m

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);

    List<Path> inputPaths = new ArrayList<Path>();

    FileStatus[] listStatus = FileSystem.get(conf).globStatus(new Path(inputFilesDir + "bucket*"));
    for (FileStatus fstat : listStatus) {
        inputPaths.add(fstat.getPath());
    }

    setInputPaths(job, inputPaths.toArray(new Path[inputPaths.size()]));

    runJob(job, "Mining");
}

From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java

License:Apache License

/**
 * Starts the first MapReduce cycle. First, the transaction file is partitioned into a number of chunks that is given
 * to different mappers. Each mapper reads a chunk and return the items together with their partial tid-lists. The
 * reducer attaches the partial tid-lists to each other, then discards the infrequent ones and sorts the frequent one
 * based on ascending frequency and divides the singletons among available mappers.
 * /*from  w w  w  . j  a  v a 2s  .  com*/
 * This method generates three files, the frequent singletons (OSingletonsTids), the order file for singletons based
 * on ascending frequency (OSingletonsOrder) and the singletons distribution file (OSingletonsDistribution).
 * 
 * @param outputFile
 * @param opt
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
private void readHorizontalDb(String outputFile, FIMOptions opt)
        throws IOException, ClassNotFoundException, InterruptedException {
    System.out.println("[ItemReading]: input: " + opt.inputFile + ", output: " + outputFile);

    Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class,
            ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ItemReaderReducer.class,
            IntWritable.class, Writable.class, TextOutputFormat.class);

    job.setJobName("Read Singletons");
    job.setJarByClass(DistEclatDriver.class);
    job.setNumReduceTasks(1);

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);

    addNamedOutput(job, OSingletonsDistribution, TextOutputFormat.class, Text.class, Text.class);
    addNamedOutput(job, OSingletonsOrder, TextOutputFormat.class, Text.class, Text.class);
    addNamedOutput(job, OSingletonsTids, SequenceFileOutputFormat.class, IntWritable.class,
            IntMatrixWritable.class);

    runJob(job, "Item Reading");
}

From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java

License:Apache License

/**
 * Starts the second MapReduce cycle. Each mapper gets a list of singletons from which it should start building X-FIs.
 * Each mapper uses Eclat to quickly compute the list of X-FIs. The total set of X-FIs is again obtained by the
 * reducer, which then gets divided into independent sets. All sets that have been computed from level 1 to X are
 * already reported. The distribution of seeds is obtained by some allocation scheme, e.g., Round-Robin,
 * Lowest-Frequency, ...//from  www . j  a v a  2  s .  c om
 * 
 * This method generates three files, the frequent itemsets from level 1 to X (OFises), the prefix groups
 * (OPrefixGroups) and the prefix distribution file (OPrefixDistribution).
 * 
 * @param inputDir
 * @param outputDir
 * @param opt
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 * @throws URISyntaxException
 */
private void startPrefixComputation(String inputDir, String outputDir, FIMOptions opt)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {

    String inputFile = inputDir + separator + OSingletonsDistribution + rExt;
    String singletonsOrderFile = inputDir + separator + OSingletonsOrder + rExt;
    String singletonsTidsFile = inputDir + separator + OSingletonsTids + rExt;

    System.out.println("[PrefixComputation]: input: " + inputFile);

    Job job = prepareJob(new Path(inputFile), new Path(outputDir), NLineInputFormat.class,
            PrefixComputerMapper.class, Text.class, IntMatrixWritable.class, PrefixComputerReducer.class,
            IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class);

    job.setJobName("Compute Prefixes");
    job.setJarByClass(DistEclatDriver.class);
    job.setNumReduceTasks(1);

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);

    addCacheFile(new URI(singletonsOrderFile.replace(" ", "%20")), job.getConfiguration());
    addCacheFile(new URI(singletonsTidsFile.replace(" ", "%20")), job.getConfiguration());

    runJob(job, "Partition Prefixes");
}

From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java

License:Apache License

/**
 * Starts the third MapReduce cycle. Each mapper reads the prefix groups assigned to it and computes the collection of
 * closed sets. All information is reported to the reducer which finally writes the output to disk.
 * /* w  w  w  .j a  v a 2s .c o  m*/
 * 
 * @param inputDir
 * @param config
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 * @throws URISyntaxException
 */
private void startMining(String inputDir, FIMOptions opt)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {

    String inputFilesDir = inputDir;
    String outputFile = opt.outputDir + separator + OFis;
    System.out.println("[StartMining]: input: " + inputFilesDir + ", output: " + outputFile);

    Job job = prepareJob(new Path(inputFilesDir), new Path(outputFile), NoSplitSequenceFileInputFormat.class,
            EclatMinerMapper.class, Text.class, Text.class, EclatMinerReducer.class, Text.class, Text.class,
            TextOutputFormat.class);

    job.setJobName("Start Mining");
    job.setJarByClass(DistEclatDriver.class);
    job.setNumReduceTasks(1);

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);

    List<Path> inputPaths = new ArrayList<Path>();

    FileStatus[] listStatus = FileSystem.get(conf).globStatus(new Path(inputFilesDir + "bucket*"));
    for (FileStatus fstat : listStatus) {
        inputPaths.add(fstat.getPath());
    }

    if (inputPaths.isEmpty()) {
        System.out.println("[StartMining]: No prefixes to extend further");
        return;
    }

    setInputPaths(job, inputPaths.toArray(new Path[inputPaths.size()]));

    runJob(job, "Mining");
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

@SuppressWarnings("rawtypes")
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat) throws IOException {
    Job job = new Job(new Configuration());

    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }//from ww  w . j a  v  a 2 s.  c o  m
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected int runPass1RNAJob(Configuration pass1Conf, String tmpOutDir)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {
    HalvadeConf.setIsPass2(pass1Conf, false);
    HalvadeResourceManager.setJobResources(halvadeOpts, pass1Conf, HalvadeResourceManager.RNA_SHMEM_PASS1, true,
            halvadeOpts.useBamInput);/*from   www  . ja  v a2  s . co  m*/
    Job pass1Job = Job.getInstance(pass1Conf, "Halvade pass 1 RNA pipeline");
    pass1Job.addCacheArchive(new URI(halvadeOpts.halvadeBinaries));
    pass1Job.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class);
    FileSystem fs = FileSystem.get(new URI(halvadeOpts.in), pass1Conf);
    try {
        if (fs.getFileStatus(new Path(halvadeOpts.in)).isDirectory()) {
            // add every file in directory
            FileStatus[] files = fs.listStatus(new Path(halvadeOpts.in));
            for (FileStatus file : files) {
                if (!file.isDirectory()) {
                    FileInputFormat.addInputPath(pass1Job, file.getPath());
                }
            }
        } else {
            FileInputFormat.addInputPath(pass1Job, new Path(halvadeOpts.in));
        }
    } catch (IOException | IllegalArgumentException e) {
        Logger.EXCEPTION(e);
    }

    FileSystem outFs = FileSystem.get(new URI(tmpOutDir), pass1Conf);
    boolean skipPass1 = false;
    if (outFs.exists(new Path(tmpOutDir))) {
        // check if genome already exists
        skipPass1 = outFs.exists(new Path(tmpOutDir + "/_SUCCESS"));
        if (skipPass1)
            Logger.DEBUG("pass1 genome already created, skipping pass 1");
        else {
            Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists.");
            Logger.INFO("ERROR: Please remove this directory before trying again.");
            System.exit(-2);
        }
    }
    if (!skipPass1) {
        FileOutputFormat.setOutputPath(pass1Job, new Path(tmpOutDir));
        pass1Job.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class);

        pass1Job.setInputFormatClass(HalvadeTextInputFormat.class);
        pass1Job.setMapOutputKeyClass(GenomeSJ.class);
        pass1Job.setMapOutputValueClass(Text.class);

        pass1Job.setSortComparatorClass(GenomeSJSortComparator.class);
        pass1Job.setGroupingComparatorClass(GenomeSJGroupingComparator.class);
        pass1Job.setNumReduceTasks(1);
        pass1Job.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RebuildStarGenomeReducer.class);
        pass1Job.setOutputKeyClass(LongWritable.class);
        pass1Job.setOutputValueClass(Text.class);

        return runTimedJob(pass1Job, "Halvade pass 1 Job");
    } else
        return 0;
}