Example usage for org.apache.hadoop.mapreduce Job getConfiguration

List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration() 

Source Link

Document

Return the configuration for the job.

Usage

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private void startCreatePrefixGroups(FIMOptions opt, int phase)
        throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
    Path path = new Path(opt.outputDir + separator + "tg" + phase);
    for (FileStatus status : path.getFileSystem(new Configuration()).listStatus(path)) {
        String cacheFile = status.getPath().toString();
        String trieGroupCount = cacheFile.substring(cacheFile.lastIndexOf('/'), cacheFile.length());
        trieGroupCount = trieGroupCount.split("-")[1];
        String outputFile = opt.outputDir + separator + "pg-trieGroup" + trieGroupCount;
        System.out.println("[CreatePrefixGroups]: input: " + opt.inputFile + ", output: " + opt.outputDir
                + ", cache: " + cacheFile);

        Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class,
                ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ComputeTidListReducer.class,
                IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class);

        job.setJobName("Create Prefix Groups");
        job.setJarByClass(BigFIMDriver.class);
        job.setNumReduceTasks(1);//from  w  w  w .  j  av a  2  s.  co  m

        Configuration conf = job.getConfiguration();
        setConfigurationValues(conf, opt);
        conf.setInt(PREFIX_LENGTH_KEY, phase);

        addCacheFile(new URI(cacheFile.replace(" ", "%20")), job.getConfiguration());

        runJob(job, "Prefix Creation");
    }
}

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private void startMining(FIMOptions opt) throws IOException, ClassNotFoundException, InterruptedException {
    String inputFilesDir = opt.outputDir + separator + "pg" + separator;
    String outputFile = opt.outputDir + separator + OFis;
    System.out.println("[StartMining]: input: " + inputFilesDir + ", output: " + outputFile);

    Job job = prepareJob(new Path(inputFilesDir), new Path(outputFile), NoSplitSequenceFileInputFormat.class,
            EclatMinerMapper.class, Text.class, Text.class, EclatMinerReducer.class, Text.class, Text.class,
            TextOutputFormat.class);

    job.setJobName("Start Mining");
    job.setJarByClass(BigFIMDriver.class);
    job.setNumReduceTasks(1);//  w  w  w.j  a  v  a2  s. c o  m

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);

    List<Path> inputPaths = new ArrayList<Path>();

    FileStatus[] listStatus = FileSystem.get(conf).globStatus(new Path(inputFilesDir + "bucket*"));
    for (FileStatus fstat : listStatus) {
        inputPaths.add(fstat.getPath());
    }

    setInputPaths(job, inputPaths.toArray(new Path[inputPaths.size()]));

    runJob(job, "Mining");
}

From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java

License:Apache License

/**
 * Starts the first MapReduce cycle. First, the transaction file is partitioned into a number of chunks that is given
 * to different mappers. Each mapper reads a chunk and return the items together with their partial tid-lists. The
 * reducer attaches the partial tid-lists to each other, then discards the infrequent ones and sorts the frequent one
 * based on ascending frequency and divides the singletons among available mappers.
 * // ww  w . j a  v a 2 s.  c o  m
 * This method generates three files, the frequent singletons (OSingletonsTids), the order file for singletons based
 * on ascending frequency (OSingletonsOrder) and the singletons distribution file (OSingletonsDistribution).
 * 
 * @param outputFile
 * @param opt
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
private void readHorizontalDb(String outputFile, FIMOptions opt)
        throws IOException, ClassNotFoundException, InterruptedException {
    System.out.println("[ItemReading]: input: " + opt.inputFile + ", output: " + outputFile);

    Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class,
            ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ItemReaderReducer.class,
            IntWritable.class, Writable.class, TextOutputFormat.class);

    job.setJobName("Read Singletons");
    job.setJarByClass(DistEclatDriver.class);
    job.setNumReduceTasks(1);

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);

    addNamedOutput(job, OSingletonsDistribution, TextOutputFormat.class, Text.class, Text.class);
    addNamedOutput(job, OSingletonsOrder, TextOutputFormat.class, Text.class, Text.class);
    addNamedOutput(job, OSingletonsTids, SequenceFileOutputFormat.class, IntWritable.class,
            IntMatrixWritable.class);

    runJob(job, "Item Reading");
}

From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java

License:Apache License

/**
 * Starts the second MapReduce cycle. Each mapper gets a list of singletons from which it should start building X-FIs.
 * Each mapper uses Eclat to quickly compute the list of X-FIs. The total set of X-FIs is again obtained by the
 * reducer, which then gets divided into independent sets. All sets that have been computed from level 1 to X are
 * already reported. The distribution of seeds is obtained by some allocation scheme, e.g., Round-Robin,
 * Lowest-Frequency, ...//w w  w. j  ava 2s. co  m
 * 
 * This method generates three files, the frequent itemsets from level 1 to X (OFises), the prefix groups
 * (OPrefixGroups) and the prefix distribution file (OPrefixDistribution).
 * 
 * @param inputDir
 * @param outputDir
 * @param opt
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 * @throws URISyntaxException
 */
private void startPrefixComputation(String inputDir, String outputDir, FIMOptions opt)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {

    String inputFile = inputDir + separator + OSingletonsDistribution + rExt;
    String singletonsOrderFile = inputDir + separator + OSingletonsOrder + rExt;
    String singletonsTidsFile = inputDir + separator + OSingletonsTids + rExt;

    System.out.println("[PrefixComputation]: input: " + inputFile);

    Job job = prepareJob(new Path(inputFile), new Path(outputDir), NLineInputFormat.class,
            PrefixComputerMapper.class, Text.class, IntMatrixWritable.class, PrefixComputerReducer.class,
            IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class);

    job.setJobName("Compute Prefixes");
    job.setJarByClass(DistEclatDriver.class);
    job.setNumReduceTasks(1);

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);

    addCacheFile(new URI(singletonsOrderFile.replace(" ", "%20")), job.getConfiguration());
    addCacheFile(new URI(singletonsTidsFile.replace(" ", "%20")), job.getConfiguration());

    runJob(job, "Partition Prefixes");
}

From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java

License:Apache License

/**
 * Starts the third MapReduce cycle. Each mapper reads the prefix groups assigned to it and computes the collection of
 * closed sets. All information is reported to the reducer which finally writes the output to disk.
 * // ww  w  .j  a  v a 2  s  . c o m
 * 
 * @param inputDir
 * @param config
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 * @throws URISyntaxException
 */
private void startMining(String inputDir, FIMOptions opt)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {

    String inputFilesDir = inputDir;
    String outputFile = opt.outputDir + separator + OFis;
    System.out.println("[StartMining]: input: " + inputFilesDir + ", output: " + outputFile);

    Job job = prepareJob(new Path(inputFilesDir), new Path(outputFile), NoSplitSequenceFileInputFormat.class,
            EclatMinerMapper.class, Text.class, Text.class, EclatMinerReducer.class, Text.class, Text.class,
            TextOutputFormat.class);

    job.setJobName("Start Mining");
    job.setJarByClass(DistEclatDriver.class);
    job.setNumReduceTasks(1);

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);

    List<Path> inputPaths = new ArrayList<Path>();

    FileStatus[] listStatus = FileSystem.get(conf).globStatus(new Path(inputFilesDir + "bucket*"));
    for (FileStatus fstat : listStatus) {
        inputPaths.add(fstat.getPath());
    }

    if (inputPaths.isEmpty()) {
        System.out.println("[StartMining]: No prefixes to extend further");
        return;
    }

    setInputPaths(job, inputPaths.toArray(new Path[inputPaths.size()]));

    runJob(job, "Mining");
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

@SuppressWarnings("rawtypes")
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat) throws IOException {
    Job job = new Job(new Configuration());

    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }//from w w w  .  j ava  2s.c  o m
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:biglayer.AutoCoder.java

License:Apache License

/**
 * Runs this tool./*  w ww  .  j  a v  a 2 s  .  c om*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    /*if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
       System.out.println("args: " + Arrays.toString(args));
       HelpFormatter formatter = new HelpFormatter();
       formatter.setWidth(120);
       formatter.printHelp(this.getClass().getName(), options);
       ToolRunner.printGenericCommandUsage(System.out);
       return -1;
    }*/

    //String inputPath = cmdline.getOptionValue(INPUT);
    //String outputPath = cmdline.getOptionValue(OUTPUT);

    String inputPath = "qiwang321/MNIST-mingled-key/part*";
    String outputPath = "shangfu/layeroutput";

    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + AutoCoder.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    Configuration conf = getConf();

    conf.setInt("num_reduce_task", reduceTasks);
    conf.set("sidepath", outputPath + "_side/");

    Job job0 = Job.getInstance(conf);
    job0.setJobName(AutoCoder.class.getSimpleName());
    job0.setJarByClass(AutoCoder.class);
    job0.setNumReduceTasks(reduceTasks);

    job0.getConfiguration().setInt("layer_ind", 0);

    FileInputFormat.setInputPaths(job0, new Path(inputPath));
    FileOutputFormat.setOutputPath(job0, new Path(outputPath + "_0"));

    job0.setInputFormatClass(KeyValueTextInputFormat.class);
    job0.setOutputFormatClass(SequenceFileOutputFormat.class);

    job0.setMapOutputKeyClass(PairOfInts.class);
    job0.setMapOutputValueClass(ModelNode.class);
    job0.setOutputKeyClass(PairOfInts.class);
    job0.setOutputValueClass(ModelNode.class);

    job0.setMapperClass(MyMapper0.class);
    job0.setReducerClass(MyReducer0.class);
    job0.setPartitionerClass(MyPartitioner.class);
    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath + "_0");
    FileSystem.get(getConf()).delete(outputDir, true);
    long startTime = System.currentTimeMillis();
    long codeStart = System.currentTimeMillis();
    double codeTimeSum = 0;
    job0.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

    for (int iterations = 1; iterations < GlobalUtil.NUM_LAYER + 1; iterations++) {
        Job job1 = Job.getInstance(conf);
        job1.setJobName(AutoCoder.class.getSimpleName());
        job1.setJarByClass(AutoCoder.class);
        job1.setNumReduceTasks(reduceTasks);
        job1.getConfiguration().setInt("layer_ind", iterations);
        FileInputFormat.setInputPaths(job1, new Path(outputPath + "_" + (iterations - 1)));
        FileOutputFormat.setOutputPath(job1, new Path(outputPath + "_" + iterations + "_train"));

        LOG.info("Tool: " + AutoCoder.class.getSimpleName());
        LOG.info(" - input path: " + outputPath + "_" + (iterations - 1));
        LOG.info(" - output path: " + outputPath + "_" + iterations + "_train");
        LOG.info(" - number of reducers: " + reduceTasks);

        job1.setInputFormatClass(SequenceFileInputFormat.class);
        job1.setOutputFormatClass(SequenceFileOutputFormat.class);

        job1.setMapOutputKeyClass(PairOfInts.class);
        job1.setMapOutputValueClass(ModelNode.class);
        job1.setOutputKeyClass(PairOfInts.class);
        job1.setOutputValueClass(ModelNode.class);

        job1.setMapperClass(MyMapper.class);
        job1.setReducerClass(MyReducer_Train.class);
        job1.setPartitionerClass(MyPartitioner.class);
        // Delete the output directory if it exists already.
        outputDir = new Path(outputPath + "_" + iterations + "_train");
        FileSystem.get(getConf()).delete(outputDir, true);
        startTime = System.currentTimeMillis();
        job1.waitForCompletion(true);
        LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
        codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

        Job job2 = Job.getInstance(conf);
        job2.setJobName(AutoCoder.class.getSimpleName());
        job2.setJarByClass(AutoCoder.class);
        job2.setNumReduceTasks(reduceTasks);
        job2.getConfiguration().setInt("layer_ind", iterations);
        FileInputFormat.setInputPaths(job2, new Path(outputPath + "_" + (iterations + "_train")));
        FileOutputFormat.setOutputPath(job2, new Path(outputPath + "_" + iterations));

        LOG.info("Tool: " + AutoCoder.class.getSimpleName());
        LOG.info(" - input path: " + outputPath + "_" + iterations + "_train");
        LOG.info(" - output path: " + outputPath + "_" + iterations);
        LOG.info(" - number of reducers: " + reduceTasks);

        job2.setInputFormatClass(SequenceFileInputFormat.class);
        job2.setOutputFormatClass(SequenceFileOutputFormat.class);

        job2.setMapOutputKeyClass(PairOfInts.class);
        job2.setMapOutputValueClass(ModelNode.class);
        job2.setOutputKeyClass(PairOfInts.class);
        job2.setOutputValueClass(ModelNode.class);

        job2.setMapperClass(MyMapper.class);
        job2.setReducerClass(MyReducer_GenData.class);
        job2.setPartitionerClass(MyPartitioner.class);
        // Delete the output directory if it exists already.
        outputDir = new Path(outputPath + "_" + iterations);
        FileSystem.get(getConf()).delete(outputDir, true);
        startTime = System.currentTimeMillis();
        job2.waitForCompletion(true);
        LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
        codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

    }

    LOG.info(" - input path: " + outputPath + "_" + GlobalUtil.NUM_LAYER);
    LOG.info(" - output path: " + outputPath);
    reduceTasks = 1;
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job_super = Job.getInstance(conf);
    job_super.setJobName(AutoCoder.class.getSimpleName());
    job_super.setJarByClass(AutoCoder.class);
    job_super.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job_super, new Path(outputPath + "_" + GlobalUtil.NUM_LAYER));
    FileOutputFormat.setOutputPath(job_super, new Path(outputPath));

    job_super.setInputFormatClass(SequenceFileInputFormat.class);
    job_super.setOutputFormatClass(SequenceFileOutputFormat.class);

    job_super.setMapOutputKeyClass(PairOfInts.class);
    job_super.setMapOutputValueClass(ModelNode.class);
    job_super.setOutputKeyClass(NullWritable.class);
    job_super.setOutputValueClass(NullWritable.class);

    job_super.setMapperClass(MyMapper_Super.class);
    job_super.setReducerClass(MyReducer_Super.class);
    job_super.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    startTime = System.currentTimeMillis();
    job_super.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0;

    Log.info("Final Time: " + ((System.currentTimeMillis() - codeStart) / 1000.0) + " seconds,  " + codeTimeSum
            + " seconds.");
    //prepareNextIteration(inputPath0, outputPath,iterations,conf,reduceTasks);

    return 0;
}

From source file:bigmodel.AutoCoderLocal.java

License:Apache License

/**
 * Runs this tool./*from w ww .j  a  v a 2 s.c  o m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT) + "/part-r-00000";
    String outputPath = cmdline.getOptionValue(OUTPUT);
    String dataPath = cmdline.getOptionValue(INPUT) + "/common";
    //String inputPath = "/home/qiwang321/mapreduce-data/data/in-mingled1-5/part*";
    //String outputPath = "output";
    //String dataPath = "/home/qiwang321/mapreduce-data/data/in-mingled1-5/common";
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + AutoCoderLocal.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    Configuration conf = getConf();
    initialParameters(conf);

    conf.set("dataPath", dataPath);

    Job job = Job.getInstance(conf);
    job.setJobName(AutoCoderLocal.class.getSimpleName());
    job.setJarByClass(AutoCoderLocal.class);
    // set the path of the information of k clusters in this iteration
    job.getConfiguration().set("sidepath", inputPath + "/side_output");
    job.setNumReduceTasks(reduceTasks);

    dataShuffle();

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileInputFormat.setMinInputSplitSize(job, 1000 * 1024 * 1024);
    FileInputFormat.setMaxInputSplitSize(job, 1000 * 1024 * 1024);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(ModelNode.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(SuperModel.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    //prepareNextIteration(inputPath0, outputPath,iterations,conf,reduceTasks);

    return 0;
}

From source file:boostingPL.driver.AdaBoostPLDriver.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    int status = commandAnalysis(args);
    if (status != 0) {
        return status;
    }/*from  w w  w.  j  ava2  s .  co m*/

    @SuppressWarnings("deprecation")
    Job job = new Job(getConf());
    job.setJobName("AdaBoostPL:" + runModel + " " + dataPath.toString() + " " + modelPath.toString() + " "
            + numLinesPerMap + " " + numIterations);
    job.setJarByClass(AdaBoostPLDriver.class);

    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.addInputPath(job, dataPath);
    NLineInputFormat.setNumLinesPerSplit(job, numLinesPerMap);

    if (runModel.equals("train")) {
        job.setMapperClass(AdaBoostPLMapper.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(ClassifierWritable.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(ClassifierWritable.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputPath(job, modelPath);
    } else {
        job.setMapperClass(AdaBoostPLTestMapper.class);
        job.setReducerClass(AdaBoostPLTestReducer.class);
        job.setOutputFormatClass(NullOutputFormat.class);

        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(NullWritable.class);
    }

    Configuration conf = job.getConfiguration();
    conf.set("BoostingPL.boostingName", "AdaBoost");
    conf.set("BoostingPL.numIterations", String.valueOf(numIterations));
    conf.set("BoostingPL.modelPath", modelPath.toString());
    if (metadataPath == null) {
        conf.set("BoostingPL.metadata", dataPath.toString() + ".metadata");
    } else {
        conf.set("BoostingPL.metadata", metadataPath.toString());
    }
    if (outputFolder != null) {
        conf.set("BoostingPL.outputFolder", outputFolder.toString());
    }

    LOG.info(StringUtils.arrayToString(args));
    return job.waitForCompletion(true) == true ? 0 : -1;
}

From source file:boostingPL.driver.SAMMEPLDriver.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    int status = commandAnalysis(args);
    if (status != 0) {
        return status;
    }/*from  ww w .  j  ava2 s .c o  m*/

    @SuppressWarnings("deprecation")
    Job job = new Job(getConf());
    job.setJobName("SAMMEPL:" + runModel + " " + dataPath.toString() + " " + modelPath.toString() + " "
            + numLinesPerMap + " " + numIterations);
    job.setJarByClass(SAMMEPLDriver.class);

    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.addInputPath(job, dataPath);
    NLineInputFormat.setNumLinesPerSplit(job, numLinesPerMap);
    FileSystem fs = modelPath.getFileSystem(getConf());
    if (fs.exists(modelPath)) {
        fs.delete(modelPath, true);
    }
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, modelPath);

    if (runModel.equals("train")) {
        job.setMapperClass(AdaBoostPLMapper.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(ClassifierWritable.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(ClassifierWritable.class);
    } else {
        job.setMapperClass(AdaBoostPLTestMapper.class);
        job.setReducerClass(AdaBoostPLTestReducer.class);
        job.setOutputFormatClass(NullOutputFormat.class);

        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(NullWritable.class);
    }

    Configuration conf = job.getConfiguration();
    conf.set("BoostingPL.boostingName", "SAMME");
    conf.set("BoostingPL.numIterations", String.valueOf(numIterations));
    conf.set("BoostingPL.modelPath", modelPath.toString());
    if (metadataPath == null) {
        conf.set("BoostingPL.metadata", dataPath.toString() + ".metadata");
    } else {
        conf.set("BoostingPL.metadata", metadataPath.toString());
    }
    if (outputFolder != null) {
        conf.set("BoostingPL.outputFolder", outputFolder.toString());
    }

    LOG.info(StringUtils.arrayToString(args));
    return job.waitForCompletion(true) == true ? 0 : -1;
}