Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException

Source Link

Document

Set the combiner class for the job.

Usage

From source file:com.sanjay.mapreduce.SiCombiner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/*from   w  w  w .  j  av a2s  .  c om*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(SiCombiner.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setPartitionerClass(WordPartitioner.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(5);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.scaleoutsoftware.soss.hserver.examples.NamedMapWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: wordcount <input map> <output map> <threshold>");
        System.exit(2);// w w  w  . ja va2s. c  o  m
    }

    final int threshold = new Integer(otherArgs[2]);

    NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap(otherArgs[0],
            new WritableSerializer<IntWritable>(IntWritable.class), new WritableSerializer<Text>(Text.class));

    NamedMap<Text, IntWritable> outputMap = NamedMapFactory.getMap(otherArgs[1],
            new WritableSerializer<Text>(Text.class), new WritableSerializer<IntWritable>(IntWritable.class));

    //Create the invocation grid
    InvocationGrid grid = HServerJob.getInvocationGridBuilder("WordCountIG").addJar("wordcount.jar").load();

    //Create hServer job
    Job job = new HServerJob(conf, "word count", false, grid);
    job.setJarByClass(NamedMapWordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(NamedMapInputFormat.class);
    job.setOutputFormatClass(GridOutputFormat.class);

    //Set named maps as input and output
    NamedMapInputFormat.setNamedMap(job, inputMap);
    GridOutputFormat.setNamedMap(job, outputMap);

    //Execute job
    job.waitForCompletion(true);

    //Assign invocation grid to the map, so parallel operation can be performed
    outputMap.setInvocationGrid(grid);

    //Run query to find words that are used more than threshold frequency
    Iterable<Text> words = outputMap.executeParallelQuery(new UsageFrequencyCondition(threshold));

    //Unload the invocation grid
    grid.unload();

    //Output resulting words and their frequencies
    System.out.println("Following words were used more than " + threshold + " times:");
    for (Text word : words) {
        System.out.println("\"" + word.toString() + "\" was used " + outputMap.get(word) + " times.");
    }
}

From source file:com.siwind.routingloop.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);// w  ww.  j a  v a 2s  . c om
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }

    Path outputpath = new Path(otherArgs[otherArgs.length - 1]);
    FileSystem.get(conf).delete(outputpath, true);

    FileOutputFormat.setOutputPath(job, outputpath);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.skp.experiment.cf.als.hadoop.DistributedParallelALSFactorizationJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    addInputOption();/*from  w ww .  j  a  v a 2 s .  c o m*/
    addOutputOption();
    addOption("lambda", null, "regularization parameter", true);
    addOption("implicitFeedback", null, "data consists of implicit feedback?", String.valueOf(false));
    addOption("alpha", null, "confidence parameter (only used on implicit feedback)", String.valueOf(40));
    addOption("numFeatures", null, "dimension of the feature space", true);
    addOption("numIterations", null, "number of iterations", true);
    addOption("numUsers", null, "number of users", true);
    addOption("numItems", null, "number of items", true);
    addOption("blockSize", null, "dfs block size.", false);
    //addOption("runIterations", null, "true or false for iterations", true);

    Map<String, String> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    numFeatures = Integer.parseInt(parsedArgs.get("--numFeatures"));
    numIterations = Integer.parseInt(parsedArgs.get("--numIterations"));
    lambda = Double.parseDouble(parsedArgs.get("--lambda"));
    alpha = Double.parseDouble(parsedArgs.get("--alpha"));
    implicitFeedback = Boolean.parseBoolean(parsedArgs.get("--implicitFeedback"));
    numUsers = Integer.parseInt(parsedArgs.get("--numUsers"));
    numItems = Integer.parseInt(parsedArgs.get("--numItems"));
    dfsBlockSize = getOption("blockSize") == null ? 64 * 1024 * 1024 : Long.parseLong(getOption("blockSize"));
    /*
        * compute the factorization A = U M'
        *
        * where A (users x items) is the matrix of known ratings
        *           U (users x features) is the representation of users in the feature space
        *           M (items x features) is the representation of items in the feature space
        */

    /* create A' */
    Job itemRatings = prepareJob(getInputPath(), pathToItemRatings(), TextInputFormat.class,
            ItemRatingVectorsMapper.class, IntWritable.class, VectorWritable.class, VectorSumReducer.class,
            IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);
    itemRatings.setCombinerClass(VectorSumReducer.class);
    itemRatings.waitForCompletion(true);
    //numItems = 
    //    (int) itemRatings.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue();
    log.info("Number of Items\t{}", numItems);

    /* create A */
    Job userRatings = prepareJob(pathToItemRatings(), pathToUserRatings(), TransposeMapper.class,
            IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class,
            VectorWritable.class);
    userRatings.setCombinerClass(MergeVectorsCombiner.class);
    userRatings.waitForCompletion(true);
    //numUsers = 
    //    (int) userRatings.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue();
    log.info("Number of Users\t{}", numUsers);

    /* count item per user */
    Job userItemCntsJob = prepareJob(pathToUserRatings(), getOutputPath("userItemCnts"),
            SequenceFileInputFormat.class, UserItemCntsMapper.class, IntWritable.class, IntWritable.class,
            UserItemCntsReducer.class, IntWritable.class, IntWritable.class, SequenceFileOutputFormat.class);
    userItemCntsJob.setJobName("user ratings count");
    userItemCntsJob.setCombinerClass(UserItemCntsReducer.class);
    userItemCntsJob.waitForCompletion(true);

    //TODO this could be fiddled into one of the upper jobs
    Job averageItemRatings = prepareJob(pathToItemRatings(), getTempPath("averageRatings"),
            AverageRatingMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class,
            IntWritable.class, VectorWritable.class);
    averageItemRatings.setCombinerClass(MergeVectorsCombiner.class);
    averageItemRatings.waitForCompletion(true);

    Vector averageRatings = ALSMatrixUtil.readFirstRow(getTempPath("averageRatings"), getConf());

    /* create an initial M */
    initializeM(averageRatings);

    for (int currentIteration = 0; currentIteration < numIterations; currentIteration++) {
        DistributedRowMatrix curM = new DistributedRowMatrix(pathToM(currentIteration - 1),
                getTempPath("Mtemp" + String.valueOf(currentIteration - 1)), numItems, numFeatures);
        curM.setConf(new Configuration());
        DistributedRowMatrix YtransposeY = curM.times(curM);

        // broadcast M, read A row-wise, recompute U row-wise //
        log.info("Recomputing U (iteration {}/{})", currentIteration, numIterations);
        runSolver(pathToUserRatings(), pathToU(currentIteration), pathToM(currentIteration - 1),
                YtransposeY.getRowPath(), numItems);

        DistributedRowMatrix curU = new DistributedRowMatrix(pathToU(currentIteration),
                getTempPath("Utmp" + String.valueOf(currentIteration)), numUsers, numFeatures);
        curU.setConf(new Configuration());
        DistributedRowMatrix XtransposeX = curU.times(curU);

        // set up index of U //
        CreateMapFileFromSeq.createMapFile(pathToU(currentIteration));

        // broadcast U, read A' row-wise, recompute M row-wise //
        log.info("Recomputing M (iteration {}/{})", currentIteration, numIterations);
        runDistributedImplicitSolver(pathToItemRatings(), pathToM(currentIteration), pathToU(currentIteration),
                XtransposeX.getRowPath(), numUsers);
    }
    return 0;
}

From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();/*w  ww. j  av a  2 s.c o  m*/
    addOutputOption();
    addOption("lambda", null, "regularization parameter", true);
    addOption("implicitFeedback", null, "data consists of implicit feedback?", String.valueOf(false));
    addOption("alpha", null, "confidence parameter (only used on implicit feedback)", String.valueOf(40));
    addOption("numFeatures", null, "dimension of the feature space", true);
    addOption("numIterations", null, "number of iterations", true);
    addOption("indexSizes", null, "index sizes Path", true);
    addOption("startIteration", null, "start iteration number", String.valueOf(0));
    addOption("oldM", null, "old M matrix Path.", null);
    addOption("largeUserFeatures", null, "true if user x feature matrix is too large for memory",
            String.valueOf(true));
    addOption("rmseCurve", null, "true if want to extract rmse curve", String.valueOf(true));
    addOption("cleanUp", null, "true if want to clean up temporary matrix", String.valueOf(true));
    addOption("useTransform", null, "true if using logarithm as transform", String.valueOf(true));
    addOption("rateIndex", null, "0 based index for rate column in input file.", String.valueOf(2));
    Map<String, String> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    try {
        /** step 0: fetch dimention of training set matrix. */
        Map<String, String> indexSizesTmp = ALSMatrixUtil.fetchTextFiles(new Path(getOption("indexSizes")),
                DELIMETER, Arrays.asList(0), Arrays.asList(1));

        numFeatures = Integer.parseInt(parsedArgs.get("--numFeatures"));
        numIterations = Integer.parseInt(parsedArgs.get("--numIterations"));
        lambda = Double.parseDouble(parsedArgs.get("--lambda"));
        alpha = Double.parseDouble(parsedArgs.get("--alpha"));
        implicitFeedback = Boolean.parseBoolean(parsedArgs.get("--implicitFeedback"));
        numUsers = Integer.parseInt(indexSizesTmp.get("0"));
        numItems = Integer.parseInt(indexSizesTmp.get("1"));

        numTaskTrackers = HadoopClusterUtil.getNumberOfTaskTrackers(getConf()) * multiplyMapTasks;
        startIteration = Integer.parseInt(parsedArgs.get("--startIteration"));
        largeUserFeatures = Boolean.parseBoolean(getOption("largeUserFeatures"));
        useRMSECurve = Boolean.parseBoolean(getOption("rmseCurve"));
        cleanUp = Boolean.parseBoolean(getOption("cleanUp"));
        useTransform = Boolean.parseBoolean(getOption("useTransform"));
        rateIndex = Integer.parseInt(getOption("rateIndex"));
        FileSystem fs = FileSystem.get(getConf());
        if (!fs.exists(pathToTransformed())) {
            if (useTransform) {
                // transform price into rating
                Job transformJob = prepareJob(getInputPath(), pathToTransformed(), TextInputFormat.class,
                        TransformColumnValueMapper.class, NullWritable.class, Text.class,
                        TextOutputFormat.class);
                transformJob.waitForCompletion(true);
            } else {

                FileUtil.copy(FileSystem.get(getConf()), getInputPath(), FileSystem.get(getConf()),
                        pathToTransformed(), false, getConf());
            }
        }
        /*
        if (getOption("oldM") != null) {
          runOnetimeSolver(pathToTransformed(), getOutputPath("U"), new Path(getOption("oldM")));
          return 0;
        }
        */
        /*
            * compute the factorization A = U M'
            *
            * where A (users x items) is the matrix of known ratings
            *           U (users x features) is the representation of users in the feature space
            *           M (items x features) is the representation of items in the feature space
            */
        if (startIteration == 0) {
            if (!fs.exists(pathToItemRatings())) {
                // create A' 
                Job itemRatings = prepareJob(pathToTransformed(), pathToItemRatings(), TextInputFormat.class,
                        ItemRatingVectorsMapper.class, IntWritable.class, VectorWritable.class,
                        VectorSumReducer.class, IntWritable.class, VectorWritable.class,
                        SequenceFileOutputFormat.class);
                itemRatings.setCombinerClass(VectorSumReducer.class);
                long matrixSizeExp = (long) (8L * numUsers * numFeatures * SAFE_MARGIN);
                long memoryThreshold = HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT
                        / (long) HadoopClusterUtil.MAP_TASKS_PER_NODE;
                int numTaskPerDataNode = Math.max(1,
                        (int) (HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT / (double) matrixSizeExp));
                //log.info("matrix Size: " + matrixSizeExp + ", memorhThreshold: " + memoryThreshold + ", numTaskPerDataNode: " + numTaskPerDataNode);
                if (matrixSizeExp > memoryThreshold) {
                    //log.info("A: {}", numTaskPerDataNode * HadoopClusterUtil.getNumberOfTaskTrackers(getConf()));
                    int numReducer = Math.min(
                            numTaskPerDataNode * HadoopClusterUtil.getNumberOfTaskTrackers(getConf()),
                            HadoopClusterUtil.getMaxMapTasks(getConf()));
                    //log.info("Number Of Reducer: " + numReducer);
                    itemRatings.setNumReduceTasks(numReducer);
                }

                itemRatings.waitForCompletion(true);
            }

            if (!fs.exists(pathToUserRatings())) {
                Job userRatings = prepareJob(pathToItemRatings(), pathToUserRatings(), TransposeMapper.class,
                        IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class,
                        VectorWritable.class);
                userRatings.setNumReduceTasks(HadoopClusterUtil.getNumberOfTaskTrackers(getConf()));
                userRatings.setCombinerClass(MergeVectorsCombiner.class);
                userRatings.setNumReduceTasks(HadoopClusterUtil.getMaxMapTasks(getConf()));
                userRatings.waitForCompletion(true);
            }
            if (!fs.exists(getOutputPath("userItemCnt"))) {
                // count item per user
                Job userItemCntsJob = prepareJob(pathToUserRatings(), getOutputPath("userItemCnt"),
                        SequenceFileInputFormat.class, UserItemCntsMapper.class, IntWritable.class,
                        IntWritable.class, SequenceFileOutputFormat.class);
                userItemCntsJob.setJobName("user ratings count");
                userItemCntsJob.waitForCompletion(true);
            }

            if (!fs.exists(getTempPath("averageRatings"))) {
                //TODO this could be fiddled into one of the upper jobs
                Job averageItemRatings = prepareJob(pathToItemRatings(), getTempPath("averageRatings"),
                        AverageRatingMapper.class, IntWritable.class, VectorWritable.class,
                        MergeVectorsReducer.class, IntWritable.class, VectorWritable.class);
                averageItemRatings.setCombinerClass(MergeVectorsCombiner.class);
                averageItemRatings.waitForCompletion(true);
            }
            if (!fs.exists(new Path(pathToM(-1), "part-m-00000"))) {
                Vector averageRatings = ALSMatrixUtil.readFirstRow(getTempPath("averageRatings"), getConf());

                /** create an initial M */
                initializeM(averageRatings);
            }
        }

        for (int currentIteration = startIteration; currentIteration < numIterations; currentIteration++) {
            DistributedRowMatrix curM = new DistributedRowMatrix(pathToM(currentIteration - 1),
                    getTempPath("Mtemp/tmp-" + String.valueOf(currentIteration - 1) + "/M"), numItems,
                    numFeatures);
            curM.setConf(getConf());
            DistributedRowMatrix YtransposeY = curM.times(curM);
            /** broadcast M, read A row-wise, recompute U row-wise */
            log.info("Recomputing U (iteration {}/{})", currentIteration, numIterations);
            runSolver(pathToUserRatings(), pathToU(currentIteration), pathToM(currentIteration - 1),
                    YtransposeY.getRowPath(), numItems, false);

            DistributedRowMatrix curU = new DistributedRowMatrix(pathToU(currentIteration),
                    getTempPath("Utmp/tmp-" + String.valueOf(currentIteration) + "/U"), numUsers, numFeatures);
            curU.setConf(getConf());
            DistributedRowMatrix XtransposeX = curU.times(curU);

            /** broadcast U, read A' row-wise, recompute M row-wise */
            log.info("Recomputing M (iteration {}/{})", currentIteration, numIterations);
            runSolver(pathToItemRatings(), pathToM(currentIteration), pathToU(currentIteration),
                    XtransposeX.getRowPath(), numUsers, largeUserFeatures);

            /** calculate rmse on each updated matrix U, M and decide to further iteration */
            if (currentIteration > startIteration && useRMSECurve) {
                Pair<Integer, Double> UsquaredError = calculateMatrixDistanceSquared(
                        pathToU(currentIteration - 1), pathToU(currentIteration), currentIteration);
                Pair<Integer, Double> MsquaredError = calculateMatrixDistanceSquared(
                        pathToM(currentIteration - 1), pathToM(currentIteration), currentIteration);
                String currentRMSE = currentIteration + DELIMETER + UsquaredError.getFirst() + DELIMETER
                        + UsquaredError.getSecond() + DELIMETER + MsquaredError.getFirst() + DELIMETER
                        + MsquaredError.getSecond() + DefaultOptionCreator.NEWLINE;
                rmsePerIteration += currentRMSE;
                log.info("iteration {}: {}", currentIteration, currentRMSE);
            }
            if (currentIteration >= startIteration + 2 && cleanUp) {
                fs.deleteOnExit(pathToU(currentIteration - 2));
                fs.deleteOnExit(pathToM(currentIteration - 2));
            }
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return -1;
    } finally {
        if (useRMSECurve) {
            HadoopClusterUtil.writeToHdfs(getConf(), getOutputPath("RMSE"), rmsePerIteration);
        }
    }
}

From source file:com.sohu.rdc.inf.cdn.offline.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/*from   ww w.  java  2  s . c om*/
    }

    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);

    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    job.setInputFormatClass(LzoTextInputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.springsource.insight.plugin.hadoop.WordCount.java

License:Open Source License

public int run(String[] args) throws Exception {
    String INPUT = "src/test/resources";
    String OUTPUT = "target/out";

    Configuration conf = new Configuration();
    File targetFolder = FileUtil.detectTargetFolder(getClass());
    if (targetFolder == null) {
        throw new IllegalStateException("Cannot detect target folder");
    }// w  w  w .  j  a va  2 s  . c o m
    File tempFolder = new File(targetFolder, "temp");
    conf.set("hadoop.tmp.dir", tempFolder.getAbsolutePath());

    Job job = new Job(conf, "wordcount");
    job.setJarByClass(WordCount.class);

    job.setMapperClass(WordCountMapper.class);
    job.setCombinerClass(WordCountReducer.class);
    job.setReducerClass(WordCountReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    FileUtils.deleteDirectory(new File(OUTPUT)); // delete old output data
    FileInputFormat.addInputPath(job, new Path(INPUT));
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT));

    return job.waitForCompletion(true) ? 0 : -1;
}

From source file:com.sreejith.loganalyzer.mapreduce.LogDriver.java

License:Apache License

public static void main(String[] args) throws Exception {
    Job job = new Job();
    job.setJarByClass(LogDriver.class);
    job.setJobName("Log Analyzer");

    job.setMapperClass(LogMapper.class);
    job.setPartitionerClass(LogPartitioner.class);
    job.setCombinerClass(LogReducer.class);
    job.setReducerClass(LogReducer.class);

    job.setNumReduceTasks(2);/*from   ww  w  . j  av a  2 s  .c  om*/

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);

}

From source file:com.studium.millionsong.mapreduce.CompleteToStripped.java

public static void main(String[] args) throws Exception {

    Path inputPath = new Path("/millionSong/complete.csv");
    Path outputPath = new Path("/millionSong/result/run1");

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "From complete to stripped dataset");

    // Job configuration:
    // 0. Set har which contains this classes
    job.setJarByClass(CompleteToStripped.class);

    // 1. Which Mapper and Reduce should be used
    job.setMapperClass(CompleteStrippedMapper.class);
    job.setReducerClass(StrippedReducer.class);

    // 2. Which are the output datatypes of the mapper- and reduce-functions
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // 3. Set local combiner for data reduction
    job.setCombinerClass(StrippedReducer.class);

    // 4. Where are the input file(s)
    // Default FileInputFormat is TextInputFormat, so its using
    // the correct implementation automatically.
    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.talis.labs.pagerank.mapreduce.CheckConvergence.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: CheckConvergence <input path> <output path>");
        return -1;
    }//from ww w.  j a v  a 2 s  .  c  o  m

    FileSystem.get(getConf()).delete(new Path(args[1]), true);

    Job job = new Job(getConf(), "CheckConvergence");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(CheckConvergenceMapper.class);
    job.setCombinerClass(CheckConvergenceReducer.class);
    job.setReducerClass(CheckConvergenceReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(DoubleWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setNumReduceTasks(1);

    return job.waitForCompletion(true) ? 0 : 1;
}