Example usage for org.apache.hadoop.mapred JobConf setInputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setInputFormat.

Prototype

public void setInputFormat(Class<? extends InputFormat> theClass)

Source Link

Document

Set the InputFormat implementation for the map-reduce job.

Usage

From source file:edu.umd.cloud9.webgraph.driver.BuildIndexableAnchorCollection.java

License:Apache License

/**
 * Runs this tool./*  w w w  .jav  a  2 s.c o m*/
 */
public int run(String[] args) throws Exception {
    if (args.length < 5) {
        printUsage();
        return -1;
    }

    JobConf conf = new JobConf(getConf());
    FileSystem fs = FileSystem.get(conf);

    String collectionPath = DriverUtil.argValue(args, DriverUtil.CL_INPUT);
    String outputPath = DriverUtil.argValue(args, DriverUtil.CL_OUTPUT);
    String docnoMappingClass = DriverUtil.argValue(args, DriverUtil.CL_DOCNO_MAPPING_CLASS);
    String docnoMapping = DriverUtil.argValue(args, DriverUtil.CL_DOCNO_MAPPING);
    int numReducers = Integer.parseInt(DriverUtil.argValue(args, DriverUtil.CL_NUMBER_OF_REDUCERS));
    if (DriverUtil.argExists(args, DriverUtil.CL_MAX_LENGTH)) {
        conf.setInt("Cloud9.maxContentLength",
                Integer.parseInt(DriverUtil.argValue(args, DriverUtil.CL_MAX_LENGTH)));
    }
    conf.set("Cloud9.DocnoMappingClass", docnoMappingClass);

    LOG.info("Tool name: BuildAnchorTextForwardIndex");
    LOG.info(" - collection path: " + collectionPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - docno-mapping class: " + docnoMappingClass);
    LOG.info(" - docno-mapping file: " + docnoMapping);
    if (args.length == 6) {
        LOG.info(" - maximum content length: " + conf.getInt("Cloud9.maxContentLength", 0));
    }

    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.setJobName("BuildIndexableAnchorCollection");
    conf.setJarByClass(BuildIndexableAnchorCollection.class);

    conf.setNumMapTasks(100);
    conf.setNumReduceTasks(numReducers);
    DistributedCache.addCacheFile(new URI(docnoMapping), conf);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(conf, true);
    SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);
    SequenceFileInputFormat.setInputPaths(conf, new Path(collectionPath));
    SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(IndexableAnchorText.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(IdentityReducer.class);

    // delete the output directory if it exists already
    fs.delete(new Path(outputPath), true);
    RunningJob job = JobClient.runJob(conf);

    return 0;
}

From source file:edu.umd.cloud9.webgraph.driver.ClueWebAnchorTextForwardIndexHttpServer.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 3) {
        System.out.println("usage: [index-file] [docno-mapping-data-files] [clue-forward-index-root]");
        //[clue-forward-index-root: /shared/ClueWeb09/collection.compressed.block/
        System.exit(-1);//from   w  ww  .  ja  v  a 2  s .co m
    }

    String indexFile = otherArgs[0];
    String mappingFile = otherArgs[1];
    String clueIndexRoot = otherArgs[2].endsWith("/") ? otherArgs[2] : otherArgs[2] + "/";

    String cluewebForwardIndex = "";
    for (int i = 1; i < 10; i++)
        cluewebForwardIndex += clueIndexRoot + "findex.en.0" + i + ".dat" + SEPARATOR + " ";
    cluewebForwardIndex += clueIndexRoot + "findex.en.10.dat";

    LOG.info("Launching DocumentForwardIndexHttpServer");
    LOG.info(" - index file: " + indexFile);
    LOG.info(" - docno mapping data file: " + mappingFile);
    LOG.info(" - ClueWeb09 index root:" + clueIndexRoot);

    FileSystem fs = FileSystem.get(conf);

    Random rand = new Random();
    int r = rand.nextInt();

    // this tmp file as a rendezvous point
    Path tmpPath = new Path("/tmp/" + r);

    if (fs.exists(tmpPath)) {
        fs.delete(tmpPath, true);
    }

    JobConf job = new JobConf(conf, ClueWebAnchorTextForwardIndexHttpServer.class);

    job.setJobName("ForwardIndexServer:" + indexFile);

    job.set("mapred.child.java.opts", "-Xmx2048m");

    job.setNumMapTasks(1);
    job.setNumReduceTasks(0);

    job.setInputFormat(NullInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);
    job.setMapperClass(ServerMapper.class);

    job.set("IndexFile", indexFile);
    job.set("DocnoMappingDataFile", mappingFile);
    job.set("TmpPath", tmpPath.toString());
    job.set("ClueWebIndexFiles", cluewebForwardIndex);

    JobClient client = new JobClient(job);
    client.submitJob(job);

    LOG.info("Waiting for server to start up...");

    while (!fs.exists(tmpPath)) {
        Thread.sleep(50000);
        LOG.info("...");
    }

    FSDataInputStream in = fs.open(tmpPath);
    String host = in.readUTF();
    in.close();

    LOG.info("host: " + host);
    LOG.info("port: 8888");
}

From source file:edu.umd.cloud9.webgraph.driver.GenerateTabDelimitedWebGraph.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 4) {
        printUsage();//from  ww  w.  j a v  a 2 s . c o  m
        return -1;
    }

    JobConf conf = new JobConf(getConf(), GenerateTabDelimitedWebGraph.class);
    FileSystem fs = FileSystem.get(conf);

    String inPath = DriverUtil.argValue(args, "-webgraph") + "/" + DriverUtil.OUTPUT_WEBGRAPH;
    String outPath = DriverUtil.argValue(args, "-output");

    Path inputPath = new Path(inPath);
    Path outputPath = new Path(outPath);

    if (fs.exists(outputPath)) {
        fs.delete(outputPath);
    }

    conf.setJobName("TabDelimWebGraph");
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.set("mapreduce.map.memory.mb", "2048");
    conf.set("mapreduce.map.java.opts", "-Xmx2048m");
    conf.set("mapreduce.reduce.memory.mb", "2048");
    conf.set("mapreduce.reduce.java.opts", "-Xmx2048m");
    conf.set("mapreduce.task.timeout", "60000000");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(MyMapper.class);

    JobClient.runJob(conf);
    return 0;
}

From source file:edu.umd.cloud9.webgraph.driver.SortWebGraph.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();/*from   ww  w  .ja  v  a2 s.  c o  m*/
        return -1;
    }

    JobConf conf = new JobConf(getConf(), SortWebGraph.class);
    FileSystem fs = FileSystem.get(conf);

    String inputPath = args[0];
    String outputPath = args[1];
    int numberOfDocuments = Integer.parseInt(args[2]);
    int numMappers = 1;
    int numReducers = Integer.parseInt(args[3]);

    conf.setJobName("SortWebGraph");
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.setInt("mapred.task.timeout", 60000000);
    conf.set("mapreduce.map.memory.mb", "2048");
    conf.set("mapreduce.map.java.opts", "-Xmx2048m");
    conf.set("mapreduce.reduce.memory.mb", "2048");
    conf.set("mapreduce.reduce.java.opts", "-Xmx2048m");
    conf.set("mapreduce.task.timeout", "60000000");

    if (numberOfDocuments == 0) {
        numberOfDocuments = DEFAULT_NUMBER_OF_DOCUMENTS;
    }
    conf.setInt("Cloud9.NumberOfDocuments", numberOfDocuments);
    conf.setNumMapTasks(numMappers);
    conf.setNumReduceTasks(numReducers);
    conf.setMapperClass(IdentityMapper.class);
    conf.setPartitionerClass(Partition.class);
    conf.setReducerClass(IdentityReducer.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(ArrayListWritable.class);
    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(ArrayListWritable.class);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(conf, true);
    SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);
    SequenceFileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    LOG.info("SortAnchorText");
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of documents: " + conf.getInt("Cloud9.NumberOfDocuments", DEFAULT_NUMBER_OF_DOCUMENTS));
    fs.delete(new Path(outputPath));
    JobClient.runJob(conf);
    return 0;
}

From source file:edu.umd.cloud9.webgraph.ExtractLinks.java

License:Apache License

public int runTool() throws Exception {

    JobConf conf = new JobConf(getConf(), ExtractLinks.class);
    FileSystem fs = FileSystem.get(conf);

    int numMappers = conf.getInt("Cloud9.Mappers", 1);
    int numReducers = conf.getInt("Cloud9.Reducers", 200);

    String inputPath = conf.get("Cloud9.InputPath");
    String outputPath = conf.get("Cloud9.OutputPath");
    String mappingFile = conf.get("Cloud9.DocnoMappingFile");

    if (!fs.exists(new Path(mappingFile)))
        throw new RuntimeException("Error: Docno mapping data file " + mappingFile + " doesn't exist!");

    DistributedCache.addCacheFile(new URI(mappingFile), conf);

    conf.setJobName("ExtractLinks");
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.setInt("mapred.task.timeout", 60000000);

    conf.setNumMapTasks(numMappers);// w  w  w  .  j a va 2 s .co m
    conf.setNumReduceTasks(numReducers);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(ArrayListWritable.class);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    SequenceFileOutputFormat.setCompressOutput(conf, true);
    SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);

    SequenceFileInputFormat.setInputPaths(conf, inputPath);

    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    LOG.info("ExtractLinks");
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - mapping file: " + mappingFile);
    LOG.info(" - include internal links? " + conf.getBoolean("Cloud9.IncludeInternalLinks", false));

    if (!fs.exists(new Path(outputPath))) {
        JobClient.runJob(conf);
    } else {
        LOG.info(outputPath + " already exists! Skipping this step...");
    }

    return 0;
}

From source file:edu.umn.cs.spatialHadoop.nasa.DistributedAggregateSpatioTemporalIndexer.java

License:Open Source License

/**
 * Build a bunch of AggregateQuadTrees using a Map-Reduce job
 * //from   ww w.j  a v a  2  s  . c  o  m
 * @param inputPathsDictionaryPath
 * @param params
 * @throws IOException
 */
public static void aggregateQuadTreeMapReduce(Path inputPathsDictionaryPath, OperationsParams params)
        throws IOException {

    // configure a map-reduce job
    JobConf job = new JobConf(params, DistributedAggregateSpatioTemporalIndexer.class);

    Path outputPath;
    String outputPathPrefix = "aggQuadTree_";
    FileSystem outFs = FileSystem.get(job);
    do {
        outputPath = new Path(outputPathPrefix + (int) (Math.random() * 1000000));
    } while (outFs.exists(outputPath));

    job.setJobName("AggregateQuadTree");
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapperClass(AggregateQuadTreeMaper.class);
    job.set(HDFSIndexPath, hdfsIndexPath.toString());

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    TextInputFormat.setInputPaths(job, inputPathsDictionaryPath);
    TextOutputFormat.setOutputPath(job, outputPath);

    if (job.getBoolean("local", false)) {
        // Enforce local execution if explicitly set by user or for small
        // files
        job.set("mapred.job.tracker", "local");
        // Use multithreading too
        job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, 16);
    }
    job.setNumReduceTasks(0);

    // Submit the job
    JobClient.runJob(job);

    outFs.delete(outputPath, true);
}

From source file:edu.umn.cs.spatialHadoop.operations.Aggregate.java

License:Open Source License

/**
 * Counts the exact number of lines in a file by issuing a MapReduce job
 * that does the thing/*from  w  ww .jav  a  2 s .  com*/
 * @param files
 * @param params
 * @return
 * @throws IOException
 */
public static MinMax aggregateMapReduce(Path[] files, OperationsParams params) throws IOException {
    Shape plotRange = params.getShape("rect");
    JobConf job = new JobConf(params, Aggregate.class);

    Path outputPath;
    FileSystem outFs = FileSystem.get(job);
    do {
        outputPath = new Path("agg_" + (int) (Math.random() * 1000000));
    } while (outFs.exists(outputPath));

    job.setJobName("Aggregate");
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(MinMax.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);

    job.setInputFormat(ShapeInputFormat.class);
    job.setClass("shape", NASAPoint.class, Shape.class);
    if (plotRange != null) {
        job.setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    }

    job.setOutputFormat(TextOutputFormat.class);

    ShapeInputFormat.setInputPaths(job, files);
    TextOutputFormat.setOutputPath(job, outputPath);

    // Submit the job
    JobClient.runJob(job);

    // Read job result
    FileStatus[] results = outFs.listStatus(outputPath);
    MinMax minMax = new MinMax();
    for (FileStatus status : results) {
        if (status.getLen() > 0 && status.getPath().getName().startsWith("part-")) {
            BufferedReader reader = new BufferedReader(new InputStreamReader(outFs.open(status.getPath())));
            String line;
            MinMax value = new MinMax();
            while ((line = reader.readLine()) != null) {
                value.fromText(new Text(line));
                minMax.expand(value);
            }
            reader.close();
        }
    }

    outFs.delete(outputPath, true);

    return minMax;
}

From source file:edu.umn.cs.spatialHadoop.operations.CatUnion.java

License:Open Source License

/**
 * Calculates the union of a set of shapes categorized by some user defined
 * category./*from www.  j  av  a 2 s.c  o m*/
 * @param shapeFile - Input file that contains shapes
 * @param categoryFile - Category file that contains the category of each
 *  shape.Shapes not appearing in this file are not generated in output.
 * @param output - An output file that contains each category and the union
 *  of all shapes in it. Each line contains the category, then a comma,
 *   then the union represented as text.
 * @throws IOException
 */
public static void unionMapReduce(Path shapeFile, Path categoryFile, Path output, OperationsParams params)
        throws IOException {

    JobConf job = new JobConf(params, CatUnion.class);
    job.setJobName("Union");

    // Check output file existence
    FileSystem outFs = output.getFileSystem(job);
    if (outFs.exists(output)) {
        if (params.getBoolean("overwrite", false)) {
            outFs.delete(output, true);
        } else {
            throw new RuntimeException("Output path already exists and -overwrite flag is not set");
        }
    }

    // Set map and reduce
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks() * 9 / 10));

    job.setMapperClass(UnionMapper.class);
    job.setCombinerClass(UnionReducer.class);
    job.setReducerClass(UnionReducer.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    // Set input and output
    job.setInputFormat(ShapeLineInputFormat.class);
    TextInputFormat.addInputPath(job, shapeFile);
    DistributedCache.addCacheFile(categoryFile.toUri(), job);

    job.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, output);

    // Start job
    JobClient.runJob(job);
}

From source file:edu.umn.cs.spatialHadoop.operations.ClosestPairHadoop.java

License:Open Source License

/**
 * Counts the exact number of lines in a file by issuing a MapReduce job
 * that does the thing// w w w  .j  a  va2  s.com
 * @param conf
 * @param fs
 * @param file
 * @return
 * @throws IOException 
 */
public static <S extends Shape> void cloesetPair(Path file, OperationsParams params) throws IOException {
    // Try to get file MBR from the MBRs of blocks
    JobConf job = new JobConf(params, ClosestPairHadoop.class);

    Path outputPath;
    FileSystem outFs = FileSystem.get(job);
    do {
        outputPath = new Path(file.getName() + ".closest_pair_" + (int) (Math.random() * 1000000));
    } while (outFs.exists(outputPath));
    outFs.delete(outputPath, true);

    job.setJobName("ClosestPair");
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Point.class);

    job.setMapperClass(Map0.class);
    job.setReducerClass(Reduce0.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);

    job.setInputFormat(ShapeArrayInputFormat.class);
    //      job.setInputFormat(ShapeInputFormat.class);
    ShapeInputFormat.setInputPaths(job, file);

    job.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputPath);

    // Submit the job
    JobClient.runJob(job);
    //////////////////////////////////////////////////////////////////////////

    System.out.println("Begin second round!");
    // 2nd Round
    job = new JobConf(params, ClosestPairHadoop.class);
    job.setJobName("Second Round");
    job.setOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Point.class);

    job.setMapperClass(Map1.class);
    job.setReducerClass(Reduce1.class);
    clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);

    job.setInputFormat(ShapeArrayInputFormat.class);
    //      job.setInputFormat(ShapeInputFormat.class);
    ShapeInputFormat.setInputPaths(job, outputPath); // The previous output is the current input

    Path newPath = new Path(outputPath.getName() + "_result");
    job.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, newPath);

    JobClient.runJob(job);
}

From source file:edu.umn.cs.spatialHadoop.operations.Contains.java

License:Open Source License

public static <S extends Shape> long contains(Path[] inFiles, Path userOutputPath, OperationsParams params)
        throws IOException, InterruptedException {
    JobConf job = new JobConf(params, Contains.class);

    LOG.info("Contains journey starts ....");
    FileSystem inFs = inFiles[0].getFileSystem(job);
    Path outputPath = userOutputPath;
    if (outputPath == null) {
        FileSystem outFs = FileSystem.get(job);
        do {/* w ww.  ja v  a2  s  .c o m*/
            outputPath = new Path(inFiles[0].getName() + ".sjmr_" + (int) (Math.random() * 1000000));
        } while (outFs.exists(outputPath));
    }
    FileSystem outFs = outputPath.getFileSystem(job);

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setJobName("Within");
    job.setMapperClass(ContainsMap.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IndexedText.class);
    job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks()));
    job.setLong("mapred.min.split.size", Math.max(inFs.getFileStatus(inFiles[0]).getBlockSize(),
            inFs.getFileStatus(inFiles[1]).getBlockSize()));

    job.setReducerClass(ContainsReduce.class);
    job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks()));

    job.setInputFormat(ShapeLineInputFormat.class);
    if (job.getBoolean("output", true))
        job.setOutputFormat(TextOutputFormat.class);
    else
        job.setOutputFormat(NullOutputFormat.class);
    ShapeLineInputFormat.setInputPaths(job, inFiles);

    // Calculate and set the dimensions of the grid to use in the map phase
    long total_size = 0;
    Rectangle mbr = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE);
    for (Path file : inFiles) {
        FileSystem fs = file.getFileSystem(params);
        Rectangle file_mbr = FileMBR.fileMBR(file, params);
        mbr.expand(file_mbr);
        total_size += FileUtil.getPathSize(fs, file);
    }
    // If the largest file is globally indexed, use its partitions
    total_size += total_size * job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.2f);
    int sjmrPartitioningGridFactor = params.getInt(PartitioiningFactor, 20);
    int num_cells = (int) Math.max(1,
            total_size * sjmrPartitioningGridFactor / outFs.getDefaultBlockSize(outputPath));
    LOG.info("Number of cells is configured to be " + num_cells);

    OperationsParams.setInactiveModeFlag(job, InactiveMode, isReduceInactive);
    OperationsParams.setJoiningThresholdPerOnce(job, JoiningThresholdPerOnce, joiningThresholdPerOnce);
    OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly);

    GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2);
    gridInfo.calculateCellDimensions(num_cells);
    OperationsParams.setShape(job, PartitionGrid, gridInfo);

    TextOutputFormat.setOutputPath(job, outputPath);

    if (OperationsParams.isLocal(job, inFiles)) {
        // Enforce local execution if explicitly set by user or for small files
        job.set("mapred.job.tracker", "local");
    }

    // Start the job
    RunningJob runningJob = JobClient.runJob(job);
    Counters counters = runningJob.getCounters();
    Counter outputRecordCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS);
    final long resultCount = outputRecordCounter.getValue();

    return resultCount;
}