Example usage for org.apache.hadoop.mapreduce.filecache DistributedCache addCacheFile

List of usage examples for org.apache.hadoop.mapreduce.filecache DistributedCache addCacheFile

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.filecache DistributedCache addCacheFile.

Prototype

@Deprecated
public static void addCacheFile(URI uri, Configuration conf) 

Source Link

Document

Add a file to be localized to the conf.

Usage

From source file:bdss.cmu.edu.Sort.java

License:Apache License

/**
 * The main driver for sort program./*from  www . j  a  va2s  . c om*/
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the
 *                     job tracker.
 */
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = conf.get(REDUCES_PER_HOST);
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = BytesWritable.class;
    List<String> otherArgs = new ArrayList<String>();
    InputSampler.Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-totalOrder".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }
    // Set user-supplied (possibly default) job configs
    job = new Job(conf);
    job.setJobName("sorter");
    job.setJarByClass(Sort.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(Reducer.class);

    job.setNumReduceTasks(num_reduces);

    job.setInputFormatClass(inputFormatClass);
    job.setOutputFormatClass(outputFormatClass);

    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);

    // Make sure there are exactly 2 parameters left.
    if (otherArgs.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(job, otherArgs.get(0));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        job.setPartitionerClass(TotalOrderPartitioner.class);
        Path inputDir = FileInputFormat.getInputPaths(job)[0];
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionFile);
        InputSampler.<K, V>writePartitionFile(job, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, conf);
    }

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(job)[0] + " into " + FileOutputFormat.getOutputPath(job) + " with "
            + num_reduces + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return ret;
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java

License:Apache License

/**
 * Configure a MapReduce Job to perform an incremental load into the given
 * table. This/*from   www .  ja va2  s.  co  m*/
 * <ul>
 *   <li>Inspects the table to configure a total order partitioner</li>
 *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
 *   <li>Sets the number of reduce tasks to match the current number of regions</li>
 *   <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
 *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
 *     PutSortReducer)</li>
 * </ul>
 * The user should be sure to set the map output value class to either KeyValue or Put before
 * running this function.
 */
public static void configureIncrementalLoad(Job job, HTable table) throws IOException {
    Configuration conf = job.getConfiguration();
    Class<? extends Partitioner> topClass;
    try {
        topClass = getTotalOrderPartitionerClass();
    } catch (ClassNotFoundException e) {
        throw new IOException("Failed getting TotalOrderPartitioner", e);
    }
    //partition
    job.setPartitionerClass(topClass);
    //Set the key class for the job output data
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    //Set the value class for job outputs
    job.setOutputValueClass(KeyValue.class);
    //outputformatHfile
    job.setOutputFormatClass(HFileOutputFormat2.class);

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(SingleColumnReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    LOG.info("Looking up current regions for table " + table);
    //?regionstarkey
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");

    //?region?reduce?
    job.setNumReduceTasks(startKeys.size());

    Path partitionsPath = new Path(job.getWorkingDirectory(), "partitions_" + UUID.randomUUID());
    LOG.info("Writing partition information to " + partitionsPath);

    FileSystem fs = partitionsPath.getFileSystem(conf);
    writePartitions(conf, partitionsPath, startKeys);
    partitionsPath.makeQualified(fs);

    URI cacheUri;
    try {
        // Below we make explicit reference to the bundled TOP.  Its cheating.
        // We are assume the define in the hbase bundled TOP is as it is in
        // hadoop (whether 0.20 or 0.22, etc.)
        /*
          cacheUri = new URI(partitionsPath.toString() + "#" +
            org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner.DEFAULT_PATH);
            */
        cacheUri = new URI(partitionsPath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }
    DistributedCache.addCacheFile(cacheUri, conf);
    DistributedCache.createSymlink(conf);

    // Set compression algorithms based on column families
    configureCompression(table, conf);

    TableMapReduceUtil.addDependencyJars(job);
    LOG.info("Incremental table output configured.");
}

From source file:com.phantom.hadoop.examples.Sort.java

License:Apache License

/**
 * The main driver for sort program. Invoke this method to submit the
 * map/reduce job.//from www.  j  a  va2  s  . co  m
 * 
 * @throws IOException
 *             When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = conf.get(REDUCES_PER_HOST);
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = BytesWritable.class;
    List<String> otherArgs = new ArrayList<String>();
    InputSampler.Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-totalOrder".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }
    // Set user-supplied (possibly default) job configs
    job = new Job(conf);
    job.setJobName("sorter");
    job.setJarByClass(Sort.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(Reducer.class);

    job.setNumReduceTasks(num_reduces);

    job.setInputFormatClass(inputFormatClass);
    job.setOutputFormatClass(outputFormatClass);

    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);

    // Make sure there are exactly 2 parameters left.
    if (otherArgs.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(job, otherArgs.get(0));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        job.setPartitionerClass(TotalOrderPartitioner.class);
        Path inputDir = FileInputFormat.getInputPaths(job)[0];
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(conf, partitionFile);
        InputSampler.<K, V>writePartitionFile(job, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, conf);
    }

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(job)[0] + " into " + FileOutputFormat.getOutputPath(job) + " with "
            + num_reduces + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return ret;
}

From source file:gobblin.runtime.mapreduce.MRJobLauncher.java

License:Apache License

@VisibleForTesting
static void serializeJobState(FileSystem fs, Path mrJobDir, Configuration conf, JobState jobState, Job job)
        throws IOException {
    Path jobStateFilePath = new Path(mrJobDir, JOB_STATE_FILE_NAME);
    // Write the job state with an empty task set (work units are read by the mapper from a different file)
    try (DataOutputStream dataOutputStream = new DataOutputStream(fs.create(jobStateFilePath))) {
        jobState.write(dataOutputStream, false);
    }/*from   www. j av  a  2  s .c om*/

    job.getConfiguration().set(ConfigurationKeys.JOB_STATE_FILE_PATH_KEY, jobStateFilePath.toString());

    DistributedCache.addCacheFile(jobStateFilePath.toUri(), job.getConfiguration());
    job.getConfiguration().set(ConfigurationKeys.JOB_STATE_DISTRIBUTED_CACHE_NAME, jobStateFilePath.getName());
}

From source file:gobblin.runtime.mapreduce.MRJobLauncher.java

License:Apache License

/**
 * Add local non-jar files the job depends on to DistributedCache.
 *//* w  w  w.  ja  v a 2 s .c o  m*/
@SuppressWarnings("deprecation")
private void addLocalFiles(Path jobFileDir, String jobFileList, Configuration conf) throws IOException {
    DistributedCache.createSymlink(conf);
    for (String jobFile : SPLITTER.split(jobFileList)) {
        Path srcJobFile = new Path(jobFile);
        // DistributedCache requires absolute path, so we need to use makeQualified.
        Path destJobFile = new Path(this.fs.makeQualified(jobFileDir), srcJobFile.getName());
        // Copy the file from local file system to HDFS
        this.fs.copyFromLocalFile(srcJobFile, destJobFile);
        // Create a URI that is in the form path#symlink
        URI destFileUri = URI.create(destJobFile.toUri().getPath() + "#" + destJobFile.getName());
        LOG.info(String.format("Adding %s to DistributedCache", destFileUri));
        // Finally add the file to DistributedCache with a symlink named after the file name
        DistributedCache.addCacheFile(destFileUri, conf);
    }
}

From source file:gobblin.runtime.mapreduce.MRJobLauncher.java

License:Apache License

/**
 * Add non-jar files already on HDFS that the job depends on to DistributedCache.
 *//*  ww w.j  av  a 2 s  . co m*/
@SuppressWarnings("deprecation")
private void addHDFSFiles(String jobFileList, Configuration conf) {
    DistributedCache.createSymlink(conf);
    jobFileList = PasswordManager.getInstance(this.jobProps).readPassword(jobFileList);
    for (String jobFile : SPLITTER.split(jobFileList)) {
        Path srcJobFile = new Path(jobFile);
        // Create a URI that is in the form path#symlink
        URI srcFileUri = URI.create(srcJobFile.toUri().getPath() + "#" + srcJobFile.getName());
        LOG.info(String.format("Adding %s to DistributedCache", srcFileUri));
        // Finally add the file to DistributedCache with a symlink named after the file name
        DistributedCache.addCacheFile(srcFileUri, conf);
    }
}

From source file:ipldataanalysis4.IPLDataAnalysis4.java

@Override
public int run(String[] args) throws Exception {

    if (args.length != 2) {
        System.out.printf("Two parameters are required for Data Analysis for IPL- <input dir> <output dir>\n");
        return -1;
    }//  ww  w . j av a2  s .com
    Configuration conf = new Configuration();
    DistributedCache.addCacheFile(new URI("testFile"), conf);
    Job job = new Job(getConf(), "Job1");
    long milliSeconds = 1000 * 60 * 60;
    conf.setLong("mapred.task.timeout", milliSeconds);

    job.setJarByClass(IPLDataAnalysis4.class);
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClass(BloomMapper.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);
    //job.setReducerClass(DataAnalysisReducer.class);
    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;

}

From source file:nl.sanoma.hdt.report.generator.ReportGeneratorDriver.java

License:Open Source License

/**
 * Job to join the data and the metadata from distributed cache and
 * calculate the revenue by quarter and most popular product category for user
 *
 * @param dBPath the path of the import MapFile
 * @param inputPath the path of the logs directory
 * @param outputPath the path of the output directory
 * @return returns the exitCode of the job
 * @throws IOException//from w  ww  .ja  va 2s  .com
 * @throws URISyntaxException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public Boolean generateReport(String dBPath, String inputPath, String outputPath)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    Job job = new Job(getConf());
    Configuration conf = job.getConfiguration();

    job.setJobName("Repor Generator");
    DistributedCache.addCacheFile(new URI(dBPath), conf);
    job.setJarByClass(ReportGeneratorDriver.class);
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setPartitionerClass(KeyDataPartitioner.class);
    job.setGroupingComparatorClass(KeyDataGroupingComparator.class);
    job.setSortComparatorClass(KeyDataComparator.class);
    job.setMapperClass(ReportGeneratorMapper.class);
    job.setMapOutputKeyClass(KeyData.class);
    job.setMapOutputValueClass(ValueData.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setReducerClass(ReportGeneratorReducer.class);
    job.setNumReduceTasks(1);

    return job.waitForCompletion(true);
}

From source file:org.apache.gobblin.runtime.mapreduce.MRJobLauncher.java

License:Apache License

@VisibleForTesting
static void serializeJobState(FileSystem fs, Path mrJobDir, Configuration conf, JobState jobState, Job job)
        throws IOException {
    Path jobStateFilePath = new Path(mrJobDir, JOB_STATE_FILE_NAME);
    // Write the job state with an empty task set (work units are read by the mapper from a different file)
    try (DataOutputStream dataOutputStream = new DataOutputStream(fs.create(jobStateFilePath))) {
        jobState.write(dataOutputStream, false, conf.getBoolean(SERIALIZE_PREVIOUS_WORKUNIT_STATES_KEY,
                DEFAULT_SERIALIZE_PREVIOUS_WORKUNIT_STATES));
    }/*  ww  w  .j  a  va 2s.  co m*/

    job.getConfiguration().set(ConfigurationKeys.JOB_STATE_FILE_PATH_KEY, jobStateFilePath.toString());

    DistributedCache.addCacheFile(jobStateFilePath.toUri(), job.getConfiguration());
    job.getConfiguration().set(ConfigurationKeys.JOB_STATE_DISTRIBUTED_CACHE_NAME, jobStateFilePath.getName());
}

From source file:org.apache.sysml.runtime.matrix.CSVReblockMR.java

License:Apache License

private static JobReturn runCSVReblockJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos,
        long[] rlens, long[] clens, int[] brlens, int[] bclens, String reblockInstructions,
        String otherInstructionsInReducer, int numReducers, int replication, byte[] resultIndexes,
        String[] outputs, OutputInfo[] outputInfos, Path counterFile, String[] smallestFiles) throws Exception {
    JobConf job;//from w w  w  .  j a v  a  2 s  .  c o  m
    job = new JobConf(ReblockMR.class);
    job.setJobName("CSV-Reblock-MR");

    byte[] realIndexes = new byte[inputs.length];
    for (byte b = 0; b < realIndexes.length; b++)
        realIndexes[b] = b;

    //set up the input files and their format information
    MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, false,
            ConvertTarget.CELL);

    job.setStrings(SMALLEST_FILE_NAME_PER_INPUT, smallestFiles);

    //set up the dimensions of input matrices
    MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens);

    //set up the block size
    MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);

    //set up the aggregate instructions that will happen in the combiner and reducer
    MRJobConfiguration.setCSVReblockInstructions(job, reblockInstructions);

    //set up the instructions that will happen in the reducer, after the aggregation instrucions
    MRJobConfiguration.setInstructionsInReducer(job, otherInstructionsInReducer);

    //set up the replication factor for the results
    job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);

    //set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
        MRJobConfiguration.addBinaryBlockSerializationFramework(job);

    //set up custom map/reduce configurations 
    DMLConfig config = ConfigurationManager.getDMLConfig();
    MRJobConfiguration.setupCustomMRConfigurations(job, config);

    //set up what matrices are needed to pass from the mapper to reducer
    HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, null,
            reblockInstructions, null, otherInstructionsInReducer, resultIndexes);

    MatrixChar_N_ReducerGroups ret = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, null,
            reblockInstructions, null, null, otherInstructionsInReducer, resultIndexes, mapoutputIndexes,
            false);

    MatrixCharacteristics[] stats = ret.stats;

    //set up the number of reducers
    int numRed = WriteCSVMR.determineNumReducers(rlens, clens, config.getIntValue(DMLConfig.NUM_REDUCERS),
            ret.numReducerGroups);
    job.setNumReduceTasks(numRed);

    // Print the complete instruction
    //if (LOG.isTraceEnabled())
    //   inst.printCompelteMRJobInstruction(stats);

    // Update resultDimsUnknown based on computed "stats"
    byte[] resultDimsUnknown = new byte[resultIndexes.length];
    for (int i = 0; i < resultIndexes.length; i++) {
        if (stats[i].getRows() == -1 || stats[i].getCols() == -1) {
            resultDimsUnknown[i] = (byte) 1;
        } else {
            resultDimsUnknown[i] = (byte) 0;
        }
    }

    //set up the multiple output files, and their format information
    MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown, outputs, outputInfos, true,
            true);

    // configure mapper and the mapper output key value pairs
    job.setMapperClass(CSVReblockMapper.class);
    job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class);
    job.setMapOutputValueClass(BlockRow.class);

    //configure reducer
    job.setReducerClass(CSVReblockReducer.class);

    //turn off adaptivemr
    job.setBoolean("adaptivemr.map.enable", false);

    //set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);
    Path cachefile = new Path(counterFile, "part-00000");
    DistributedCache.addCacheFile(cachefile.toUri(), job);
    DistributedCache.createSymlink(job);
    job.set(ROWID_FILE_NAME, cachefile.toString());

    RunningJob runjob = JobClient.runJob(job);

    MapReduceTool.deleteFileIfExistOnHDFS(counterFile, job);

    /* Process different counters */

    Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS);
    for (int i = 0; i < resultIndexes.length; i++) {
        // number of non-zeros
        stats[i].setNonZeros(group.getCounter(Integer.toString(i)));
        //   System.out.println("result #"+resultIndexes[i]+" ===>\n"+stats[i]);
    }
    return new JobReturn(stats, outputInfos, runjob.isSuccessful());
}