Example usage for org.apache.hadoop.mapreduce Job getCounters

List of usage examples for org.apache.hadoop.mapreduce Job getCounters

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getCounters.

Prototype

public Counters getCounters() throws IOException 

Source Link

Document

Gets the counters for this job.

Usage

From source file:org.apache.mahout.graph.components.FindKTrussesJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();//from www .  j  a  v  a  2  s.  c  om
    addOutputOption();

    addOption("k", "k", "The k parameter of the k-trusses to find.");

    Map<String, String> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    Path inputPath = getInputPath();
    Path outputPath = getOutputPath();
    Path tempDirPath = new Path(parsedArgs.get("--tempDir"));

    int k = Integer.parseInt(parsedArgs.get("--k")); // extract parameter

    AtomicInteger currentPhase = new AtomicInteger();
    Configuration conf = new Configuration();

    Path simplifyInputPath = inputPath;
    Path simplifyOutputPath = new Path(tempDirPath, String.valueOf(System.currentTimeMillis()));

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        /*
         * Simplify the graph first
         */
        SimplifyGraphJob simplifyGraphJob = new SimplifyGraphJob();
        simplifyGraphJob.setConf(conf);
        simplifyGraphJob.run(new String[] { "--input", simplifyInputPath.toString(), "--output",
                simplifyOutputPath.toString(), "--tempDir", tempDirPath.toString() });
    }

    Path currentTrussesDirPath = simplifyOutputPath;

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        while (true) {
            /*
             * Augment the simplified graph with degrees
             */
            // scatter the edges to each of the vertices and count degree
            Path augmentInputPath = currentTrussesDirPath;
            Path augmentOutputPath = new Path(tempDirPath,
                    "augment" + String.valueOf(System.currentTimeMillis()));

            AugmentGraphWithDegreesJob augmentGraphWithDegreesJob = new AugmentGraphWithDegreesJob();
            augmentGraphWithDegreesJob.setConf(conf);
            augmentGraphWithDegreesJob.run(new String[] { "--input", augmentInputPath.toString(), "--output",
                    augmentOutputPath.toString(), "--tempDir",
                    new Path(tempDirPath, String.valueOf(System.currentTimeMillis())).toString(), });

            /*
             * Enumerate triangles in the graph
             */
            Path enumerateInputPath = augmentOutputPath;
            // scatter the edges to lower degree vertex and build open triads
            Path enumerateOutputPath = new Path(tempDirPath,
                    "enumerate" + String.valueOf(System.currentTimeMillis()));

            EnumerateTrianglesJob enumerateTrianglesJob = new EnumerateTrianglesJob();
            enumerateTrianglesJob.setConf(conf);
            enumerateTrianglesJob.run(new String[] { "--input", enumerateInputPath.toString(), "--output",
                    enumerateOutputPath.toString(), "--tempDir",
                    new Path(tempDirPath, String.valueOf(System.currentTimeMillis())).toString(), });

            /*
             * Drop edges with insufficient support
             */
            Path checkSupportInputPath = enumerateOutputPath;
            Path checkSupportOutputPath = new Path(tempDirPath,
                    "support" + String.valueOf(System.currentTimeMillis()));
            Job checkTrianglesForSupport = prepareJob(checkSupportInputPath, checkSupportOutputPath,
                    SequenceFileInputFormat.class, SplitTrianglesToEdgesMapper.class, UndirectedEdge.class,
                    IntWritable.class, DropUnsupportedEdgesReducer.class, UndirectedEdge.class,
                    NullWritable.class, SequenceFileOutputFormat.class);

            checkTrianglesForSupport.setCombinerClass(IntSumReducer.class);
            checkTrianglesForSupport.getConfiguration().setInt(K, k);
            checkTrianglesForSupport.waitForCompletion(true);

            currentTrussesDirPath = checkSupportOutputPath;

            long droppedEdges = checkTrianglesForSupport.getCounters().findCounter(Counter.DROPPED_EDGES)
                    .getValue();
            log.info("{} edges were dropped", droppedEdges);
            if (droppedEdges == 0L) {
                break;
            }

        }
    }

    Path componentsInputPath = new Path(tempDirPath, "converted" + String.valueOf(System.currentTimeMillis()));
    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        /*
         * Prepare the input for FindComponents
         */
        Job convertFromat = prepareJob(currentTrussesDirPath, componentsInputPath,
                SequenceFileInputFormat.class, PrepareInputMapper.class, Vertex.class, FlaggedVertex.class,
                Reducer.class, Vertex.class, FlaggedVertex.class, SequenceFileOutputFormat.class);
        convertFromat.waitForCompletion(true);
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        /*
         * Find the components of the remaining graph
         */
        FindComponentsJob componentsJob = new FindComponentsJob();
        componentsJob.setConf(conf);
        componentsJob.run(new String[] { "--input", componentsInputPath.toString(), "--output",
                outputPath.toString(), "--tempDir", tempDirPath.toString(), });
    }
    return 0;
}

From source file:org.apache.mahout.math.stats.entropy.ConditionalEntropy.java

License:Apache License

/**
 * Groups and counts by key and value.//  w  w w  .j a  va  2s .c o m
 * SQL-like: SELECT key, value, COUNT(*) FROM x GROUP BY key, value
 */
private void groupAndCountByKeyAndValue() throws IOException, ClassNotFoundException, InterruptedException {

    Job job = prepareJob(getInputPath(), keyValueCountPath, SequenceFileInputFormat.class,
            GroupAndCountByKeyAndValueMapper.class, StringTuple.class, VarIntWritable.class,
            VarIntSumReducer.class, StringTuple.class, VarIntWritable.class, SequenceFileOutputFormat.class);
    job.setCombinerClass(VarIntSumReducer.class);
    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }

    numberItems = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS")
            .getValue();

}

From source file:org.apache.mahout.math.stats.entropy.Entropy.java

License:Apache License

/**
 * Groups the items and counts the occur for each of them.
 * SQL-like: SELECT item, COUNT(*) FROM x GROUP BY item
 *
 * @throws IOException/*from ww  w . j a v a 2  s  .c  o  m*/
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
private void groupAndCount() throws IOException, ClassNotFoundException, InterruptedException {

    Class<? extends Mapper> mapper = "key".equals(source) ? KeyCounterMapper.class : ValueCounterMapper.class;

    Job job = prepareJob(getInputPath(), tempPath, SequenceFileInputFormat.class, mapper, Text.class,
            VarIntWritable.class, VarIntSumReducer.class, Text.class, VarIntWritable.class,
            SequenceFileOutputFormat.class);
    job.setCombinerClass(VarIntSumReducer.class);
    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }

    numberItems = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS")
            .getValue();

}

From source file:org.apache.mahout.utils.nlp.collocations.llr.CollocDriver.java

License:Apache License

/**
 * pass1: generate collocations, ngrams// w  w w  . java  2 s .c  o  m
 */
private static long generateCollocations(Path input, Path output, Configuration baseConf, boolean emitUnigrams,
        int maxNGramSize, int reduceTasks, int minSupport)
        throws IOException, ClassNotFoundException, InterruptedException {

    Configuration con = new Configuration(baseConf);
    con.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
    con.setInt(CollocMapper.MAX_SHINGLE_SIZE, maxNGramSize);
    con.setInt(CollocReducer.MIN_SUPPORT, minSupport);

    Job job = new Job(con);
    job.setJobName(CollocDriver.class.getSimpleName() + ".generateCollocations:" + input);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(GramKey.class);
    job.setMapOutputValueClass(Gram.class);
    job.setPartitionerClass(GramKeyPartitioner.class);
    job.setGroupingComparatorClass(GramKeyGroupComparator.class);

    job.setOutputKeyClass(Gram.class);
    job.setOutputValueClass(Gram.class);

    job.setCombinerClass(CollocCombiner.class);

    FileInputFormat.setInputPaths(job, input);

    Path outputPath = new Path(output, SUBGRAM_OUTPUT_DIRECTORY);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(CollocMapper.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setReducerClass(CollocReducer.class);
    job.setNumReduceTasks(reduceTasks);

    job.waitForCompletion(true);

    return job.getCounters().findCounter(CollocMapper.Count.NGRAM_TOTAL).getValue();
}

From source file:org.apache.mahout.vectorizer.collocations.llr.CollocDriver.java

License:Apache License

/**
 * pass1: generate collocations, ngrams/* ww w .java  2  s  .c o  m*/
 */
private static long generateCollocations(Path input, Path output, Configuration baseConf, boolean emitUnigrams,
        int maxNGramSize, int reduceTasks, int minSupport)
        throws IOException, ClassNotFoundException, InterruptedException {

    Configuration con = new Configuration(baseConf);
    con.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
    con.setInt(CollocMapper.MAX_SHINGLE_SIZE, maxNGramSize);
    con.setInt(CollocReducer.MIN_SUPPORT, minSupport);

    Job job = new Job(con);
    job.setJobName(CollocDriver.class.getSimpleName() + ".generateCollocations:" + input);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(GramKey.class);
    job.setMapOutputValueClass(Gram.class);
    job.setPartitionerClass(GramKeyPartitioner.class);
    job.setGroupingComparatorClass(GramKeyGroupComparator.class);

    job.setOutputKeyClass(Gram.class);
    job.setOutputValueClass(Gram.class);

    job.setCombinerClass(CollocCombiner.class);

    FileInputFormat.setInputPaths(job, input);

    Path outputPath = new Path(output, SUBGRAM_OUTPUT_DIRECTORY);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(CollocMapper.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setReducerClass(CollocReducer.class);
    job.setNumReduceTasks(reduceTasks);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }

    return job.getCounters().findCounter(CollocMapper.Count.NGRAM_TOTAL).getValue();
}

From source file:org.apache.mrql.CrossProductOperation.java

License:Apache License

/** The CrossProduct physical operator (similar to block-nested loop)
 * @param mx              left mapper/* w  ww  . ja  v  a 2 s  .  c om*/
 * @param my              right mapper
 * @param reduce_fnc      reducer
 * @param acc_fnc         optional accumulator function
 * @param zero            optional the zero value for the accumulator
 * @param X               the left source
 * @param Y               the right source (stored in distributed cache)
 * @param stop_counter    optional counter used in repeat operation
 * @return a new data source that contains the result
 */
public final static DataSet crossProduct(Tree mx, // left mapper
        Tree my, // right mapper
        Tree reduce_fnc, // reducer
        Tree acc_fnc, // optional accumulator function
        Tree zero, // optional the zero value for the accumulator
        DataSet X, // the left source
        DataSet Y, // the right source (stored in distributed cache)
        String stop_counter) // optional counter used in repeat operation
        throws Exception {
    DataSet ds = MapOperation.cMap(my, null, null, Y, "-");
    conf = MapReduceEvaluator.clear_configuration(conf);
    String newpath = new_path(conf);
    conf.set("mrql.reducer", reduce_fnc.toString());
    conf.set("mrql.mapper", mx.toString());
    if (zero != null) {
        conf.set("mrql.accumulator", acc_fnc.toString());
        conf.set("mrql.zero", zero.toString());
    } else
        conf.set("mrql.zero", "");
    conf.set("mrql.counter", stop_counter);
    setupSplits(new DataSet[] { X, Y }, conf);
    Job job = new Job(conf, newpath);
    distribute_compiled_arguments(job.getConfiguration());
    job.setJarByClass(MapReducePlan.class);
    job.setOutputKeyClass(MRContainer.class);
    job.setOutputValueClass(MRContainer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    PathFilter pf = new PathFilter() {
        public boolean accept(Path path) {
            return !path.getName().startsWith("_");
        }
    };
    for (DataSource p : ds.source) {
        Path path = new Path(p.path);
        for (FileStatus s : path.getFileSystem(conf).listStatus(path, pf))
            DistributedCache.addCacheFile(s.getPath().toUri(), job.getConfiguration());
    }
    ;
    for (DataSource p : X.source)
        MultipleInputs.addInputPath(job, new Path(p.path),
                (Class<? extends MapReduceMRQLFileInputFormat>) p.inputFormat, crossProductMapper.class);
    FileOutputFormat.setOutputPath(job, new Path(newpath));
    job.setNumReduceTasks(0);
    job.waitForCompletion(true);
    long c = (stop_counter.equals("-")) ? 0 : job.getCounters().findCounter("mrql", stop_counter).getValue();
    return new DataSet(new BinaryDataSource(newpath, conf), c, outputRecords(job));
}

From source file:org.apache.mrql.GroupByJoinPlan.java

License:Apache License

/** the GroupByJoin operation:
 *      an equi-join combined with a group-by implemented using hashing
 * @param left_join_key_fnc   left join key function from a to k
 * @param right_join_key_fnc  right join key function from b to k
 * @param left_groupby_fnc    left group-by function from a to k1
 * @param right_groupby_fnc   right group-by function from b to k2
 * @param accumulator_fnc     accumulator function from (c,(a,b)) to c
 * @param zero                the left zero of accumulator of type c
 * @param reduce_fnc          reduce function from ((k1,k2),c) to d
 * @param X                   left data set of type {a}
 * @param Y                   right data set of type {b}
 * @param num_reducers        number of reducers
 * @param n                   left dimension of the reducer grid
 * @param m                   right dimension of the reducer grid
 * @param stop_counter        optional counter used in repeat operation
 * @return a DataSet that contains the result of type {d}
 *///  ww w.ja v  a2 s.  co m
public final static DataSet groupByJoin(Tree left_join_key_fnc, // left join key function
        Tree right_join_key_fnc, // right join key function
        Tree left_groupby_fnc, // left group-by function
        Tree right_groupby_fnc, // right group-by function
        Tree accumulator_fnc, // accumulator function
        Tree zero, // the left zero of accumulator
        Tree reduce_fnc, // reduce function
        DataSet X, // left data set
        DataSet Y, // right data set
        int num_reducers, // number of reducers
        int n, int m, // dimensions of the reducer grid
        String stop_counter) // optional counter used in repeat operation
        throws Exception {
    conf = MapReduceEvaluator.clear_configuration(conf);
    String newpath = new_path(conf);
    conf.set("mrql.join.key.left", left_join_key_fnc.toString());
    conf.set("mrql.join.key.right", right_join_key_fnc.toString());
    conf.set("mrql.groupby.left", left_groupby_fnc.toString());
    conf.set("mrql.groupby.right", right_groupby_fnc.toString());
    conf.setInt("mrql.m", m);
    conf.setInt("mrql.n", n);
    conf.set("mrql.accumulator", accumulator_fnc.toString());
    conf.set("mrql.zero", zero.toString());
    conf.set("mrql.reducer", reduce_fnc.toString());
    conf.set("mrql.counter", stop_counter);
    setupSplits(new DataSet[] { X, Y }, conf);
    Job job = new Job(conf, newpath);
    distribute_compiled_arguments(job.getConfiguration());
    job.setMapOutputKeyClass(GroupByJoinKey.class);
    job.setJarByClass(GroupByJoinPlan.class);
    job.setOutputKeyClass(MRContainer.class);
    job.setOutputValueClass(MRContainer.class);
    job.setPartitionerClass(GroupByJoinPartitioner.class);
    job.setSortComparatorClass(GroupByJoinSortComparator.class);
    job.setGroupingComparatorClass(GroupByJoinGroupingComparator.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(newpath));
    for (DataSource p : X.source)
        MultipleInputs.addInputPath(job, new Path(p.path),
                (Class<? extends MapReduceMRQLFileInputFormat>) p.inputFormat, MapperLeft.class);
    for (DataSource p : Y.source)
        MultipleInputs.addInputPath(job, new Path(p.path),
                (Class<? extends MapReduceMRQLFileInputFormat>) p.inputFormat, MapperRight.class);
    job.setReducerClass(JoinReducer.class);
    if (num_reducers > 0)
        job.setNumReduceTasks(num_reducers);
    job.waitForCompletion(true);
    long c = (stop_counter.equals("-")) ? 0 : job.getCounters().findCounter("mrql", stop_counter).getValue();
    DataSource s = new BinaryDataSource(newpath, conf);
    s.to_be_merged = false;
    return new DataSet(s, c, MapReducePlan.outputRecords(job));
}

From source file:org.apache.mrql.JoinOperation.java

License:Apache License

/** The MapReduce2 physical operator (a reduce-side join)
 * @param mx             left mapper function
 * @param my             right mapper function
 * @param combine_fnc    optional in-mapper combiner function
 * @param reduce_fnc     reducer function
 * @param acc_fnc        optional accumulator function
 * @param zero           optional the zero value for the accumulator
 * @param X              left data set/*w w w.j a v  a 2  s.c  o  m*/
 * @param Y              right data set
 * @param num_reduces    number of reducers
 * @param stop_counter   optional counter used in repeat operation
 * @param orderp         does the result need to be ordered?
 * @return a new data source that contains the result
 */
public final static DataSet mapReduce2(Tree mx, // left mapper function
        Tree my, // right mapper function
        Tree combine_fnc, // optional in-mapper combiner function
        Tree reduce_fnc, // reducer function
        Tree acc_fnc, // optional accumulator function
        Tree zero, // optional the zero value for the accumulator
        DataSet X, // left data set
        DataSet Y, // right data set
        int num_reduces, // number of reducers
        String stop_counter, // optional counter used in repeat operation
        boolean orderp) // does the result need to be ordered?
        throws Exception {
    conf = MapReduceEvaluator.clear_configuration(conf);
    String newpath = new_path(conf);
    conf.set("mrql.mapper.left", mx.toString());
    conf.set("mrql.mapper.right", my.toString());
    if (combine_fnc != null)
        conf.set("mrql.combiner", combine_fnc.toString());
    conf.set("mrql.reducer", reduce_fnc.toString());
    if (zero != null) {
        conf.set("mrql.accumulator", acc_fnc.toString());
        conf.set("mrql.zero", zero.toString());
    } else
        conf.set("mrql.zero", "");
    conf.set("mrql.counter", stop_counter);
    setupSplits(new DataSet[] { X, Y }, conf);
    Job job = new Job(conf, newpath);
    distribute_compiled_arguments(job.getConfiguration());
    job.setMapOutputKeyClass(JoinKey.class);
    job.setJarByClass(MapReducePlan.class);
    job.setOutputKeyClass(MRContainer.class);
    job.setOutputValueClass(MRContainer.class);
    job.setPartitionerClass(MRContainerJoinPartitioner.class);
    job.setSortComparatorClass(MRContainerSortComparator.class);
    job.setGroupingComparatorClass(MRContainerGroupingComparator.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(newpath));
    for (DataSource p : X.source)
        MultipleInputs.addInputPath(job, new Path(p.path),
                (Class<? extends MapReduceMRQLFileInputFormat>) p.inputFormat, MapperLeft.class);
    for (DataSource p : Y.source)
        MultipleInputs.addInputPath(job, new Path(p.path),
                (Class<? extends MapReduceMRQLFileInputFormat>) p.inputFormat, MapperRight.class);
    if (Config.trace && PlanGeneration.streamed_MapReduce2_reducer(reduce_fnc))
        System.out.println("Streamed MapReduce2 reducer");
    job.setReducerClass(JoinReducer.class);
    if (num_reduces > 0)
        job.setNumReduceTasks(num_reduces);
    job.waitForCompletion(true);
    long c = (stop_counter.equals("-")) ? 0 : job.getCounters().findCounter("mrql", stop_counter).getValue();
    DataSource s = new BinaryDataSource(newpath, conf);
    s.to_be_merged = orderp;
    return new DataSet(s, c, outputRecords(job));
}

From source file:org.apache.mrql.MapJoinOperation.java

License:Apache License

/** The fragment-replicate join (map-side join) physical operator
 * @param probe_map_fnc    left mapper function
 * @param built_map_fnc    right mapper function
 * @param reduce_fnc       reducer function
 * @param acc_fnc          optional accumulator function
 * @param zero             optional the zero value for the accumulator
 * @param probe_dataset    the map source
 * @param built_dataset    stored in distributed cache
 * @param stop_counter     optional counter used in repeat operation
 * @return a new data source that contains the result
 *//*w w  w  .  j a v a  2  s .c om*/
public final static DataSet mapJoin(Tree probe_map_fnc, // left mapper function
        Tree built_map_fnc, // right mapper function
        Tree reduce_fnc, // reducer function
        Tree acc_fnc, // optional accumulator function
        Tree zero, // optional the zero value for the accumulator
        DataSet probe_dataset, // the map source
        DataSet built_dataset, // stored in distributed cache
        String stop_counter) // optional counter used in repeat operation
        throws Exception {
    DataSet ds = MapOperation.cMap(built_map_fnc, null, null, built_dataset, "-");
    conf = MapReduceEvaluator.clear_configuration(conf);
    String newpath = new_path(conf);
    conf.set("mrql.inMap.reducer", reduce_fnc.toString());
    conf.set("mrql.probe_mapper", probe_map_fnc.toString());
    conf.set("mrql.counter", stop_counter);
    if (zero != null) {
        conf.set("mrql.accumulator", acc_fnc.toString());
        conf.set("mrql.zero", zero.toString());
    } else
        conf.set("mrql.zero", "");
    setupSplits(new DataSet[] { probe_dataset, built_dataset }, conf);
    Job job = new Job(conf, newpath);
    distribute_compiled_arguments(job.getConfiguration());
    job.setJarByClass(MapReducePlan.class);
    job.setOutputKeyClass(MRContainer.class);
    job.setOutputValueClass(MRContainer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    PathFilter pf = new PathFilter() {
        public boolean accept(Path path) {
            return !path.getName().startsWith("_");
        }
    };
    for (DataSource p : ds.source) { // distribute the built dataset
        Path path = new Path(p.path);
        for (FileStatus s : path.getFileSystem(conf).listStatus(path, pf))
            DistributedCache.addCacheFile(s.getPath().toUri(), job.getConfiguration());
    }
    ;
    for (DataSource p : probe_dataset.source)
        MultipleInputs.addInputPath(job, new Path(p.path),
                (Class<? extends MapReduceMRQLFileInputFormat>) p.inputFormat, mapJoinMapper.class);
    FileOutputFormat.setOutputPath(job, new Path(newpath));
    job.setNumReduceTasks(0);
    job.waitForCompletion(true);
    long c = (stop_counter.equals("-")) ? 0 : job.getCounters().findCounter("mrql", stop_counter).getValue();
    return new DataSet(new BinaryDataSource(newpath, conf), c, outputRecords(job));
}

From source file:org.apache.mrql.MapOperation.java

License:Apache License

/** The cMap physical operator
 * @param map_fnc       mapper function//from  w  w  w  .j  a  v a 2 s.c om
 * @param acc_fnc       optional accumulator function
 * @param zero          optional the zero value for the accumulator
 * @param source        input data source
 * @param stop_counter  optional counter used in repeat operation
 * @return a new data source that contains the result
 */
public final static DataSet cMap(Tree map_fnc, // mapper function
        Tree acc_fnc, // optional accumulator function
        Tree zero, // optional the zero value for the accumulator
        DataSet source, // input data source
        String stop_counter) // optional counter used in repeat operation
        throws Exception {
    conf = MapReduceEvaluator.clear_configuration(conf);
    String newpath = new_path(conf);
    conf.set("mrql.mapper", map_fnc.toString());
    conf.set("mrql.counter", stop_counter);
    if (zero != null) {
        conf.set("mrql.accumulator", acc_fnc.toString());
        conf.set("mrql.zero", zero.toString());
    } else
        conf.set("mrql.zero", "");
    setupSplits(source, conf);
    Job job = new Job(conf, newpath);
    distribute_compiled_arguments(job.getConfiguration());
    job.setJarByClass(MapReducePlan.class);
    job.setOutputKeyClass(MRContainer.class);
    job.setOutputValueClass(MRContainer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    for (DataSource p : source.source)
        MultipleInputs.addInputPath(job, new Path(p.path),
                (Class<? extends MapReduceMRQLFileInputFormat>) p.inputFormat, cMapMapper.class);
    FileOutputFormat.setOutputPath(job, new Path(newpath));
    job.setNumReduceTasks(0);
    job.waitForCompletion(true);
    long c = (stop_counter.equals("-")) ? 0 : job.getCounters().findCounter("mrql", stop_counter).getValue();
    return new DataSet(new BinaryDataSource(newpath, conf), c, outputRecords(job));
}