Example usage for org.apache.hadoop.mapred JobConf setOutputValueClass

List of usage examples for org.apache.hadoop.mapred JobConf setOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputValueClass.

Prototype

public void setOutputValueClass(Class<?> theClass) 

Source Link

Document

Set the value class for job outputs.

Usage

From source file:invertedIndex.startJob.java

public static void start(String[] args) {
    try {/*from  w w w.ja  va 2s.c om*/
        JobConf conf = new JobConf(WordCount.class);
        conf.setJobName("wordcount");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        lineIndexMapper Map = new lineIndexMapper();
        conf.setMapperClass(Map.getClass());

        lineIndexReducer Reduce = new lineIndexReducer();
        conf.setCombinerClass(Reduce.getClass());
        conf.setReducerClass(Reduce.getClass());

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[1]));

        Path outputDir = new Path(args[2]);

        outputDir.getFileSystem(conf).delete(outputDir, true);
        FileSystem fs = FileSystem.get(conf);
        fs.delete(outputDir, true);

        FileOutputFormat.setOutputPath(conf, outputDir);

        JobClient.runJob(conf);

    } catch (Exception Exp) {

        Exp.printStackTrace();
    }
}

From source file:io.aos.t4f.hadoop.mapred.WordCountMapReduceTest.java

License:Apache License

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker.// ww  w  .  ja va 2 s .c om
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), WordCountMapReduceTest.class);
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(WordCountMapper.class);
    conf.setCombinerClass(WordCountReducer.class);
    conf.setReducerClass(WordCountReducer.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:io.aos.t4f.hadoop.mapred.WordCountTest.java

License:Apache License

private JobConf createJobConf() {
    JobConf conf = mrCluster.createJobConf();
    conf.setJobName("wordcount test");

    conf.setMapperClass(WordCountMapper.class);
    conf.setReducerClass(WordCountReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setNumMapTasks(1);//from  ww  w  .j ava 2  s .c o  m
    conf.setNumReduceTasks(1);
    FileInputFormat.setInputPaths(conf, input);
    FileOutputFormat.setOutputPath(conf, output);
    return conf;
}

From source file:io.dstream.tez.TezDAGBuilder.java

License:Apache License

/**
 *
 *///from  w w w. j a  v a 2 s.co m
private JobConf buildJobConf(Class<? extends Writable> keyClass, Class<? extends Writable> valueClass) {
    JobConf jobConf = new JobConf(this.tezClient.getTezConfiguration());
    jobConf.setOutputKeyClass(keyClass);
    jobConf.setOutputValueClass(valueClass);
    return jobConf;
}

From source file:io.fluo.stress.trie.Generate.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length != 4) {
        log.error("Usage: " + this.getClass().getSimpleName()
                + " <numMappers> <numbersPerMapper> <max> <output dir>");
        System.exit(-1);//  w w  w. j  a va 2 s .  c  om
    }

    int numMappers = Integer.parseInt(args[0]);
    int numPerMapper = Integer.parseInt(args[1]);
    long max = Long.parseLong(args[2]);
    Path out = new Path(args[3]);

    Preconditions.checkArgument(numMappers > 0, "numMappers <= 0");
    Preconditions.checkArgument(numPerMapper > 0, "numPerMapper <= 0");
    Preconditions.checkArgument(max > 0, "max <= 0");

    JobConf job = new JobConf(getConf());

    job.setJobName(this.getClass().getName());

    job.setJarByClass(Generate.class);

    job.setInt(TRIE_GEN_NUM_PER_MAPPER_PROP, numPerMapper);
    job.setInt(TRIE_GEN_NUM_MAPPERS_PROP, numMappers);
    job.setLong(TRIE_GEN_MAX_PROP, max);

    job.setInputFormat(RandomLongInputFormat.class);

    job.setNumReduceTasks(0);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(NullWritable.class);

    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, out);

    RunningJob runningJob = JobClient.runJob(job);
    runningJob.waitForCompletion();
    return runningJob.isSuccessful() ? 0 : -1;
}

From source file:io.fluo.stress.trie.NumberIngest.java

License:Apache License

public static void main(String[] args) throws IOException, ConfigurationException {

    // Parse arguments
    if (args.length != 4) {
        log.error("Usage: NumberIngest <numMappers> <numbersPerMapper> <nodeSize> <fluoProps>");
        System.exit(-1);/*from   w  w w .ja  v  a2s.c  om*/
    }
    int numMappers = Integer.parseInt(args[0]);
    int numPerMapper = Integer.parseInt(args[1]);
    int nodeSize = Integer.parseInt(args[2]);
    String fluoPropsPath = args[3];

    String hadoopPrefix = System.getenv("HADOOP_PREFIX");
    if (hadoopPrefix == null) {
        hadoopPrefix = System.getenv("HADOOP_HOME");
        if (hadoopPrefix == null) {
            log.error("HADOOP_PREFIX or HADOOP_HOME needs to be set!");
            System.exit(-1);
        }
    }

    // create test name
    String testId = String.format("test-%d", (new Date().getTime() / 1000));
    String testDir = "/trie-stress/" + testId;

    setupHdfs(hadoopPrefix, testDir, numMappers, numPerMapper);

    JobConf ingestConf = new JobConf(NumberIngest.class);
    ingestConf.setJobName("NumberIngest");

    FluoConfiguration config = new FluoConfiguration(new File(fluoPropsPath));

    loadConfig(ingestConf, ConfigurationConverter.getProperties(config));
    ingestConf.setInt(TRIE_NODE_SIZE_PROP, nodeSize);

    ingestConf.setOutputKeyClass(LongWritable.class);
    ingestConf.setOutputValueClass(IntWritable.class);
    ingestConf.setMapperClass(NumberIngest.IngestMapper.class);
    ingestConf.setReducerClass(NumberIngest.UniqueReducer.class);

    FileInputFormat.setInputPaths(ingestConf, new Path(testDir + "/input/"));
    FileOutputFormat.setOutputPath(ingestConf, new Path(testDir + "/unique/"));

    RunningJob ingestJob = JobClient.runJob(ingestConf);
    ingestJob.waitForCompletion();
    if (ingestJob.isSuccessful()) {

        JobConf countConf = new JobConf(NumberIngest.class);
        countConf.setJobName("NumberCount");

        countConf.setOutputKeyClass(Text.class);
        countConf.setOutputValueClass(LongWritable.class);
        countConf.setMapperClass(NumberIngest.CountMapper.class);
        countConf.setReducerClass(NumberIngest.CountReducer.class);

        FileInputFormat.setInputPaths(countConf, new Path(testDir + "/unique/"));
        FileOutputFormat.setOutputPath(countConf, new Path(testDir + "/output/"));

        RunningJob countJob = JobClient.runJob(countConf);
        countJob.waitForCompletion();
        if (countJob.isSuccessful()) {
            log.info("Ingest and count jobs were successful");
            log.info("Output can be viewed @ " + testDir);
            System.exit(0);
        } else {
            log.error("Count job failed for " + testId);
        }
    } else {
        log.error("Ingest job failed.  Skipping count job for " + testId);
    }

    System.exit(-1);
}

From source file:io.hops.erasure_coding.MapReduceEncoder.java

License:Apache License

/**
 * create new job conf based on configuration passed.
 *
 * @param conf//from  w w  w.  j a va  2s  .  c  o m
 * @return
 */
private static JobConf createJobConf(Configuration conf) {
    JobConf jobconf = new JobConf(conf, MapReduceEncoder.class);
    jobName = NAME + " " + dateForm.format(new Date(BaseEncodingManager.now()));
    jobconf.setUser(BaseEncodingManager.JOBUSER);
    jobconf.setJobName(jobName);
    jobconf.setMapSpeculativeExecution(false);
    RaidUtils.parseAndSetOptions(jobconf, SCHEDULER_OPTION_LABEL);

    jobconf.setJarByClass(MapReduceEncoder.class);
    jobconf.setInputFormat(DistRaidInputFormat.class);
    jobconf.setOutputKeyClass(Text.class);
    jobconf.setOutputValueClass(Text.class);

    jobconf.setMapperClass(DistRaidMapper.class);
    jobconf.setNumReduceTasks(0);
    return jobconf;
}

From source file:it.isislab.sof.core.engine.hadoop.sshclient.utils.simulation.executor.SOF.java

License:Apache License

public static void main(String[] args) {

    /**/*from   w  w w . j  av a 2 s . com*/
     * aids /home/michele/Scrivania/aids netlogo /home/michele/Scrivania/aids/aids.nlogo /home/michele/Scrivania/aids/input.tmp /home/michele/Scrivania/aids/output /home/michele/Scrivania/aids/output.xml false pepp ciao  
     *  
     */

    /*         try {//Runtime.getRuntime().exec("rm -r /home/lizard87/Desktop/mason_test/output");
        Runtime.getRuntime().exec("rm -r /home/michele/Scrivania/aids/output");
       } catch (IOException e) {e.printStackTrace();}*/

    if (args.length < 9 || args.length == 11 || args.length == 12 || args.length >= 15) {

        System.out.println("Usage:");
        System.out.println("java -jar SCUD.jar " + "<simulation_name> " + "<simulation_path_home> "
                + "<simulation_type[mason |netlogo |generic]>" + "<simulation_generic_interpreter_path>"
                + "<simultion_program_path> " + "<simulation_mapper_input_path> "
                + "<simulation_mapper_output_path> " + "<simulation_output_domain_xmlfile> "
                + "<simulation_input_path> " + "<<simulation_rating_path>>" + "<oneshot[one|loop]> "
                + "<author_name> " + "<simulation_description> " + "<path_interpreter_evaluate_file> "
                + "<evaluate_file_path>");
        System.exit(-1);
    }

    Configuration conf = null;
    JobConf job = null;

    String AUTHOR = null;/*author name*/
    String SIMULATION_NAME = null;/*simulation name*/
    String SIMULATION_HOME = null;/*path simulation*/
    String SIM_TYPE = null;/*mason, netlogo, generic*/
    String SIM_EXECUTABLE_SIMULATION_INTERPRETER_PATH = null;
    String SIM_EXECUTABLE_SIMULATION_PROGRAM = null; /*executable program *.jar | *.nlogo*/
    String SIM_EXECUTION_INPUT_DATA_MAPPER = null;/*input.data path */
    String SIM_EXECUTION_OUTPUT_MAPPER = null;/*output loop(i) path*/
    String SIM_DESCRIPTION_OUTPUT_XML_DOMAIN = null;/*path of domain file */
    String SIM_EXECUTION_INPUT_XML = null;/*execution input path*/
    boolean ISLOOP = false;/*false[one] | true[loop]*/
    //String DESCRIPTION=null;/*simulations' description*/
    String INTERPRETER_REMOTE_PATH_EVALUATION = null;/*remote program bin path for executing EvalFoo*/
    String EXECUTABLE_RATING_FILE = null;/*path of rating file*/
    String SIM_RATING_PATH = null;

    // aids /home/michele/Scrivania/aids netlogo /home/michele/Scrivania/aids/aids.nlogo /home/michele/Scrivania/aids/input.tmp /home/michele/Scrivania/aids/output /home/michele/Scrivania/aids/domain.xml /home/michele/Scrivania/aids/input loop pepp ciao /usr/bin/python /home/michele/Scrivania/aids/evaluate.py 

    if (args.length == 13) {
        SIMULATION_NAME = args[0];
        SIMULATION_HOME = args[1];
        SIM_TYPE = args[2];
        SIM_EXECUTABLE_SIMULATION_PROGRAM = args[3];
        SIM_EXECUTION_INPUT_DATA_MAPPER = args[4];
        SIM_EXECUTION_OUTPUT_MAPPER = args[5];
        SIM_DESCRIPTION_OUTPUT_XML_DOMAIN = args[6];
        SIM_EXECUTION_INPUT_XML = args[7];
        SIM_RATING_PATH = args[8];
        ISLOOP = Boolean.parseBoolean(args[9]);
        AUTHOR = args[10];
        //DESCRIPTION=args[11];
        INTERPRETER_REMOTE_PATH_EVALUATION = args[11];
        EXECUTABLE_RATING_FILE = args[12];
        //   System.out.println(DESCRIPTION);
        //System.out.println(INTERPRETER_REMOTE_PATH_EVALUATION);

    }

    else if (args.length == 9) {
        SIMULATION_NAME = args[0];
        SIMULATION_HOME = args[1];
        SIM_TYPE = args[2];
        SIM_EXECUTABLE_SIMULATION_PROGRAM = args[3];
        SIM_EXECUTION_INPUT_DATA_MAPPER = args[4];
        SIM_EXECUTION_OUTPUT_MAPPER = args[5];
        SIM_DESCRIPTION_OUTPUT_XML_DOMAIN = args[6];
        ISLOOP = Boolean.parseBoolean(args[7]);
        AUTHOR = args[8];
        //DESCRIPTION=args[9];
    }

    else if (args.length == 14) {
        SIMULATION_NAME = args[0];
        SIMULATION_HOME = args[1];
        SIM_TYPE = args[2];
        SIM_EXECUTABLE_SIMULATION_INTERPRETER_PATH = args[3];
        SIM_EXECUTABLE_SIMULATION_PROGRAM = args[4];
        SIM_EXECUTION_INPUT_DATA_MAPPER = args[5];
        SIM_EXECUTION_OUTPUT_MAPPER = args[6];
        SIM_DESCRIPTION_OUTPUT_XML_DOMAIN = args[7];
        SIM_EXECUTION_INPUT_XML = args[8];
        SIM_RATING_PATH = args[9];
        ISLOOP = Boolean.parseBoolean(args[10]);
        AUTHOR = args[11];
        //   DESCRIPTION=args[12];
        INTERPRETER_REMOTE_PATH_EVALUATION = args[12];
        EXECUTABLE_RATING_FILE = args[13];

    }

    else if (args.length == 10) {
        SIMULATION_NAME = args[0];
        SIMULATION_HOME = args[1];
        SIM_TYPE = args[2];
        SIM_EXECUTABLE_SIMULATION_INTERPRETER_PATH = args[3];
        SIM_EXECUTABLE_SIMULATION_PROGRAM = args[4];
        SIM_EXECUTION_INPUT_DATA_MAPPER = args[5];
        SIM_EXECUTION_OUTPUT_MAPPER = args[6];
        SIM_DESCRIPTION_OUTPUT_XML_DOMAIN = args[7];
        ISLOOP = Boolean.parseBoolean(args[8]);
        AUTHOR = args[9];
        //   DESCRIPTION=args[10];
    }

    if (!(SIM_TYPE.equalsIgnoreCase("mason") || SIM_TYPE.equalsIgnoreCase("netlogo")
            || SIM_TYPE.equalsIgnoreCase("generic"))) {
        System.exit(-2);
    }

    conf = new Configuration();
    job = new JobConf(conf, SOF.class);
    job.setJobName(SIMULATION_NAME/*SIMULATION NAME*/);
    job.set("simulation.home", SIMULATION_HOME);
    job.set("simulation.name", SIMULATION_NAME);
    job.set("simulation.type", SIM_TYPE);

    if (SIM_TYPE.equalsIgnoreCase("generic")) {
        job.set("simulation.interpreter.genericsim", SIM_EXECUTABLE_SIMULATION_INTERPRETER_PATH);
    }

    job.set("simulation.program.simulation", SIM_EXECUTABLE_SIMULATION_PROGRAM);
    job.set("simulation.executable.input", SIM_EXECUTION_INPUT_DATA_MAPPER);
    job.set("simulation.executable.output", SIM_EXECUTION_OUTPUT_MAPPER);
    job.setBoolean("simulation.executable.mode", ISLOOP);
    //job.set("simulation.executable.mode", ISLOOP);
    job.set("simulation.executable.author", AUTHOR);
    //job.set("simulation.executable.description", DESCRIPTION);
    job.set("simulation.description.output.domain", SIM_DESCRIPTION_OUTPUT_XML_DOMAIN);

    /**
     * GENERA IL .TMP
     * COMMENTA LA LINEA 
     * TEST IN LOCALE 
     * SOLO PER IL LOCALE
     */
    //XmlToText.convertXmlFileToFileText(conf,"/home/lizard87/Desktop/mason_test/input.xml");
    //XmlToText.convertXmlFileToFileText(conf,"/home/lizard87/Desktop/input.xml");
    //XmlToText.convertXmlFileToFileText(conf,"/home/lizard87/Desktop/aids/input.xml");

    if (ISLOOP) {
        job.set("simulation.description.input", SIM_EXECUTION_INPUT_XML);
        job.set("simulation.program.rating", EXECUTABLE_RATING_FILE);
        //job.set("simulation.interpreter.selection", INTERPRETER_REMOTE_PATH_SELECTION);
        job.set("simulation.interpreter.rating", INTERPRETER_REMOTE_PATH_EVALUATION);
        job.set("simulation.executable.loop.rating", SIM_RATING_PATH);
    }

    FileInputFormat.addInputPath(job, new Path(SIM_EXECUTION_INPUT_DATA_MAPPER)/*DIRECTORY INPUT*/);
    FileOutputFormat.setOutputPath(job, new Path(SIM_EXECUTION_OUTPUT_MAPPER));

    if (SIM_TYPE.equalsIgnoreCase("mason")) {
        job.setMapperClass(SOFMapperMason.class);
        job.setReducerClass(SOFReducerMason.class);

    } else if (SIM_TYPE.equalsIgnoreCase("netlogo")) {

        job.setMapperClass(SOFMapperNetLogo.class);
        job.setReducerClass(SOFReducerNetLogo.class);
    } else if (SIM_TYPE.equalsIgnoreCase("generic")) {
        job.setMapperClass(SOFMapperGeneric.class);
        job.setReducerClass(SOFReducerGeneric.class);
    }

    job.setOutputKeyClass(org.apache.hadoop.io.Text.class);
    job.setOutputValueClass(org.apache.hadoop.io.Text.class);

    JobClient jobc;

    try {
        jobc = new JobClient(job);
        System.out.println(jobc + " " + job);
        RunningJob runjob;
        runjob = JobClient.runJob(job);
        while (runjob.getJobStatus().equals(JobStatus.SUCCEEDED)) {
        }
        System.exit(0);
    } catch (IOException e) {

        e.printStackTrace();
    }

}

From source file:Iterator.SpeciesIterDriver2.java

@SuppressWarnings("deprecation")
public static void main(String[] args) {

    int iterationCount = 0;

    while (iterationCount <= 20) {

        System.out.println("Running Iteration - " + iterationCount);
        JobClient client = new JobClient();
        JobConf conf = new JobConf(SpeciesIterDriver2.class);
        conf.setJobName("Species Iter - " + iterationCount);

        // This property is set to generate 5 reducer tasks
        conf.setNumReduceTasks(5);// ww w.  j ava 2  s.co  m
        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);

        //output-iterator-0 contains the input data
        FileInputFormat.setInputPaths(conf, new Path("output-iterator-" + iterationCount));
        iterationCount++;
        FileOutputFormat.setOutputPath(conf, new Path("output-iterator-" + iterationCount));

        conf.setMapperClass(SpeciesIterMapper2.class);
        conf.setReducerClass(SpeciesIterReducer2.class);
        conf.setCombinerClass(SpeciesIterReducer2.class);

        client.setConf(conf);
        try {
            JobClient.runJob(conf);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}

From source file:ivory.core.index.MergeGlobalStatsAcrossIndexSegments.java

License:Apache License

public int runTool() throws Exception {

    JobConf conf = new JobConf(getConf(), MergeGlobalStatsAcrossIndexSegments.class);
    FileSystem fs = FileSystem.get(conf);

    String collectionName = conf.get("Ivory.CollectionName");
    String indexPaths = conf.get("Ivory.IndexPaths");
    String dataOutputPath = conf.get("Ivory.DataOutputPath");
    int dfThreshold = conf.getInt("Ivory.DfThreshold", 0);

    // first, compute size of global term space
    Path tmpPaths = new Path("/tmp/index-paths.txt");

    FSDataOutputStream out = fs.create(tmpPaths, true);
    for (String s : indexPaths.split(",")) {
        out.write(new String(s + "\n").getBytes());
    }/* w  w w. j  a v a2  s .  co  m*/
    out.close();

    LOG.info("Job: ComputeNumberOfTermsAcrossIndexSegments");
    conf.setJobName("ComputeNumberOfTermsAcrossIndexSegments:" + collectionName);

    FileInputFormat.addInputPath(conf, tmpPaths);

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(1);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.setInputFormat(NLineInputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(PairOfIntLong.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(IdentityReducer.class);

    long startTime = System.currentTimeMillis();
    RunningJob job = JobClient.runJob(conf);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    Counters counters = job.getCounters();

    long totalNumTerms = counters.findCounter("org.apache.hadoop.mapred.Task$Counter", 6, "REDUCE_INPUT_GROUPS")
            .getCounter();

    LOG.info("total number of terms in global dictionary = " + totalNumTerms);

    // now build the dictionary
    fs.delete(new Path(dataOutputPath), true);

    conf = new JobConf(getConf(), MergeGlobalStatsAcrossIndexSegments.class);

    LOG.info("Job: MergeGlobalStatsAcrossIndexSegments");
    conf.setJobName("MergeGlobalStatsAcrossIndexSegments:" + collectionName);

    FileInputFormat.addInputPath(conf, tmpPaths);

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(1);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.setInputFormat(NLineInputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(PairOfIntLong.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    conf.setLong("Ivory.IndexNumberOfTerms", (int) totalNumTerms);

    startTime = System.currentTimeMillis();
    job = JobClient.runJob(conf);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    // compute some # docs, collection length, avg doc length
    long collectionLength = 0;
    int docCount = 0;
    for (String index : indexPaths.split(",")) {
        LOG.info("reading stats for " + index);

        RetrievalEnvironment env = new RetrievalEnvironment(index, fs);

        long l = env.readCollectionLength();
        int n = env.readCollectionDocumentCount();

        LOG.info(" - CollectionLength: " + l);
        LOG.info(" - CollectionDocumentCount: " + n);

        collectionLength += l;
        docCount += n;
    }

    float avgdl = (float) collectionLength / docCount;

    LOG.info("all index segments: ");
    LOG.info(" - CollectionLength: " + collectionLength);
    LOG.info(" - CollectionDocumentCount: " + docCount);
    LOG.info(" - AverageDocumentLenght: " + avgdl);

    RetrievalEnvironment env = new RetrievalEnvironment(dataOutputPath, fs);

    env.writeCollectionAverageDocumentLength(avgdl);
    env.writeCollectionLength(collectionLength);
    env.writeCollectionDocumentCount(docCount);

    return 0;
}