Example usage for org.apache.hadoop.mapred JobConf setOutputFormat

List of usage examples for org.apache.hadoop.mapred JobConf setOutputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputFormat.

Prototype

public void setOutputFormat(Class<? extends OutputFormat> theClass) 

Source Link

Document

Set the OutputFormat implementation for the map-reduce job.

Usage

From source file:ivory.preprocess.BuildIntDocVectorsForwardIndex.java

License:Apache License

public int runTool() throws Exception {
    JobConf conf = new JobConf(getConf(), BuildIntDocVectorsForwardIndex.class);
    FileSystem fs = FileSystem.get(conf);

    String indexPath = conf.get("Ivory.IndexPath");
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

    int mapTasks = conf.getInt("Ivory.NumMapTasks", 0);
    String collectionName = env.readCollectionName();
    boolean buildWeighted = conf.getBoolean("Ivory.BuildWeighted", false);

    sLogger.info("Tool: BuildIntDocVectorsIndex");
    sLogger.info(" - IndexPath: " + indexPath);
    sLogger.info(" - CollectionName: " + collectionName);
    sLogger.info(" - BuildWeighted: " + buildWeighted);
    sLogger.info(" - NumMapTasks: " + mapTasks);

    String intDocVectorsPath;/*from  w w  w .j  av  a  2 s . c om*/
    String forwardIndexPath;
    if (buildWeighted) {
        intDocVectorsPath = env.getWeightedIntDocVectorsDirectory();
        forwardIndexPath = env.getWeightedIntDocVectorsForwardIndex();
    } else {
        intDocVectorsPath = env.getIntDocVectorsDirectory();
        forwardIndexPath = env.getIntDocVectorsForwardIndex();
    }

    if (!fs.exists(new Path(intDocVectorsPath))) {
        sLogger.info("Error: IntDocVectors don't exist!");
        return 0;
    }

    if (fs.exists(new Path(forwardIndexPath))) {
        sLogger.info("IntDocVectorIndex already exists: skipping!");
        return 0;
    }

    conf.setJobName("BuildIntDocVectorsForwardIndex:" + collectionName);

    Path inputPath = new Path(intDocVectorsPath);
    FileInputFormat.setInputPaths(conf, inputPath);

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(1);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(MyReducer.class);

    JobClient.runJob(conf);

    return 0;
}

From source file:ivory.preprocess.BuildTermDocVectors.java

License:Apache License

@SuppressWarnings("unchecked")
public int runTool() throws Exception {
    // create a new JobConf, inheriting from the configuration of this
    // PowerTool//  w w w  . j a  v a  2  s.  c  o  m
    JobConf conf = new JobConf(getConf(), BuildTermDocVectors.class);
    FileSystem fs = FileSystem.get(conf);

    String indexPath = conf.get("Ivory.IndexPath");
    int mapTasks = conf.getInt("Ivory.NumMapTasks", 0);

    String collectionName = conf.get("Ivory.CollectionName");
    String collectionPath = conf.get("Ivory.CollectionPath");
    String inputFormat = conf.get("Ivory.InputFormat");
    String tokenizer = conf.get("Ivory.Tokenizer");
    String mappingClass = conf.get("Ivory.DocnoMappingClass");

    sLogger.info("PowerTool: BuildTermDocVectors");
    sLogger.info(" - CollectionName: " + collectionName);
    sLogger.info(" - CollectionPath: " + collectionPath);
    sLogger.info(" - InputputFormat: " + inputFormat);
    sLogger.info(" - Tokenizer: " + tokenizer);
    sLogger.info(" - DocnoMappingClass: " + mappingClass);
    sLogger.info(" - NumMapTasks: " + mapTasks);
    sLogger.info(" - NumReduceTasks: " + 0);

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    Path mappingFile = env.getDocnoMappingData();

    if (!fs.exists(mappingFile)) {
        sLogger.error("Error, docno mapping data file " + mappingFile + "doesn't exist!");
        return 0;
    }

    DistributedCache.addCacheFile(mappingFile.toUri(), conf);

    conf.setJobName("BuildTermDocVectors:" + collectionName);

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(0);

    if (collectionPath.indexOf(",") == -1) {
        FileInputFormat.setInputPaths(conf, new Path(collectionPath));
        sLogger.info("Adding input path " + collectionPath);
    } else {
        String[] paths = collectionPath.split(",");
        for (String p : paths) {
            FileInputFormat.addInputPath(conf, new Path(p));
            sLogger.info("Adding input path " + p);
        }
    }

    Path outputPath = new Path(env.getTermDocVectorsDirectory());
    if (fs.exists(outputPath)) {
        sLogger.info("TermDocVectors already exist: Skipping!");
    } else {
        env.writeCollectionName(collectionName);
        env.writeCollectionPath(collectionPath);
        env.writeInputFormat(inputFormat);
        env.writeDocnoMappingClass(mappingClass);
        env.writeTokenizerClass(tokenizer);

        conf.set("mapred.child.java.opts", "-Xmx2048m");
        conf.setInt("mapred.task.timeout", 60000000);

        FileOutputFormat.setOutputPath(conf, outputPath);

        conf.setInputFormat((Class<? extends InputFormat>) Class.forName(inputFormat));
        conf.setOutputFormat(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.RECORD);

        conf.setMapOutputKeyClass(IntWritable.class);
        conf.setMapOutputValueClass(LazyTermDocVector.class);
        conf.setOutputKeyClass(IntWritable.class);
        conf.setOutputValueClass(LazyTermDocVector.class);

        conf.setMapperClass(MyMapper.class);

        long startTime = System.currentTimeMillis();
        RunningJob job = JobClient.runJob(conf);
        sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

        Counters counters = job.getCounters();

        // write out number of postings
        int collectionDocCount = (int) counters.findCounter(Docs.Total).getCounter();
        env.writeCollectionDocumentCount(collectionDocCount);
    }

    if (fs.exists(env.getDoclengthsData())) {
        sLogger.info("DocLength data exists: Skipping!");
        return 0;
    }

    int collectionDocCount = env.readCollectionDocumentCount();
    long startTime = System.currentTimeMillis();
    writeDoclengthsData(collectionDocCount);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    return 0;
}

From source file:ivory.preprocess.BuildTermDocVectors.java

License:Apache License

private void writeDoclengthsData(int collectionDocCount) throws IOException {
    JobConf conf = new JobConf(getConf(), GetTermCount.class);

    String indexPath = conf.get("Ivory.IndexPath");
    String collectionName = conf.get("Ivory.CollectionName");
    int docnoOffset = conf.getInt("Ivory.DocnoOffset", 0);

    FileSystem fs = FileSystem.get(conf);
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

    Path dlFile = env.getDoclengthsData();
    Path inputPath = env.getDoclengthsDirectory();

    sLogger.info("Writing doc length data to " + dlFile + "...");

    conf.setJobName("DocLengthTable:" + collectionName);

    conf.setInt("Ivory.CollectionDocumentCount", collectionDocCount);
    conf.set("InputPath", inputPath.toString());
    conf.set("DocLengthDataFile", dlFile.toString());
    conf.set("mapred.child.java.opts", "-Xmx4096m");

    conf.setNumMapTasks(1);/*from   w  w w . j a v a  2s  .c om*/
    conf.setNumReduceTasks(0);
    conf.setSpeculativeExecution(false);

    conf.setInputFormat(NullInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(DocLengthDataWriterMapper.class);

    RunningJob job = JobClient.runJob(conf);

    env.writeDocnoOffset(docnoOffset);
    Counters counters = job.getCounters();

    long collectionSumOfDocLengths = (long) counters.findCounter(DocLengths.SumOfDocLengths).getCounter();
    env.writeCollectionAverageDocumentLength((float) collectionSumOfDocLengths / collectionDocCount);
}

From source file:ivory.preprocess.BuildTermDocVectorsForwardIndex.java

License:Apache License

public int runTool() throws Exception {
    JobConf conf = new JobConf(getConf(), BuildTermDocVectorsForwardIndex.class);
    FileSystem fs = FileSystem.get(conf);

    String indexPath = conf.get("Ivory.IndexPath");
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

    int mapTasks = conf.getInt("Ivory.NumMapTasks", 0);
    String collectionName = env.readCollectionName();

    sLogger.info("Tool: BuildTermDocVectorsIndex");
    sLogger.info(" - IndexPath: " + indexPath);
    sLogger.info(" - CollectionName: " + collectionName);
    sLogger.info(" - NumMapTasks: " + mapTasks);

    if (!fs.exists(new Path(env.getTermDocVectorsDirectory()))) {
        sLogger.info("Error: TermDocVectors don't exist!");
        return 0;
    }/*  w ww  .  j a  va2 s .  com*/

    if (fs.exists(new Path(env.getTermDocVectorsForwardIndex()))) {
        sLogger.info("TermDocVectorIndex already exists: skipping!");
        return 0;
    }

    conf.setJobName("BuildTermDocVectorsForwardIndex:" + collectionName);

    Path inputPath = new Path(env.getTermDocVectorsDirectory());
    FileInputFormat.setInputPaths(conf, inputPath);

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(1);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(MyReducer.class);

    JobClient.runJob(conf);

    return 0;
}

From source file:ivory.preprocess.BuildTermIdMap.java

License:Apache License

@SuppressWarnings("unused")
public int runTool() throws Exception {
    // create a new JobConf, inheriting from the configuration of this
    // PowerTool/* w w  w  . j a  v  a  2  s  .  co  m*/
    JobConf conf = new JobConf(getConf(), BuildTermIdMap.class);
    FileSystem fs = FileSystem.get(conf);

    String indexPath = conf.get("Ivory.IndexPath");
    String collectionName = conf.get("Ivory.CollectionName");

    int mapTasks = conf.getInt("Ivory.NumMapTasks", 0);
    int reduceTasks = 1;
    int minSplitSize = conf.getInt("Ivory.MinSplitSize", 0);

    sLogger.info("PowerTool: BuildTermIdMap");
    sLogger.info(" - CollectionName: " + collectionName);
    sLogger.info(" - IndexPath: " + indexPath);
    sLogger.info(" - NumMapTasks: " + mapTasks);
    sLogger.info(" - NumReduceTasks: " + reduceTasks);

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

    if (!fs.exists(new Path(indexPath))) {
        sLogger.error("index path doesn't existing: skipping!");
        return 0;
    }

    Path termsFilePath = new Path(env.getIndexTermsData());
    Path termIDsFilePath = new Path(env.getIndexTermIdsData());
    Path idToTermFilePath = new Path(env.getIndexTermIdMappingData());
    Path dfByTermFilePath = new Path(env.getDfByTermData());
    Path cfByTermFilePath = new Path(env.getCfByTermData());
    Path dfByIntFilePath = new Path(env.getDfByIntData());
    Path cfByIntFilePath = new Path(env.getCfByIntData());

    if (fs.exists(termsFilePath) || fs.exists(termIDsFilePath) || fs.exists(idToTermFilePath)
            || fs.exists(dfByTermFilePath) || fs.exists(cfByTermFilePath) || fs.exists(dfByIntFilePath)
            || fs.exists(cfByIntFilePath)) {
        sLogger.info("term and term id data exist: skipping!");
        return 0;
    }

    Path tmpPath = new Path(env.getTempDirectory());
    fs.delete(tmpPath, true);

    conf.setJobName("BuildTermIdMap:" + collectionName);

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);

    conf.setInt("Ivory.CollectionTermCount", (int) env.readCollectionTermCount());
    conf.setInt("mapred.min.split.size", minSplitSize);
    conf.set("mapred.child.java.opts", "-Xmx2048m");

    FileInputFormat.setInputPaths(conf, new Path(env.getTermDfCfDirectory()));
    FileOutputFormat.setOutputPath(conf, tmpPath);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(PairOfIntLong.class);
    conf.setOutputKeyClass(Text.class);

    conf.setMapperClass(IdentityMapper.class);
    conf.setReducerClass(MyReducer.class);

    long startTime = System.currentTimeMillis();
    RunningJob job = JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    fs.delete(tmpPath, true);

    return 0;
}

From source file:ivory.preprocess.GetTermCount.java

License:Apache License

public int runTool() throws Exception {
    // create a new JobConf, inheriting from the configuration of this
    // PowerTool/*from  w  w  w.  j  a v a  2 s.c  o  m*/
    JobConf conf = new JobConf(getConf(), GetTermCount.class);
    FileSystem fs = FileSystem.get(conf);

    String indexPath = conf.get(Constants.IndexPath);
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

    int mapTasks = conf.getInt(Constants.NumMapTasks, 0);
    int reduceTasks = conf.getInt(Constants.NumReduceTasks, 0);

    String collectionName = env.readCollectionName();
    String termDocVectorsPath = env.getTermDocVectorsDirectory();
    String termDfCfPath = env.getTermDfCfDirectory();

    if (!fs.exists(new Path(indexPath))) {
        sLogger.info("index path doesn't existing: skipping!");
        return 0;
    }

    sLogger.info("PowerTool: GetTermCount");
    sLogger.info(" - CollectionName: " + collectionName);
    sLogger.info(" - NumMapTasks: " + mapTasks);
    sLogger.info(" - NumReduceTasks: " + reduceTasks);
    sLogger.info(" - MinDf: " + conf.getInt(Constants.MinDf, 0));
    sLogger.info(" - MaxDf: " + conf.getInt(Constants.MaxDf, Integer.MAX_VALUE));

    Path outputPath = new Path(termDfCfPath);
    if (fs.exists(outputPath)) {
        sLogger.error("TermDfCf directory exist: skipping!");
        return 0;
    }

    conf.setJobName("GetTermCount:" + collectionName);

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);
    conf.set("mapred.child.java.opts", "-Xmx2048m");

    FileInputFormat.setInputPaths(conf, new Path(termDocVectorsPath));
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(PairOfIntLong.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(PairOfIntLong.class);

    conf.setMapperClass(MyMapper.class);
    conf.setCombinerClass(MyCombiner.class);
    conf.setReducerClass(MyReducer.class);

    long startTime = System.currentTimeMillis();
    RunningJob job = JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    Counters counters = job.getCounters();
    // write out number of postings
    int collectionTermCount = (int) counters.findCounter(Statistics.Terms).getCounter();
    env.writeCollectionTermCount(collectionTermCount);
    // NOTE: this value is not the same as number of postings, because
    // postings for non-English terms are discarded, or as result of df cut

    long collectionLength = counters.findCounter(Statistics.SumOfDocLengths).getCounter();
    env.writeCollectionLength(collectionLength);
    return 0;
}

From source file:ivory.ptc.AnchorTextInvertedIndex.java

License:Apache License

@Override
public int runTool() throws Exception {
    JobConf conf = new JobConf(getConf(), AnchorTextInvertedIndex.class);
    FileSystem fs = FileSystem.get(conf);
    String inPath = conf.get("Ivory.InputPath");
    String outPath = conf.get("Ivory.OutputPath");
    Path inputPath = new Path(inPath);
    Path outputPath = new Path(outPath);
    int mapTasks = conf.getInt("Ivory.NumMapTasks", 1);
    int reduceTasks = conf.getInt("Ivory.NumReduceTasks", 100);
    String weightingSchemeParameters = conf.get("Ivory.WeightingSchemeParameters");

    LOG.info("BuildAnchorTextInvertedIndex");
    LOG.info(" - input path: " + inPath);
    LOG.info(" - output path: " + outPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    LOG.info(" - weighting scheme: " + conf.get("Ivory.WeightingScheme"));
    LOG.info(" - weighting scheme parameters: " + weightingSchemeParameters);

    String[] params = weightingSchemeParameters.split(PARAMETER_SEPARATER);
    for (String param : params) {
        DistributedCache.addCacheFile(new URI(param), conf);
    }//from  w  w w. ja  v a  2s  .  c o m

    conf.setJobName("BuildAnchorTextInvertedIndex");
    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);
    conf.set("mapred.child.java.opts", "-Xmx4096m");
    conf.setInt("mapred.task.timeout", 60000000);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(AnchorTextTarget.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(ArrayListWritable.class);
    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    fs.delete(outputPath);
    JobClient.runJob(conf);
    return 0;
}

From source file:ivory.ptc.driver.XMLFormatJudgments.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();/*w  ww  .  j  a  v  a 2  s .  com*/
        return -1;
    }
    JobConf conf = new JobConf(getConf(), XMLFormatJudgments.class);
    // Command line arguments
    String inPath = args[0];
    String outPath = args[1];
    String docnoMapping = args[2];
    Path inputPath = new Path(inPath);
    Path outputPath = new Path(outPath);
    int mapTasks = 1;
    int reduceTasks = 1;

    conf.setJobName("FormatPseudoJudgments");
    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    DistributedCache.addCacheFile(new URI(docnoMapping), conf);
    FileSystem.get(conf).delete(outputPath);
    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setMapOutputKeyClass(PseudoQuery.class);
    conf.setMapOutputValueClass(PseudoJudgments.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(IdentityMapper.class);
    conf.setReducerClass(MyReducer.class);

    JobClient.runJob(conf);
    return 0;
}

From source file:ivory.ptc.driver.XMLFormatQueries.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();/*from   w  ww  .j  a v  a 2 s . c  o  m*/
        return -1;
    }

    JobConf conf = new JobConf(getConf(), XMLFormatQueries.class);
    // Command line arguments
    String inPath = args[0];
    String outPath = args[1];
    Path inputPath = new Path(inPath);
    Path outputPath = new Path(outPath);
    int mapTasks = 1;
    int reduceTasks = 1;

    conf.setJobName("FormatPseudoQueries");
    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    FileSystem.get(conf).delete(outputPath);
    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setMapOutputKeyClass(PseudoQuery.class);
    conf.setMapOutputValueClass(PseudoJudgments.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(IdentityMapper.class);
    conf.setReducerClass(MyReducer.class);
    JobClient.runJob(conf);
    return 0;
}

From source file:ivory.ptc.SortedPseudoTestCollection.java

License:Apache License

public int runTool() throws Exception {
    JobConf conf = new JobConf(getConf(), SortedPseudoTestCollection.class);
    FileSystem fs = FileSystem.get(conf);
    String inPath = conf.get("Ivory.InputPath");
    String outPath = conf.get("Ivory.OutputPath");
    Path inputPath = new Path(inPath);
    Path outputPath = new Path(outPath);
    int mapTasks = 1;
    int reduceTasks = 1;

    LOG.info("SortedPseudoTestCollection");
    LOG.info(" - Input path: " + conf.get("Ivory.InputPath"));
    LOG.info(" - Output path: " + conf.get("Ivory.OutputPath"));
    LOG.info(" - JudgmentExtractor: " + conf.get("Ivory.JudgmentExtractor"));
    LOG.info(" - JudgmentExtractorParameters: " + conf.get("Ivory.JudgmentExtractorParameters"));
    LOG.info(" - SamplingCriterion: " + conf.get("Ivory.SamplingCriterion"));
    LOG.info(" - SamplingCriterionParameters: " + conf.get("Ivory.SamplingCriterionParameters"));
    LOG.info(" - QueryScorer: " + conf.get("Ivory.QueryScorer"));

    conf.setJobName("SortedPTC");
    conf.setNumMapTasks(mapTasks);/*from  ww  w .j a  v a 2  s  .  c om*/
    conf.setNumReduceTasks(reduceTasks);
    conf.set("mapred.child.java.opts", "-Xmx4096m");

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setMapOutputKeyClass(PseudoQuery.class);
    conf.setMapOutputValueClass(PseudoJudgments.class);
    conf.setOutputKeyClass(PseudoQuery.class);
    conf.setOutputValueClass(PseudoJudgments.class);
    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    fs.delete(outputPath);
    JobClient.runJob(conf);
    return 0;
}