Example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the key class for the map output data.

Usage

From source file:com.mozilla.socorro.hadoop.CrashCountToHbase.java

License:LGPL

/**
 * @param args/*from   www. j av a  2 s . c o  m*/
 * @return
 * @throws IOException
 * @throws ParseException
 */
public Job initJob(String[] args) throws IOException {
    Job job = new Job(getConf());
    job.setJobName(NAME);
    job.setJarByClass(CrashCountToHbase.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));

    job.setMapperClass(CrashCountToHBaseMapper.class);
    job.setReducerClass(CrashCountToHBaseReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job;
}

From source file:com.mozilla.socorro.hadoop.DumpSizeTrends.java

License:LGPL

/**
 * @param args//from   w  ww . j  a  v  a 2s . c  o m
 * @return
 * @throws IOException
 * @throws ParseException
 */
public Job initJob(String[] args) throws IOException, ParseException {
    conf.set("mapred.child.java.opts", "-Xmx1024m");
    conf.setBoolean("mapred.map.tasks.speculative.execution", false);

    Map<byte[], byte[]> columns = new HashMap<byte[], byte[]>();
    columns.put(RAW_DATA_BYTES, DUMP_BYTES);
    columns.put(META_DATA_BYTES, JSON_BYTES);
    columns.put(PROCESSED_DATA_BYTES, JSON_BYTES);
    Job job = CrashReportJob.initJob(NAME, getConf(), DumpSizeTrends.class, DumpSizeTrendsMapper.class, null,
            DumpSizeTrendsReducer.class, columns, Text.class, Text.class, new Path(args[0]));
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    return job;
}

From source file:com.msd.gin.halyard.tools.HalyardBulkLoad.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: bulkload [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D"
                + SKIP_INVALID_PROPERTY + "=true] [-D" + SPLIT_BITS_PROPERTY + "=8] [-D"
                + DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + OVERRIDE_CONTEXT_PROPERTY
                + "=true] <input_path(s)> <output_path> <table_name>");
        return -1;
    }//from w  w w .j  a  va2s  .  c  o  m
    TableMapReduceUtil.addDependencyJars(getConf(), NTriplesUtil.class, Rio.class, AbstractRDFHandler.class,
            RDFFormat.class, RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    if (SnappyCodec.isNativeCodeLoaded()) {
        getConf().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
        getConf().setClass(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);
    }
    getConf().setDouble(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 1.0);
    getConf().setLong(MRJobConfig.TASK_TIMEOUT, 3600000l);
    getConf().setInt(MRJobConfig.IO_SORT_FACTOR, 100);
    getConf().setInt(MRJobConfig.IO_SORT_MB, 1000);
    getConf().setInt(FileInputFormat.SPLIT_MAXSIZE, 1000000000);
    getConf().setInt(LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY, 2048);
    Job job = Job.getInstance(getConf(), "HalyardBulkLoad -> " + args[1] + " -> " + args[2]);
    job.setJarByClass(HalyardBulkLoad.class);
    job.setMapperClass(RDFMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(RioFileInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    Map<String, Integer> contextSplitsMap = new HashMap<>();
    for (Map.Entry<String, String> me : getConf().getValByRegex(CONTEXT_SPLIT_REGEXP).entrySet()) {
        int splits = Integer.parseInt(me.getKey().substring(me.getKey().lastIndexOf('.') + 1));
        StringTokenizer stk = new StringTokenizer(me.getValue(), ",");
        while (stk.hasMoreTokens()) {
            contextSplitsMap.put(stk.nextToken(), splits);
        }
    }
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], true,
            getConf().getInt(SPLIT_BITS_PROPERTY, 3), contextSplitsMap)) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(),
                hTable.getRegionLocator());
        FileInputFormat.setInputDirRecursive(job, true);
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
            LOG.info("Bulk Load Completed..");
            return 0;
        }
    }
    return -1;
}

From source file:com.msd.gin.halyard.tools.HalyardBulkUpdate.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: bulkupdate [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D"
                + DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + OVERRIDE_CONTEXT_PROPERTY
                + "=true] <input_file_with_SPARQL_queries> <output_path> <table_name>");
        return -1;
    }/*  w w w  . j a v a2  s  . c  om*/
    TableMapReduceUtil.addDependencyJars(getConf(), NTriplesUtil.class, Rio.class, RDFFormat.class,
            RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    if (SnappyCodec.isNativeCodeLoaded()) {
        getConf().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
        getConf().setClass(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);
    }
    getConf().setDouble(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 1.0);
    getConf().setLong(MRJobConfig.TASK_TIMEOUT, 3600000l);
    getConf().setInt(MRJobConfig.IO_SORT_FACTOR, 100);
    getConf().setInt(MRJobConfig.IO_SORT_MB, 1000);
    getConf().setInt(FileInputFormat.SPLIT_MAXSIZE, 1000000000);
    getConf().setInt(LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY, 2048);
    getConf().setStrings(TABLE_NAME_PROPERTY, args[2]);
    Job job = Job.getInstance(getConf(), "HalyardBulkUpdate -> " + args[1] + " -> " + args[2]);
    NLineInputFormat.setNumLinesPerSplit(job, 1);
    job.setJarByClass(HalyardBulkUpdate.class);
    job.setMapperClass(SPARQLMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(NLineInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], false, 0, null)) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(),
                hTable.getRegionLocator());
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
            LOG.info("Bulk Update Completed..");
            return 0;
        }
    }
    return -1;
}

From source file:com.msd.gin.halyard.tools.HalyardHiveLoad.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: hiveload -D" + RDF_MIME_TYPE_PROPERTY + "='application/ld+json' [-D"
                + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D" + HIVE_DATA_COLUMN_INDEX_PROPERTY + "=3] [-D"
                + BASE_URI_PROPERTY + "='http://my_base_uri/'] [-D" + HalyardBulkLoad.SPLIT_BITS_PROPERTY
                + "=8] [-D" + HalyardBulkLoad.DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D"
                + HalyardBulkLoad.OVERRIDE_CONTEXT_PROPERTY
                + "=true] <hive_table_name> <output_path> <hbase_table_name>");
        return -1;
    }//  w ww. j  a  va2 s  .  c o m
    TableMapReduceUtil.addDependencyJars(getConf(), NTriplesUtil.class, Rio.class, AbstractRDFHandler.class,
            RDFFormat.class, RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    if (SnappyCodec.isNativeCodeLoaded()) {
        getConf().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
        getConf().setClass(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);
    }
    getConf().setDouble(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 1.0);
    getConf().setLong(MRJobConfig.TASK_TIMEOUT, 3600000l);
    getConf().setInt(MRJobConfig.IO_SORT_FACTOR, 100);
    getConf().setInt(MRJobConfig.IO_SORT_MB, 1000);
    getConf().setInt(FileInputFormat.SPLIT_MAXSIZE, 1000000000);
    getConf().setInt(LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY, 2048);
    Job job = Job.getInstance(getConf(), "HalyardHiveLoad -> " + args[1] + " -> " + args[2]);
    int i = args[0].indexOf('.');
    HCatInputFormat.setInput(job, i > 0 ? args[0].substring(0, i) : null, args[0].substring(i + 1));
    job.setJarByClass(HalyardHiveLoad.class);
    job.setMapperClass(HiveMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(HCatInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    Map<String, Integer> contextSplitsMap = new HashMap<>();
    for (Map.Entry<String, String> me : getConf().getValByRegex(HalyardBulkLoad.CONTEXT_SPLIT_REGEXP)
            .entrySet()) {
        int splits = Integer.parseInt(me.getKey().substring(me.getKey().lastIndexOf('.') + 1));
        StringTokenizer stk = new StringTokenizer(me.getValue(), ",");
        while (stk.hasMoreTokens()) {
            contextSplitsMap.put(stk.nextToken(), splits);
        }
    }
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], true,
            getConf().getInt(HalyardBulkLoad.SPLIT_BITS_PROPERTY, 3), contextSplitsMap)) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(),
                hTable.getRegionLocator());
        FileInputFormat.setInputDirRecursive(job, true);
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
            LOG.info("Bulk Load Completed..");
            return 0;
        }
    }
    return -1;
}

From source file:com.msd.gin.halyard.tools.HalyardParallelExport.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(newOption("h", null, "Prints this help"));
    options.addOption(newOption("v", null, "Prints version"));
    options.addOption(newOption("s", "source_htable", "Source HBase table with Halyard RDF store"));
    options.addOption(newOption("q", "sparql_query",
            "SPARQL tuple or graph query with use of '" + PARALLEL_SPLIT_FUNCTION_URI + "' function"));
    options.addOption(newOption("t", "target_url",
            "file://<path>/<file_name>{0}.<ext> or hdfs://<path>/<file_name>{0}.<ext> or jdbc:<jdbc_connection>/<table_name>"));
    options.addOption(newOption("p", "property=value", "JDBC connection properties"));
    options.addOption(newOption("l", "driver_classpath", "JDBC driver classpath delimited by ':'"));
    options.addOption(newOption("c", "driver_class", "JDBC driver class name"));
    try {/*from  w w  w  .  j  a va  2s.  c om*/
        CommandLine cmd = new PosixParser().parse(options, args);
        if (args.length == 0 || cmd.hasOption('h')) {
            printHelp(options);
            return -1;
        }
        if (cmd.hasOption('v')) {
            Properties p = new Properties();
            try (InputStream in = HalyardExport.class
                    .getResourceAsStream("/META-INF/maven/com.msd.gin.halyard/hbasesail/pom.properties")) {
                if (in != null)
                    p.load(in);
            }
            System.out.println("Halyard Parallel Export version " + p.getProperty("version", "unknown"));
            return 0;
        }
        if (!cmd.getArgList().isEmpty())
            throw new ExportException("Unknown arguments: " + cmd.getArgList().toString());
        for (char c : "sqt".toCharArray()) {
            if (!cmd.hasOption(c))
                throw new ExportException("Missing mandatory option: " + c);
        }
        for (char c : "sqtlc".toCharArray()) {
            String s[] = cmd.getOptionValues(c);
            if (s != null && s.length > 1)
                throw new ExportException("Multiple values for option: " + c);
        }
        String source = cmd.getOptionValue('s');
        String query = cmd.getOptionValue('q');
        if (!query.contains(PARALLEL_SPLIT_FUNCTION_NAME)) {
            throw new ExportException("Parallel export SPARQL query must contain '"
                    + PARALLEL_SPLIT_FUNCTION_URI + "' function.");
        }
        String target = cmd.getOptionValue('t');
        if ((target.startsWith("file:") || target.startsWith("hdfs:")) && !target.contains("{0}")) {
            throw new ExportException(
                    "Parallel export file target must contain '{0}' counter in the file path or name.");
        }
        getConf().set(SOURCE, source);
        getConf().set(QUERY, query);
        getConf().set(TARGET, target);
        String driver = cmd.getOptionValue('c');
        if (driver != null) {
            getConf().set(JDBC_DRIVER, driver);
        }
        String props[] = cmd.getOptionValues('p');
        if (props != null) {
            for (int i = 0; i < props.length; i++) {
                props[i] = Base64.encodeBase64String(props[i].getBytes(UTF8));
            }
            getConf().setStrings(JDBC_PROPERTIES, props);
        }
        TableMapReduceUtil.addDependencyJars(getConf(), HalyardExport.class, NTriplesUtil.class, Rio.class,
                AbstractRDFHandler.class, RDFFormat.class, RDFParser.class, HTable.class,
                HBaseConfiguration.class, AuthenticationProtos.class, Trace.class);
        HBaseConfiguration.addHbaseResources(getConf());
        Job job = Job.getInstance(getConf(), "HalyardParallelExport " + source + " -> " + target);
        String cp = cmd.getOptionValue('l');
        if (cp != null) {
            String jars[] = cp.split(":");
            for (int i = 0; i < jars.length; i++) {
                File f = new File(jars[i]);
                if (!f.isFile())
                    throw new ExportException("Invalid JDBC driver classpath element: " + jars[i]);
                job.addFileToClassPath(new Path(f.toURI()));
                jars[i] = f.getName();
            }
            job.getConfiguration().setStrings(JDBC_CLASSPATH, jars);
        }
        job.setJarByClass(HalyardParallelExport.class);
        job.setMaxMapAttempts(1);
        job.setMapperClass(ParallelExportMapper.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(Void.class);
        job.setNumReduceTasks(0);
        job.setInputFormatClass(IndexedInputFormat.class);
        job.setOutputFormatClass(NullOutputFormat.class);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            LOG.info("Parallel Export Completed..");
            return 0;
        }
        return -1;
    } catch (RuntimeException exp) {
        System.out.println(exp.getMessage());
        printHelp(options);
        throw exp;
    }

}

From source file:com.mvdb.platform.action.VersionMerge.java

License:Apache License

public static void main(String[] args) throws Exception {
    logger.error("error1");
    logger.warn("warning1");
    logger.info("info1");
    logger.debug("debug1");
    logger.trace("trace1");
    ActionUtils.setUpInitFileProperty();
    //        LoggerContext lc = (LoggerContext) LoggerFactory.getILoggerFactory();
    //        StatusPrinter.print(lc);

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    //Also add  lastMergedTimeStamp and  mergeUptoTimestamp and passive db name which would be mv1 or mv2
    if (otherArgs.length != 3) {
        System.err.println("Usage: versionmerge <customer-directory>");
        System.exit(2);//ww  w  . ja  v a 2  s  .  co  m
    }
    //Example: file:/home/umesh/.mvdb/etl/data/alpha
    //Example: hdfs://localhost:9000/data/alpha
    String customerDirectory = otherArgs[0];
    String lastMergedDirName = otherArgs[1];
    String lastCopiedDirName = otherArgs[2];

    org.apache.hadoop.conf.Configuration conf1 = new org.apache.hadoop.conf.Configuration();
    //conf1.addResource(new Path("/home/umesh/ops/hadoop-1.2.0/conf/core-site.xml"));
    FileSystem hdfsFileSystem = FileSystem.get(conf1);

    Path topPath = new Path(customerDirectory);

    //Clean scratch db
    Path passiveDbPath = new Path(topPath, "db/mv1");
    Path tempDbPath = new Path(topPath, "db/tmp-" + (int) (Math.random() * 100000));
    if (hdfsFileSystem.exists(tempDbPath)) {
        boolean success = hdfsFileSystem.delete(tempDbPath, true);
        if (success == false) {
            System.err.println(String.format("Unable to delete temp directory %s", tempDbPath.toString()));
            System.exit(1);
        }
    }
    //last three parameters are hardcoded and  the nulls must be replaced later after changing inout parameters. 
    Path[] inputPaths = getInputPaths(hdfsFileSystem, topPath, lastMergedDirName, lastCopiedDirName, null);
    Set<String> tableNameSet = new HashSet<String>();
    for (Path path : inputPaths) {
        tableNameSet.add(path.getName());
    }

    Job job = new Job(conf, "versionmerge");
    job.setJarByClass(VersionMerge.class);
    job.setMapperClass(VersionMergeMapper.class);
    job.setReducerClass(VersionMergeReducer.class);
    job.setMapOutputKeyClass(MergeKey.class);
    job.setMapOutputValueClass(BytesWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    String lastDirName = null;
    if (inputPaths != null && inputPaths.length > 1) {
        lastDirName = inputPaths[(inputPaths.length) - 2].getParent().getName();
    }
    for (Path inputPath : inputPaths) {
        FileInputFormat.addInputPath(job, inputPath);
    }
    FileOutputFormat.setOutputPath(job, tempDbPath);

    for (String table : tableNameSet) {
        if (table.endsWith(".dat") == false) {
            continue;
        }
        table = table.replaceAll("-", "");
        table = table.replaceAll(".dat", "");
        MultipleOutputs.addNamedOutput(job, table, SequenceFileOutputFormat.class, Text.class,
                BytesWritable.class);
    }
    boolean success = job.waitForCompletion(true);
    System.out.println("Success:" + success);
    System.out.println(ManagementFactory.getRuntimeMXBean().getName());
    if (success && lastDirName != null) {
        ActionUtils.setConfigurationValue(new Path(customerDirectory).getName(),
                ConfigurationKeys.LAST_MERGE_TO_MVDB_DIRNAME, lastDirName);
    }
    //hdfsFileSystem.delete(passiveDbPath, true);
    //hdfsFileSystem.rename(tempDbPath, passiveDbPath);
    System.exit(success ? 0 : 1);
}

From source file:com.netflix.Aegisthus.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());

    job.setJarByClass(Aegisthus.class);
    CommandLine cl = getOptions(args);/*from  w ww . j a va2s. c o m*/
    if (cl == null) {
        return 1;
    }
    job.setInputFormatClass(AegisthusInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(CassReducer.class);
    List<Path> paths = Lists.newArrayList();
    if (cl.hasOption(OPT_INPUT)) {
        for (String input : cl.getOptionValues(OPT_INPUT)) {
            paths.add(new Path(input));
        }
    }
    if (cl.hasOption(OPT_INPUTDIR)) {
        paths.addAll(getDataFiles(job.getConfiguration(), cl.getOptionValue(OPT_INPUTDIR)));
    }
    TextInputFormat.setInputPaths(job, paths.toArray(new Path[0]));
    TextOutputFormat.setOutputPath(job, new Path(cl.getOptionValue(OPT_OUTPUT)));

    job.submit();
    System.out.println(job.getJobID());
    System.out.println(job.getTrackingURL());
    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:com.pagerankcalculator.TwitterPageRank.java

public int sortPagerank(String in, String out)
        throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(getConf());
    job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#3 Sorting Page Rank");
    job.setJarByClass(TwitterPageRank.class);

    job.setMapOutputKeyClass(DoubleWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(PageRankSortingMapper.class);
    job.setReducerClass(PageRankSortingReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setNumReduceTasks(1);//from   www.  ja  v  a 2  s  . com

    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    job.setSortComparatorClass(DoubleSortDescComparator.class);

    Path inputFilePath = new Path(in);
    Path outputFilePath = new Path(out);

    FileInputFormat.addInputPath(job, inputFilePath);
    FileOutputFormat.setOutputPath(job, outputFilePath);

    FileSystem fs = FileSystem.newInstance(getConf());

    if (fs.exists(outputFilePath)) {
        fs.delete(outputFilePath, true);
    }

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.phantom.hadoop.examples.BaileyBorweinPlouffe.java

License:Apache License

/** Create and setup a job */
private static Job createJob(String name, Configuration conf) throws IOException {
    final Job job = new Job(conf, NAME + "_" + name);
    final Configuration jobconf = job.getConfiguration();
    job.setJarByClass(BaileyBorweinPlouffe.class);

    // setup mapper
    job.setMapperClass(BbpMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(BytesWritable.class);

    // setup reducer
    job.setReducerClass(BbpReducer.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setNumReduceTasks(1);/*from   w  w w  .  ja va2  s . c om*/

    // setup input
    job.setInputFormatClass(BbpInputFormat.class);

    // disable task timeout
    jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0);

    // do not use speculative execution
    jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false);
    jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);
    return job;
}