Example usage for org.apache.hadoop.mapreduce Job setReduceSpeculativeExecution

List of usage examples for org.apache.hadoop.mapreduce Job setReduceSpeculativeExecution

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setReduceSpeculativeExecution.

Prototype

public void setReduceSpeculativeExecution(boolean speculativeExecution) 

Source Link

Document

Turn speculative execution on or off for this job for reduce tasks.

Usage

From source file:com.abel.hwfs.custom.output.SetSizeDBOutputFormat.java

License:Apache License

private static MyDBConfiguration setOutput(Job job, String tableName) throws IOException {
    job.setOutputFormatClass(SetSizeDBOutputFormat.class);
    job.setReduceSpeculativeExecution(false);

    MyDBConfiguration dbConf = new MyDBConfiguration(job.getConfiguration());

    dbConf.setOutputTableName(tableName);
    return dbConf;
}

From source file:com.cloudera.sqoop.shims.Apache22HadoopShim.java

License:Apache License

@Override
public void setJobReduceSpeculativeExecution(Job job, boolean isEnabled) {
    job.setReduceSpeculativeExecution(isEnabled);
}

From source file:com.ery.hadoop.mrddx.hbase.HbaseOutputFormat.java

License:Apache License

/**
 * format?/*  ww  w .  jav a  2 s .c o  m*/
 * 
 * @param job
 *            ?
 * @param tableName
 *            ??
 * @return HbaseConfiguration HBase?
 * @throws IOException
 *             IO
 */
public static HbaseConfiguration setOutput(Job job, String tableName) {
    job.setOutputFormatClass(HbaseOutputFormat.class);
    job.setReduceSpeculativeExecution(false);
    HbaseConfiguration dbConf = new HbaseConfiguration(job.getConfiguration(),
            HbaseConfiguration.FLAG_HBASE_OUTPUT);
    dbConf.setOutputHBaseTableName(tableName);
    return dbConf;
}

From source file:com.ery.hadoop.mrddx.hbase.HbaseOutputFormat.java

License:Apache License

@Override
public void handle(Job conf) throws Exception {
    HbaseConfiguration hConf = new HbaseConfiguration(conf.getConfiguration(),
            HbaseConfiguration.FLAG_HBASE_OUTPUT);

    // ??/*from  w w w . j av  a  2s  .  co m*/
    String tableName = hConf.getOutputHBaseTableName();
    if (null == tableName || tableName.trim().length() <= 0) {
        String meg = "HBase??<" + HbaseConfiguration.OUTPUT_TABLE + ">?.";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ?
    String hbaseFieldNames = hConf.getOutputHBaseFieldNames();
    this.vParamTargetFamilyNames(hbaseFieldNames, hConf);
    hConf.setOutputHBaseFamilyNames(this.getHBaseFamilyNames(hbaseFieldNames));

    // 
    String rowKeyRule = hConf.getOutputHBaseRowKeyRule();
    if (null == rowKeyRule || rowKeyRule.trim().length() <= 0) {
        String meg = "<" + HbaseConfiguration.OUTPUT_ROWKEY_RULE + ">";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // HFile
    long hfileMaxfilesize = hConf.getOutputHBaseHFileMaxfilesize();
    if (hfileMaxfilesize <= 0) {
        String meg = "HFile<" + HbaseConfiguration.OUTPUT_HFILE_MAXFILESIZE + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // memstore flushHDFS?
    long memstoreFlushSize = hConf.getOutputHBaseMemstoreFlushSize();
    if (memstoreFlushSize <= 0) {
        String meg = "memstore flushHDFS?<"
                + HbaseConfiguration.OUTPUT_MEMSTORE_FLUSHSIZE + ">??0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ??
    int colmunBlocksize = hConf.getOutputHBaseColmunBlocksize();
    if (colmunBlocksize <= 0) {
        String meg = "??<" + HbaseConfiguration.OUTPUT_COLMUN_BLOCKSIZE + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ?
    int colmunMaxversion = hConf.getOutputHBaseColmunMaxversion();
    if (colmunMaxversion <= 0) {
        String meg = "?<" + HbaseConfiguration.OUTPUT_COLMUN_MAXVERSION + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ??
    int colmunMinversion = hConf.getOutputHBaseColmunMinversion();
    if (colmunMinversion <= 0) {
        String meg = "??<" + HbaseConfiguration.OUTPUT_COLMUN_MINVERSION + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ????
    int commitBufferLength = hConf.getOutputHBaseBufferLength();
    if (commitBufferLength <= 0) {
        String meg = "????<" + HbaseConfiguration.OUTPUT_SET_COMMIT_BUFFERLENGTH
                + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ?hbaseWAL
    int walFlag = hConf.getOutputHBaseSetWalFlags();
    if (!(walFlag == -1 || walFlag >= 0 || walFlag <= 4)) {
        String meg = "WAL<" + HbaseConfiguration.OUTPUT_SET_WAL_FLAG
                + ">?-1??:[0-4]";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // 
    if (!validateTable(hConf)) {
        String errorInfo = "HBase output table, validate Execption!";
        MRLog.error(LOG, errorInfo);
        throw new Exception(errorInfo);
    }

    conf.setOutputFormatClass(HbaseOutputFormat.class);
    conf.setReduceSpeculativeExecution(false);
    conf.setOutputKeyClass(DBRecord.class);
    conf.setOutputValueClass(NullWritable.class);
    conf.setReducerClass(DBReducer.class);

    // ??
    printTableDesc(tableName, hConf.getConf());
}

From source file:com.ery.hadoop.mrddx.hive.HiveOutputFormat.java

License:Apache License

/**
 * ???/*from   w  w  w  .jav  a  2s  . c  o  m*/
 * 
 * @param job jobconf
 * @param tableName ??
 */
public static void setOutput(Job job, String tableName) {
    job.setOutputFormatClass(HiveOutputFormat.class);
    job.setReduceSpeculativeExecution(false);
    HiveConfiguration dbConf = new HiveConfiguration(job.getConfiguration());
    dbConf.setOutputHiveTableName(tableName);
}

From source file:com.ery.hadoop.mrddx.hive.HiveOutputFormat.java

License:Apache License

@Override
public void handle(Job conf) throws Exception {
    /**/* w w  w.j  a  va 2  s .  c o m*/
     * ?
     */
    HiveConfiguration hconf = new HiveConfiguration(conf.getConfiguration());
    // ?
    String outRowChars = hconf.getOutputHiveFileRowsSplitChars();
    if (null == outRowChars || outRowChars.length() <= 0) {
        String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ROWS_SPLITCHARS + ">";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    // 
    String outFileSplitChars = hconf.getOutputHiveFileFieldSplitChars();
    if (null == outFileSplitChars || outFileSplitChars.trim().length() <= 0) {
        String meg = "<" + HiveConfiguration.OUTPUT_HIVE_FIELD_SPLITCHARS + ">";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    boolean para = hconf.getOutputHiveCompress();
    // ? (?HDFSUtils.CompressCodec)
    String outCompressCodec = hconf.getOutputHiveCompressCodec();
    if (para && !HDFSUtils.isExistCompressCodec(outCompressCodec)) {
        String meg = "[MR ERROR]?<" + HiveConfiguration.OUTPUT_HIVE_COMPRESS_CODEC
                + ">?.";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    // ?MR
    String outTargetpath = hconf.getOutputTargetFilePath();
    hconf.setOutputTargetPath(outTargetpath);
    if (null == outTargetpath || outTargetpath.trim().length() <= 0) {
        MRLog.warn(LOG,
                "MR<" + HiveConfiguration.OUTPUT_HIVE_TARGET_PATH + ">");
    }

    // ?hive??
    String hiveUrl = hconf.getOutPutHiveConfigUrl();
    if (null == hiveUrl || hiveUrl.trim().length() <= 0) {
        String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_URL
                + ">?.";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // hive???
    String hiveUser = hconf.getOutPutHiveConfigUser();
    if (null == hiveUser || hiveUser.trim().length() <= 0) {
        LOG.warn("[MR WARN]hive???<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_USER + ">.");
    }

    // hive??
    String hivePwd = hconf.getOutPutHiveConfigPassword();
    if (null == hivePwd || hivePwd.trim().length() <= 0) {
        LOG.warn("[MR WARN]hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_PASSWORD + ">.");
    }

    // ??
    String tableName = hconf.getOutputHiveTableName();
    if (null == tableName || tableName.trim().length() <= 0) {
        String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_TABLE + ">?.";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ??
    String partitionField[] = hconf.getOutputHivePartitionField();
    if (null != partitionField && partitionField.length > 0) {
        // 
        String[] outputFieldName = hconf.getOutputFieldNames();
        if (null == outputFieldName || outputFieldName.length <= 0) {
            String meg = "<" + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + ">.";
            MRLog.error(LOG, meg);
            throw new Exception(meg);
        }

        for (int i = 0; i < partitionField.length; i++) {
            boolean isExist = false;
            for (String s : outputFieldName) {
                if (s.equals(partitionField[i])) {
                    isExist = true;
                    break;
                }
            }

            if (!isExist) {
                String meg = "" + partitionField[i] + "<"
                        + HiveConfiguration.OUTPUT_HIVE_PARTITION_FIELD + ">?<"
                        + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + "";
                MRLog.error(LOG, meg);
                throw new Exception(meg);
            }
        }

        String orderOutputTempPath = hconf.getOutputHiveOrderTempPath();
        if (null == orderOutputTempPath || orderOutputTempPath.trim().length() <= 0) {
            String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">.";
            MRLog.error(LOG, meg);
            throw new Exception(meg);
        }

        String orderOutputFileNamePrefix = hconf.getOutputHiveOrderFileNamePrefix();
        if (null == orderOutputFileNamePrefix || orderOutputFileNamePrefix.trim().length() <= 0) {
            String meg = "???<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">.";
            MRLog.warn(LOG, meg);
        }

        long orderOutputFileMaxCount = hconf.getOutputHiveOrderFileMaxCount();
        if (orderOutputFileMaxCount == 0) {
            String meg = "?<" + HiveConfiguration.OUTPUT_HIVE_ORDER_FILEMAXCOUNT
                    + ">0 -1(??).";
            MRLog.error(LOG, meg);
            throw new Exception(meg);
        }
    }

    // DDL?
    String ddlHQL = hconf.getOutputHiveExecuteDDLHQL();
    if (null == ddlHQL || ddlHQL.trim().length() <= 0) {
        LOG.warn("[MR WARN]hive?<" + HiveConfiguration.OUTPUT_HIVE_DDL_HQL + ">.");
    }

    try {
        executeDDLHQL(hconf);
        MRLog.info(LOG, "execute ddl hive sql success!");
    } catch (SQLException e) {
        MRLog.error(LOG, "execute ddl hive sql error!");
        e.printStackTrace();
    }

    conf.setReduceSpeculativeExecution(false);
    conf.setOutputFormatClass(HiveOutputFormat.class);
    conf.setOutputKeyClass(DBRecord.class);
    conf.setOutputValueClass(NullWritable.class);
    if (null != partitionField && partitionField.length > 0) {
        conf.setCombinerClass(DBGroupReducer.class);
        conf.setReducerClass(DBPartitionReducer.class);
    } else {
        conf.setCombinerClass(DBGroupReducer.class);
        conf.setReducerClass(DBReducer.class);
    }
}

From source file:com.msd.gin.halyard.tools.HalyardBulkLoad.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: bulkload [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D"
                + SKIP_INVALID_PROPERTY + "=true] [-D" + SPLIT_BITS_PROPERTY + "=8] [-D"
                + DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + OVERRIDE_CONTEXT_PROPERTY
                + "=true] <input_path(s)> <output_path> <table_name>");
        return -1;
    }//from ww  w .  j  av  a  2s. c  o  m
    TableMapReduceUtil.addDependencyJars(getConf(), NTriplesUtil.class, Rio.class, AbstractRDFHandler.class,
            RDFFormat.class, RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    if (SnappyCodec.isNativeCodeLoaded()) {
        getConf().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
        getConf().setClass(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);
    }
    getConf().setDouble(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 1.0);
    getConf().setLong(MRJobConfig.TASK_TIMEOUT, 3600000l);
    getConf().setInt(MRJobConfig.IO_SORT_FACTOR, 100);
    getConf().setInt(MRJobConfig.IO_SORT_MB, 1000);
    getConf().setInt(FileInputFormat.SPLIT_MAXSIZE, 1000000000);
    getConf().setInt(LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY, 2048);
    Job job = Job.getInstance(getConf(), "HalyardBulkLoad -> " + args[1] + " -> " + args[2]);
    job.setJarByClass(HalyardBulkLoad.class);
    job.setMapperClass(RDFMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(RioFileInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    Map<String, Integer> contextSplitsMap = new HashMap<>();
    for (Map.Entry<String, String> me : getConf().getValByRegex(CONTEXT_SPLIT_REGEXP).entrySet()) {
        int splits = Integer.parseInt(me.getKey().substring(me.getKey().lastIndexOf('.') + 1));
        StringTokenizer stk = new StringTokenizer(me.getValue(), ",");
        while (stk.hasMoreTokens()) {
            contextSplitsMap.put(stk.nextToken(), splits);
        }
    }
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], true,
            getConf().getInt(SPLIT_BITS_PROPERTY, 3), contextSplitsMap)) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(),
                hTable.getRegionLocator());
        FileInputFormat.setInputDirRecursive(job, true);
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
            LOG.info("Bulk Load Completed..");
            return 0;
        }
    }
    return -1;
}

From source file:com.msd.gin.halyard.tools.HalyardBulkUpdate.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: bulkupdate [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D"
                + DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + OVERRIDE_CONTEXT_PROPERTY
                + "=true] <input_file_with_SPARQL_queries> <output_path> <table_name>");
        return -1;
    }//  w  w  w .  j  ava 2 s  . c om
    TableMapReduceUtil.addDependencyJars(getConf(), NTriplesUtil.class, Rio.class, RDFFormat.class,
            RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    if (SnappyCodec.isNativeCodeLoaded()) {
        getConf().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
        getConf().setClass(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);
    }
    getConf().setDouble(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 1.0);
    getConf().setLong(MRJobConfig.TASK_TIMEOUT, 3600000l);
    getConf().setInt(MRJobConfig.IO_SORT_FACTOR, 100);
    getConf().setInt(MRJobConfig.IO_SORT_MB, 1000);
    getConf().setInt(FileInputFormat.SPLIT_MAXSIZE, 1000000000);
    getConf().setInt(LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY, 2048);
    getConf().setStrings(TABLE_NAME_PROPERTY, args[2]);
    Job job = Job.getInstance(getConf(), "HalyardBulkUpdate -> " + args[1] + " -> " + args[2]);
    NLineInputFormat.setNumLinesPerSplit(job, 1);
    job.setJarByClass(HalyardBulkUpdate.class);
    job.setMapperClass(SPARQLMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(NLineInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], false, 0, null)) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(),
                hTable.getRegionLocator());
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
            LOG.info("Bulk Update Completed..");
            return 0;
        }
    }
    return -1;
}

From source file:com.msd.gin.halyard.tools.HalyardHiveLoad.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: hiveload -D" + RDF_MIME_TYPE_PROPERTY + "='application/ld+json' [-D"
                + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D" + HIVE_DATA_COLUMN_INDEX_PROPERTY + "=3] [-D"
                + BASE_URI_PROPERTY + "='http://my_base_uri/'] [-D" + HalyardBulkLoad.SPLIT_BITS_PROPERTY
                + "=8] [-D" + HalyardBulkLoad.DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D"
                + HalyardBulkLoad.OVERRIDE_CONTEXT_PROPERTY
                + "=true] <hive_table_name> <output_path> <hbase_table_name>");
        return -1;
    }//from   w  w  w.  j a v  a 2 s .c o m
    TableMapReduceUtil.addDependencyJars(getConf(), NTriplesUtil.class, Rio.class, AbstractRDFHandler.class,
            RDFFormat.class, RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    if (SnappyCodec.isNativeCodeLoaded()) {
        getConf().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
        getConf().setClass(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);
    }
    getConf().setDouble(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 1.0);
    getConf().setLong(MRJobConfig.TASK_TIMEOUT, 3600000l);
    getConf().setInt(MRJobConfig.IO_SORT_FACTOR, 100);
    getConf().setInt(MRJobConfig.IO_SORT_MB, 1000);
    getConf().setInt(FileInputFormat.SPLIT_MAXSIZE, 1000000000);
    getConf().setInt(LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY, 2048);
    Job job = Job.getInstance(getConf(), "HalyardHiveLoad -> " + args[1] + " -> " + args[2]);
    int i = args[0].indexOf('.');
    HCatInputFormat.setInput(job, i > 0 ? args[0].substring(0, i) : null, args[0].substring(i + 1));
    job.setJarByClass(HalyardHiveLoad.class);
    job.setMapperClass(HiveMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(HCatInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    Map<String, Integer> contextSplitsMap = new HashMap<>();
    for (Map.Entry<String, String> me : getConf().getValByRegex(HalyardBulkLoad.CONTEXT_SPLIT_REGEXP)
            .entrySet()) {
        int splits = Integer.parseInt(me.getKey().substring(me.getKey().lastIndexOf('.') + 1));
        StringTokenizer stk = new StringTokenizer(me.getValue(), ",");
        while (stk.hasMoreTokens()) {
            contextSplitsMap.put(stk.nextToken(), splits);
        }
    }
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], true,
            getConf().getInt(HalyardBulkLoad.SPLIT_BITS_PROPERTY, 3), contextSplitsMap)) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(),
                hTable.getRegionLocator());
        FileInputFormat.setInputDirRecursive(job, true);
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
            LOG.info("Bulk Load Completed..");
            return 0;
        }
    }
    return -1;
}

From source file:com.xiaomi.linden.hadoop.indexing.job.LindenJob.java

License:Apache License

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    String dir = conf.get(LindenJobConfig.INPUT_DIR, null);
    logger.info("input dir:" + dir);
    Path inputPath = new Path(StringUtils.unEscapeString(dir));
    Path outputPath = new Path(conf.get(LindenJobConfig.OUTPUT_DIR));
    String indexPath = conf.get(LindenJobConfig.INDEX_PATH);

    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);// w  w  w  . j a  v a 2s. c o m
    }
    if (fs.exists(new Path(indexPath))) {
        fs.delete(new Path(indexPath), true);
    }

    int numShards = conf.getInt(LindenJobConfig.NUM_SHARDS, 1);
    Shard[] shards = createShards(indexPath, numShards);

    Shard.setIndexShards(conf, shards);

    //empty trash;
    (new Trash(conf)).expunge();

    Job job = Job.getInstance(conf, "linden-hadoop-indexing");
    job.setJarByClass(LindenJob.class);
    job.setMapperClass(LindenMapper.class);
    job.setCombinerClass(LindenCombiner.class);
    job.setReducerClass(LindenReducer.class);
    job.setMapOutputKeyClass(Shard.class);
    job.setMapOutputValueClass(IntermediateForm.class);
    job.setOutputKeyClass(Shard.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(IndexUpdateOutputFormat.class);
    job.setReduceSpeculativeExecution(false);
    job.setNumReduceTasks(numShards);

    String lindenSchemaFile = conf.get(LindenJobConfig.SCHEMA_FILE_URL);
    if (lindenSchemaFile == null) {
        throw new IOException("no schema file is found");
    }
    logger.info("Adding schema file: " + lindenSchemaFile);
    job.addCacheFile(new URI(lindenSchemaFile + "#lindenSchema"));
    String lindenPropertiesFile = conf.get(LindenJobConfig.LINDEN_PROPERTIES_FILE_URL);
    if (lindenPropertiesFile == null) {
        throw new IOException("no linden properties file is found");
    }
    logger.info("Adding linden properties file: " + lindenPropertiesFile);
    job.addCacheFile(new URI(lindenPropertiesFile + "#lindenProperties"));

    FileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    Path[] inputs = FileInputFormat.getInputPaths(job);
    StringBuilder buffer = new StringBuilder(inputs[0].toString());
    for (int i = 1; i < inputs.length; i++) {
        buffer.append(",");
        buffer.append(inputs[i].toString());
    }
    logger.info("mapreduce.input.dir = " + buffer.toString());
    logger.info("mapreduce.output.dir = " + FileOutputFormat.getOutputPath(job).toString());
    logger.info("mapreduce.job.num.reduce.tasks = " + job.getNumReduceTasks());
    logger.info(shards.length + " shards = " + conf.get(LindenJobConfig.INDEX_SHARDS));
    logger.info("mapreduce.input.format.class = " + job.getInputFormatClass());
    logger.info("mapreduce.output.format.class = " + job.getOutputFormatClass());
    logger.info("mapreduce.cluster.temp.dir = " + conf.get(MRJobConfig.TEMP_DIR));

    job.waitForCompletion(true);
    if (!job.isSuccessful()) {
        throw new RuntimeException("Job failed");
    }
    return 0;
}