Example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass.

Prototype

public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the key class for the job output data.

Usage

From source file:com.elixir.hadoop.Chromo.FragmentCoverage.java

License:Apache License

public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from  w ww  . j  a v a2  s .  c om
    }
    Job job = Job.getInstance(conf, "position");
    job.setJarByClass(FragmentCoverage.class);

    job.setMapperClass(CoverageMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setNumReduceTasks(5);
    job.setMapOutputKeyClass(com.elixir.hadoop.Chromo.SecondrySort.IntPair.class);
    //job.setSpeculativeExecution(true);
    job.setPartitionerClass(ChromoPartitioner.class);
    job.setGroupingComparatorClass(com.elixir.hadoop.Chromo.SecondrySort.FirstGroupingComparator.class);
    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);

    job.setOutputValueClass(IntWritable.class);
    //   job.setOutputFormatClass(Text.class);

    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.elixir.hadoop.FragmentCoverage.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/*www. j  av a  2  s .  co  m*/
    }
    Job job = Job.getInstance(conf, "position");
    job.setJarByClass(FragmentCoverage.class);
    job.setMapperClass(CoverageMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.elixir.hadoop.OddEven.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from  w  w w.  ja v a  2 s.co m
    }
    Job job = Job.getInstance(conf, "oddeven");
    job.setJarByClass(OddEven.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.elixir.hadoop.Word.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from   w  ww  . j  a v  a2s .  co m
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);

    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.elixir.hadoop.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/* www  .ja v  a  2 s.  c  om*/
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ema.hadoop.bestclient.BestClient.java

public static void main(String[] args) throws Exception {

    if (args.length != 4) {
        System.err.println("Usage: BestClient <input path> <output path> <date start> <date end>");
        System.exit(-1);//from w w w  .  jav a2s. c o  m
    }

    Job job = Job.getInstance();
    job.setJarByClass(BestClient.class);
    job.setJobName("Best client job");

    JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.setStrings("dates", args[2], args[3]);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(BCMapper.class);
    job.setReducerClass(BCReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ema.hadoop.wordcount.WordCount.java

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println("Usage: WordCount <input path> <output path>");
        System.exit(-1);/*from ww  w . j av a 2s .  c o  m*/
    }

    Job job = Job.getInstance();
    job.setJarByClass(WordCount.class);
    job.setJobName("Word count job");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(WCMapper.class);
    job.setReducerClass(WCReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ema.hadoop.wordcount.WordCount_cache.java

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println("Usage: WordCount <input path> <output path>");
        System.exit(-1);//from   w  w  w .j  a  va2 s  .c o  m
    }

    // First we write the stop word list
    // it could also be a file manually loaded into HDFS

    String[] stopwords = { "the", "a" };
    Configuration configuration = new Configuration();
    FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration);
    Path file = new Path("hdfs://localhost:9000/user/student/stop_words.txt");
    if (hdfs.exists(file)) {
        hdfs.delete(file, true);
    }
    OutputStream os = hdfs.create(file, new Progressable() {
        @Override
        public void progress() {
            out.println("...bytes written");
        }
    });
    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8"));
    for (String w : stopwords) {
        br.write(w + "\n");
    }

    br.close();
    hdfs.close();

    Job job = Job.getInstance();
    job.addCacheFile(new Path("hdfs://localhost:9000/user/student/stop_words.txt").toUri());

    job.setJarByClass(WordCount_cache.class);
    job.setJobName("Word count job");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(WCMapper_cache.class);
    job.setReducerClass(WCReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ery.hadoop.mrddx.hbase.HbaseOutputFormat.java

License:Apache License

@Override
public void handle(Job conf) throws Exception {
    HbaseConfiguration hConf = new HbaseConfiguration(conf.getConfiguration(),
            HbaseConfiguration.FLAG_HBASE_OUTPUT);

    // ??/*  ww  w .j  a  v a  2s  . co  m*/
    String tableName = hConf.getOutputHBaseTableName();
    if (null == tableName || tableName.trim().length() <= 0) {
        String meg = "HBase??<" + HbaseConfiguration.OUTPUT_TABLE + ">?.";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ?
    String hbaseFieldNames = hConf.getOutputHBaseFieldNames();
    this.vParamTargetFamilyNames(hbaseFieldNames, hConf);
    hConf.setOutputHBaseFamilyNames(this.getHBaseFamilyNames(hbaseFieldNames));

    // 
    String rowKeyRule = hConf.getOutputHBaseRowKeyRule();
    if (null == rowKeyRule || rowKeyRule.trim().length() <= 0) {
        String meg = "<" + HbaseConfiguration.OUTPUT_ROWKEY_RULE + ">";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // HFile
    long hfileMaxfilesize = hConf.getOutputHBaseHFileMaxfilesize();
    if (hfileMaxfilesize <= 0) {
        String meg = "HFile<" + HbaseConfiguration.OUTPUT_HFILE_MAXFILESIZE + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // memstore flushHDFS?
    long memstoreFlushSize = hConf.getOutputHBaseMemstoreFlushSize();
    if (memstoreFlushSize <= 0) {
        String meg = "memstore flushHDFS?<"
                + HbaseConfiguration.OUTPUT_MEMSTORE_FLUSHSIZE + ">??0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ??
    int colmunBlocksize = hConf.getOutputHBaseColmunBlocksize();
    if (colmunBlocksize <= 0) {
        String meg = "??<" + HbaseConfiguration.OUTPUT_COLMUN_BLOCKSIZE + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ?
    int colmunMaxversion = hConf.getOutputHBaseColmunMaxversion();
    if (colmunMaxversion <= 0) {
        String meg = "?<" + HbaseConfiguration.OUTPUT_COLMUN_MAXVERSION + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ??
    int colmunMinversion = hConf.getOutputHBaseColmunMinversion();
    if (colmunMinversion <= 0) {
        String meg = "??<" + HbaseConfiguration.OUTPUT_COLMUN_MINVERSION + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ????
    int commitBufferLength = hConf.getOutputHBaseBufferLength();
    if (commitBufferLength <= 0) {
        String meg = "????<" + HbaseConfiguration.OUTPUT_SET_COMMIT_BUFFERLENGTH
                + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ?hbaseWAL
    int walFlag = hConf.getOutputHBaseSetWalFlags();
    if (!(walFlag == -1 || walFlag >= 0 || walFlag <= 4)) {
        String meg = "WAL<" + HbaseConfiguration.OUTPUT_SET_WAL_FLAG
                + ">?-1??:[0-4]";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // 
    if (!validateTable(hConf)) {
        String errorInfo = "HBase output table, validate Execption!";
        MRLog.error(LOG, errorInfo);
        throw new Exception(errorInfo);
    }

    conf.setOutputFormatClass(HbaseOutputFormat.class);
    conf.setReduceSpeculativeExecution(false);
    conf.setOutputKeyClass(DBRecord.class);
    conf.setOutputValueClass(NullWritable.class);
    conf.setReducerClass(DBReducer.class);

    // ??
    printTableDesc(tableName, hConf.getConf());
}

From source file:com.ery.hadoop.mrddx.hive.HiveOutputFormat.java

License:Apache License

@Override
public void handle(Job conf) throws Exception {
    /**//  w  w w.ja v  a  2  s  .  com
     * ?
     */
    HiveConfiguration hconf = new HiveConfiguration(conf.getConfiguration());
    // ?
    String outRowChars = hconf.getOutputHiveFileRowsSplitChars();
    if (null == outRowChars || outRowChars.length() <= 0) {
        String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ROWS_SPLITCHARS + ">";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    // 
    String outFileSplitChars = hconf.getOutputHiveFileFieldSplitChars();
    if (null == outFileSplitChars || outFileSplitChars.trim().length() <= 0) {
        String meg = "<" + HiveConfiguration.OUTPUT_HIVE_FIELD_SPLITCHARS + ">";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    boolean para = hconf.getOutputHiveCompress();
    // ? (?HDFSUtils.CompressCodec)
    String outCompressCodec = hconf.getOutputHiveCompressCodec();
    if (para && !HDFSUtils.isExistCompressCodec(outCompressCodec)) {
        String meg = "[MR ERROR]?<" + HiveConfiguration.OUTPUT_HIVE_COMPRESS_CODEC
                + ">?.";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    // ?MR
    String outTargetpath = hconf.getOutputTargetFilePath();
    hconf.setOutputTargetPath(outTargetpath);
    if (null == outTargetpath || outTargetpath.trim().length() <= 0) {
        MRLog.warn(LOG,
                "MR<" + HiveConfiguration.OUTPUT_HIVE_TARGET_PATH + ">");
    }

    // ?hive??
    String hiveUrl = hconf.getOutPutHiveConfigUrl();
    if (null == hiveUrl || hiveUrl.trim().length() <= 0) {
        String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_URL
                + ">?.";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // hive???
    String hiveUser = hconf.getOutPutHiveConfigUser();
    if (null == hiveUser || hiveUser.trim().length() <= 0) {
        LOG.warn("[MR WARN]hive???<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_USER + ">.");
    }

    // hive??
    String hivePwd = hconf.getOutPutHiveConfigPassword();
    if (null == hivePwd || hivePwd.trim().length() <= 0) {
        LOG.warn("[MR WARN]hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_PASSWORD + ">.");
    }

    // ??
    String tableName = hconf.getOutputHiveTableName();
    if (null == tableName || tableName.trim().length() <= 0) {
        String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_TABLE + ">?.";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ??
    String partitionField[] = hconf.getOutputHivePartitionField();
    if (null != partitionField && partitionField.length > 0) {
        // 
        String[] outputFieldName = hconf.getOutputFieldNames();
        if (null == outputFieldName || outputFieldName.length <= 0) {
            String meg = "<" + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + ">.";
            MRLog.error(LOG, meg);
            throw new Exception(meg);
        }

        for (int i = 0; i < partitionField.length; i++) {
            boolean isExist = false;
            for (String s : outputFieldName) {
                if (s.equals(partitionField[i])) {
                    isExist = true;
                    break;
                }
            }

            if (!isExist) {
                String meg = "" + partitionField[i] + "<"
                        + HiveConfiguration.OUTPUT_HIVE_PARTITION_FIELD + ">?<"
                        + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + "";
                MRLog.error(LOG, meg);
                throw new Exception(meg);
            }
        }

        String orderOutputTempPath = hconf.getOutputHiveOrderTempPath();
        if (null == orderOutputTempPath || orderOutputTempPath.trim().length() <= 0) {
            String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">.";
            MRLog.error(LOG, meg);
            throw new Exception(meg);
        }

        String orderOutputFileNamePrefix = hconf.getOutputHiveOrderFileNamePrefix();
        if (null == orderOutputFileNamePrefix || orderOutputFileNamePrefix.trim().length() <= 0) {
            String meg = "???<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">.";
            MRLog.warn(LOG, meg);
        }

        long orderOutputFileMaxCount = hconf.getOutputHiveOrderFileMaxCount();
        if (orderOutputFileMaxCount == 0) {
            String meg = "?<" + HiveConfiguration.OUTPUT_HIVE_ORDER_FILEMAXCOUNT
                    + ">0 -1(??).";
            MRLog.error(LOG, meg);
            throw new Exception(meg);
        }
    }

    // DDL?
    String ddlHQL = hconf.getOutputHiveExecuteDDLHQL();
    if (null == ddlHQL || ddlHQL.trim().length() <= 0) {
        LOG.warn("[MR WARN]hive?<" + HiveConfiguration.OUTPUT_HIVE_DDL_HQL + ">.");
    }

    try {
        executeDDLHQL(hconf);
        MRLog.info(LOG, "execute ddl hive sql success!");
    } catch (SQLException e) {
        MRLog.error(LOG, "execute ddl hive sql error!");
        e.printStackTrace();
    }

    conf.setReduceSpeculativeExecution(false);
    conf.setOutputFormatClass(HiveOutputFormat.class);
    conf.setOutputKeyClass(DBRecord.class);
    conf.setOutputValueClass(NullWritable.class);
    if (null != partitionField && partitionField.length > 0) {
        conf.setCombinerClass(DBGroupReducer.class);
        conf.setReducerClass(DBPartitionReducer.class);
    } else {
        conf.setCombinerClass(DBGroupReducer.class);
        conf.setReducerClass(DBReducer.class);
    }
}