Example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass.

Prototype

public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the key class for the job output data.

Usage

From source file:com.elixir.hadoop.Chromo.FragmentCoverage.java

License:Apache License

public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from  w ww  . j  a v a2  s .  c om
    }
    Job job = Job.getInstance(conf, "position");
    job.setJarByClass(FragmentCoverage.class);

    job.setMapperClass(CoverageMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setNumReduceTasks(5);
    job.setMapOutputKeyClass(com.elixir.hadoop.Chromo.SecondrySort.IntPair.class);
    //job.setSpeculativeExecution(true);
    job.setPartitionerClass(ChromoPartitioner.class);
    job.setGroupingComparatorClass(com.elixir.hadoop.Chromo.SecondrySort.FirstGroupingComparator.class);
    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);

    job.setOutputValueClass(IntWritable.class);
    //   job.setOutputFormatClass(Text.class);

    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.elixir.hadoop.FragmentCoverage.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/*www. j  av a  2  s .  co  m*/
    }
    Job job = Job.getInstance(conf, "position");
    job.setJarByClass(FragmentCoverage.class);
    job.setMapperClass(CoverageMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.elixir.hadoop.OddEven.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from  w  w w.  ja v a  2 s.co m
    }
    Job job = Job.getInstance(conf, "oddeven");
    job.setJarByClass(OddEven.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.elixir.hadoop.Word.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from   w  ww  . j  a v  a2s .  co m
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);

    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.elixir.hadoop.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/* www  .ja v  a  2 s.  c  om*/
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ema.hadoop.bestclient.BestClient.java

public static void main(String[] args) throws Exception {

    if (args.length != 4) {
        System.err.println("Usage: BestClient <input path> <output path> <date start> <date end>");
        System.exit(-1);//from w w w  .  jav a2s. c o  m
    }

    Job job = Job.getInstance();
    job.setJarByClass(BestClient.class);
    job.setJobName("Best client job");

    JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.setStrings("dates", args[2], args[3]);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(BCMapper.class);
    job.setReducerClass(BCReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ema.hadoop.wordcount.WordCount.java

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println("Usage: WordCount <input path> <output path>");
        System.exit(-1);/*from ww  w . j av a 2s .  c o  m*/
    }

    Job job = Job.getInstance();
    job.setJarByClass(WordCount.class);
    job.setJobName("Word count job");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(WCMapper.class);
    job.setReducerClass(WCReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ema.hadoop.wordcount.WordCount_cache.java

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println("Usage: WordCount <input path> <output path>");
        System.exit(-1);//from   w  w  w .j  a  va2 s  .c o  m
    }

    // First we write the stop word list
    // it could also be a file manually loaded into HDFS

    String[] stopwords = { "the", "a" };
    Configuration configuration = new Configuration();
    FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration);
    Path file = new Path("hdfs://localhost:9000/user/student/stop_words.txt");
    if (hdfs.exists(file)) {
        hdfs.delete(file, true);
    }
    OutputStream os = hdfs.create(file, new Progressable() {
        @Override
        public void progress() {
            out.println("...bytes written");
        }
    });
    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8"));
    for (String w : stopwords) {
        br.write(w + "\n");
    }

    br.close();
    hdfs.close();

    Job job = Job.getInstance();
    job.addCacheFile(new Path("hdfs://localhost:9000/user/student/stop_words.txt").toUri());

    job.setJarByClass(WordCount_cache.class);
    job.setJobName("Word count job");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(WCMapper_cache.class);
    job.setReducerClass(WCReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ery.hadoop.mrddx.hbase.HbaseOutputFormat.java

License:Apache License

@Override
public void handle(Job conf) throws Exception {
    HbaseConfiguration hConf = new HbaseConfiguration(conf.getConfiguration(),
            HbaseConfiguration.FLAG_HBASE_OUTPUT);

    // ??/*  ww  w .j  a  v a  2s  . co  m*/
    String tableName = hConf.getOutputHBaseTableName();
    if (null == tableName || tableName.trim().length() <= 0) {
        String meg = "HBase??<" + HbaseConfiguration.OUTPUT_TABLE + ">?.";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ?
    String hbaseFieldNames = hConf.getOutputHBaseFieldNames();
    this.vParamTargetFamilyNames(hbaseFieldNames, hConf);
    hConf.setOutputHBaseFamilyNames(this.getHBaseFamilyNames(hbaseFieldNames));

    // 
    String rowKeyRule = hConf.getOutputHBaseRowKeyRule();
    if (null == rowKeyRule || rowKeyRule.trim().length() <= 0) {
        String meg = "<" + HbaseConfiguration.OUTPUT_ROWKEY_RULE + ">";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // HFile
    long hfileMaxfilesize = hConf.getOutputHBaseHFileMaxfilesize();
    if (hfileMaxfilesize <= 0) {
        String meg = "HFile<" + HbaseConfiguration.OUTPUT_HFILE_MAXFILESIZE + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // memstore flushHDFS?
    long memstoreFlushSize = hConf.getOutputHBaseMemstoreFlushSize();
    if (memstoreFlushSize <= 0) {
        String meg = "memstore flushHDFS?<"
                + HbaseConfiguration.OUTPUT_MEMSTORE_FLUSHSIZE + ">??0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ??
    int colmunBlocksize = hConf.getOutputHBaseColmunBlocksize();
    if (colmunBlocksize <= 0) {
        String meg = "??<" + HbaseConfiguration.OUTPUT_COLMUN_BLOCKSIZE + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ?
    int colmunMaxversion = hConf.getOutputHBaseColmunMaxversion();
    if (colmunMaxversion <= 0) {
        String meg = "?<" + HbaseConfiguration.OUTPUT_COLMUN_MAXVERSION + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ??
    int colmunMinversion = hConf.getOutputHBaseColmunMinversion();
    if (colmunMinversion <= 0) {
        String meg = "??<" + HbaseConfiguration.OUTPUT_COLMUN_MINVERSION + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ????
    int commitBufferLength = hConf.getOutputHBaseBufferLength();
    if (commitBufferLength <= 0) {
        String meg = "????<" + HbaseConfiguration.OUTPUT_SET_COMMIT_BUFFERLENGTH
                + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ?hbaseWAL
    int walFlag = hConf.getOutputHBaseSetWalFlags();
    if (!(walFlag == -1 || walFlag >= 0 || walFlag <= 4)) {
        String meg = "WAL<" + HbaseConfiguration.OUTPUT_SET_WAL_FLAG
                + ">?-1??:[0-4]";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // 
    if (!validateTable(hConf)) {
        String errorInfo = "HBase output table, validate Execption!";
        MRLog.error(LOG, errorInfo);
        throw new Exception(errorInfo);
    }

    conf.setOutputFormatClass(HbaseOutputFormat.class);
    conf.setReduceSpeculativeExecution(false);
    conf.setOutputKeyClass(DBRecord.class);
    conf.setOutputValueClass(NullWritable.class);
    conf.setReducerClass(DBReducer.class);

    // ??
    printTableDesc(tableName, hConf.getConf());
}

From source file:com.ery.hadoop.mrddx.hive.HiveOutputFormat.java

License:Apache License

@Override
public void handle(Job conf) throws Exception {
    /**//  w  w w.ja v  a  2  s  .  com
     * ?
     */
    HiveConfiguration hconf = new HiveConfiguration(conf.getConfiguration());
    // ?
    String outRowChars = hconf.getOutputHiveFileRowsSplitChars();
    if (null == outRowChars || outRowChars.length() <= 0) {
        String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ROWS_SPLITCHARS + ">";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    // 
    String outFileSplitChars = hconf.getOutputHiveFileFieldSplitChars();
    if (null == outFileSplitChars || outFileSplitChars.trim().length() <= 0) {
        String meg = "<" + HiveConfiguration.OUTPUT_HIVE_FIELD_SPLITCHARS + ">";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    boolean para = hconf.getOutputHiveCompress();
    // ? (?HDFSUtils.CompressCodec)
    String outCompressCodec = hconf.getOutputHiveCompressCodec();
    if (para && !HDFSUtils.isExistCompressCodec(outCompressCodec)) {
        String meg = "[MR ERROR]?<" + HiveConfiguration.OUTPUT_HIVE_COMPRESS_CODEC
                + ">?.";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    // ?MR
    String outTargetpath = hconf.getOutputTargetFilePath();
    hconf.setOutputTargetPath(outTargetpath);
    if (null == outTargetpath || outTargetpath.trim().length() <= 0) {
        MRLog.warn(LOG,
                "MR<" + HiveConfiguration.OUTPUT_HIVE_TARGET_PATH + ">");
    }

    // ?hive??
    String hiveUrl = hconf.getOutPutHiveConfigUrl();
    if (null == hiveUrl || hiveUrl.trim().length() <= 0) {
        String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_URL
                + ">?.";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // hive???
    String hiveUser = hconf.getOutPutHiveConfigUser();
    if (null == hiveUser || hiveUser.trim().length() <= 0) {
        LOG.warn("[MR WARN]hive???<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_USER + ">.");
    }

    // hive??
    String hivePwd = hconf.getOutPutHiveConfigPassword();
    if (null == hivePwd || hivePwd.trim().length() <= 0) {
        LOG.warn("[MR WARN]hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_PASSWORD + ">.");
    }

    // ??
    String tableName = hconf.getOutputHiveTableName();
    if (null == tableName || tableName.trim().length() <= 0) {
        String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_TABLE + ">?.";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ??
    String partitionField[] = hconf.getOutputHivePartitionField();
    if (null != partitionField && partitionField.length > 0) {
        // 
        String[] outputFieldName = hconf.getOutputFieldNames();
        if (null == outputFieldName || outputFieldName.length <= 0) {
            String meg = "<" + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + ">.";
            MRLog.error(LOG, meg);
            throw new Exception(meg);
        }

        for (int i = 0; i < partitionField.length; i++) {
            boolean isExist = false;
            for (String s : outputFieldName) {
                if (s.equals(partitionField[i])) {
                    isExist = true;
                    break;
                }
            }

            if (!isExist) {
                String meg = "" + partitionField[i] + "<"
                        + HiveConfiguration.OUTPUT_HIVE_PARTITION_FIELD + ">?<"
                        + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + "";
                MRLog.error(LOG, meg);
                throw new Exception(meg);
            }
        }

        String orderOutputTempPath = hconf.getOutputHiveOrderTempPath();
        if (null == orderOutputTempPath || orderOutputTempPath.trim().length() <= 0) {
            String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">.";
            MRLog.error(LOG, meg);
            throw new Exception(meg);
        }

        String orderOutputFileNamePrefix = hconf.getOutputHiveOrderFileNamePrefix();
        if (null == orderOutputFileNamePrefix || orderOutputFileNamePrefix.trim().length() <= 0) {
            String meg = "???<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">.";
            MRLog.warn(LOG, meg);
        }

        long orderOutputFileMaxCount = hconf.getOutputHiveOrderFileMaxCount();
        if (orderOutputFileMaxCount == 0) {
            String meg = "?<" + HiveConfiguration.OUTPUT_HIVE_ORDER_FILEMAXCOUNT
                    + ">0 -1(??).";
            MRLog.error(LOG, meg);
            throw new Exception(meg);
        }
    }

    // DDL?
    String ddlHQL = hconf.getOutputHiveExecuteDDLHQL();
    if (null == ddlHQL || ddlHQL.trim().length() <= 0) {
        LOG.warn("[MR WARN]hive?<" + HiveConfiguration.OUTPUT_HIVE_DDL_HQL + ">.");
    }

    try {
        executeDDLHQL(hconf);
        MRLog.info(LOG, "execute ddl hive sql success!");
    } catch (SQLException e) {
        MRLog.error(LOG, "execute ddl hive sql error!");
        e.printStackTrace();
    }

    conf.setReduceSpeculativeExecution(false);
    conf.setOutputFormatClass(HiveOutputFormat.class);
    conf.setOutputKeyClass(DBRecord.class);
    conf.setOutputValueClass(NullWritable.class);
    if (null != partitionField && partitionField.length > 0) {
        conf.setCombinerClass(DBGroupReducer.class);
        conf.setReducerClass(DBPartitionReducer.class);
    } else {
        conf.setCombinerClass(DBGroupReducer.class);
        conf.setReducerClass(DBReducer.class);
    }
}