Example usage for org.apache.hadoop.mapreduce Job setOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputValueClass.

Prototype

public void setOutputValueClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the value class for job outputs.

Usage

From source file:com.ema.hadoop.wordcount.WordCount_cache.java

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println("Usage: WordCount <input path> <output path>");
        System.exit(-1);//w  w w  .j av a2  s .  c om
    }

    // First we write the stop word list
    // it could also be a file manually loaded into HDFS

    String[] stopwords = { "the", "a" };
    Configuration configuration = new Configuration();
    FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration);
    Path file = new Path("hdfs://localhost:9000/user/student/stop_words.txt");
    if (hdfs.exists(file)) {
        hdfs.delete(file, true);
    }
    OutputStream os = hdfs.create(file, new Progressable() {
        @Override
        public void progress() {
            out.println("...bytes written");
        }
    });
    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8"));
    for (String w : stopwords) {
        br.write(w + "\n");
    }

    br.close();
    hdfs.close();

    Job job = Job.getInstance();
    job.addCacheFile(new Path("hdfs://localhost:9000/user/student/stop_words.txt").toUri());

    job.setJarByClass(WordCount_cache.class);
    job.setJobName("Word count job");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(WCMapper_cache.class);
    job.setReducerClass(WCReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ery.hadoop.mrddx.hbase.HbaseOutputFormat.java

License:Apache License

@Override
public void handle(Job conf) throws Exception {
    HbaseConfiguration hConf = new HbaseConfiguration(conf.getConfiguration(),
            HbaseConfiguration.FLAG_HBASE_OUTPUT);

    // ??//ww w .  j  a v  a 2 s . com
    String tableName = hConf.getOutputHBaseTableName();
    if (null == tableName || tableName.trim().length() <= 0) {
        String meg = "HBase??<" + HbaseConfiguration.OUTPUT_TABLE + ">?.";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ?
    String hbaseFieldNames = hConf.getOutputHBaseFieldNames();
    this.vParamTargetFamilyNames(hbaseFieldNames, hConf);
    hConf.setOutputHBaseFamilyNames(this.getHBaseFamilyNames(hbaseFieldNames));

    // 
    String rowKeyRule = hConf.getOutputHBaseRowKeyRule();
    if (null == rowKeyRule || rowKeyRule.trim().length() <= 0) {
        String meg = "<" + HbaseConfiguration.OUTPUT_ROWKEY_RULE + ">";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // HFile
    long hfileMaxfilesize = hConf.getOutputHBaseHFileMaxfilesize();
    if (hfileMaxfilesize <= 0) {
        String meg = "HFile<" + HbaseConfiguration.OUTPUT_HFILE_MAXFILESIZE + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // memstore flushHDFS?
    long memstoreFlushSize = hConf.getOutputHBaseMemstoreFlushSize();
    if (memstoreFlushSize <= 0) {
        String meg = "memstore flushHDFS?<"
                + HbaseConfiguration.OUTPUT_MEMSTORE_FLUSHSIZE + ">??0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ??
    int colmunBlocksize = hConf.getOutputHBaseColmunBlocksize();
    if (colmunBlocksize <= 0) {
        String meg = "??<" + HbaseConfiguration.OUTPUT_COLMUN_BLOCKSIZE + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ?
    int colmunMaxversion = hConf.getOutputHBaseColmunMaxversion();
    if (colmunMaxversion <= 0) {
        String meg = "?<" + HbaseConfiguration.OUTPUT_COLMUN_MAXVERSION + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ??
    int colmunMinversion = hConf.getOutputHBaseColmunMinversion();
    if (colmunMinversion <= 0) {
        String meg = "??<" + HbaseConfiguration.OUTPUT_COLMUN_MINVERSION + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ????
    int commitBufferLength = hConf.getOutputHBaseBufferLength();
    if (commitBufferLength <= 0) {
        String meg = "????<" + HbaseConfiguration.OUTPUT_SET_COMMIT_BUFFERLENGTH
                + ">0";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ?hbaseWAL
    int walFlag = hConf.getOutputHBaseSetWalFlags();
    if (!(walFlag == -1 || walFlag >= 0 || walFlag <= 4)) {
        String meg = "WAL<" + HbaseConfiguration.OUTPUT_SET_WAL_FLAG
                + ">?-1??:[0-4]";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // 
    if (!validateTable(hConf)) {
        String errorInfo = "HBase output table, validate Execption!";
        MRLog.error(LOG, errorInfo);
        throw new Exception(errorInfo);
    }

    conf.setOutputFormatClass(HbaseOutputFormat.class);
    conf.setReduceSpeculativeExecution(false);
    conf.setOutputKeyClass(DBRecord.class);
    conf.setOutputValueClass(NullWritable.class);
    conf.setReducerClass(DBReducer.class);

    // ??
    printTableDesc(tableName, hConf.getConf());
}

From source file:com.ery.hadoop.mrddx.hive.HiveOutputFormat.java

License:Apache License

@Override
public void handle(Job conf) throws Exception {
    /**//from ww w  .  j av  a 2  s  .c  o  m
     * ?
     */
    HiveConfiguration hconf = new HiveConfiguration(conf.getConfiguration());
    // ?
    String outRowChars = hconf.getOutputHiveFileRowsSplitChars();
    if (null == outRowChars || outRowChars.length() <= 0) {
        String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ROWS_SPLITCHARS + ">";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    // 
    String outFileSplitChars = hconf.getOutputHiveFileFieldSplitChars();
    if (null == outFileSplitChars || outFileSplitChars.trim().length() <= 0) {
        String meg = "<" + HiveConfiguration.OUTPUT_HIVE_FIELD_SPLITCHARS + ">";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    boolean para = hconf.getOutputHiveCompress();
    // ? (?HDFSUtils.CompressCodec)
    String outCompressCodec = hconf.getOutputHiveCompressCodec();
    if (para && !HDFSUtils.isExistCompressCodec(outCompressCodec)) {
        String meg = "[MR ERROR]?<" + HiveConfiguration.OUTPUT_HIVE_COMPRESS_CODEC
                + ">?.";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    // ?MR
    String outTargetpath = hconf.getOutputTargetFilePath();
    hconf.setOutputTargetPath(outTargetpath);
    if (null == outTargetpath || outTargetpath.trim().length() <= 0) {
        MRLog.warn(LOG,
                "MR<" + HiveConfiguration.OUTPUT_HIVE_TARGET_PATH + ">");
    }

    // ?hive??
    String hiveUrl = hconf.getOutPutHiveConfigUrl();
    if (null == hiveUrl || hiveUrl.trim().length() <= 0) {
        String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_URL
                + ">?.";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // hive???
    String hiveUser = hconf.getOutPutHiveConfigUser();
    if (null == hiveUser || hiveUser.trim().length() <= 0) {
        LOG.warn("[MR WARN]hive???<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_USER + ">.");
    }

    // hive??
    String hivePwd = hconf.getOutPutHiveConfigPassword();
    if (null == hivePwd || hivePwd.trim().length() <= 0) {
        LOG.warn("[MR WARN]hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_PASSWORD + ">.");
    }

    // ??
    String tableName = hconf.getOutputHiveTableName();
    if (null == tableName || tableName.trim().length() <= 0) {
        String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_TABLE + ">?.";
        LOG.error(meg);
        throw new Exception(meg);
    }

    // ??
    String partitionField[] = hconf.getOutputHivePartitionField();
    if (null != partitionField && partitionField.length > 0) {
        // 
        String[] outputFieldName = hconf.getOutputFieldNames();
        if (null == outputFieldName || outputFieldName.length <= 0) {
            String meg = "<" + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + ">.";
            MRLog.error(LOG, meg);
            throw new Exception(meg);
        }

        for (int i = 0; i < partitionField.length; i++) {
            boolean isExist = false;
            for (String s : outputFieldName) {
                if (s.equals(partitionField[i])) {
                    isExist = true;
                    break;
                }
            }

            if (!isExist) {
                String meg = "" + partitionField[i] + "<"
                        + HiveConfiguration.OUTPUT_HIVE_PARTITION_FIELD + ">?<"
                        + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + "";
                MRLog.error(LOG, meg);
                throw new Exception(meg);
            }
        }

        String orderOutputTempPath = hconf.getOutputHiveOrderTempPath();
        if (null == orderOutputTempPath || orderOutputTempPath.trim().length() <= 0) {
            String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">.";
            MRLog.error(LOG, meg);
            throw new Exception(meg);
        }

        String orderOutputFileNamePrefix = hconf.getOutputHiveOrderFileNamePrefix();
        if (null == orderOutputFileNamePrefix || orderOutputFileNamePrefix.trim().length() <= 0) {
            String meg = "???<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">.";
            MRLog.warn(LOG, meg);
        }

        long orderOutputFileMaxCount = hconf.getOutputHiveOrderFileMaxCount();
        if (orderOutputFileMaxCount == 0) {
            String meg = "?<" + HiveConfiguration.OUTPUT_HIVE_ORDER_FILEMAXCOUNT
                    + ">0 -1(??).";
            MRLog.error(LOG, meg);
            throw new Exception(meg);
        }
    }

    // DDL?
    String ddlHQL = hconf.getOutputHiveExecuteDDLHQL();
    if (null == ddlHQL || ddlHQL.trim().length() <= 0) {
        LOG.warn("[MR WARN]hive?<" + HiveConfiguration.OUTPUT_HIVE_DDL_HQL + ">.");
    }

    try {
        executeDDLHQL(hconf);
        MRLog.info(LOG, "execute ddl hive sql success!");
    } catch (SQLException e) {
        MRLog.error(LOG, "execute ddl hive sql error!");
        e.printStackTrace();
    }

    conf.setReduceSpeculativeExecution(false);
    conf.setOutputFormatClass(HiveOutputFormat.class);
    conf.setOutputKeyClass(DBRecord.class);
    conf.setOutputValueClass(NullWritable.class);
    if (null != partitionField && partitionField.length > 0) {
        conf.setCombinerClass(DBGroupReducer.class);
        conf.setReducerClass(DBPartitionReducer.class);
    } else {
        conf.setCombinerClass(DBGroupReducer.class);
        conf.setReducerClass(DBReducer.class);
    }
}

From source file:com.ery.hadoop.mrddx.hive.HiveSequenceFileOutputFormat.java

License:Apache License

@Override
public void handle(Job conf) throws Exception {
    super.handle(conf);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setOutputFormatClass(HiveSequenceFileOutputFormat.class);
}

From source file:com.example.bigtable.sample.CellCounter.java

License:Apache License

/**
 * Sets up the actual job./*  w w w  .  j  a  v  a 2 s. com*/
 *
 * @param conf The current configuration.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];
    Path outputDir = new Path(args[1]);
    String reportSeparatorString = (args.length > 2) ? args[2] : ":";
    conf.set("ReportSeparator", reportSeparatorString);
    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
    job.setJarByClass(CellCounter.class);
    Scan scan = getConfiguredScanForJob(conf, args);
    TableMapReduceUtil.initTableMapperJob(tableName, scan, CellCounterMapper.class,
            ImmutableBytesWritable.class, Result.class, job);
    job.setNumReduceTasks(1);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setReducerClass(IntSumReducer.class);
    return job;
}

From source file:com.example.Driver.java

License:Open Source License

public int run(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Your job name");

    job.setJarByClass(Driver.class);

    logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: "
            + Arrays.toString(args));

    if (args.length < 2) {
        logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar()
                + " input_files output_directory");
        return 1;
    }/*w w  w.  jav a 2s  .co m*/

    job.setMapperClass(WordcountMapper.class);
    logger.info("mapper class is " + job.getMapperClass());

    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(IntWritable.class);
    logger.info("mapper output key class is " + job.getMapOutputKeyClass());
    logger.info("mapper output value class is " + job.getMapOutputValueClass());

    job.setReducerClass(WordcountReducer.class);
    logger.info("reducer class is " + job.getReducerClass());
    job.setCombinerClass(WordcountReducer.class);
    logger.info("combiner class is " + job.getCombinerClass());
    //When you are not runnign any Reducer
    //OR    job.setNumReduceTasks(0);
    //      logger.info("number of reduce task is " + job.getNumReduceTasks());

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    logger.info("output key class is " + job.getOutputKeyClass());
    logger.info("output value class is " + job.getOutputValueClass());

    job.setInputFormatClass(TextInputFormat.class);
    logger.info("input format class is " + job.getInputFormatClass());

    job.setOutputFormatClass(TextOutputFormat.class);
    logger.info("output format class is " + job.getOutputFormatClass());

    Path filePath = new Path(args[0]);
    logger.info("input path " + filePath);
    FileInputFormat.setInputPaths(job, filePath);

    Path outputPath = new Path(args[1]);
    logger.info("output path " + outputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.waitForCompletion(true);
    return 0;
}

From source file:com.examples.ch03.ParseWeblogs_Ex_1.java

public int run(String[] args) throws Exception {
    Path inputPath = new Path("apache_clf.txt");
    Path outputPath = new Path("output");
    Configuration conf = getConf();
    Job weblogJob = Job.getInstance(conf);
    weblogJob.setJobName("Weblog Transformer");
    weblogJob.setJarByClass(getClass());
    weblogJob.setNumReduceTasks(0);//www.  j  av  a2 s .  co m

    weblogJob.setMapperClass(CLFMapper_Ex_1.class);
    weblogJob.setMapOutputKeyClass(Text.class);
    weblogJob.setMapOutputValueClass(Text.class);

    weblogJob.setOutputKeyClass(Text.class);
    weblogJob.setOutputValueClass(Text.class);

    weblogJob.setInputFormatClass(TextInputFormat.class);
    weblogJob.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(weblogJob, inputPath);
    FileOutputFormat.setOutputPath(weblogJob, outputPath);

    if (weblogJob.waitForCompletion(true)) {
        return 0;
    }
    return 1;
}

From source file:com.fanlehai.hadoop.join.CompositeJoin.java

License:Apache License

/**
 * The main driver for sort program. Invoke this method to submit the
 * map/reduce job.//from w w  w .  ja v  a2s  .  c o m
 * 
 * @throws IOException
 *             When there is communication problems with the job tracker.
 */

@SuppressWarnings("rawtypes")
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String join_reduces = conf.get(REDUCES_PER_HOST);
    if (join_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(join_reduces);
    }
    Job job = Job.getInstance(conf);
    job.setJobName("join");
    job.setJarByClass(CompositeJoin.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(Reducer.class);

    Class<? extends InputFormat> inputFormatClass = KeyValueTextInputFormat.class;// SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = Text.class;// BytesWritable.class;
    Class<? extends Writable> outputValueClass = Text.class;//TupleWritable.class;
    String op = "inner";
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-joinOp".equals(args[i])) {
                op = args[++i];
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    // Set user-supplied (possibly default) job configs
    job.setNumReduceTasks(num_reduces);

    if (otherArgs.size() < 2) {
        System.out.println("ERROR: Wrong number of parameters: ");
        return printUsage();
    }

    String strOut = otherArgs.remove(otherArgs.size() - 1);
    FileSystem.get(new Configuration()).delete(new Path(strOut), true);

    FileOutputFormat.setOutputPath(job, new Path(strOut));
    List<Path> plist = new ArrayList<Path>(otherArgs.size());
    for (String s : otherArgs) {
        plist.add(new Path(s));
    }

    job.setInputFormatClass(CompositeInputFormat.class);
    job.getConfiguration().set(CompositeInputFormat.JOIN_EXPR,
            CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0])));
    job.setOutputFormatClass(outputFormatClass);

    job.setMapperClass(MapComposite.class);

    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return ret;
}

From source file:com.fanlehai.hadoop.serialize.avro.MapReduceAvroWordCount.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length != 2) {
        printUsage();/*from  w  ww.j a  v a  2  s .  co  m*/
    }

    FileSystem.get(new Configuration()).delete(new Path(args[1]), true);
    Job job = Job.getInstance(super.getConf(), "AvroWordCount");

    job.setJarByClass(MapReduceAvroWordCount.class);
    job.setJobName("AvroWordCount");

    // We call setOutputSchema first so we can override the configuration
    // parameters it sets
    AvroJob.setOutputKeySchema(job, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT)));
    job.setOutputValueClass(NullWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setSortComparatorClass(Text.Comparator.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 1 : 0;
}

From source file:com.flipkart.fdp.migration.distcp.core.MirrorDistCPDriver.java

License:Apache License

private Job createJob(Configuration configuration) throws Exception {

    System.out.println("Initializing BlueShift v 2.0...");
    System.out.println("Configuration: " + dcmConfig.toString());

    Job job = Job.getInstance(configuration, "BlueShift v 2.0 - " + dcmConfig.getBatchName());

    job.setJarByClass(MirrorDistCPDriver.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(MirrorMapper.class);
    job.setReducerClass(MirrorReducer.class);

    job.setInputFormatClass(MirrorFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileOutputFormat.setOutputPath(job, stateManager.getReportPath());

    job.setNumReduceTasks(configuration.getInt("mapreduce.reduce.tasks", 1));

    System.out.println("Job Initialization Complete, The status of the Mirror job will be written to: "
            + stateManager.getReportPath());
    return job;/*ww  w. ja v a  2 s .  c om*/
}