Example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the key class for the map output data.

Usage

From source file:com.cloudera.recordservice.examples.mapreduce.MapReduceColorCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    org.apache.log4j.BasicConfigurator.configure();

    if (args.length != 2) {
        System.err.println("Usage: MapReduceColorCount <input path> <output path>");
        return -1;
    }//from w  w  w  .j  a  v  a2 s  .c  o m

    Job job = Job.getInstance(getConf());
    job.setJarByClass(MapReduceColorCount.class);
    job.setJobName("Color Count");

    // RECORDSERVICE:
    // To read from a table instead of a path, comment out
    // FileInputFormat.setInputPaths() and instead use:
    //FileInputFormat.setInputPaths(job, new Path(args[0]));
    RecordServiceConfig.setInputTable(job.getConfiguration(), "rs", "users");

    // RECORDSERVICE:
    // Use the RecordService version of the AvroKeyInputFormat
    job.setInputFormatClass(com.cloudera.recordservice.avro.mapreduce.AvroKeyInputFormat.class);
    //job.setInputFormatClass(AvroKeyInputFormat.class);

    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(ColorCountMapper.class);
    AvroJob.setInputKeySchema(job, User.getClassSchema());
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    job.setReducerClass(ColorCountReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.cloudera.sa.hbasebulkload.HBASEBulkLoadDriver.java

@Override
public int run(String[] args) throws Exception {
    Configuration config = getConf();
    args = new GenericOptionsParser(config, args).getRemainingArgs();

    if (args.length < 6) {
        /*System.out.println("hadoop jar HBASEBulkLoad.jar "
         + "com.cloudera.sa.hbasebulkload.HBASEBulkLoadDriver"
         + " <inputpath> <outputpath> <hbaseTable> <hbaseColumnFamily"
         + " \"<hbaseColumns (delimiter seperated)>\" <column delimiter>");*/
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }/*  w  w w .j a  v a  2  s .c  o  m*/

    String hbaseTab = args[2];
    String hbaseColumnFamily = args[3];
    String hbaseColumns = args[4];
    String hbaseColumnSeperator = args[5];
    config.set(HBASEBulkLoadConstants.HBASE_TABLE_KEY, hbaseTab.trim().toLowerCase(Locale.ENGLISH));
    config.set(HBASEBulkLoadConstants.HBASE_COLUMN_FAMILY_KEY, hbaseColumnFamily);
    config.set(HBASEBulkLoadConstants.HBASE_COLUMNS_KEY, hbaseColumns.trim().toLowerCase(Locale.ENGLISH));
    config.set(HBASEBulkLoadConstants.HBASE_COLUMN_SEPERATOR_KEY, hbaseColumnSeperator);
    System.out.println(2);
    Job job = Job.getInstance(config, this.getClass().getName() + "-" + hbaseTab);
    HBaseConfiguration.addHbaseResources(config);

    job.setInputFormatClass(TextInputFormat.class);
    job.setJarByClass(HBASEBulkLoadDriver.class);
    job.setMapperClass(HBASEBulkLoadKeyValueMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(Put.class);
    job.setCombinerClass(PutCombiner.class);
    job.setReducerClass(PutSortReducer.class);

    Connection connection = ConnectionFactory.createConnection(config);
    Table hTab = connection.getTable(TableName.valueOf(hbaseTab));

    FileSystem.get(getConf()).delete(new Path(args[1]), true);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //job.setOutputFormatClass(HFileOutputFormat2.class);
    TableMapReduceUtil.initTableReducerJob(hTab.getName().getNameAsString(), null, job);
    //job.setNumReduceTasks(0);
    TableMapReduceUtil.addDependencyJars(job);
    HFileOutputFormat2.configureIncrementalLoadMap(job, hTab);

    int exitCode = job.waitForCompletion(true) ? HBASEBulkLoadConstants.SUCCESS
            : HBASEBulkLoadConstants.FAILURE;
    System.out.println(8);
    if (HBASEBulkLoadConstants.SUCCESS == exitCode) {
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(config);
        loader.doBulkLoad(new Path(args[1]), (HTable) hTab);
        connection.close();
    }
    return exitCode;
}

From source file:com.cloudera.sa.securewordcount.SecureWordCountDriver.java

@Override
public int run(String[] args) throws Exception {
    Configuration config = getConf();
    args = new GenericOptionsParser(config, args).getRemainingArgs();

    if (args.length < 2) {

        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }/*from  w  w w . ja v a  2 s  .co m*/
    Job job = Job.getInstance(config, this.getClass().getName() + "-wordcount");
    job.setJarByClass(SecureWordCountDriver.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:com.cloudera.sqoop.mapreduce.db.TestDataDrivenDBInputFormat.java

License:Apache License

public void testDateSplits() throws Exception {
    Statement s = connection.createStatement();
    final String DATE_TABLE = "datetable";
    final String COL = "foo";
    try {//w w w  .  ja  v a2  s.c  o m
        try {
            // delete the table if it already exists.
            s.executeUpdate("DROP TABLE " + DATE_TABLE);
        } catch (SQLException e) {
            // Ignored; proceed regardless of whether we deleted the table;
            // it may have simply not existed.
        }

        // Create the table.
        s.executeUpdate("CREATE TABLE " + DATE_TABLE + "(" + COL + " TIMESTAMP)");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-01')");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-02')");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-05-01')");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2011-04-01')");

        // commit this tx.
        connection.commit();

        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "file:///");
        FileSystem fs = FileSystem.getLocal(conf);
        fs.delete(new Path(OUT_DIR), true);

        // now do a dd import
        Job job = new Job(conf);
        job.setMapperClass(ValMapper.class);
        job.setReducerClass(Reducer.class);
        job.setMapOutputKeyClass(DateCol.class);
        job.setMapOutputValueClass(NullWritable.class);
        job.setOutputKeyClass(DateCol.class);
        job.setOutputValueClass(NullWritable.class);
        job.setNumReduceTasks(1);
        job.getConfiguration().setInt("mapreduce.map.tasks", 2);
        FileOutputFormat.setOutputPath(job, new Path(OUT_DIR));
        DBConfiguration.configureDB(job.getConfiguration(), DRIVER_CLASS, DB_URL, (String) null, (String) null);
        DataDrivenDBInputFormat.setInput(job, DateCol.class, DATE_TABLE, null, COL, COL);

        boolean ret = job.waitForCompletion(true);
        assertTrue("job failed", ret);

        // Check to see that we imported as much as we thought we did.
        assertEquals("Did not get all the records", 4, job.getCounters()
                .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue());
    } finally {
        s.close();
    }
}

From source file:com.cloudera.sqoop.mapreduce.MergeJob.java

License:Apache License

public boolean runMergeJob() throws IOException {
    Configuration conf = options.getConf();
    Job job = new Job(conf);

    String userClassName = options.getClassName();
    if (null == userClassName) {
        // Shouldn't get here.
        throw new IOException("Record class name not specified with " + "--class-name.");
    }/*  w w w  .j av a2s  . c  o  m*/

    // Set the external jar to use for the job.
    String existingJar = options.getExistingJarName();
    if (existingJar != null) {
        // User explicitly identified a jar path.
        LOG.debug("Setting job jar to user-specified jar: " + existingJar);
        job.getConfiguration().set("mapred.jar", existingJar);
    } else {
        // Infer it from the location of the specified class, if it's on the
        // classpath.
        try {
            Class<? extends Object> userClass = conf.getClassByName(userClassName);
            if (null != userClass) {
                String userJar = Jars.getJarPathForClass(userClass);
                LOG.debug("Setting job jar based on user class " + userClassName + ": " + userJar);
                job.getConfiguration().set("mapred.jar", userJar);
            } else {
                LOG.warn("Specified class " + userClassName + " is not in a jar. "
                        + "MapReduce may not find the class");
            }
        } catch (ClassNotFoundException cnfe) {
            throw new IOException(cnfe);
        }
    }

    try {
        Path oldPath = new Path(options.getMergeOldPath());
        Path newPath = new Path(options.getMergeNewPath());

        Configuration jobConf = job.getConfiguration();
        FileSystem fs = FileSystem.get(jobConf);
        oldPath = oldPath.makeQualified(fs);
        newPath = newPath.makeQualified(fs);

        FileInputFormat.addInputPath(job, oldPath);
        FileInputFormat.addInputPath(job, newPath);

        jobConf.set(MERGE_OLD_PATH_KEY, oldPath.toString());
        jobConf.set(MERGE_NEW_PATH_KEY, newPath.toString());
        jobConf.set(MERGE_KEY_COL_KEY, options.getMergeKeyCol());
        jobConf.set(MERGE_SQOOP_RECORD_KEY, userClassName);

        FileOutputFormat.setOutputPath(job, new Path(options.getTargetDir()));

        if (ExportJobBase.isSequenceFiles(jobConf, newPath)) {
            job.setInputFormatClass(SequenceFileInputFormat.class);
            job.setOutputFormatClass(SequenceFileOutputFormat.class);
            job.setMapperClass(MergeRecordMapper.class);
        } else {
            job.setMapperClass(MergeTextMapper.class);
            job.setOutputFormatClass(RawKeyTextOutputFormat.class);
        }

        jobConf.set("mapred.output.key.class", userClassName);
        job.setOutputValueClass(NullWritable.class);

        job.setReducerClass(MergeReducer.class);

        // Set the intermediate data types.
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(MergeRecord.class);

        // Make sure Sqoop and anything else we need is on the classpath.
        cacheJars(job, null);
        return this.runJob(job);
    } catch (InterruptedException ie) {
        throw new IOException(ie);
    } catch (ClassNotFoundException cnfe) {
        throw new IOException(cnfe);
    }
}

From source file:com.cloudera.test.UseHCat.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    args = new GenericOptionsParser(conf, args).getRemainingArgs();

    // Get the input and output table names as arguments
    String inputTableName = args[0];
    String outputTableName = args[1];
    // Assume the default database
    String dbName = null;/*w w w  .j  a  va 2s  . c  om*/

    Job job = new Job(conf, "UseHCat");
    HCatInputFormat.setInput(job, dbName, inputTableName);
    job.setJarByClass(UseHCat.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    // An HCatalog record as input
    job.setInputFormatClass(HCatInputFormat.class);

    // Mapper emits a string as key and an integer as value
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    // Ignore the key for the reducer output; emitting an HCatalog record as value
    job.setOutputKeyClass(WritableComparable.class);
    job.setOutputValueClass(DefaultHCatRecord.class);
    job.setOutputFormatClass(HCatOutputFormat.class);

    HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null));
    HCatSchema s = HCatOutputFormat.getTableSchema(job);
    System.err.println("INFO: output schema explicitly set for writing:" + s);
    HCatOutputFormat.setSchema(job, s);
    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.cloudera.traffic.AveragerRunner.java

License:Apache License

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    Job job = new Job(conf);
    job.setJarByClass(AveragerRunner.class);
    job.setMapperClass(AveragerMapper.class);
    job.setReducerClass(AveragerReducer.class);
    job.setCombinerClass(AveragerReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(AverageWritable.class);
    job.setInputFormatClass(TextInputFormat.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    job.waitForCompletion(true);// w  w w  .j  a  v  a2  s  .  c om
}

From source file:com.cloudy.mapred.base.JobUtil.java

License:Apache License

public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf)
        throws IOException {

    Job job = new Job(new Configuration(conf));
    Configuration jobConf = job.getConfiguration();

    if (mapper.equals(Mapper.class)) {
        throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
    }//from  ww  w  .  ja  v a  2s. c  o  m
    job.setJarByClass(mapper);

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);
    job.setOutputKeyClass(mapperKey);
    job.setOutputValueClass(mapperValue);
    jobConf.setBoolean("mapred.compress.map.output", true);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.cloudy.mapred.base.JobUtil.java

License:Apache License

/**
 * Create a map and reduce Hadoop job.  Does not set the name on the job.
 * @param inputPath The input {@link org.apache.hadoop.fs.Path}
 * @param outputPath The output {@link org.apache.hadoop.fs.Path}
 * @param inputFormat The {@link org.apache.hadoop.mapreduce.InputFormat}
 * @param mapper The {@link org.apache.hadoop.mapreduce.Mapper} class to use
 * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class.  If the Mapper is a no-op,
 *                  this value may be null
 * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class.  If the Mapper is a no-op,
 *                    this value may be null
 * @param reducer The {@link org.apache.hadoop.mapreduce.Reducer} to use
 * @param reducerKey The reducer key class.
 * @param reducerValue The reducer value class.
 * @param outputFormat The {@link org.apache.hadoop.mapreduce.OutputFormat}.
 * @param conf The {@link org.apache.hadoop.conf.Configuration} to use.
 * @return The {@link org.apache.hadoop.mapreduce.Job}.
 * @throws IOException if there is a problem with the IO.
 *
 * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class)
 * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class,
 * org.apache.hadoop.conf.Configuration)
 *//*w  w  w .j  a v a  2s.  c o m*/
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException {

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }

    //    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.cmcc.hy.bigdata.weijifen.jobs.hubei.score.ScoreInfoDayJob.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub

    Configuration conf = ConfigurationUtil.loginAuthentication(args, SEPCIFIC_CONFIG_NAME, getConf());

    // ?()/*from www . ja  v a 2  s  .  c o  m*/
    String statDate = DateUtil.getFilterDate(args);
    if (statDate == null) {
        System.exit(1);
    }

    conf.set(STAT_DAY, statDate);

    // ?job
    Job job = Job.getInstance(conf, JOB_NAME + ":" + statDate);
    job.setJarByClass(ScoreInfoDayJob.class);
    String scoreInfoInput = conf.get(SCORE_INFO_INPUT_PATH);
    Path scoreInfoPath = new Path(scoreInfoInput);

    String acctPhoneMapInfoInput = conf.get(ACCT_PHONE_MAP_INPUT_PATH);
    Path accPhoneMapInfoPath = new Path(acctPhoneMapInfoInput);

    // ?
    if (FileSystemUtil.exists(scoreInfoPath)) {
        MultipleInputs.addInputPath(job, scoreInfoPath, SequenceFileInputFormat.class,
                ScoreInfoDayMapper.class);
        logger.info("SocreInfoPath is " + scoreInfoInput);
    } else {
        logger.error("Path [{}] not exist!", scoreInfoInput);
    }

    // ??
    //        if (FileSystemUtil.exists(accPhoneMapInfoPath)) {
    //            MultipleInputs.addInputPath(job, accPhoneMapInfoPath, TextInputFormat.class,
    //                    AcctPhoneMapper.class);
    //            logger.info("AccPhoneMapInfoPath is " + acctPhoneMapInfoInput);
    //        } else {
    //            logger.error("Path [{}] not exist!", acctPhoneMapInfoInput);
    //        }
    // job
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(ScoreInfo.class);
    job.setNumReduceTasks(conf.getInt(REDUCE_NUMBER, 40));
    job.setOutputFormatClass(NullOutputFormat.class);

    //        TableMapReduceUtil.initTableReducerJob(HBaseTableSchema.USER_INFO_TABLE2,
    //                ScoreInfoDayReducer.class, job);

    return (job.waitForCompletion(true) ? 0 : 1);
}