Example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the key class for the map output data.

Usage

From source file:co.nubetech.hiho.job.DBQueryInputJob.java

License:Apache License

public void runJobs(Configuration conf, int jobCounter) throws IOException {

    try {/*from  ww  w.  j ava 2s  .c om*/
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new IOException(e1);
    }

    Job job = new Job(conf);
    for (Entry<String, String> entry : conf) {
        logger.warn("key, value " + entry.getKey() + "=" + entry.getValue());
    }

    // logger.debug("Number of maps " +
    // conf.getInt("mapred.map.tasks", 1));
    // conf.setInt(JobContext.NUM_MAPS,
    // conf.getInt("mapreduce.job.maps", 1));
    // job.getConfiguration().setInt("mapred.map.tasks", 4);
    job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));
    logger.warn("Number of maps " + conf.getInt(MRJobConfig.NUM_MAPS, 1));

    job.setJobName("Import job");
    job.setJarByClass(DBQueryInputJob.class);

    String strategy = conf.get(HIHOConf.INPUT_OUTPUT_STRATEGY);
    OutputStrategyEnum os = OutputStrategyEnum.value(strategy);
    if (os == null) {
        throw new IllegalArgumentException("Wrong value of output strategy. Please correct");
    }
    if (os != OutputStrategyEnum.AVRO) {
        switch (os) {

        case DUMP: {
            // job.setMapperClass(DBImportMapper.class);
            break;
        }
        /*
         * case AVRO: { job.setMapperClass(DBInputAvroMapper.class); //
         * need avro in cp // job.setJarByClass(Schema.class); // need
         * jackson which is needed by avro - ugly! //
         * job.setJarByClass(ObjectMapper.class);
         * job.setMapOutputKeyClass(NullWritable.class);
         * job.setMapOutputValueClass(AvroValue.class);
         * job.setOutputKeyClass(NullWritable.class);
         * job.setOutputValueClass(AvroValue.class);
         * job.setOutputFormatClass(AvroOutputFormat.class);
         * 
         * AvroOutputFormat.setOutputPath(job, new
         * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); break; }
         */
        case DELIMITED: {
            job.setMapperClass(DBInputDelimMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);

            NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
        }
        case JSON: {
            // job.setMapperClass(DBImportJsonMapper.class);
            // job.setJarByClass(ObjectMapper.class);
            break;
        }
        default: {
            job.setMapperClass(DBInputDelimMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);

            NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
            break;
        }
        }

        String inputQuery = conf.get(DBConfiguration.INPUT_QUERY);
        String inputBoundingQuery = conf.get(DBConfiguration.INPUT_BOUNDING_QUERY);
        logger.debug("About to set the params");
        DBQueryInputFormat.setInput(job, inputQuery, inputBoundingQuery, params);
        logger.debug("Set the params");

        job.setNumReduceTasks(0);

        try {
            // job.setJarByClass(Class.forName(conf.get(
            // org.apache.hadoop.mapred.lib.db.DBConfiguration.DRIVER_CLASS_PROPERTY)));
            logger.debug("OUTPUT format class is " + job.getOutputFormatClass());

            /*
             * org.apache.hadoop.mapreduce.OutputFormat<?, ?> output =
             * ReflectionUtils.newInstance(job.getOutputFormatClass(),
             * job.getConfiguration()); output.checkOutputSpecs(job);
             */
            logger.debug("Class is " + ReflectionUtils
                    .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName());
            job.waitForCompletion(false);
            if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null) {
                generateHiveScript(conf, job, jobCounter);
                generatePigScript(conf, job);
            }

        }
        /*
         * catch (HIHOException h) { h.printStackTrace(); }
         */
        catch (Exception e) {
            e.printStackTrace();
        } catch (HIHOException e) {
            e.printStackTrace();
        }
    }
    // avro to be handled differently, thanks to all the incompatibilities
    // in the apis.
    else {
        String inputQuery = conf.get(DBConfiguration.INPUT_QUERY);
        String inputBoundingQuery = conf.get(DBConfiguration.INPUT_BOUNDING_QUERY);
        logger.debug("About to set the params");
        // co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(job,
        // inputQuery, inputBoundingQuery, params);
        logger.debug("Set the params");

        JobConf jobConf = new JobConf(conf);

        try {
            GenericDBWritable queryWritable = getDBWritable(jobConf);
            Schema pair = DBMapper.getPairSchema(queryWritable.getColumns());

            AvroJob.setMapOutputSchema(jobConf, pair);
            GenericRecordAvroOutputFormat.setOutputPath(jobConf,
                    new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));

            co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(jobConf, inputQuery,
                    inputBoundingQuery, params);
            jobConf.setInputFormat(co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.class);
            jobConf.setMapperClass(DBInputAvroMapper.class);
            jobConf.setMapOutputKeyClass(NullWritable.class);
            jobConf.setMapOutputValueClass(AvroValue.class);
            jobConf.setOutputKeyClass(NullWritable.class);
            jobConf.setOutputValueClass(Text.class);
            jobConf.setOutputFormat(GenericRecordAvroOutputFormat.class);
            jobConf.setJarByClass(DBQueryInputJob.class);
            jobConf.setStrings("io.serializations",
                    "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization,org.apache.avro.mapred.AvroSerialization");
            jobConf.setNumReduceTasks(0);
            /*
             * jobConf.setOutputFormat(org.apache.hadoop.mapred.
             * SequenceFileOutputFormat.class);
             * org.apache.hadoop.mapred.SequenceFileOutputFormat
             * .setOutputPath(jobConf, new
             * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
             */
            JobClient.runJob(jobConf);
        } catch (Throwable e) {
            e.printStackTrace();
        }

    }

}

From source file:co.nubetech.hiho.job.ExportDelimitedToDB.java

License:Apache License

public int run(String[] args) throws IOException {
    Configuration conf = getConf();
    Job job = new Job(conf);
    job.setJobName("MySQLBulkLoading");
    job.setMapperClass(DelimitedLoadMapper.class);
    job.setJarByClass(DelimitedLoadMapper.class);
    job.setNumReduceTasks(0);/*from   w w  w .j  a v  a 2 s.  c om*/

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(args[0]));

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputFormatClass(DBOutputFormat.class);
    int ret = 0;

    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;

}

From source file:co.nubetech.hiho.job.ExportToFTPServer.java

License:Apache License

@Override
public int run(String[] args) throws IOException {
    Configuration conf = getConf();
    populateConfiguration(args, conf);/*from   ww w . ja va 2 s. c om*/
    try {
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new IOException(e1);
    }

    for (Entry<String, String> entry : conf) {
        logger.debug("key, value " + entry.getKey() + "=" + entry.getValue());
    }
    Job job = new Job(conf);
    job.setMapperClass(TokenCounterMapper.class);
    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(inputPath));
    job.setReducerClass(IntSumReducer.class);
    job.setOutputFormatClass(FTPTextOutputFormat.class);
    FTPTextOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setJarByClass(ExportToFTPServer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(2);

    int ret = 0;

    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;

}

From source file:co.nubetech.hiho.job.ExportToMySQLDB.java

License:Apache License

@Override
public int run(String[] args) throws IOException {

    Configuration conf = getConf();
    populateConfiguration(args, conf);//w w w.  j a  va2s  . co  m
    try {
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new IOException(e1);
    }
    Job job = new Job(conf);
    job.setJobName("MySQLBulkLoading");
    job.setMapperClass(MySQLLoadDataMapper.class);
    job.setJarByClass(MySQLLoadDataMapper.class);
    for (Entry<String, String> entry : conf) {
        logger.debug("key, value " + entry.getKey() + "=" + entry.getValue());
    }
    // verify required properties are loaded
    logger.debug(conf.get(DBConfiguration.URL_PROPERTY));
    logger.debug(conf.get(DBConfiguration.USERNAME_PROPERTY));
    logger.debug(conf.get(DBConfiguration.PASSWORD_PROPERTY));

    job.setNumReduceTasks(0);
    job.setInputFormatClass(FileStreamInputFormat.class);
    FileStreamInputFormat.addInputPath(job, new Path(inputPath));
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    // job.setJarByClass(com.mysql.jdbc.Driver.class);
    job.setOutputFormatClass(NullOutputFormat.class);

    int ret = 0;
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;
}

From source file:co.nubetech.hiho.job.ExportToOracleDb.java

License:Apache License

@Override
public int run(String[] args) throws IOException {
    Configuration conf = getConf();
    for (Entry<String, String> entry : conf) {
        logger.debug("key, value " + entry.getKey() + "=" + entry.getValue());
    }//from   ww  w. j a  v  a  2  s.co m

    for (int i = 0; i < args.length; i++) {
        logger.debug("Remaining arguments are" + " " + args[i]);
    }
    populateConfiguration(args, conf);

    try {
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new IOException(e1);
    }

    Job job = new Job(conf);
    job.setJobName("OracleLoading");
    job.setMapperClass(OracleLoadMapper.class);
    job.setJarByClass(ExportToOracleDb.class);
    job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));

    try {
        // we first create the external table definition
        String query = conf.get(HIHOConf.EXTERNAL_TABLE_DML);
        // create table if user has specified
        if (query != null) {
            this.runQuery(query, conf);
        }
    } catch (HIHOException e1) {

        e1.printStackTrace();
    }

    // verify required properties are loaded

    job.setNumReduceTasks(0);
    job.setInputFormatClass(FileStreamInputFormat.class);
    FileStreamInputFormat.addInputPath(job, new Path(inputPath));
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    // job.setJarByClass(com.mysql.jdbc.Driver.class);
    job.setOutputFormatClass(NullOutputFormat.class);

    int ret = 0;
    try {

        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    // run alter table query and add locations
    try {
        this.runQuery(getAlterTableDML(new Path(inputPath), conf), conf);
    } catch (HIHOException e1) {

        e1.printStackTrace();
    }
    return ret;
}

From source file:co.nubetech.hiho.job.sf.ExportSalesForceJob.java

License:Apache License

@Override
public int run(String[] arg0) throws Exception {
    Configuration conf = getConf();
    populateConfiguration(arg0, conf);/*from www  .  ja  v  a 2  s  .  c  o  m*/
    try {
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new Exception(e1);
    }

    Job job = new Job(conf);
    job.setJobName("SaleForceLoading");
    job.setMapperClass(SalesForceLoadMapper.class);
    job.setJarByClass(SalesForceLoadMapper.class);
    job.setNumReduceTasks(0);

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(inputPath));
    // NLineInputFormat.setNumLinesPerSplit(job, 10);

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputFormatClass(NullOutputFormat.class);

    int ret = 0;

    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;

}

From source file:co.nubetech.hiho.merge.MergeJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    populateConfiguration(args);/*from w w w .  j av  a 2 s  .com*/
    try {
        checkMandatoryConfs();
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new Exception(e1);
    }

    Class inputFormatClass = Class.forName(inputFormat);
    Class outputFormatClass = Class.forName(outputFormat);
    Class inputKeyClass = Class.forName(inputKeyClassName);
    Class inputValueClass = Class.forName(inputValueClassName);

    Configuration conf = getConf();
    conf.set(HIHOConf.MERGE_OLD_PATH, oldPath);
    conf.set(HIHOConf.MERGE_NEW_PATH, newPath);

    Job job = new Job(conf);
    job.setJobName("Merge job");
    job.setJarByClass(MergeJob.class);

    if (mergeBy.equals("key")) {
        job.setMapperClass(MergeKeyMapper.class);
        job.setReducerClass(MergeKeyReducer.class);

    } else if (mergeBy.equals("value")) {
        job.setMapperClass(MergeValueMapper.class);
        job.setReducerClass(MergeValueReducer.class);
    }

    job.setInputFormatClass(inputFormatClass);
    DelimitedTextInputFormat.setProperties(job, delimiter, column);
    job.setMapOutputKeyClass(HihoTuple.class);
    job.setMapOutputValueClass(HihoValue.class);

    job.setOutputKeyClass(inputKeyClass);
    job.setOutputValueClass(inputValueClass);
    FileInputFormat.setInputPaths(job, oldPath + "," + newPath);
    job.setOutputFormatClass(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    try {
        logger.debug("Output format class is " + job.getOutputFormatClass());
        logger.debug("Class is " + ReflectionUtils
                .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName());
        job.waitForCompletion(false);
        if (job.isComplete()) {
            Counters counters = job.getCounters();
            totalRecordsOld = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_OLD).getValue();
            totalRecordsNew = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_NEW).getValue();
            badRecords = counters.findCounter(MergeRecordCounter.BAD_RECORD).getValue();
            output = counters.findCounter(MergeRecordCounter.OUTPUT).getValue();
            logger.info("Total old records read are: " + totalRecordsOld);
            logger.info("Total new records read are: " + totalRecordsNew);
            logger.info("Bad Records are: " + badRecords);
            logger.info("Output records are: " + output);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    return 0;
}

From source file:co.nubetech.hiho.similarity.ngram.NGramJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    populateConfiguration(args);/*from   w  ww.  ja v a2  s  .co m*/
    try {
        checkMandatoryConfs();
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new Exception(e1);
    }
    Job job = new Job(conf);
    job.setJobName("NGram job");
    job.setJarByClass(NGramJob.class);

    Class inputFormatClass = Class.forName("org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat");
    Class outputFormatClass = Class.forName("org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat");
    // org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
    // org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
    Class inputKeyClass = Class.forName("org.apache.hadoop.io.Text");
    Class inputValueClass = Class.forName("org.apache.hadoop.io.Text");
    Class outputKeyClass = Class.forName("co.nubetech.hiho.similarity.ngram.ValuePair");
    Class outputValueClass = Class.forName("org.apache.hadoop.io.IntWritable");

    job.setMapperClass(NGramMapper.class);
    job.setReducerClass(NGramReducer.class);

    job.setInputFormatClass(inputFormatClass);
    job.setMapOutputKeyClass(inputKeyClass);
    job.setMapOutputValueClass(inputValueClass);

    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);
    job.setOutputFormatClass(outputFormatClass);

    FileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, new Path("outputOfNGramJob"));

    int ret = 0;
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;
}

From source file:co.nubetech.hiho.similarity.ngram.ScoreJob.java

License:Apache License

@Override
public int run(String[] arg0) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf);
    job.setJobName("Score job");
    job.setJarByClass(ScoreJob.class);

    Class inputFormatClass = Class.forName("org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat");
    Class outputFormatClass = Class.forName("org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat");
    // org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
    // org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
    Class inputKeyClass = Class.forName("co.nubetech.hiho.similarity.ngram.ValuePair");
    Class inputValueClass = Class.forName("org.apache.hadoop.io.IntWritable");
    Class outputKeyClass = Class.forName("co.nubetech.hiho.similarity.ngram.ValuePair");
    Class outputValueClass = Class.forName("org.apache.hadoop.io.LongWritable");

    job.setMapperClass(ScoreMapper.class);
    job.setReducerClass(ScoreReducer.class);

    job.setInputFormatClass(inputFormatClass);
    job.setMapOutputKeyClass(inputKeyClass);
    job.setMapOutputValueClass(inputValueClass);

    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);
    job.setOutputFormatClass(outputFormatClass);

    FileInputFormat.setInputPaths(job, "outputOfNGramJob");
    FileOutputFormat.setOutputPath(job, new Path("outputOfScoreJob"));

    int ret = 0;//  www.  ja  va 2s . com
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;
}

From source file:com.aerospike.hadoop.examples.aggregateintinput.AggregateIntInput.java

License:Apache License

public int run(final String[] args) throws Exception {
    final Configuration conf = getConf();

    @SuppressWarnings("deprecation")
    final Job job = new Job(conf, "AerospikeAggregateIntInput");

    log.info("run starting on bin " + binName);

    job.setJarByClass(AggregateIntInput.class);
    job.setInputFormatClass(AerospikeInputFormat.class);
    job.setMapperClass(Map.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    // job.setCombinerClass(Reduce.class); // no combiner
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job, new Path(args[0]));

    int status = job.waitForCompletion(true) ? 0 : 1;
    log.info("run finished, status=" + status);
    return status;
}