Example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass.

Prototype

public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException 

Source Link

Document

Set the OutputFormat for the job.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

/**
 * Sets the configurations used for outputs.
 *///from  w ww. j ava2s .c  om
private void setOutputsIfNeeded(Job job) {
    Map<String, OutputFormatProvider> outputFormatProviders = context.getOutputFormatProviders();
    LOG.debug("Using as output for MapReduce Job: {}", outputFormatProviders.keySet());

    if (outputFormatProviders.isEmpty()) {
        // user is not going through our APIs to add output; leave the job's output format to user
        return;
    } else if (outputFormatProviders.size() == 1) {
        // If only one output is configured through the context, then set it as the root OutputFormat
        Map.Entry<String, OutputFormatProvider> next = outputFormatProviders.entrySet().iterator().next();
        OutputFormatProvider outputFormatProvider = next.getValue();
        ConfigurationUtil.setAll(outputFormatProvider.getOutputFormatConfiguration(), job.getConfiguration());
        job.getConfiguration().set(Job.OUTPUT_FORMAT_CLASS_ATTR,
                outputFormatProvider.getOutputFormatClassName());
        return;
    }
    // multiple output formats configured via the context. We should use a RecordWriter that doesn't support writing
    // as the root output format in this case to disallow writing directly on the context
    MultipleOutputsMainOutputWrapper.setRootOutputFormat(job, UnsupportedOutputFormat.class.getName(),
            new HashMap<String, String>());
    job.setOutputFormatClass(MultipleOutputsMainOutputWrapper.class);

    for (Map.Entry<String, OutputFormatProvider> entry : outputFormatProviders.entrySet()) {
        String outputName = entry.getKey();
        OutputFormatProvider outputFormatProvider = entry.getValue();

        String outputFormatClassName = outputFormatProvider.getOutputFormatClassName();
        if (outputFormatClassName == null) {
            throw new IllegalArgumentException(
                    "Output '" + outputName + "' provided null as the output format");
        }

        Map<String, String> outputConfig = outputFormatProvider.getOutputFormatConfiguration();
        MultipleOutputs.addNamedOutput(job, outputName, outputFormatClassName, job.getOutputKeyClass(),
                job.getOutputValueClass(), outputConfig);

    }
}

From source file:co.cask.cdap.template.etl.batch.sink.DBSink.java

License:Apache License

@Override
public void prepareRun(BatchSinkContext context) {
    LOG.debug("tableName = {}; pluginType = {}; pluginName = {}; connectionString = {}; columns = {}",
            dbSinkConfig.tableName, dbSinkConfig.jdbcPluginType, dbSinkConfig.jdbcPluginName,
            dbSinkConfig.connectionString, dbSinkConfig.columns);

    Job job = context.getHadoopJob();
    Configuration hConf = job.getConfiguration();

    // Load the plugin class to make sure it is available.
    Class<? extends Driver> driverClass = context.loadPluginClass(getJDBCPluginId());
    if (dbSinkConfig.user == null && dbSinkConfig.password == null) {
        DBConfiguration.configureDB(hConf, driverClass.getName(), dbSinkConfig.connectionString);
    } else {/*w  ww  . j  av a  2  s.c om*/
        DBConfiguration.configureDB(hConf, driverClass.getName(), dbSinkConfig.connectionString,
                dbSinkConfig.user, dbSinkConfig.password);
    }
    List<String> fields = Lists.newArrayList(Splitter.on(",").omitEmptyStrings().split(dbSinkConfig.columns));
    try {
        ETLDBOutputFormat.setOutput(job, dbSinkConfig.tableName, fields.toArray(new String[fields.size()]));
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
    job.setOutputFormatClass(ETLDBOutputFormat.class);
}

From source file:co.nubetech.apache.hadoop.DBOutputFormat.java

License:Apache License

private static DBConfiguration setOutput(Job job, String tableName) throws IOException {
    job.setOutputFormatClass(DBOutputFormat.class);
    // job.setReduceSpeculativeExecution(false);

    DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());

    dbConf.setOutputTableName(tableName);
    return dbConf;
}

From source file:co.nubetech.hiho.dedup.DedupJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    populateConfiguration(args);/*from w  w  w.j  a  v  a2  s .  c  o  m*/
    try {
        checkMandatoryConfs();
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new Exception(e1);
    }
    Job job = new Job(conf);
    job.setJobName("Dedup job");
    job.setJarByClass(DedupJob.class);

    Class inputFormatClass = Class.forName(inputFormat);
    Class outputFormatClass = Class.forName(outputFormat);
    Class inputKeyClass = Class.forName(inputKeyClassName);
    Class inputValueClass = Class.forName(inputValueClassName);

    if (dedupBy.equals("key")) {
        job.setMapperClass(DedupKeyMapper.class);
        job.setReducerClass(DedupKeyReducer.class);
        job.setMapOutputValueClass(inputValueClass);
    } else if (dedupBy.equals("value")) {
        job.setMapperClass(DedupValueMapper.class);
        job.setReducerClass(DedupValueReducer.class);
        job.setMapOutputValueClass(inputKeyClass);
    }

    job.setInputFormatClass(inputFormatClass);
    if (inputFormat.equals("co.nubetech.hiho.dedup.DelimitedTextInputFormat")) {
        DelimitedTextInputFormat.setProperties(job, delimiter, column);
    }

    job.setMapOutputKeyClass(HihoTuple.class);

    job.setOutputKeyClass(inputKeyClass);
    job.setOutputValueClass(inputValueClass);
    job.setPartitionerClass(HihoHashPartitioner.class);
    FileInputFormat.setInputPaths(job, inputPath);
    job.setOutputFormatClass(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    try {
        logger.debug("Output format class is " + job.getOutputFormatClass());
        logger.debug("Class is " + ReflectionUtils
                .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName());
        job.waitForCompletion(false);
        if (job.isComplete()) {
            Counters counters = job.getCounters();
            totalRecordsRead = counters.findCounter(DedupRecordCounter.TOTAL_RECORDS_READ).getValue();
            badRecords = counters.findCounter(DedupRecordCounter.BAD_RECORD).getValue();
            output = counters.findCounter(DedupRecordCounter.OUTPUT).getValue();
            duplicateRecords = totalRecordsRead - output;
            logger.info("Total records read are: " + totalRecordsRead);
            logger.info("Bad Records are: " + badRecords);
            logger.info("Output records are: " + output);
            logger.info("Duplicate records are: " + duplicateRecords);
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
    return 0;
}

From source file:co.nubetech.hiho.job.DBQueryInputJob.java

License:Apache License

public void runJobs(Configuration conf, int jobCounter) throws IOException {

    try {/*from  w  w w  . j a  v a2s.com*/
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new IOException(e1);
    }

    Job job = new Job(conf);
    for (Entry<String, String> entry : conf) {
        logger.warn("key, value " + entry.getKey() + "=" + entry.getValue());
    }

    // logger.debug("Number of maps " +
    // conf.getInt("mapred.map.tasks", 1));
    // conf.setInt(JobContext.NUM_MAPS,
    // conf.getInt("mapreduce.job.maps", 1));
    // job.getConfiguration().setInt("mapred.map.tasks", 4);
    job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));
    logger.warn("Number of maps " + conf.getInt(MRJobConfig.NUM_MAPS, 1));

    job.setJobName("Import job");
    job.setJarByClass(DBQueryInputJob.class);

    String strategy = conf.get(HIHOConf.INPUT_OUTPUT_STRATEGY);
    OutputStrategyEnum os = OutputStrategyEnum.value(strategy);
    if (os == null) {
        throw new IllegalArgumentException("Wrong value of output strategy. Please correct");
    }
    if (os != OutputStrategyEnum.AVRO) {
        switch (os) {

        case DUMP: {
            // job.setMapperClass(DBImportMapper.class);
            break;
        }
        /*
         * case AVRO: { job.setMapperClass(DBInputAvroMapper.class); //
         * need avro in cp // job.setJarByClass(Schema.class); // need
         * jackson which is needed by avro - ugly! //
         * job.setJarByClass(ObjectMapper.class);
         * job.setMapOutputKeyClass(NullWritable.class);
         * job.setMapOutputValueClass(AvroValue.class);
         * job.setOutputKeyClass(NullWritable.class);
         * job.setOutputValueClass(AvroValue.class);
         * job.setOutputFormatClass(AvroOutputFormat.class);
         * 
         * AvroOutputFormat.setOutputPath(job, new
         * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); break; }
         */
        case DELIMITED: {
            job.setMapperClass(DBInputDelimMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);

            NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
        }
        case JSON: {
            // job.setMapperClass(DBImportJsonMapper.class);
            // job.setJarByClass(ObjectMapper.class);
            break;
        }
        default: {
            job.setMapperClass(DBInputDelimMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);

            NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
            break;
        }
        }

        String inputQuery = conf.get(DBConfiguration.INPUT_QUERY);
        String inputBoundingQuery = conf.get(DBConfiguration.INPUT_BOUNDING_QUERY);
        logger.debug("About to set the params");
        DBQueryInputFormat.setInput(job, inputQuery, inputBoundingQuery, params);
        logger.debug("Set the params");

        job.setNumReduceTasks(0);

        try {
            // job.setJarByClass(Class.forName(conf.get(
            // org.apache.hadoop.mapred.lib.db.DBConfiguration.DRIVER_CLASS_PROPERTY)));
            logger.debug("OUTPUT format class is " + job.getOutputFormatClass());

            /*
             * org.apache.hadoop.mapreduce.OutputFormat<?, ?> output =
             * ReflectionUtils.newInstance(job.getOutputFormatClass(),
             * job.getConfiguration()); output.checkOutputSpecs(job);
             */
            logger.debug("Class is " + ReflectionUtils
                    .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName());
            job.waitForCompletion(false);
            if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null) {
                generateHiveScript(conf, job, jobCounter);
                generatePigScript(conf, job);
            }

        }
        /*
         * catch (HIHOException h) { h.printStackTrace(); }
         */
        catch (Exception e) {
            e.printStackTrace();
        } catch (HIHOException e) {
            e.printStackTrace();
        }
    }
    // avro to be handled differently, thanks to all the incompatibilities
    // in the apis.
    else {
        String inputQuery = conf.get(DBConfiguration.INPUT_QUERY);
        String inputBoundingQuery = conf.get(DBConfiguration.INPUT_BOUNDING_QUERY);
        logger.debug("About to set the params");
        // co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(job,
        // inputQuery, inputBoundingQuery, params);
        logger.debug("Set the params");

        JobConf jobConf = new JobConf(conf);

        try {
            GenericDBWritable queryWritable = getDBWritable(jobConf);
            Schema pair = DBMapper.getPairSchema(queryWritable.getColumns());

            AvroJob.setMapOutputSchema(jobConf, pair);
            GenericRecordAvroOutputFormat.setOutputPath(jobConf,
                    new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));

            co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(jobConf, inputQuery,
                    inputBoundingQuery, params);
            jobConf.setInputFormat(co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.class);
            jobConf.setMapperClass(DBInputAvroMapper.class);
            jobConf.setMapOutputKeyClass(NullWritable.class);
            jobConf.setMapOutputValueClass(AvroValue.class);
            jobConf.setOutputKeyClass(NullWritable.class);
            jobConf.setOutputValueClass(Text.class);
            jobConf.setOutputFormat(GenericRecordAvroOutputFormat.class);
            jobConf.setJarByClass(DBQueryInputJob.class);
            jobConf.setStrings("io.serializations",
                    "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization,org.apache.avro.mapred.AvroSerialization");
            jobConf.setNumReduceTasks(0);
            /*
             * jobConf.setOutputFormat(org.apache.hadoop.mapred.
             * SequenceFileOutputFormat.class);
             * org.apache.hadoop.mapred.SequenceFileOutputFormat
             * .setOutputPath(jobConf, new
             * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
             */
            JobClient.runJob(jobConf);
        } catch (Throwable e) {
            e.printStackTrace();
        }

    }

}

From source file:co.nubetech.hiho.job.ExportDelimitedToDB.java

License:Apache License

public int run(String[] args) throws IOException {
    Configuration conf = getConf();
    Job job = new Job(conf);
    job.setJobName("MySQLBulkLoading");
    job.setMapperClass(DelimitedLoadMapper.class);
    job.setJarByClass(DelimitedLoadMapper.class);
    job.setNumReduceTasks(0);/* w  ww  .j a  v  a  2  s.c  o  m*/

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(args[0]));

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputFormatClass(DBOutputFormat.class);
    int ret = 0;

    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;

}

From source file:co.nubetech.hiho.job.ExportToFTPServer.java

License:Apache License

@Override
public int run(String[] args) throws IOException {
    Configuration conf = getConf();
    populateConfiguration(args, conf);/*from w w w .  j  a v  a2s  . co m*/
    try {
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new IOException(e1);
    }

    for (Entry<String, String> entry : conf) {
        logger.debug("key, value " + entry.getKey() + "=" + entry.getValue());
    }
    Job job = new Job(conf);
    job.setMapperClass(TokenCounterMapper.class);
    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(inputPath));
    job.setReducerClass(IntSumReducer.class);
    job.setOutputFormatClass(FTPTextOutputFormat.class);
    FTPTextOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setJarByClass(ExportToFTPServer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(2);

    int ret = 0;

    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;

}

From source file:co.nubetech.hiho.job.ExportToMySQLDB.java

License:Apache License

@Override
public int run(String[] args) throws IOException {

    Configuration conf = getConf();
    populateConfiguration(args, conf);/*www  .  j av  a 2 s.c  o  m*/
    try {
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new IOException(e1);
    }
    Job job = new Job(conf);
    job.setJobName("MySQLBulkLoading");
    job.setMapperClass(MySQLLoadDataMapper.class);
    job.setJarByClass(MySQLLoadDataMapper.class);
    for (Entry<String, String> entry : conf) {
        logger.debug("key, value " + entry.getKey() + "=" + entry.getValue());
    }
    // verify required properties are loaded
    logger.debug(conf.get(DBConfiguration.URL_PROPERTY));
    logger.debug(conf.get(DBConfiguration.USERNAME_PROPERTY));
    logger.debug(conf.get(DBConfiguration.PASSWORD_PROPERTY));

    job.setNumReduceTasks(0);
    job.setInputFormatClass(FileStreamInputFormat.class);
    FileStreamInputFormat.addInputPath(job, new Path(inputPath));
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    // job.setJarByClass(com.mysql.jdbc.Driver.class);
    job.setOutputFormatClass(NullOutputFormat.class);

    int ret = 0;
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;
}

From source file:co.nubetech.hiho.job.ExportToOracleDb.java

License:Apache License

@Override
public int run(String[] args) throws IOException {
    Configuration conf = getConf();
    for (Entry<String, String> entry : conf) {
        logger.debug("key, value " + entry.getKey() + "=" + entry.getValue());
    }/*  w  w  w.j  a va2 s.  c o m*/

    for (int i = 0; i < args.length; i++) {
        logger.debug("Remaining arguments are" + " " + args[i]);
    }
    populateConfiguration(args, conf);

    try {
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new IOException(e1);
    }

    Job job = new Job(conf);
    job.setJobName("OracleLoading");
    job.setMapperClass(OracleLoadMapper.class);
    job.setJarByClass(ExportToOracleDb.class);
    job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));

    try {
        // we first create the external table definition
        String query = conf.get(HIHOConf.EXTERNAL_TABLE_DML);
        // create table if user has specified
        if (query != null) {
            this.runQuery(query, conf);
        }
    } catch (HIHOException e1) {

        e1.printStackTrace();
    }

    // verify required properties are loaded

    job.setNumReduceTasks(0);
    job.setInputFormatClass(FileStreamInputFormat.class);
    FileStreamInputFormat.addInputPath(job, new Path(inputPath));
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    // job.setJarByClass(com.mysql.jdbc.Driver.class);
    job.setOutputFormatClass(NullOutputFormat.class);

    int ret = 0;
    try {

        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    // run alter table query and add locations
    try {
        this.runQuery(getAlterTableDML(new Path(inputPath), conf), conf);
    } catch (HIHOException e1) {

        e1.printStackTrace();
    }
    return ret;
}

From source file:co.nubetech.hiho.job.sf.ExportSalesForceJob.java

License:Apache License

@Override
public int run(String[] arg0) throws Exception {
    Configuration conf = getConf();
    populateConfiguration(arg0, conf);/* w  ww.  j a v  a2  s. c o m*/
    try {
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new Exception(e1);
    }

    Job job = new Job(conf);
    job.setJobName("SaleForceLoading");
    job.setMapperClass(SalesForceLoadMapper.class);
    job.setJarByClass(SalesForceLoadMapper.class);
    job.setNumReduceTasks(0);

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(inputPath));
    // NLineInputFormat.setNumLinesPerSplit(job, 10);

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputFormatClass(NullOutputFormat.class);

    int ret = 0;

    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;

}