Example usage for org.apache.hadoop.mapreduce Job Job

List of usage examples for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

Job(JobConf conf) throws IOException 

Source Link

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat.java

License:Apache License

private TaskAttemptContext getTaskAttemptContext(TaskAttemptContext context, String newOutputName)
        throws IOException {
    Job job = new Job(context.getConfiguration());
    FileOutputFormat.setOutputName(job, newOutputName);
    // CDAP-4806 We must set this parameter in addition to calling FileOutputFormat#setOutputName, because
    // AvroKeyOutputFormat/AvroKeyValueOutputFormat use a different parameter for the output name than FileOutputFormat.
    if (isAvroOutputFormat(getFileOutputFormat(context))) {
        job.getConfiguration().set("avro.mo.config.namedOutput", newOutputName);
    }/*from w ww . j a va  2 s . c  o  m*/

    Path jobOutputPath = createJobSpecificPath(FileOutputFormat.getOutputPath(job), context);
    FileOutputFormat.setOutputPath(job, jobOutputPath);

    return new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID());
}

From source file:co.nubetech.hiho.dedup.DedupJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    populateConfiguration(args);/*from   w w  w  .java  2 s . c  o m*/
    try {
        checkMandatoryConfs();
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new Exception(e1);
    }
    Job job = new Job(conf);
    job.setJobName("Dedup job");
    job.setJarByClass(DedupJob.class);

    Class inputFormatClass = Class.forName(inputFormat);
    Class outputFormatClass = Class.forName(outputFormat);
    Class inputKeyClass = Class.forName(inputKeyClassName);
    Class inputValueClass = Class.forName(inputValueClassName);

    if (dedupBy.equals("key")) {
        job.setMapperClass(DedupKeyMapper.class);
        job.setReducerClass(DedupKeyReducer.class);
        job.setMapOutputValueClass(inputValueClass);
    } else if (dedupBy.equals("value")) {
        job.setMapperClass(DedupValueMapper.class);
        job.setReducerClass(DedupValueReducer.class);
        job.setMapOutputValueClass(inputKeyClass);
    }

    job.setInputFormatClass(inputFormatClass);
    if (inputFormat.equals("co.nubetech.hiho.dedup.DelimitedTextInputFormat")) {
        DelimitedTextInputFormat.setProperties(job, delimiter, column);
    }

    job.setMapOutputKeyClass(HihoTuple.class);

    job.setOutputKeyClass(inputKeyClass);
    job.setOutputValueClass(inputValueClass);
    job.setPartitionerClass(HihoHashPartitioner.class);
    FileInputFormat.setInputPaths(job, inputPath);
    job.setOutputFormatClass(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    try {
        logger.debug("Output format class is " + job.getOutputFormatClass());
        logger.debug("Class is " + ReflectionUtils
                .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName());
        job.waitForCompletion(false);
        if (job.isComplete()) {
            Counters counters = job.getCounters();
            totalRecordsRead = counters.findCounter(DedupRecordCounter.TOTAL_RECORDS_READ).getValue();
            badRecords = counters.findCounter(DedupRecordCounter.BAD_RECORD).getValue();
            output = counters.findCounter(DedupRecordCounter.OUTPUT).getValue();
            duplicateRecords = totalRecordsRead - output;
            logger.info("Total records read are: " + totalRecordsRead);
            logger.info("Bad Records are: " + badRecords);
            logger.info("Output records are: " + output);
            logger.info("Duplicate records are: " + duplicateRecords);
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
    return 0;
}

From source file:co.nubetech.hiho.dedup.TestDelimitedTextInputFormat.java

License:Apache License

@Test
public void testSetProperties() throws IOException {
    Configuration conf = new Configuration();
    Job job = new Job(conf);
    DelimitedTextInputFormat.setProperties(job, ",", 1);
    assertEquals(",", job.getConfiguration().get(DelimitedTextInputFormat.DELIMITER_CONF));
    assertEquals("1", job.getConfiguration().get(DelimitedTextInputFormat.COLUMN_CONF));
}

From source file:co.nubetech.hiho.job.DBQueryInputJob.java

License:Apache License

public void runJobs(Configuration conf, int jobCounter) throws IOException {

    try {//  www .j  av  a2s  . com
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new IOException(e1);
    }

    Job job = new Job(conf);
    for (Entry<String, String> entry : conf) {
        logger.warn("key, value " + entry.getKey() + "=" + entry.getValue());
    }

    // logger.debug("Number of maps " +
    // conf.getInt("mapred.map.tasks", 1));
    // conf.setInt(JobContext.NUM_MAPS,
    // conf.getInt("mapreduce.job.maps", 1));
    // job.getConfiguration().setInt("mapred.map.tasks", 4);
    job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));
    logger.warn("Number of maps " + conf.getInt(MRJobConfig.NUM_MAPS, 1));

    job.setJobName("Import job");
    job.setJarByClass(DBQueryInputJob.class);

    String strategy = conf.get(HIHOConf.INPUT_OUTPUT_STRATEGY);
    OutputStrategyEnum os = OutputStrategyEnum.value(strategy);
    if (os == null) {
        throw new IllegalArgumentException("Wrong value of output strategy. Please correct");
    }
    if (os != OutputStrategyEnum.AVRO) {
        switch (os) {

        case DUMP: {
            // job.setMapperClass(DBImportMapper.class);
            break;
        }
        /*
         * case AVRO: { job.setMapperClass(DBInputAvroMapper.class); //
         * need avro in cp // job.setJarByClass(Schema.class); // need
         * jackson which is needed by avro - ugly! //
         * job.setJarByClass(ObjectMapper.class);
         * job.setMapOutputKeyClass(NullWritable.class);
         * job.setMapOutputValueClass(AvroValue.class);
         * job.setOutputKeyClass(NullWritable.class);
         * job.setOutputValueClass(AvroValue.class);
         * job.setOutputFormatClass(AvroOutputFormat.class);
         * 
         * AvroOutputFormat.setOutputPath(job, new
         * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); break; }
         */
        case DELIMITED: {
            job.setMapperClass(DBInputDelimMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);

            NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
        }
        case JSON: {
            // job.setMapperClass(DBImportJsonMapper.class);
            // job.setJarByClass(ObjectMapper.class);
            break;
        }
        default: {
            job.setMapperClass(DBInputDelimMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);

            NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
            break;
        }
        }

        String inputQuery = conf.get(DBConfiguration.INPUT_QUERY);
        String inputBoundingQuery = conf.get(DBConfiguration.INPUT_BOUNDING_QUERY);
        logger.debug("About to set the params");
        DBQueryInputFormat.setInput(job, inputQuery, inputBoundingQuery, params);
        logger.debug("Set the params");

        job.setNumReduceTasks(0);

        try {
            // job.setJarByClass(Class.forName(conf.get(
            // org.apache.hadoop.mapred.lib.db.DBConfiguration.DRIVER_CLASS_PROPERTY)));
            logger.debug("OUTPUT format class is " + job.getOutputFormatClass());

            /*
             * org.apache.hadoop.mapreduce.OutputFormat<?, ?> output =
             * ReflectionUtils.newInstance(job.getOutputFormatClass(),
             * job.getConfiguration()); output.checkOutputSpecs(job);
             */
            logger.debug("Class is " + ReflectionUtils
                    .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName());
            job.waitForCompletion(false);
            if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null) {
                generateHiveScript(conf, job, jobCounter);
                generatePigScript(conf, job);
            }

        }
        /*
         * catch (HIHOException h) { h.printStackTrace(); }
         */
        catch (Exception e) {
            e.printStackTrace();
        } catch (HIHOException e) {
            e.printStackTrace();
        }
    }
    // avro to be handled differently, thanks to all the incompatibilities
    // in the apis.
    else {
        String inputQuery = conf.get(DBConfiguration.INPUT_QUERY);
        String inputBoundingQuery = conf.get(DBConfiguration.INPUT_BOUNDING_QUERY);
        logger.debug("About to set the params");
        // co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(job,
        // inputQuery, inputBoundingQuery, params);
        logger.debug("Set the params");

        JobConf jobConf = new JobConf(conf);

        try {
            GenericDBWritable queryWritable = getDBWritable(jobConf);
            Schema pair = DBMapper.getPairSchema(queryWritable.getColumns());

            AvroJob.setMapOutputSchema(jobConf, pair);
            GenericRecordAvroOutputFormat.setOutputPath(jobConf,
                    new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));

            co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(jobConf, inputQuery,
                    inputBoundingQuery, params);
            jobConf.setInputFormat(co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.class);
            jobConf.setMapperClass(DBInputAvroMapper.class);
            jobConf.setMapOutputKeyClass(NullWritable.class);
            jobConf.setMapOutputValueClass(AvroValue.class);
            jobConf.setOutputKeyClass(NullWritable.class);
            jobConf.setOutputValueClass(Text.class);
            jobConf.setOutputFormat(GenericRecordAvroOutputFormat.class);
            jobConf.setJarByClass(DBQueryInputJob.class);
            jobConf.setStrings("io.serializations",
                    "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization,org.apache.avro.mapred.AvroSerialization");
            jobConf.setNumReduceTasks(0);
            /*
             * jobConf.setOutputFormat(org.apache.hadoop.mapred.
             * SequenceFileOutputFormat.class);
             * org.apache.hadoop.mapred.SequenceFileOutputFormat
             * .setOutputPath(jobConf, new
             * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
             */
            JobClient.runJob(jobConf);
        } catch (Throwable e) {
            e.printStackTrace();
        }

    }

}

From source file:co.nubetech.hiho.job.ExportDelimitedToDB.java

License:Apache License

public int run(String[] args) throws IOException {
    Configuration conf = getConf();
    Job job = new Job(conf);
    job.setJobName("MySQLBulkLoading");
    job.setMapperClass(DelimitedLoadMapper.class);
    job.setJarByClass(DelimitedLoadMapper.class);
    job.setNumReduceTasks(0);//from ww w  .  ja  v a  2  s  .com

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(args[0]));

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputFormatClass(DBOutputFormat.class);
    int ret = 0;

    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;

}

From source file:co.nubetech.hiho.job.ExportToDB.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    populateConfiguration(args, conf);//from w  w w.  ja v  a  2 s  . co  m
    try {
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new Exception(e1);
    }
    Job job = new Job(conf);
    job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));
    job.setJobName("HihoDBExport");

    job.setMapperClass(GenericDBLoadDataMapper.class);
    job.setJarByClass(ExportToDB.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(inputPath));
    GenericDBOutputFormat.setOutput(job, tableName, columnNames);

    int ret = 0;
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;

}

From source file:co.nubetech.hiho.job.ExportToFTPServer.java

License:Apache License

@Override
public int run(String[] args) throws IOException {
    Configuration conf = getConf();
    populateConfiguration(args, conf);/*from  w ww .  j a v  a2  s.  c  o  m*/
    try {
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new IOException(e1);
    }

    for (Entry<String, String> entry : conf) {
        logger.debug("key, value " + entry.getKey() + "=" + entry.getValue());
    }
    Job job = new Job(conf);
    job.setMapperClass(TokenCounterMapper.class);
    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(inputPath));
    job.setReducerClass(IntSumReducer.class);
    job.setOutputFormatClass(FTPTextOutputFormat.class);
    FTPTextOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setJarByClass(ExportToFTPServer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(2);

    int ret = 0;

    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;

}

From source file:co.nubetech.hiho.job.ExportToMySQLDB.java

License:Apache License

@Override
public int run(String[] args) throws IOException {

    Configuration conf = getConf();
    populateConfiguration(args, conf);/* www .  jav a 2  s  . com*/
    try {
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new IOException(e1);
    }
    Job job = new Job(conf);
    job.setJobName("MySQLBulkLoading");
    job.setMapperClass(MySQLLoadDataMapper.class);
    job.setJarByClass(MySQLLoadDataMapper.class);
    for (Entry<String, String> entry : conf) {
        logger.debug("key, value " + entry.getKey() + "=" + entry.getValue());
    }
    // verify required properties are loaded
    logger.debug(conf.get(DBConfiguration.URL_PROPERTY));
    logger.debug(conf.get(DBConfiguration.USERNAME_PROPERTY));
    logger.debug(conf.get(DBConfiguration.PASSWORD_PROPERTY));

    job.setNumReduceTasks(0);
    job.setInputFormatClass(FileStreamInputFormat.class);
    FileStreamInputFormat.addInputPath(job, new Path(inputPath));
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    // job.setJarByClass(com.mysql.jdbc.Driver.class);
    job.setOutputFormatClass(NullOutputFormat.class);

    int ret = 0;
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;
}

From source file:co.nubetech.hiho.job.ExportToOracleDb.java

License:Apache License

@Override
public int run(String[] args) throws IOException {
    Configuration conf = getConf();
    for (Entry<String, String> entry : conf) {
        logger.debug("key, value " + entry.getKey() + "=" + entry.getValue());
    }//  w w w . jav  a  2 s  . c om

    for (int i = 0; i < args.length; i++) {
        logger.debug("Remaining arguments are" + " " + args[i]);
    }
    populateConfiguration(args, conf);

    try {
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new IOException(e1);
    }

    Job job = new Job(conf);
    job.setJobName("OracleLoading");
    job.setMapperClass(OracleLoadMapper.class);
    job.setJarByClass(ExportToOracleDb.class);
    job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));

    try {
        // we first create the external table definition
        String query = conf.get(HIHOConf.EXTERNAL_TABLE_DML);
        // create table if user has specified
        if (query != null) {
            this.runQuery(query, conf);
        }
    } catch (HIHOException e1) {

        e1.printStackTrace();
    }

    // verify required properties are loaded

    job.setNumReduceTasks(0);
    job.setInputFormatClass(FileStreamInputFormat.class);
    FileStreamInputFormat.addInputPath(job, new Path(inputPath));
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    // job.setJarByClass(com.mysql.jdbc.Driver.class);
    job.setOutputFormatClass(NullOutputFormat.class);

    int ret = 0;
    try {

        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    // run alter table query and add locations
    try {
        this.runQuery(getAlterTableDML(new Path(inputPath), conf), conf);
    } catch (HIHOException e1) {

        e1.printStackTrace();
    }
    return ret;
}

From source file:co.nubetech.hiho.job.sf.ExportSalesForceJob.java

License:Apache License

@Override
public int run(String[] arg0) throws Exception {
    Configuration conf = getConf();
    populateConfiguration(arg0, conf);/*from   ww w  .ja va 2  s  . co m*/
    try {
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new Exception(e1);
    }

    Job job = new Job(conf);
    job.setJobName("SaleForceLoading");
    job.setMapperClass(SalesForceLoadMapper.class);
    job.setJarByClass(SalesForceLoadMapper.class);
    job.setNumReduceTasks(0);

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(inputPath));
    // NLineInputFormat.setNumLinesPerSplit(job, 10);

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputFormatClass(NullOutputFormat.class);

    int ret = 0;

    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;

}