Example usage for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration()

Source Link

Document

Return the configuration for the job.

Usage

From source file:co.nubetech.hiho.job.DBQueryInputJob.java

License:Apache License

public void runJobs(Configuration conf, int jobCounter) throws IOException {

    try {/*from w ww . j  a  v a2  s .  c om*/
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new IOException(e1);
    }

    Job job = new Job(conf);
    for (Entry<String, String> entry : conf) {
        logger.warn("key, value " + entry.getKey() + "=" + entry.getValue());
    }

    // logger.debug("Number of maps " +
    // conf.getInt("mapred.map.tasks", 1));
    // conf.setInt(JobContext.NUM_MAPS,
    // conf.getInt("mapreduce.job.maps", 1));
    // job.getConfiguration().setInt("mapred.map.tasks", 4);
    job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));
    logger.warn("Number of maps " + conf.getInt(MRJobConfig.NUM_MAPS, 1));

    job.setJobName("Import job");
    job.setJarByClass(DBQueryInputJob.class);

    String strategy = conf.get(HIHOConf.INPUT_OUTPUT_STRATEGY);
    OutputStrategyEnum os = OutputStrategyEnum.value(strategy);
    if (os == null) {
        throw new IllegalArgumentException("Wrong value of output strategy. Please correct");
    }
    if (os != OutputStrategyEnum.AVRO) {
        switch (os) {

        case DUMP: {
            // job.setMapperClass(DBImportMapper.class);
            break;
        }
        /*
         * case AVRO: { job.setMapperClass(DBInputAvroMapper.class); //
         * need avro in cp // job.setJarByClass(Schema.class); // need
         * jackson which is needed by avro - ugly! //
         * job.setJarByClass(ObjectMapper.class);
         * job.setMapOutputKeyClass(NullWritable.class);
         * job.setMapOutputValueClass(AvroValue.class);
         * job.setOutputKeyClass(NullWritable.class);
         * job.setOutputValueClass(AvroValue.class);
         * job.setOutputFormatClass(AvroOutputFormat.class);
         * 
         * AvroOutputFormat.setOutputPath(job, new
         * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); break; }
         */
        case DELIMITED: {
            job.setMapperClass(DBInputDelimMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);

            NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
        }
        case JSON: {
            // job.setMapperClass(DBImportJsonMapper.class);
            // job.setJarByClass(ObjectMapper.class);
            break;
        }
        default: {
            job.setMapperClass(DBInputDelimMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);

            NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
            break;
        }
        }

        String inputQuery = conf.get(DBConfiguration.INPUT_QUERY);
        String inputBoundingQuery = conf.get(DBConfiguration.INPUT_BOUNDING_QUERY);
        logger.debug("About to set the params");
        DBQueryInputFormat.setInput(job, inputQuery, inputBoundingQuery, params);
        logger.debug("Set the params");

        job.setNumReduceTasks(0);

        try {
            // job.setJarByClass(Class.forName(conf.get(
            // org.apache.hadoop.mapred.lib.db.DBConfiguration.DRIVER_CLASS_PROPERTY)));
            logger.debug("OUTPUT format class is " + job.getOutputFormatClass());

            /*
             * org.apache.hadoop.mapreduce.OutputFormat<?, ?> output =
             * ReflectionUtils.newInstance(job.getOutputFormatClass(),
             * job.getConfiguration()); output.checkOutputSpecs(job);
             */
            logger.debug("Class is " + ReflectionUtils
                    .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName());
            job.waitForCompletion(false);
            if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null) {
                generateHiveScript(conf, job, jobCounter);
                generatePigScript(conf, job);
            }

        }
        /*
         * catch (HIHOException h) { h.printStackTrace(); }
         */
        catch (Exception e) {
            e.printStackTrace();
        } catch (HIHOException e) {
            e.printStackTrace();
        }
    }
    // avro to be handled differently, thanks to all the incompatibilities
    // in the apis.
    else {
        String inputQuery = conf.get(DBConfiguration.INPUT_QUERY);
        String inputBoundingQuery = conf.get(DBConfiguration.INPUT_BOUNDING_QUERY);
        logger.debug("About to set the params");
        // co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(job,
        // inputQuery, inputBoundingQuery, params);
        logger.debug("Set the params");

        JobConf jobConf = new JobConf(conf);

        try {
            GenericDBWritable queryWritable = getDBWritable(jobConf);
            Schema pair = DBMapper.getPairSchema(queryWritable.getColumns());

            AvroJob.setMapOutputSchema(jobConf, pair);
            GenericRecordAvroOutputFormat.setOutputPath(jobConf,
                    new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));

            co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(jobConf, inputQuery,
                    inputBoundingQuery, params);
            jobConf.setInputFormat(co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.class);
            jobConf.setMapperClass(DBInputAvroMapper.class);
            jobConf.setMapOutputKeyClass(NullWritable.class);
            jobConf.setMapOutputValueClass(AvroValue.class);
            jobConf.setOutputKeyClass(NullWritable.class);
            jobConf.setOutputValueClass(Text.class);
            jobConf.setOutputFormat(GenericRecordAvroOutputFormat.class);
            jobConf.setJarByClass(DBQueryInputJob.class);
            jobConf.setStrings("io.serializations",
                    "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization,org.apache.avro.mapred.AvroSerialization");
            jobConf.setNumReduceTasks(0);
            /*
             * jobConf.setOutputFormat(org.apache.hadoop.mapred.
             * SequenceFileOutputFormat.class);
             * org.apache.hadoop.mapred.SequenceFileOutputFormat
             * .setOutputPath(jobConf, new
             * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
             */
            JobClient.runJob(jobConf);
        } catch (Throwable e) {
            e.printStackTrace();
        }

    }

}

From source file:co.nubetech.hiho.job.ExportToDB.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    populateConfiguration(args, conf);/*from   w  w w  .j  a va  2 s  .c  om*/
    try {
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new Exception(e1);
    }
    Job job = new Job(conf);
    job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));
    job.setJobName("HihoDBExport");

    job.setMapperClass(GenericDBLoadDataMapper.class);
    job.setJarByClass(ExportToDB.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(inputPath));
    GenericDBOutputFormat.setOutput(job, tableName, columnNames);

    int ret = 0;
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;

}

From source file:co.nubetech.hiho.job.ExportToOracleDb.java

License:Apache License

@Override
public int run(String[] args) throws IOException {
    Configuration conf = getConf();
    for (Entry<String, String> entry : conf) {
        logger.debug("key, value " + entry.getKey() + "=" + entry.getValue());
    }//from  www.  jav  a  2s. c o  m

    for (int i = 0; i < args.length; i++) {
        logger.debug("Remaining arguments are" + " " + args[i]);
    }
    populateConfiguration(args, conf);

    try {
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new IOException(e1);
    }

    Job job = new Job(conf);
    job.setJobName("OracleLoading");
    job.setMapperClass(OracleLoadMapper.class);
    job.setJarByClass(ExportToOracleDb.class);
    job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));

    try {
        // we first create the external table definition
        String query = conf.get(HIHOConf.EXTERNAL_TABLE_DML);
        // create table if user has specified
        if (query != null) {
            this.runQuery(query, conf);
        }
    } catch (HIHOException e1) {

        e1.printStackTrace();
    }

    // verify required properties are loaded

    job.setNumReduceTasks(0);
    job.setInputFormatClass(FileStreamInputFormat.class);
    FileStreamInputFormat.addInputPath(job, new Path(inputPath));
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    // job.setJarByClass(com.mysql.jdbc.Driver.class);
    job.setOutputFormatClass(NullOutputFormat.class);

    int ret = 0;
    try {

        ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    // run alter table query and add locations
    try {
        this.runQuery(getAlterTableDML(new Path(inputPath), conf), conf);
    } catch (HIHOException e1) {

        e1.printStackTrace();
    }
    return ret;
}

From source file:co.nubetech.hiho.mapreduce.lib.db.apache.DBOutputFormat.java

License:Apache License

private static DBConfiguration setOutput(Job job, JobConf jobConf, String tableName) throws IOException {
    job.setOutputFormatClass(DBOutputFormat.class);
    jobConf.setReduceSpeculativeExecution(false);

    DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());

    dbConf.setOutputTableName(tableName);
    return dbConf;
}

From source file:co.nubetech.hiho.mapreduce.lib.db.DBQueryInputFormat.java

License:Apache License

/**
 * setInput() takes a custom query and a separate "bounding query" to use
 * instead of the custom "count query" used by DBInputFormat.
 *//* w  ww. j  a v  a 2s . c  o  m*/
public static void setInput(Job job, String inputQuery, String inputBoundingQuery, ArrayList params)
        throws IOException {
    DBInputFormat.setInput(job, GenericDBWritable.class, inputQuery, "");
    if (inputBoundingQuery != null) {
        job.getConfiguration().set(DBConfiguration.INPUT_BOUNDING_QUERY, inputBoundingQuery);
    }
    if (params != null) {
        DefaultStringifier<ArrayList> stringifier = new DefaultStringifier<ArrayList>(job.getConfiguration(),
                ArrayList.class);
        job.getConfiguration().set(HIHOConf.QUERY_PARAMS, stringifier.toString(params));
        logger.debug("Converted params and saved them into config");
    }
    job.setInputFormatClass(DBQueryInputFormat.class);
}

From source file:co.nubetech.hiho.mapreduce.lib.db.GenericDBOutputFormat.java

License:Apache License

public static void setOutput(Job job, String tableName, String columnNames) throws IOException {
    job.setOutputFormatClass(GenericDBOutputFormat.class);
    DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());
    dbConf.setOutputTableName(tableName);
    dbConf.setOutputFieldNames(columnNames);

    String dbDriver = job.getConfiguration().get(DBConfiguration.DRIVER_CLASS_PROPERTY);
    String connString = job.getConfiguration().get(DBConfiguration.URL_PROPERTY);
    String username = job.getConfiguration().get(DBConfiguration.USERNAME_PROPERTY);
    String password = job.getConfiguration().get(DBConfiguration.PASSWORD_PROPERTY);

    Connection conn;//from ww w .  jav  a  2s.c om
    PreparedStatement stmt;
    try {
        Class.forName(dbDriver).newInstance();
        conn = DriverManager.getConnection(connString, username, password);
        String query = "select " + columnNames + " from " + tableName;
        stmt = conn.prepareStatement(query);
        ResultSetMetaData meta = stmt.getMetaData();
        ArrayList<ColumnInfo> columnInfo = populateColumnInfo(meta);
        String jsonString = getJsonStringOfColumnInfo(columnInfo);
        job.getConfiguration().set(HIHOConf.COLUMN_INFO, jsonString);
        logger.debug("columnInfo is: " + job.getConfiguration().get(HIHOConf.COLUMN_INFO));
        stmt.close();
        conn.close();
    } catch (Exception e) {
        e.printStackTrace();
        throw new IOException(e);
    }

}

From source file:co.nubetech.hiho.merge.MergeJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    populateConfiguration(args);//www  .j  a v  a 2s . c o  m
    try {
        checkMandatoryConfs();
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new Exception(e1);
    }

    Class inputFormatClass = Class.forName(inputFormat);
    Class outputFormatClass = Class.forName(outputFormat);
    Class inputKeyClass = Class.forName(inputKeyClassName);
    Class inputValueClass = Class.forName(inputValueClassName);

    Configuration conf = getConf();
    conf.set(HIHOConf.MERGE_OLD_PATH, oldPath);
    conf.set(HIHOConf.MERGE_NEW_PATH, newPath);

    Job job = new Job(conf);
    job.setJobName("Merge job");
    job.setJarByClass(MergeJob.class);

    if (mergeBy.equals("key")) {
        job.setMapperClass(MergeKeyMapper.class);
        job.setReducerClass(MergeKeyReducer.class);

    } else if (mergeBy.equals("value")) {
        job.setMapperClass(MergeValueMapper.class);
        job.setReducerClass(MergeValueReducer.class);
    }

    job.setInputFormatClass(inputFormatClass);
    DelimitedTextInputFormat.setProperties(job, delimiter, column);
    job.setMapOutputKeyClass(HihoTuple.class);
    job.setMapOutputValueClass(HihoValue.class);

    job.setOutputKeyClass(inputKeyClass);
    job.setOutputValueClass(inputValueClass);
    FileInputFormat.setInputPaths(job, oldPath + "," + newPath);
    job.setOutputFormatClass(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    try {
        logger.debug("Output format class is " + job.getOutputFormatClass());
        logger.debug("Class is " + ReflectionUtils
                .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName());
        job.waitForCompletion(false);
        if (job.isComplete()) {
            Counters counters = job.getCounters();
            totalRecordsOld = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_OLD).getValue();
            totalRecordsNew = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_NEW).getValue();
            badRecords = counters.findCounter(MergeRecordCounter.BAD_RECORD).getValue();
            output = counters.findCounter(MergeRecordCounter.OUTPUT).getValue();
            logger.info("Total old records read are: " + totalRecordsOld);
            logger.info("Total new records read are: " + totalRecordsNew);
            logger.info("Bad Records are: " + badRecords);
            logger.info("Output records are: " + output);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    return 0;
}

From source file:com.abel.hwfs.custom.output.SetSizeDBOutputFormat.java

License:Apache License

private static MyDBConfiguration setOutput(Job job, String tableName) throws IOException {
    job.setOutputFormatClass(SetSizeDBOutputFormat.class);
    job.setReduceSpeculativeExecution(false);

    MyDBConfiguration dbConf = new MyDBConfiguration(job.getConfiguration());

    dbConf.setOutputTableName(tableName);
    return dbConf;
}

From source file:com.acme.io.JsonLoader.java

License:Apache License

/**
 * Get a schema for the data to be loaded.  
 * @param location Location as returned by 
 * {@link LoadFunc#relativeToAbsolutePath(String, org.apache.hadoop.fs.Path)}
 * @param job The {@link Job} object - this should be used only to obtain 
 * cluster properties through {@link Job#getConfiguration()} and not to
 * set/query any runtime job information.  
 * @return schema for the data to be loaded. This schema should represent
 * all tuples of the returned data.  If the schema is unknown or it is
 * not possible to return a schema that represents all returned data,
 * then null should be returned. The schema should not be affected by
 * pushProjection, ie. getSchema should always return the original schema
 * even after pushProjection// w w  w  .  j  a  v  a  2  s  . com
 * @throws IOException if an exception occurs while determining the schema
 */
public ResourceSchema getSchema(String location, Job job) throws IOException {
    // Open the schema file and read the schema
    // Get an HDFS handle.
    FileSystem fs = FileSystem.get(job.getConfiguration());
    DataInputStream in = fs.open(new Path(location + "/_schema"));
    String line = in.readLine();
    in.close();

    // Parse the schema
    ResourceSchema s = new ResourceSchema(Utils.getSchemaFromString(line));
    if (s == null) {
        throw new IOException("Unable to parse schema found in file " + location + "/_schema");
    }

    // Now that we have determined the schema, store it in our
    // UDFContext properties object so we have it when we need it on the
    // backend
    UDFContext udfc = UDFContext.getUDFContext();
    Properties p = udfc.getUDFProperties(this.getClass(), new String[] { udfcSignature });
    p.setProperty("pig.jsonloader.schema", line);

    return s;
}

From source file:com.acme.io.JsonStorage.java

License:Apache License

/**
 * Store schema of the data being written
 * @param schema Schema to be recorded//from w  w w . java  2s  .  com
 * @param location Location as returned by 
 * {@link LoadFunc#relativeToAbsolutePath(String, org.apache.hadoop.fs.Path)}
 * @param job The {@link Job} object - this should be used only to obtain 
 * cluster properties through {@link Job#getConfiguration()} and not to
 * set/query any runtime job information.  
 * @throws IOException 
 */
public void storeSchema(ResourceSchema schema, String location, Job job) throws IOException {
    // Store the schema in a side file in the same directory.  MapReduce
    // does not include files starting with "_" when reading data for a job.
    FileSystem fs = FileSystem.get(job.getConfiguration());
    DataOutputStream out = fs.create(new Path(location + "/_schema"));
    out.writeBytes(schema.toString());
    out.writeByte('\n');
    out.close();
}