Example usage for org.apache.hadoop.mapreduce Job getConfiguration

List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration() 

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.cloudera.sqoop.mapreduce.db.TestDataDrivenDBInputFormat.java

License:Apache License

public void testDateSplits() throws Exception {
    Statement s = connection.createStatement();
    final String DATE_TABLE = "datetable";
    final String COL = "foo";
    try {/*  w  w w.j  a  v a2  s.  com*/
        try {
            // delete the table if it already exists.
            s.executeUpdate("DROP TABLE " + DATE_TABLE);
        } catch (SQLException e) {
            // Ignored; proceed regardless of whether we deleted the table;
            // it may have simply not existed.
        }

        // Create the table.
        s.executeUpdate("CREATE TABLE " + DATE_TABLE + "(" + COL + " TIMESTAMP)");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-01')");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-02')");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-05-01')");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2011-04-01')");

        // commit this tx.
        connection.commit();

        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "file:///");
        FileSystem fs = FileSystem.getLocal(conf);
        fs.delete(new Path(OUT_DIR), true);

        // now do a dd import
        Job job = new Job(conf);
        job.setMapperClass(ValMapper.class);
        job.setReducerClass(Reducer.class);
        job.setMapOutputKeyClass(DateCol.class);
        job.setMapOutputValueClass(NullWritable.class);
        job.setOutputKeyClass(DateCol.class);
        job.setOutputValueClass(NullWritable.class);
        job.setNumReduceTasks(1);
        job.getConfiguration().setInt("mapreduce.map.tasks", 2);
        FileOutputFormat.setOutputPath(job, new Path(OUT_DIR));
        DBConfiguration.configureDB(job.getConfiguration(), DRIVER_CLASS, DB_URL, (String) null, (String) null);
        DataDrivenDBInputFormat.setInput(job, DateCol.class, DATE_TABLE, null, COL, COL);

        boolean ret = job.waitForCompletion(true);
        assertTrue("job failed", ret);

        // Check to see that we imported as much as we thought we did.
        assertEquals("Did not get all the records", 4, job.getCounters()
                .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue());
    } finally {
        s.close();
    }
}

From source file:com.cloudera.sqoop.mapreduce.HBaseImportJob.java

License:Apache License

@Override
protected void configureOutputFormat(Job job, String tableName, String tableClassName)
        throws ClassNotFoundException, IOException {

    // Use the DelegatingOutputFormat with the HBasePutProcessor.
    job.setOutputFormatClass(getOutputFormatClass());

    Configuration conf = job.getConfiguration();
    conf.setClass("sqoop.output.delegate.field.map.processor.class", HBasePutProcessor.class,
            FieldMapProcessor.class);

    // Set the HBase parameters (table, column family, row key):
    conf.set(HBasePutProcessor.TABLE_NAME_KEY, options.getHBaseTable());
    conf.set(HBasePutProcessor.COL_FAMILY_KEY, options.getHBaseColFamily());

    // What column of the input becomes the row key?
    String rowKeyCol = options.getHBaseRowKeyColumn();
    if (null == rowKeyCol) {
        // User didn't explicitly set one. If there's a split-by column set,
        // use that.
        rowKeyCol = options.getSplitByCol();
    }/*from  w  w  w  .  j  a  va  2  s  .  c om*/

    if (null == rowKeyCol) {
        // No split-by column is explicitly set.
        // If the table has a primary key, use that.
        ConnManager manager = getContext().getConnManager();
        rowKeyCol = manager.getPrimaryKey(tableName);
    }

    if (null == rowKeyCol) {
        // Give up here if this is still unset.
        throw new IOException("Could not determine the row-key column. "
                + "Use --hbase-row-key to specify the input column that " + "names each row.");
    }

    conf.set(HBasePutProcessor.ROW_KEY_COLUMN_KEY, rowKeyCol);
}

From source file:com.cloudera.sqoop.mapreduce.HBaseImportJob.java

License:Apache License

@Override
/** Create the target HBase table before running the job. */
protected void jobSetup(Job job) throws IOException, ImportException {
    Configuration conf = job.getConfiguration();
    String tableName = conf.get(HBasePutProcessor.TABLE_NAME_KEY);
    String familyName = conf.get(HBasePutProcessor.COL_FAMILY_KEY);

    if (null == tableName) {
        throw new ImportException("Import to HBase error: Table name not specified");
    }//from w w  w . j  a  v  a  2  s.  c  om

    if (null == familyName) {
        throw new ImportException("Import to HBase error: Column family not specified");
    }

    // Add HBase configuration files to this conf object.
    HBaseConfiguration.addHbaseResources(conf);

    HBaseAdmin admin = new HBaseAdmin(conf);

    // Check to see if the table exists.
    HTableDescriptor tableDesc = new HTableDescriptor(tableName);
    byte[] familyBytes = Bytes.toBytes(familyName);
    HColumnDescriptor colDesc = new HColumnDescriptor(familyBytes);
    if (!admin.tableExists(tableName)) {
        if (options.getCreateHBaseTable()) {
            // Create the table.
            LOG.info("Creating missing HBase table " + tableName);
            tableDesc.addFamily(colDesc);
            admin.createTable(tableDesc);
        } else {
            LOG.warn("Could not find HBase table " + tableName);
            LOG.warn("This job may fail. Either explicitly create the table,");
            LOG.warn("or re-run with --hbase-create-table.");
        }
    } else if (!tableDesc.hasFamily(familyBytes)) {
        if (options.getCreateHBaseTable()) {
            // Create the column family.
            LOG.info("Creating missing column family " + familyName);
            admin.disableTable(tableName);
            admin.addColumn(tableName, colDesc);
            admin.enableTable(tableName);
        } else {
            LOG.warn("Could not find column family " + familyName + " in table " + tableName);
            LOG.warn("This job may fail. Either create the column family,");
            LOG.warn("or re-run with --hbase-create-table.");
        }
    }

    // Make sure HBase libraries are shipped as part of the job.
    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJars(conf, HTable.class);

    super.jobSetup(job);
}

From source file:com.cloudera.sqoop.mapreduce.MergeJob.java

License:Apache License

public boolean runMergeJob() throws IOException {
    Configuration conf = options.getConf();
    Job job = new Job(conf);

    String userClassName = options.getClassName();
    if (null == userClassName) {
        // Shouldn't get here.
        throw new IOException("Record class name not specified with " + "--class-name.");
    }//from   www  . j a v a  2 s. c om

    // Set the external jar to use for the job.
    String existingJar = options.getExistingJarName();
    if (existingJar != null) {
        // User explicitly identified a jar path.
        LOG.debug("Setting job jar to user-specified jar: " + existingJar);
        job.getConfiguration().set("mapred.jar", existingJar);
    } else {
        // Infer it from the location of the specified class, if it's on the
        // classpath.
        try {
            Class<? extends Object> userClass = conf.getClassByName(userClassName);
            if (null != userClass) {
                String userJar = Jars.getJarPathForClass(userClass);
                LOG.debug("Setting job jar based on user class " + userClassName + ": " + userJar);
                job.getConfiguration().set("mapred.jar", userJar);
            } else {
                LOG.warn("Specified class " + userClassName + " is not in a jar. "
                        + "MapReduce may not find the class");
            }
        } catch (ClassNotFoundException cnfe) {
            throw new IOException(cnfe);
        }
    }

    try {
        Path oldPath = new Path(options.getMergeOldPath());
        Path newPath = new Path(options.getMergeNewPath());

        Configuration jobConf = job.getConfiguration();
        FileSystem fs = FileSystem.get(jobConf);
        oldPath = oldPath.makeQualified(fs);
        newPath = newPath.makeQualified(fs);

        FileInputFormat.addInputPath(job, oldPath);
        FileInputFormat.addInputPath(job, newPath);

        jobConf.set(MERGE_OLD_PATH_KEY, oldPath.toString());
        jobConf.set(MERGE_NEW_PATH_KEY, newPath.toString());
        jobConf.set(MERGE_KEY_COL_KEY, options.getMergeKeyCol());
        jobConf.set(MERGE_SQOOP_RECORD_KEY, userClassName);

        FileOutputFormat.setOutputPath(job, new Path(options.getTargetDir()));

        if (ExportJobBase.isSequenceFiles(jobConf, newPath)) {
            job.setInputFormatClass(SequenceFileInputFormat.class);
            job.setOutputFormatClass(SequenceFileOutputFormat.class);
            job.setMapperClass(MergeRecordMapper.class);
        } else {
            job.setMapperClass(MergeTextMapper.class);
            job.setOutputFormatClass(RawKeyTextOutputFormat.class);
        }

        jobConf.set("mapred.output.key.class", userClassName);
        job.setOutputValueClass(NullWritable.class);

        job.setReducerClass(MergeReducer.class);

        // Set the intermediate data types.
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(MergeRecord.class);

        // Make sure Sqoop and anything else we need is on the classpath.
        cacheJars(job, null);
        return this.runJob(job);
    } catch (InterruptedException ie) {
        throw new IOException(ie);
    } catch (ClassNotFoundException cnfe) {
        throw new IOException(cnfe);
    }
}

From source file:com.cloudera.sqoop.mapreduce.MySQLDumpImportJob.java

License:Apache License

/**
 * Configure the inputformat to use for the job.
 *//*from   w  w w  .  j  a va  2 s. co  m*/
protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol)
        throws ClassNotFoundException, IOException {

    if (null == tableName) {
        LOG.error("mysqldump-based import cannot support free-form query imports.");
        LOG.error("Do not use --direct and --query together for MySQL.");
        throw new IOException("null tableName for MySQLDumpImportJob.");
    }

    ConnManager mgr = getContext().getConnManager();
    String username = options.getUsername();
    if (null == username || username.length() == 0) {
        DBConfiguration.configureDB(job.getConfiguration(), mgr.getDriverClass(), options.getConnectString());
    } else {
        DBConfiguration.configureDB(job.getConfiguration(), mgr.getDriverClass(), options.getConnectString(),
                username, options.getPassword());
    }

    String[] colNames = options.getColumns();
    if (null == colNames) {
        colNames = mgr.getColumnNames(tableName);
    }

    String[] sqlColNames = null;
    if (null != colNames) {
        sqlColNames = new String[colNames.length];
        for (int i = 0; i < colNames.length; i++) {
            sqlColNames[i] = mgr.escapeColName(colNames[i]);
        }
    }

    // It's ok if the where clause is null in DBInputFormat.setInput.
    String whereClause = options.getWhereClause();

    // We can't set the class properly in here, because we may not have the
    // jar loaded in this JVM. So we start by calling setInput() with
    // DBWritable and then overriding the string manually.

    // Note that mysqldump also does *not* want a quoted table name.
    DataDrivenDBInputFormat.setInput(job, DBWritable.class, tableName, whereClause,
            mgr.escapeColName(splitByCol), sqlColNames);

    Configuration conf = job.getConfiguration();
    conf.setInt(MySQLUtils.OUTPUT_FIELD_DELIM_KEY, options.getOutputFieldDelim());
    conf.setInt(MySQLUtils.OUTPUT_RECORD_DELIM_KEY, options.getOutputRecordDelim());
    conf.setInt(MySQLUtils.OUTPUT_ENCLOSED_BY_KEY, options.getOutputEnclosedBy());
    conf.setInt(MySQLUtils.OUTPUT_ESCAPED_BY_KEY, options.getOutputEscapedBy());
    conf.setBoolean(MySQLUtils.OUTPUT_ENCLOSE_REQUIRED_KEY, options.isOutputEncloseRequired());
    String[] extraArgs = options.getExtraArgs();
    if (null != extraArgs) {
        conf.setStrings(MySQLUtils.EXTRA_ARGS_KEY, extraArgs);
    }

    LOG.debug("Using InputFormat: " + inputFormatClass);
    job.setInputFormatClass(getInputFormatClass());
}

From source file:com.cloudera.sqoop.mapreduce.MySQLExportJob.java

License:Apache License

@Override
/**/*from  w w  w.  j a va  2  s . com*/
 * Configure the inputformat to use for the job.
 */
protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol)
        throws ClassNotFoundException, IOException {

    // Configure the delimiters, etc.
    Configuration conf = job.getConfiguration();
    conf.setInt(MySQLUtils.OUTPUT_FIELD_DELIM_KEY, options.getOutputFieldDelim());
    conf.setInt(MySQLUtils.OUTPUT_RECORD_DELIM_KEY, options.getOutputRecordDelim());
    conf.setInt(MySQLUtils.OUTPUT_ENCLOSED_BY_KEY, options.getOutputEnclosedBy());
    conf.setInt(MySQLUtils.OUTPUT_ESCAPED_BY_KEY, options.getOutputEscapedBy());
    conf.setBoolean(MySQLUtils.OUTPUT_ENCLOSE_REQUIRED_KEY, options.isOutputEncloseRequired());
    String[] extraArgs = options.getExtraArgs();
    if (null != extraArgs) {
        conf.setStrings(MySQLUtils.EXTRA_ARGS_KEY, extraArgs);
    }

    ConnManager mgr = context.getConnManager();
    String username = options.getUsername();
    if (null == username || username.length() == 0) {
        DBConfiguration.configureDB(job.getConfiguration(), mgr.getDriverClass(), options.getConnectString());
    } else {
        DBConfiguration.configureDB(job.getConfiguration(), mgr.getDriverClass(), options.getConnectString(),
                username, options.getPassword());
    }

    String[] colNames = options.getColumns();
    if (null == colNames) {
        colNames = mgr.getColumnNames(tableName);
    }

    String[] sqlColNames = null;
    if (null != colNames) {
        sqlColNames = new String[colNames.length];
        for (int i = 0; i < colNames.length; i++) {
            sqlColNames[i] = mgr.escapeColName(colNames[i]);
        }
    }

    // Note that mysqldump also does *not* want a quoted table name.
    DataDrivenDBInputFormat.setInput(job, DBWritable.class, tableName, null, null, sqlColNames);

    // Configure the actual InputFormat to use. 
    super.configureInputFormat(job, tableName, tableClassName, splitByCol);
}

From source file:com.cloudera.sqoop.shims.Apache22HadoopShim.java

License:Apache License

@Override
public void setJobNumMaps(Job job, int numMapTasks) {
    job.getConfiguration().setInt(JobContext.NUM_MAPS, numMapTasks);
}

From source file:com.cloudera.sqoop.shims.CDH3Shim.java

License:Apache License

@Override
public void setJobNumMaps(Job job, int numMapTasks) {
    job.getConfiguration().setInt("mapred.map.tasks", numMapTasks);
}

From source file:com.cloudera.sqoop.shims.CDH3Shim.java

License:Apache License

@Override
public void setJobMapSpeculativeExecution(Job job, boolean isEnabled) {
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", isEnabled);
}

From source file:com.cloudera.sqoop.shims.CDH3Shim.java

License:Apache License

@Override
public void setJobReduceSpeculativeExecution(Job job, boolean isEnabled) {
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", isEnabled);
}