List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:com.cloudera.sqoop.mapreduce.db.TestDataDrivenDBInputFormat.java
License:Apache License
public void testDateSplits() throws Exception { Statement s = connection.createStatement(); final String DATE_TABLE = "datetable"; final String COL = "foo"; try {/* w w w.j a v a2 s. com*/ try { // delete the table if it already exists. s.executeUpdate("DROP TABLE " + DATE_TABLE); } catch (SQLException e) { // Ignored; proceed regardless of whether we deleted the table; // it may have simply not existed. } // Create the table. s.executeUpdate("CREATE TABLE " + DATE_TABLE + "(" + COL + " TIMESTAMP)"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-01')"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-02')"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-05-01')"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2011-04-01')"); // commit this tx. connection.commit(); Configuration conf = new Configuration(); conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.getLocal(conf); fs.delete(new Path(OUT_DIR), true); // now do a dd import Job job = new Job(conf); job.setMapperClass(ValMapper.class); job.setReducerClass(Reducer.class); job.setMapOutputKeyClass(DateCol.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(DateCol.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(1); job.getConfiguration().setInt("mapreduce.map.tasks", 2); FileOutputFormat.setOutputPath(job, new Path(OUT_DIR)); DBConfiguration.configureDB(job.getConfiguration(), DRIVER_CLASS, DB_URL, (String) null, (String) null); DataDrivenDBInputFormat.setInput(job, DateCol.class, DATE_TABLE, null, COL, COL); boolean ret = job.waitForCompletion(true); assertTrue("job failed", ret); // Check to see that we imported as much as we thought we did. assertEquals("Did not get all the records", 4, job.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue()); } finally { s.close(); } }
From source file:com.cloudera.sqoop.mapreduce.HBaseImportJob.java
License:Apache License
@Override protected void configureOutputFormat(Job job, String tableName, String tableClassName) throws ClassNotFoundException, IOException { // Use the DelegatingOutputFormat with the HBasePutProcessor. job.setOutputFormatClass(getOutputFormatClass()); Configuration conf = job.getConfiguration(); conf.setClass("sqoop.output.delegate.field.map.processor.class", HBasePutProcessor.class, FieldMapProcessor.class); // Set the HBase parameters (table, column family, row key): conf.set(HBasePutProcessor.TABLE_NAME_KEY, options.getHBaseTable()); conf.set(HBasePutProcessor.COL_FAMILY_KEY, options.getHBaseColFamily()); // What column of the input becomes the row key? String rowKeyCol = options.getHBaseRowKeyColumn(); if (null == rowKeyCol) { // User didn't explicitly set one. If there's a split-by column set, // use that. rowKeyCol = options.getSplitByCol(); }/*from w w w . j a va 2 s . c om*/ if (null == rowKeyCol) { // No split-by column is explicitly set. // If the table has a primary key, use that. ConnManager manager = getContext().getConnManager(); rowKeyCol = manager.getPrimaryKey(tableName); } if (null == rowKeyCol) { // Give up here if this is still unset. throw new IOException("Could not determine the row-key column. " + "Use --hbase-row-key to specify the input column that " + "names each row."); } conf.set(HBasePutProcessor.ROW_KEY_COLUMN_KEY, rowKeyCol); }
From source file:com.cloudera.sqoop.mapreduce.HBaseImportJob.java
License:Apache License
@Override /** Create the target HBase table before running the job. */ protected void jobSetup(Job job) throws IOException, ImportException { Configuration conf = job.getConfiguration(); String tableName = conf.get(HBasePutProcessor.TABLE_NAME_KEY); String familyName = conf.get(HBasePutProcessor.COL_FAMILY_KEY); if (null == tableName) { throw new ImportException("Import to HBase error: Table name not specified"); }//from w w w . j a v a 2 s. c om if (null == familyName) { throw new ImportException("Import to HBase error: Column family not specified"); } // Add HBase configuration files to this conf object. HBaseConfiguration.addHbaseResources(conf); HBaseAdmin admin = new HBaseAdmin(conf); // Check to see if the table exists. HTableDescriptor tableDesc = new HTableDescriptor(tableName); byte[] familyBytes = Bytes.toBytes(familyName); HColumnDescriptor colDesc = new HColumnDescriptor(familyBytes); if (!admin.tableExists(tableName)) { if (options.getCreateHBaseTable()) { // Create the table. LOG.info("Creating missing HBase table " + tableName); tableDesc.addFamily(colDesc); admin.createTable(tableDesc); } else { LOG.warn("Could not find HBase table " + tableName); LOG.warn("This job may fail. Either explicitly create the table,"); LOG.warn("or re-run with --hbase-create-table."); } } else if (!tableDesc.hasFamily(familyBytes)) { if (options.getCreateHBaseTable()) { // Create the column family. LOG.info("Creating missing column family " + familyName); admin.disableTable(tableName); admin.addColumn(tableName, colDesc); admin.enableTable(tableName); } else { LOG.warn("Could not find column family " + familyName + " in table " + tableName); LOG.warn("This job may fail. Either create the column family,"); LOG.warn("or re-run with --hbase-create-table."); } } // Make sure HBase libraries are shipped as part of the job. TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(conf, HTable.class); super.jobSetup(job); }
From source file:com.cloudera.sqoop.mapreduce.MergeJob.java
License:Apache License
public boolean runMergeJob() throws IOException { Configuration conf = options.getConf(); Job job = new Job(conf); String userClassName = options.getClassName(); if (null == userClassName) { // Shouldn't get here. throw new IOException("Record class name not specified with " + "--class-name."); }//from www . j a v a 2 s. c om // Set the external jar to use for the job. String existingJar = options.getExistingJarName(); if (existingJar != null) { // User explicitly identified a jar path. LOG.debug("Setting job jar to user-specified jar: " + existingJar); job.getConfiguration().set("mapred.jar", existingJar); } else { // Infer it from the location of the specified class, if it's on the // classpath. try { Class<? extends Object> userClass = conf.getClassByName(userClassName); if (null != userClass) { String userJar = Jars.getJarPathForClass(userClass); LOG.debug("Setting job jar based on user class " + userClassName + ": " + userJar); job.getConfiguration().set("mapred.jar", userJar); } else { LOG.warn("Specified class " + userClassName + " is not in a jar. " + "MapReduce may not find the class"); } } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } } try { Path oldPath = new Path(options.getMergeOldPath()); Path newPath = new Path(options.getMergeNewPath()); Configuration jobConf = job.getConfiguration(); FileSystem fs = FileSystem.get(jobConf); oldPath = oldPath.makeQualified(fs); newPath = newPath.makeQualified(fs); FileInputFormat.addInputPath(job, oldPath); FileInputFormat.addInputPath(job, newPath); jobConf.set(MERGE_OLD_PATH_KEY, oldPath.toString()); jobConf.set(MERGE_NEW_PATH_KEY, newPath.toString()); jobConf.set(MERGE_KEY_COL_KEY, options.getMergeKeyCol()); jobConf.set(MERGE_SQOOP_RECORD_KEY, userClassName); FileOutputFormat.setOutputPath(job, new Path(options.getTargetDir())); if (ExportJobBase.isSequenceFiles(jobConf, newPath)) { job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MergeRecordMapper.class); } else { job.setMapperClass(MergeTextMapper.class); job.setOutputFormatClass(RawKeyTextOutputFormat.class); } jobConf.set("mapred.output.key.class", userClassName); job.setOutputValueClass(NullWritable.class); job.setReducerClass(MergeReducer.class); // Set the intermediate data types. job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(MergeRecord.class); // Make sure Sqoop and anything else we need is on the classpath. cacheJars(job, null); return this.runJob(job); } catch (InterruptedException ie) { throw new IOException(ie); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } }
From source file:com.cloudera.sqoop.mapreduce.MySQLDumpImportJob.java
License:Apache License
/** * Configure the inputformat to use for the job. *//*from w w w . j a va 2 s. co m*/ protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol) throws ClassNotFoundException, IOException { if (null == tableName) { LOG.error("mysqldump-based import cannot support free-form query imports."); LOG.error("Do not use --direct and --query together for MySQL."); throw new IOException("null tableName for MySQLDumpImportJob."); } ConnManager mgr = getContext().getConnManager(); String username = options.getUsername(); if (null == username || username.length() == 0) { DBConfiguration.configureDB(job.getConfiguration(), mgr.getDriverClass(), options.getConnectString()); } else { DBConfiguration.configureDB(job.getConfiguration(), mgr.getDriverClass(), options.getConnectString(), username, options.getPassword()); } String[] colNames = options.getColumns(); if (null == colNames) { colNames = mgr.getColumnNames(tableName); } String[] sqlColNames = null; if (null != colNames) { sqlColNames = new String[colNames.length]; for (int i = 0; i < colNames.length; i++) { sqlColNames[i] = mgr.escapeColName(colNames[i]); } } // It's ok if the where clause is null in DBInputFormat.setInput. String whereClause = options.getWhereClause(); // We can't set the class properly in here, because we may not have the // jar loaded in this JVM. So we start by calling setInput() with // DBWritable and then overriding the string manually. // Note that mysqldump also does *not* want a quoted table name. DataDrivenDBInputFormat.setInput(job, DBWritable.class, tableName, whereClause, mgr.escapeColName(splitByCol), sqlColNames); Configuration conf = job.getConfiguration(); conf.setInt(MySQLUtils.OUTPUT_FIELD_DELIM_KEY, options.getOutputFieldDelim()); conf.setInt(MySQLUtils.OUTPUT_RECORD_DELIM_KEY, options.getOutputRecordDelim()); conf.setInt(MySQLUtils.OUTPUT_ENCLOSED_BY_KEY, options.getOutputEnclosedBy()); conf.setInt(MySQLUtils.OUTPUT_ESCAPED_BY_KEY, options.getOutputEscapedBy()); conf.setBoolean(MySQLUtils.OUTPUT_ENCLOSE_REQUIRED_KEY, options.isOutputEncloseRequired()); String[] extraArgs = options.getExtraArgs(); if (null != extraArgs) { conf.setStrings(MySQLUtils.EXTRA_ARGS_KEY, extraArgs); } LOG.debug("Using InputFormat: " + inputFormatClass); job.setInputFormatClass(getInputFormatClass()); }
From source file:com.cloudera.sqoop.mapreduce.MySQLExportJob.java
License:Apache License
@Override /**/*from w w w. j a va 2 s . com*/ * Configure the inputformat to use for the job. */ protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol) throws ClassNotFoundException, IOException { // Configure the delimiters, etc. Configuration conf = job.getConfiguration(); conf.setInt(MySQLUtils.OUTPUT_FIELD_DELIM_KEY, options.getOutputFieldDelim()); conf.setInt(MySQLUtils.OUTPUT_RECORD_DELIM_KEY, options.getOutputRecordDelim()); conf.setInt(MySQLUtils.OUTPUT_ENCLOSED_BY_KEY, options.getOutputEnclosedBy()); conf.setInt(MySQLUtils.OUTPUT_ESCAPED_BY_KEY, options.getOutputEscapedBy()); conf.setBoolean(MySQLUtils.OUTPUT_ENCLOSE_REQUIRED_KEY, options.isOutputEncloseRequired()); String[] extraArgs = options.getExtraArgs(); if (null != extraArgs) { conf.setStrings(MySQLUtils.EXTRA_ARGS_KEY, extraArgs); } ConnManager mgr = context.getConnManager(); String username = options.getUsername(); if (null == username || username.length() == 0) { DBConfiguration.configureDB(job.getConfiguration(), mgr.getDriverClass(), options.getConnectString()); } else { DBConfiguration.configureDB(job.getConfiguration(), mgr.getDriverClass(), options.getConnectString(), username, options.getPassword()); } String[] colNames = options.getColumns(); if (null == colNames) { colNames = mgr.getColumnNames(tableName); } String[] sqlColNames = null; if (null != colNames) { sqlColNames = new String[colNames.length]; for (int i = 0; i < colNames.length; i++) { sqlColNames[i] = mgr.escapeColName(colNames[i]); } } // Note that mysqldump also does *not* want a quoted table name. DataDrivenDBInputFormat.setInput(job, DBWritable.class, tableName, null, null, sqlColNames); // Configure the actual InputFormat to use. super.configureInputFormat(job, tableName, tableClassName, splitByCol); }
From source file:com.cloudera.sqoop.shims.Apache22HadoopShim.java
License:Apache License
@Override public void setJobNumMaps(Job job, int numMapTasks) { job.getConfiguration().setInt(JobContext.NUM_MAPS, numMapTasks); }
From source file:com.cloudera.sqoop.shims.CDH3Shim.java
License:Apache License
@Override public void setJobNumMaps(Job job, int numMapTasks) { job.getConfiguration().setInt("mapred.map.tasks", numMapTasks); }
From source file:com.cloudera.sqoop.shims.CDH3Shim.java
License:Apache License
@Override public void setJobMapSpeculativeExecution(Job job, boolean isEnabled) { job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", isEnabled); }
From source file:com.cloudera.sqoop.shims.CDH3Shim.java
License:Apache License
@Override public void setJobReduceSpeculativeExecution(Job job, boolean isEnabled) { job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", isEnabled); }