List of usage examples for org.apache.hadoop.mapreduce Job setWorkingDirectory
public void setWorkingDirectory(Path dir) throws IOException
From source file:org.apache.hcatalog.hbase.ImportSequenceFile.java
License:Apache License
/** * Method to run the Importer MapReduce Job. Normally will be called by another MR job * during OutputCommitter.commitJob()./*from w w w. j a v a2 s . c o m*/ * @param parentContext JobContext of the parent job * @param tableName name of table to bulk load data into * @param InputDir path of SequenceFile formatted data to read * @param scratchDir temporary path for the Importer MR job to build the HFiles which will be imported * @return */ static boolean runJob(JobContext parentContext, String tableName, Path InputDir, Path scratchDir) { Configuration parentConf = parentContext.getConfiguration(); Configuration conf = new Configuration(); for (Map.Entry<String, String> el : parentConf) { if (el.getKey().startsWith("hbase.")) conf.set(el.getKey(), el.getValue()); if (el.getKey().startsWith("mapred.cache.archives")) conf.set(el.getKey(), el.getValue()); } //Inherit jar dependencies added to distributed cache loaded by parent job conf.set("mapred.job.classpath.archives", parentConf.get("mapred.job.classpath.archives", "")); conf.set("mapreduce.job.cache.archives.visibilities", parentConf.get("mapreduce.job.cache.archives.visibilities", "")); //Temporary fix until hbase security is ready //We need the written HFile to be world readable so //hbase regionserver user has the privileges to perform a hdfs move if (parentConf.getBoolean("hadoop.security.authorization", false)) { FsPermission.setUMask(conf, FsPermission.valueOf("----------")); } conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); conf.setBoolean(JobContext.JOB_CANCEL_DELEGATION_TOKEN, false); boolean localMode = "local".equals(conf.get("mapred.job.tracker")); boolean success = false; try { FileSystem fs = FileSystem.get(parentConf); Path workDir = new Path(new Job(parentConf).getWorkingDirectory(), IMPORTER_WORK_DIR); if (!fs.mkdirs(workDir)) throw new IOException("Importer work directory already exists: " + workDir); Job job = createSubmittableJob(conf, tableName, InputDir, scratchDir, localMode); job.setWorkingDirectory(workDir); job.getCredentials().addAll(parentContext.getCredentials()); success = job.waitForCompletion(true); fs.delete(workDir, true); //We only cleanup on success because failure might've been caused by existence of target directory if (localMode && success) { new ImporterOutputFormat().getOutputCommitter( org.apache.hadoop.mapred.HCatMapRedUtil.createTaskAttemptContext(conf, new TaskAttemptID())) .commitJob(job); } } catch (InterruptedException e) { LOG.error("ImportSequenceFile Failed", e); } catch (ClassNotFoundException e) { LOG.error("ImportSequenceFile Failed", e); } catch (IOException e) { LOG.error("ImportSequenceFile Failed", e); } return success; }
From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java
License:Apache License
@Test public void importSequenceFileTest() throws IOException, ClassNotFoundException, InterruptedException { String testName = "importSequenceFileTest"; Path methodTestDir = new Path(getTestDir(), testName); LOG.info("starting: " + testName); String tableName = newTableName(testName).toLowerCase(); String familyName = "my_family"; byte[] familyNameBytes = Bytes.toBytes(familyName); //include hbase config in conf file Configuration conf = new Configuration(allConf); //create table createTable(tableName, new String[] { familyName }); String data[] = { "1,english:one,spanish:uno", "2,english:two,spanish:dos", "3,english:three,spanish:tres" }; // input/output settings Path inputPath = new Path(methodTestDir, "mr_input"); getFileSystem().mkdirs(inputPath);/*from w w w .jav a 2 s .co m*/ FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); for (String line : data) os.write(Bytes.toBytes(line + "\n")); os.close(); Path interPath = new Path(methodTestDir, "inter"); Path scratchPath = new Path(methodTestDir, "scratch"); //create job Job job = new Job(conf, testName); job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); job.setJarByClass(this.getClass()); job.setMapperClass(MapWrite.class); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.setInputPaths(job, inputPath); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, interPath); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Put.class); job.setNumReduceTasks(0); assertTrue(job.waitForCompletion(true)); job = new Job(new Configuration(allConf), testName + "_importer"); assertTrue(ImportSequenceFile.runJob(job, tableName, interPath, scratchPath)); //verify HTable table = new HTable(conf, tableName); Scan scan = new Scan(); scan.addFamily(familyNameBytes); ResultScanner scanner = table.getScanner(scan); int index = 0; for (Result result : scanner) { String vals[] = data[index].toString().split(","); for (int i = 1; i < vals.length; i++) { String pair[] = vals[i].split(":"); assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); } index++; } //test if load count is the same assertEquals(data.length, index); //test if scratch directory was erased assertFalse(FileSystem.get(job.getConfiguration()).exists(scratchPath)); }
From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java
License:Apache License
@Test public void bulkModeHCatOutputFormatTest() throws Exception { String testName = "bulkModeHCatOutputFormatTest"; Path methodTestDir = new Path(getTestDir(), testName); LOG.info("starting: " + testName); String databaseName = testName.toLowerCase(); String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); String tableName = newTableName(testName).toLowerCase(); String familyName = "my_family"; byte[] familyNameBytes = Bytes.toBytes(familyName); //include hbase config in conf file Configuration conf = new Configuration(allConf); conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + "'"; String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + "(key int, english string, spanish string) STORED BY " + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," + "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + ":spanish')"; assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); String data[] = { "1,english:ONE,spanish:UNO", "2,english:TWO,spanish:DOS", "3,english:THREE,spanish:TRES" }; // input/output settings Path inputPath = new Path(methodTestDir, "mr_input"); getFileSystem().mkdirs(inputPath);/*from ww w .j a v a 2 s . co m*/ //create multiple files so we can test with multiple mappers for (int i = 0; i < data.length; i++) { FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile" + i + ".txt")); os.write(Bytes.toBytes(data[i] + "\n")); os.close(); } //create job Job job = new Job(conf, testName); job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); job.setJarByClass(this.getClass()); job.setMapperClass(MapHCatWrite.class); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.setInputPaths(job, inputPath); job.setOutputFormatClass(HCatOutputFormat.class); OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, tableName, null); HCatOutputFormat.setOutput(job, outputJobInfo); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(HCatRecord.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(HCatRecord.class); job.setNumReduceTasks(0); assertTrue(job.waitForCompletion(true)); RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); try { TableSnapshot snapshot = rm.createSnapshot(databaseName + "." + tableName); for (String el : snapshot.getColumnFamilies()) { assertEquals(1, snapshot.getRevision(el)); } } finally { rm.close(); } //verify HTable table = new HTable(conf, databaseName + "." + tableName); Scan scan = new Scan(); scan.addFamily(familyNameBytes); ResultScanner scanner = table.getScanner(scan); int index = 0; for (Result result : scanner) { String vals[] = data[index].toString().split(","); for (int i = 1; i < vals.length; i++) { String pair[] = vals[i].split(":"); assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); assertEquals(1l, result.getColumn(familyNameBytes, Bytes.toBytes(pair[0])).get(0).getTimestamp()); } index++; } //test if load count is the same assertEquals(data.length, index); }
From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java
License:Apache License
@Test public void bulkModeHCatOutputFormatTestWithDefaultDB() throws Exception { String testName = "bulkModeHCatOutputFormatTestWithDefaultDB"; Path methodTestDir = new Path(getTestDir(), testName); String databaseName = "default"; String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); String tableName = newTableName(testName).toLowerCase(); String familyName = "my_family"; byte[] familyNameBytes = Bytes.toBytes(familyName); //include hbase config in conf file Configuration conf = new Configuration(allConf); conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + "'"; String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + "(key int, english string, spanish string) STORED BY " + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," + "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + ":spanish')"; assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); String data[] = { "1,english:ONE,spanish:UNO", "2,english:TWO,spanish:DOS", "3,english:THREE,spanish:TRES" }; // input/output settings Path inputPath = new Path(methodTestDir, "mr_input"); getFileSystem().mkdirs(inputPath);/* ww w. j av a 2s . co m*/ FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); for (String line : data) os.write(Bytes.toBytes(line + "\n")); os.close(); //create job Job job = new Job(conf, testName); job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); job.setJarByClass(this.getClass()); job.setMapperClass(MapHCatWrite.class); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.setInputPaths(job, inputPath); job.setOutputFormatClass(HCatOutputFormat.class); OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, tableName, null); HCatOutputFormat.setOutput(job, outputJobInfo); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(HCatRecord.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(HCatRecord.class); job.setNumReduceTasks(0); assertTrue(job.waitForCompletion(true)); //verify HTable table = new HTable(conf, tableName); Scan scan = new Scan(); scan.addFamily(familyNameBytes); ResultScanner scanner = table.getScanner(scan); int index = 0; for (Result result : scanner) { String vals[] = data[index].toString().split(","); for (int i = 1; i < vals.length; i++) { String pair[] = vals[i].split(":"); assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); } index++; } //test if load count is the same assertEquals(data.length, index); }
From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java
License:Apache License
private Job configureJob(String jobName, Configuration conf, Path workingDir, Class<? extends Mapper> mapperClass, OutputJobInfo outputJobInfo, Path inputPath) throws IOException { Job job = new Job(conf, jobName); job.setWorkingDirectory(workingDir); job.setJarByClass(this.getClass()); job.setMapperClass(mapperClass);// w w w . j ava 2s . com job.setInputFormatClass(TextInputFormat.class); TextInputFormat.setInputPaths(job, inputPath); job.setOutputFormatClass(HCatOutputFormat.class); HCatOutputFormat.setOutput(job, outputJobInfo); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(HCatRecord.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(HCatRecord.class); job.setNumReduceTasks(0); return job; }
From source file:org.apache.hcatalog.hbase.TestHBaseDirectOutputFormat.java
License:Apache License
private Job configureJob(String jobName, Configuration conf, Path workingDir, Class<? extends Mapper> mapperClass, OutputJobInfo outputJobInfo, Path inputPath) throws IOException { Job job = new Job(conf, jobName); job.setWorkingDirectory(workingDir); job.setJarByClass(this.getClass()); job.setMapperClass(mapperClass);//from w ww. j a v a2s . c om job.setInputFormatClass(TextInputFormat.class); TextInputFormat.setInputPaths(job, inputPath); job.setOutputFormatClass(HCatOutputFormat.class); HCatOutputFormat.setOutput(job, outputJobInfo); String txnString = job.getConfiguration().get(HBaseConstants.PROPERTY_WRITE_TXN_KEY); //Test passing in same OutputJobInfo multiple times and verify 1 transaction is created String jobString = job.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO); outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(jobString); Job job2 = new Job(conf); HCatOutputFormat.setOutput(job2, outputJobInfo); assertEquals(txnString, job2.getConfiguration().get(HBaseConstants.PROPERTY_WRITE_TXN_KEY)); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(HCatRecord.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(HCatRecord.class); job.setNumReduceTasks(0); return job; }
From source file:org.apache.hive.hcatalog.hbase.TestHiveHBaseTableOutputFormat.java
License:Apache License
private Job configureJob(String jobName, Configuration conf, Path workingDir, Class<? extends Mapper> mapperClass, OutputJobInfo outputJobInfo, Path inputPath) throws IOException { try {/*from www .j a va 2 s . c o m*/ //now setting the schema HiveConf hiveConf = HCatUtil.getHiveConf(conf); HiveMetaStoreClient client = HCatUtil.getHiveClient(hiveConf); Table table = client.getTable(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName()); StorageDescriptor tblSD = table.getSd(); if (tblSD == null) { throw new HCatException("Cannot construct partition info from an empty storage descriptor."); } HCatSchema tableSchema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(tblSD.getCols())); outputJobInfo.setOutputSchema(tableSchema); } catch (Exception e) { if (e instanceof HCatException) { throw (HCatException) e; } else { throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e); } } conf.set(HBaseSerDe.HBASE_TABLE_NAME, outputJobInfo.getDatabaseName() + "." + outputJobInfo.getTableName()); conf.set(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, outputJobInfo.getDatabaseName() + "." + outputJobInfo.getTableName()); conf.set(TableOutputFormat.OUTPUT_TABLE, outputJobInfo.getDatabaseName() + "." + outputJobInfo.getTableName()); conf.set(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.mapreduce.outputTableName", outputJobInfo.getDatabaseName() + "." + outputJobInfo.getTableName()); conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); Job job = new Job(conf, jobName); job.setWorkingDirectory(workingDir); job.setJarByClass(this.getClass()); job.setMapperClass(mapperClass); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.setInputPaths(job, inputPath); //job.setOutputFormatClass(HiveHBaseTableOutputFormat.class); job.setOutputFormatClass(HCatOutputFormat.class); HCatOutputFormat.setOutput(job, outputJobInfo); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(HCatRecord.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(HCatRecord.class); job.setNumReduceTasks(0); return job; }