Example usage for org.apache.hadoop.mapreduce Job setWorkingDirectory

List of usage examples for org.apache.hadoop.mapreduce Job setWorkingDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setWorkingDirectory.

Prototype

public void setWorkingDirectory(Path dir) throws IOException 

Source Link

Document

Set the current working directory for the default file system.

Usage

From source file:org.apache.hcatalog.hbase.ImportSequenceFile.java

License:Apache License

/**
 * Method to run the Importer MapReduce Job. Normally will be called by another MR job
 * during OutputCommitter.commitJob()./*from w  w  w. j  a v a2 s . c o  m*/
 * @param parentContext JobContext of the parent job
 * @param tableName name of table to bulk load data into
 * @param InputDir path of SequenceFile formatted data to read
 * @param scratchDir temporary path for the Importer MR job to build the HFiles which will be imported
 * @return
 */
static boolean runJob(JobContext parentContext, String tableName, Path InputDir, Path scratchDir) {
    Configuration parentConf = parentContext.getConfiguration();
    Configuration conf = new Configuration();
    for (Map.Entry<String, String> el : parentConf) {
        if (el.getKey().startsWith("hbase."))
            conf.set(el.getKey(), el.getValue());
        if (el.getKey().startsWith("mapred.cache.archives"))
            conf.set(el.getKey(), el.getValue());
    }

    //Inherit jar dependencies added to distributed cache loaded by parent job
    conf.set("mapred.job.classpath.archives", parentConf.get("mapred.job.classpath.archives", ""));
    conf.set("mapreduce.job.cache.archives.visibilities",
            parentConf.get("mapreduce.job.cache.archives.visibilities", ""));

    //Temporary fix until hbase security is ready
    //We need the written HFile to be world readable so
    //hbase regionserver user has the privileges to perform a hdfs move
    if (parentConf.getBoolean("hadoop.security.authorization", false)) {
        FsPermission.setUMask(conf, FsPermission.valueOf("----------"));
    }

    conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName);
    conf.setBoolean(JobContext.JOB_CANCEL_DELEGATION_TOKEN, false);

    boolean localMode = "local".equals(conf.get("mapred.job.tracker"));

    boolean success = false;
    try {
        FileSystem fs = FileSystem.get(parentConf);
        Path workDir = new Path(new Job(parentConf).getWorkingDirectory(), IMPORTER_WORK_DIR);
        if (!fs.mkdirs(workDir))
            throw new IOException("Importer work directory already exists: " + workDir);
        Job job = createSubmittableJob(conf, tableName, InputDir, scratchDir, localMode);
        job.setWorkingDirectory(workDir);
        job.getCredentials().addAll(parentContext.getCredentials());
        success = job.waitForCompletion(true);
        fs.delete(workDir, true);
        //We only cleanup on success because failure might've been caused by existence of target directory
        if (localMode && success) {
            new ImporterOutputFormat().getOutputCommitter(
                    org.apache.hadoop.mapred.HCatMapRedUtil.createTaskAttemptContext(conf, new TaskAttemptID()))
                    .commitJob(job);
        }
    } catch (InterruptedException e) {
        LOG.error("ImportSequenceFile Failed", e);
    } catch (ClassNotFoundException e) {
        LOG.error("ImportSequenceFile Failed", e);
    } catch (IOException e) {
        LOG.error("ImportSequenceFile Failed", e);
    }
    return success;
}

From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java

License:Apache License

@Test
public void importSequenceFileTest() throws IOException, ClassNotFoundException, InterruptedException {
    String testName = "importSequenceFileTest";
    Path methodTestDir = new Path(getTestDir(), testName);
    LOG.info("starting: " + testName);

    String tableName = newTableName(testName).toLowerCase();
    String familyName = "my_family";
    byte[] familyNameBytes = Bytes.toBytes(familyName);

    //include hbase config in conf file
    Configuration conf = new Configuration(allConf);

    //create table
    createTable(tableName, new String[] { familyName });

    String data[] = { "1,english:one,spanish:uno", "2,english:two,spanish:dos",
            "3,english:three,spanish:tres" };

    // input/output settings
    Path inputPath = new Path(methodTestDir, "mr_input");
    getFileSystem().mkdirs(inputPath);/*from w w  w .jav a 2 s  .co  m*/
    FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt"));
    for (String line : data)
        os.write(Bytes.toBytes(line + "\n"));
    os.close();
    Path interPath = new Path(methodTestDir, "inter");
    Path scratchPath = new Path(methodTestDir, "scratch");

    //create job
    Job job = new Job(conf, testName);
    job.setWorkingDirectory(new Path(methodTestDir, "mr_work"));
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapWrite.class);

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.setInputPaths(job, inputPath);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, interPath);

    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(Put.class);

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(Put.class);

    job.setNumReduceTasks(0);
    assertTrue(job.waitForCompletion(true));

    job = new Job(new Configuration(allConf), testName + "_importer");
    assertTrue(ImportSequenceFile.runJob(job, tableName, interPath, scratchPath));

    //verify
    HTable table = new HTable(conf, tableName);
    Scan scan = new Scan();
    scan.addFamily(familyNameBytes);
    ResultScanner scanner = table.getScanner(scan);
    int index = 0;
    for (Result result : scanner) {
        String vals[] = data[index].toString().split(",");
        for (int i = 1; i < vals.length; i++) {
            String pair[] = vals[i].split(":");
            assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0])));
            assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0]))));
        }
        index++;
    }
    //test if load count is the same
    assertEquals(data.length, index);
    //test if scratch directory was erased
    assertFalse(FileSystem.get(job.getConfiguration()).exists(scratchPath));
}

From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java

License:Apache License

@Test
public void bulkModeHCatOutputFormatTest() throws Exception {
    String testName = "bulkModeHCatOutputFormatTest";
    Path methodTestDir = new Path(getTestDir(), testName);
    LOG.info("starting: " + testName);

    String databaseName = testName.toLowerCase();
    String dbDir = new Path(methodTestDir, "DB_" + testName).toString();
    String tableName = newTableName(testName).toLowerCase();
    String familyName = "my_family";
    byte[] familyNameBytes = Bytes.toBytes(familyName);

    //include hbase config in conf file
    Configuration conf = new Configuration(allConf);
    conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties()));

    String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + "'";
    String tableQuery = "CREATE TABLE " + databaseName + "." + tableName
            + "(key int, english string, spanish string) STORED BY "
            + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + "TBLPROPERTIES ('"
            + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," + "'hbase.columns.mapping'=':key,"
            + familyName + ":english," + familyName + ":spanish')";

    assertEquals(0, hcatDriver.run(dbquery).getResponseCode());
    assertEquals(0, hcatDriver.run(tableQuery).getResponseCode());

    String data[] = { "1,english:ONE,spanish:UNO", "2,english:TWO,spanish:DOS",
            "3,english:THREE,spanish:TRES" };

    // input/output settings
    Path inputPath = new Path(methodTestDir, "mr_input");
    getFileSystem().mkdirs(inputPath);/*from   ww  w  .j  a v  a 2 s . co  m*/
    //create multiple files so we can test with multiple mappers
    for (int i = 0; i < data.length; i++) {
        FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile" + i + ".txt"));
        os.write(Bytes.toBytes(data[i] + "\n"));
        os.close();
    }

    //create job
    Job job = new Job(conf, testName);
    job.setWorkingDirectory(new Path(methodTestDir, "mr_work"));
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapHCatWrite.class);

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.setInputPaths(job, inputPath);

    job.setOutputFormatClass(HCatOutputFormat.class);
    OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, tableName, null);
    HCatOutputFormat.setOutput(job, outputJobInfo);

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);

    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);

    job.setNumReduceTasks(0);

    assertTrue(job.waitForCompletion(true));
    RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf);
    try {
        TableSnapshot snapshot = rm.createSnapshot(databaseName + "." + tableName);
        for (String el : snapshot.getColumnFamilies()) {
            assertEquals(1, snapshot.getRevision(el));
        }
    } finally {
        rm.close();
    }

    //verify
    HTable table = new HTable(conf, databaseName + "." + tableName);
    Scan scan = new Scan();
    scan.addFamily(familyNameBytes);
    ResultScanner scanner = table.getScanner(scan);
    int index = 0;
    for (Result result : scanner) {
        String vals[] = data[index].toString().split(",");
        for (int i = 1; i < vals.length; i++) {
            String pair[] = vals[i].split(":");
            assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0])));
            assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0]))));
            assertEquals(1l, result.getColumn(familyNameBytes, Bytes.toBytes(pair[0])).get(0).getTimestamp());
        }
        index++;
    }
    //test if load count is the same
    assertEquals(data.length, index);
}

From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java

License:Apache License

@Test
public void bulkModeHCatOutputFormatTestWithDefaultDB() throws Exception {
    String testName = "bulkModeHCatOutputFormatTestWithDefaultDB";
    Path methodTestDir = new Path(getTestDir(), testName);

    String databaseName = "default";
    String dbDir = new Path(methodTestDir, "DB_" + testName).toString();
    String tableName = newTableName(testName).toLowerCase();
    String familyName = "my_family";
    byte[] familyNameBytes = Bytes.toBytes(familyName);

    //include hbase config in conf file
    Configuration conf = new Configuration(allConf);
    conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties()));

    String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + "'";
    String tableQuery = "CREATE TABLE " + databaseName + "." + tableName
            + "(key int, english string, spanish string) STORED BY "
            + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + "TBLPROPERTIES ('"
            + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," + "'hbase.columns.mapping'=':key,"
            + familyName + ":english," + familyName + ":spanish')";

    assertEquals(0, hcatDriver.run(dbquery).getResponseCode());
    assertEquals(0, hcatDriver.run(tableQuery).getResponseCode());

    String data[] = { "1,english:ONE,spanish:UNO", "2,english:TWO,spanish:DOS",
            "3,english:THREE,spanish:TRES" };

    // input/output settings
    Path inputPath = new Path(methodTestDir, "mr_input");
    getFileSystem().mkdirs(inputPath);/*  ww  w.  j av a  2s  .  co  m*/
    FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt"));
    for (String line : data)
        os.write(Bytes.toBytes(line + "\n"));
    os.close();

    //create job
    Job job = new Job(conf, testName);
    job.setWorkingDirectory(new Path(methodTestDir, "mr_work"));
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapHCatWrite.class);

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.setInputPaths(job, inputPath);

    job.setOutputFormatClass(HCatOutputFormat.class);
    OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, tableName, null);
    HCatOutputFormat.setOutput(job, outputJobInfo);

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);

    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);

    job.setNumReduceTasks(0);

    assertTrue(job.waitForCompletion(true));

    //verify
    HTable table = new HTable(conf, tableName);
    Scan scan = new Scan();
    scan.addFamily(familyNameBytes);
    ResultScanner scanner = table.getScanner(scan);
    int index = 0;
    for (Result result : scanner) {
        String vals[] = data[index].toString().split(",");
        for (int i = 1; i < vals.length; i++) {
            String pair[] = vals[i].split(":");
            assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0])));
            assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0]))));
        }
        index++;
    }
    //test if load count is the same
    assertEquals(data.length, index);
}

From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java

License:Apache License

private Job configureJob(String jobName, Configuration conf, Path workingDir,
        Class<? extends Mapper> mapperClass, OutputJobInfo outputJobInfo, Path inputPath) throws IOException {
    Job job = new Job(conf, jobName);
    job.setWorkingDirectory(workingDir);
    job.setJarByClass(this.getClass());
    job.setMapperClass(mapperClass);//  w  w w . j  ava  2s . com

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.setInputPaths(job, inputPath);
    job.setOutputFormatClass(HCatOutputFormat.class);
    HCatOutputFormat.setOutput(job, outputJobInfo);

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);

    job.setNumReduceTasks(0);
    return job;
}

From source file:org.apache.hcatalog.hbase.TestHBaseDirectOutputFormat.java

License:Apache License

private Job configureJob(String jobName, Configuration conf, Path workingDir,
        Class<? extends Mapper> mapperClass, OutputJobInfo outputJobInfo, Path inputPath) throws IOException {
    Job job = new Job(conf, jobName);
    job.setWorkingDirectory(workingDir);
    job.setJarByClass(this.getClass());
    job.setMapperClass(mapperClass);//from   w ww. j a  v  a2s .  c om

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.setInputPaths(job, inputPath);
    job.setOutputFormatClass(HCatOutputFormat.class);
    HCatOutputFormat.setOutput(job, outputJobInfo);
    String txnString = job.getConfiguration().get(HBaseConstants.PROPERTY_WRITE_TXN_KEY);
    //Test passing in same OutputJobInfo multiple times and verify 1 transaction is created
    String jobString = job.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO);
    outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(jobString);
    Job job2 = new Job(conf);
    HCatOutputFormat.setOutput(job2, outputJobInfo);
    assertEquals(txnString, job2.getConfiguration().get(HBaseConstants.PROPERTY_WRITE_TXN_KEY));
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);

    job.setNumReduceTasks(0);
    return job;
}

From source file:org.apache.hive.hcatalog.hbase.TestHiveHBaseTableOutputFormat.java

License:Apache License

private Job configureJob(String jobName, Configuration conf, Path workingDir,
        Class<? extends Mapper> mapperClass, OutputJobInfo outputJobInfo, Path inputPath) throws IOException {

    try {/*from  www  .j  a va  2 s .  c o m*/
        //now setting the schema
        HiveConf hiveConf = HCatUtil.getHiveConf(conf);
        HiveMetaStoreClient client = HCatUtil.getHiveClient(hiveConf);
        Table table = client.getTable(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName());
        StorageDescriptor tblSD = table.getSd();
        if (tblSD == null) {
            throw new HCatException("Cannot construct partition info from an empty storage descriptor.");
        }
        HCatSchema tableSchema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(tblSD.getCols()));
        outputJobInfo.setOutputSchema(tableSchema);
    } catch (Exception e) {
        if (e instanceof HCatException) {
            throw (HCatException) e;
        } else {
            throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e);
        }
    }
    conf.set(HBaseSerDe.HBASE_TABLE_NAME, outputJobInfo.getDatabaseName() + "." + outputJobInfo.getTableName());
    conf.set(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME,
            outputJobInfo.getDatabaseName() + "." + outputJobInfo.getTableName());
    conf.set(TableOutputFormat.OUTPUT_TABLE,
            outputJobInfo.getDatabaseName() + "." + outputJobInfo.getTableName());
    conf.set(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.mapreduce.outputTableName",
            outputJobInfo.getDatabaseName() + "." + outputJobInfo.getTableName());
    conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo));

    Job job = new Job(conf, jobName);
    job.setWorkingDirectory(workingDir);
    job.setJarByClass(this.getClass());
    job.setMapperClass(mapperClass);

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.setInputPaths(job, inputPath);
    //job.setOutputFormatClass(HiveHBaseTableOutputFormat.class);
    job.setOutputFormatClass(HCatOutputFormat.class);
    HCatOutputFormat.setOutput(job, outputJobInfo);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);

    job.setNumReduceTasks(0);
    return job;
}