Example usage for org.apache.hadoop.mapreduce Job addArchiveToClassPath

List of usage examples for org.apache.hadoop.mapreduce Job addArchiveToClassPath

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job addArchiveToClassPath.

Prototype

public void addArchiveToClassPath(Path archive) throws IOException 

Source Link

Document

Add an archive path to the current set of classpath entries.

Usage

From source file:org.schedoscope.export.jdbc.JdbcExportJob.java

License:Apache License

private Job configure() throws Exception {

    Configuration conf = getConfiguration();
    conf = configureHiveMetaStore(conf);
    conf = configureKerberos(conf);/*from  www.  ja v  a2  s  .co  m*/
    conf = configureAnonFields(conf);

    Job job = Job.getInstance(conf, "JDBCExport: " + inputDatabase + "." + inputTable);

    job.setJarByClass(JdbcExportJob.class);
    job.setMapperClass(JdbcExportMapper.class);
    job.setReducerClass(Reducer.class);
    job.setNumReduceTasks(numReducer);

    if (inputFilter == null || inputFilter.trim().equals("")) {
        HCatInputFormat.setInput(job, inputDatabase, inputTable);

    } else {
        HCatInputFormat.setInput(job, inputDatabase, inputTable, inputFilter);
    }

    Schema outputSchema = SchemaFactory.getSchema(dbConnectionString, job.getConfiguration());
    HCatSchema hcatInputSchema = HCatInputFormat.getTableSchema(job.getConfiguration());

    String[] columnNames = SchemaUtils.getColumnNamesFromHcatSchema(hcatInputSchema, outputSchema);
    String[] columnTypes = SchemaUtils.getColumnTypesFromHcatSchema(hcatInputSchema, outputSchema,
            ImmutableSet.copyOf(anonFields));

    String outputTable = inputDatabase + "_" + inputTable;

    JdbcOutputFormat.setOutput(job.getConfiguration(), dbConnectionString, dbUser, dbPassword, outputTable,
            inputFilter, numReducer, commitSize, storageEngine, distributeBy, columnNames, columnTypes);

    job.setInputFormatClass(HCatInputFormat.class);
    job.setOutputFormatClass(JdbcOutputFormat.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(JdbcOutputWritable.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(JdbcOutputWritable.class);

    Class<?> clazz = Class.forName(outputSchema.getDriverName());
    String jarFile = ClassUtil.findContainingJar(clazz);
    String jarSelf = ClassUtil.findContainingJar(JdbcExportJob.class);

    FileSystem fs = FileSystem.get(job.getConfiguration());
    String tmpDir = job.getConfiguration().get("hadoop.tmp.dir");
    Path hdfsDir = new Path(
            tmpDir + "/" + new Path(jarFile).getName() + "." + RandomStringUtils.randomNumeric(20));

    if (jarFile != null && jarSelf != null && tmpDir != null && !jarFile.equals(jarSelf)) {
        LOG.info("copy " + LOCAL_PATH_PREFIX + jarFile + " to " + tmpDir);
        fs.copyFromLocalFile(false, true, new Path(LOCAL_PATH_PREFIX + jarFile), hdfsDir);
        LOG.info("add " + hdfsDir + " to distributed cache");
        job.addArchiveToClassPath(hdfsDir);
    }

    return job;
}