List of usage examples for org.apache.hadoop.mapreduce Job addArchiveToClassPath
public void addArchiveToClassPath(Path archive) throws IOException
From source file:org.schedoscope.export.jdbc.JdbcExportJob.java
License:Apache License
private Job configure() throws Exception { Configuration conf = getConfiguration(); conf = configureHiveMetaStore(conf); conf = configureKerberos(conf);/*from www. ja v a2 s .co m*/ conf = configureAnonFields(conf); Job job = Job.getInstance(conf, "JDBCExport: " + inputDatabase + "." + inputTable); job.setJarByClass(JdbcExportJob.class); job.setMapperClass(JdbcExportMapper.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(numReducer); if (inputFilter == null || inputFilter.trim().equals("")) { HCatInputFormat.setInput(job, inputDatabase, inputTable); } else { HCatInputFormat.setInput(job, inputDatabase, inputTable, inputFilter); } Schema outputSchema = SchemaFactory.getSchema(dbConnectionString, job.getConfiguration()); HCatSchema hcatInputSchema = HCatInputFormat.getTableSchema(job.getConfiguration()); String[] columnNames = SchemaUtils.getColumnNamesFromHcatSchema(hcatInputSchema, outputSchema); String[] columnTypes = SchemaUtils.getColumnTypesFromHcatSchema(hcatInputSchema, outputSchema, ImmutableSet.copyOf(anonFields)); String outputTable = inputDatabase + "_" + inputTable; JdbcOutputFormat.setOutput(job.getConfiguration(), dbConnectionString, dbUser, dbPassword, outputTable, inputFilter, numReducer, commitSize, storageEngine, distributeBy, columnNames, columnTypes); job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(JdbcOutputFormat.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(JdbcOutputWritable.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(JdbcOutputWritable.class); Class<?> clazz = Class.forName(outputSchema.getDriverName()); String jarFile = ClassUtil.findContainingJar(clazz); String jarSelf = ClassUtil.findContainingJar(JdbcExportJob.class); FileSystem fs = FileSystem.get(job.getConfiguration()); String tmpDir = job.getConfiguration().get("hadoop.tmp.dir"); Path hdfsDir = new Path( tmpDir + "/" + new Path(jarFile).getName() + "." + RandomStringUtils.randomNumeric(20)); if (jarFile != null && jarSelf != null && tmpDir != null && !jarFile.equals(jarSelf)) { LOG.info("copy " + LOCAL_PATH_PREFIX + jarFile + " to " + tmpDir); fs.copyFromLocalFile(false, true, new Path(LOCAL_PATH_PREFIX + jarFile), hdfsDir); LOG.info("add " + hdfsDir + " to distributed cache"); job.addArchiveToClassPath(hdfsDir); } return job; }