List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf(boolean loadDefaults)
From source file:azkaban.jobtype.javautils.AbstractHadoopJob.java
License:Apache License
public AbstractHadoopJob(String name, Props props) { this.props = props; this.jobName = name; conf = new Configuration(); jobconf = new JobConf(conf); jobconf.setJobName(name);/*from w w w . j a v a 2s . c om*/ visualizer = props.getBoolean("mr.listener.visualizer", false) == true; if (visualizer == true) { jobStatsFileName = props.getString("azkaban.job.attachment.file"); } }
From source file:azkaban.jobtype.SecurePigWrapper.java
License:Apache License
public static void main(final String[] args) throws IOException, InterruptedException { final Logger logger = Logger.getRootLogger(); final Properties p = System.getProperties(); final Configuration conf = new Configuration(); SecurityUtils.getProxiedUser(p, logger, conf).doAs(new PrivilegedExceptionAction<Void>() { @Override/*from w w w . j a v a 2s. c o m*/ public Void run() throws Exception { prefetchToken(); org.apache.pig.Main.main(args); return null; } // For Pig jobs that need to do extra communication with the // JobTracker, it's necessary to pre-fetch a token and include it in // the credentials cache private void prefetchToken() throws InterruptedException, IOException { String shouldPrefetch = p.getProperty(OBTAIN_BINARY_TOKEN); if (shouldPrefetch != null && shouldPrefetch.equals("true")) { logger.info("Pre-fetching token"); Job job = new Job(conf, "totally phony, extremely fake, not real job"); JobConf jc = new JobConf(conf); JobClient jobClient = new JobClient(jc); logger.info("Pre-fetching: Got new JobClient: " + jc); Token<DelegationTokenIdentifier> mrdt = jobClient.getDelegationToken(new Text("hi")); job.getCredentials().addToken(new Text("howdy"), mrdt); File temp = File.createTempFile("mr-azkaban", ".token"); temp.deleteOnExit(); FileOutputStream fos = null; DataOutputStream dos = null; try { fos = new FileOutputStream(temp); dos = new DataOutputStream(fos); job.getCredentials().writeTokenStorageToStream(dos); } finally { if (dos != null) { dos.close(); } if (fos != null) { fos.close(); } } logger.info("Setting " + MAPREDUCE_JOB_CREDENTIALS_BINARY + " to " + temp.getAbsolutePath()); System.setProperty(MAPREDUCE_JOB_CREDENTIALS_BINARY, temp.getAbsolutePath()); } else { logger.info("Not pre-fetching token"); } } }); }
From source file:azkaban.security.commons.SecurityUtils.java
License:Apache License
public static synchronized void prefetchToken(final File tokenFile, final Props p, final Logger logger) throws InterruptedException, IOException { final Configuration conf = new Configuration(); logger.info("Getting proxy user for " + p.getString(TO_PROXY)); logger.info("Getting proxy user for " + p.toString()); getProxiedUser(p.toProperties(), logger, conf).doAs(new PrivilegedExceptionAction<Void>() { @Override/*w ww. j a v a 2 s.com*/ public Void run() throws Exception { getToken(p); return null; } private void getToken(Props p) throws InterruptedException, IOException { String shouldPrefetch = p.getString(OBTAIN_BINARY_TOKEN); if (shouldPrefetch != null && shouldPrefetch.equals("true")) { logger.info("Pre-fetching token"); logger.info("Pre-fetching fs token"); FileSystem fs = FileSystem.get(conf); Token<?> fsToken = fs.getDelegationToken(p.getString("user.to.proxy")); logger.info("Created token: " + fsToken.toString()); Job job = new Job(conf, "totally phony, extremely fake, not real job"); JobConf jc = new JobConf(conf); JobClient jobClient = new JobClient(jc); logger.info("Pre-fetching job token: Got new JobClient: " + jc); Token<DelegationTokenIdentifier> mrdt = jobClient.getDelegationToken(new Text("hi")); logger.info("Created token: " + mrdt.toString()); job.getCredentials().addToken(new Text("howdy"), mrdt); job.getCredentials().addToken(fsToken.getService(), fsToken); FileOutputStream fos = null; DataOutputStream dos = null; try { fos = new FileOutputStream(tokenFile); dos = new DataOutputStream(fos); job.getCredentials().writeTokenStorageToStream(dos); } finally { if (dos != null) { dos.close(); } if (fos != null) { fos.close(); } } logger.info("Loading hadoop tokens into " + tokenFile.getAbsolutePath()); p.put("HadoopTokenFileLoc", tokenFile.getAbsolutePath()); } else { logger.info("Not pre-fetching token"); } } }); }
From source file:azkaban.security.HadoopSecurityManager_H_1_0.java
License:Apache License
@Override public synchronized void prefetchToken(final File tokenFile, final String userToProxy, final Logger logger) throws HadoopSecurityManagerException { logger.info("Getting hadoop tokens for " + userToProxy); try {//ww w . ja v a 2s. co m getProxiedUser(userToProxy).doAs(new PrivilegedExceptionAction<Void>() { @Override public Void run() throws Exception { getToken(userToProxy); return null; } private void getToken(String userToProxy) throws InterruptedException, IOException, HadoopSecurityManagerException { FileSystem fs = FileSystem.get(conf); // check if we get the correct FS, and most importantly, the conf logger.info("Getting DFS token from " + fs.getCanonicalServiceName() + fs.getUri()); Token<?> fsToken = fs.getDelegationToken(userToProxy); if (fsToken == null) { logger.error("Failed to fetch DFS token for "); throw new HadoopSecurityManagerException("Failed to fetch DFS token for " + userToProxy); } logger.info("Created DFS token: " + fsToken.toString()); logger.info("Token kind: " + fsToken.getKind()); logger.info("Token id: " + fsToken.getIdentifier()); logger.info("Token service: " + fsToken.getService()); JobConf jc = new JobConf(conf); JobClient jobClient = new JobClient(jc); logger.info("Pre-fetching JT token: Got new JobClient: " + jc); Token<DelegationTokenIdentifier> mrdt = jobClient.getDelegationToken(new Text("mr token")); if (mrdt == null) { logger.error("Failed to fetch JT token for "); throw new HadoopSecurityManagerException("Failed to fetch JT token for " + userToProxy); } logger.info("Created JT token: " + mrdt.toString()); logger.info("Token kind: " + mrdt.getKind()); logger.info("Token id: " + mrdt.getIdentifier()); logger.info("Token service: " + mrdt.getService()); jc.getCredentials().addToken(mrdt.getService(), mrdt); jc.getCredentials().addToken(fsToken.getService(), fsToken); FileOutputStream fos = null; DataOutputStream dos = null; try { fos = new FileOutputStream(tokenFile); dos = new DataOutputStream(fos); jc.getCredentials().writeTokenStorageToStream(dos); } finally { if (dos != null) { dos.close(); } if (fos != null) { fos.close(); } } // stash them to cancel after use. logger.info("Tokens loaded in " + tokenFile.getAbsolutePath()); } }); } catch (Exception e) { e.printStackTrace(); throw new HadoopSecurityManagerException( "Failed to get hadoop tokens! " + e.getMessage() + e.getCause()); } }
From source file:azkaban.security.HadoopSecurityManager_H_1_0.java
License:Apache License
private void cancelMRJobTrackerToken(final Token<? extends TokenIdentifier> t, String userToProxy) throws HadoopSecurityManagerException { try {/* ww w . ja va 2s .c o m*/ getProxiedUser(userToProxy).doAs(new PrivilegedExceptionAction<Void>() { @SuppressWarnings("unchecked") @Override public Void run() throws Exception { cancelToken((Token<DelegationTokenIdentifier>) t); return null; } private void cancelToken(Token<DelegationTokenIdentifier> jt) throws IOException, InterruptedException { JobConf jc = new JobConf(conf); JobClient jobClient = new JobClient(jc); jobClient.cancelDelegationToken(jt); } }); } catch (Exception e) { e.printStackTrace(); throw new HadoopSecurityManagerException("Failed to cancel Token. " + e.getMessage() + e.getCause()); } }
From source file:azkaban.security.HadoopSecurityManager_H_2_0.java
License:Apache License
@Override public synchronized void prefetchToken(final File tokenFile, final String userToProxy, final Logger logger) throws HadoopSecurityManagerException { logger.info("Getting hadoop tokens for " + userToProxy); try {//from w w w.j av a2 s . c om getProxiedUser(userToProxy).doAs(new PrivilegedExceptionAction<Void>() { @Override public Void run() throws Exception { getToken(userToProxy); return null; } private void getToken(String userToProxy) throws InterruptedException, IOException, HadoopSecurityManagerException { FileSystem fs = FileSystem.get(conf); // check if we get the correct FS, and most importantly, the conf logger.info("Getting DFS token from " + fs.getCanonicalServiceName() + fs.getUri()); Token<?> fsToken = fs.getDelegationToken(userToProxy); if (fsToken == null) { logger.error("Failed to fetch DFS token for "); throw new HadoopSecurityManagerException("Failed to fetch DFS token for " + userToProxy); } logger.info("Created DFS token: " + fsToken.toString()); logger.info("Token kind: " + fsToken.getKind()); logger.info("Token id: " + fsToken.getIdentifier()); logger.info("Token service: " + fsToken.getService()); JobConf jc = new JobConf(conf); JobClient jobClient = new JobClient(jc); logger.info("Pre-fetching JT token: Got new JobClient: " + jc); Token<DelegationTokenIdentifier> mrdt = jobClient.getDelegationToken(new Text("mr token")); if (mrdt == null) { logger.error("Failed to fetch JT token for "); throw new HadoopSecurityManagerException("Failed to fetch JT token for " + userToProxy); } logger.info("Created JT token: " + mrdt.toString()); logger.info("Token kind: " + mrdt.getKind()); logger.info("Token id: " + mrdt.getIdentifier()); logger.info("Token service: " + mrdt.getService()); jc.getCredentials().addToken(mrdt.getService(), mrdt); jc.getCredentials().addToken(fsToken.getService(), fsToken); FileOutputStream fos = null; DataOutputStream dos = null; try { fos = new FileOutputStream(tokenFile); dos = new DataOutputStream(fos); jc.getCredentials().writeTokenStorageToStream(dos); } finally { if (dos != null) { try { dos.close(); } catch (Throwable t) { // best effort logger.error( "encountered exception while closing DataOutputStream of the tokenFile", t); } } if (fos != null) { fos.close(); } } // stash them to cancel after use. logger.info("Tokens loaded in " + tokenFile.getAbsolutePath()); } }); } catch (Exception e) { throw new HadoopSecurityManagerException( "Failed to get hadoop tokens! " + e.getMessage() + e.getCause()); } }
From source file:azkaban.security.HadoopSecurityManager_H_2_0.java
License:Apache License
private void cancelMRJobTrackerToken(final Token<? extends TokenIdentifier> t, String userToProxy) throws HadoopSecurityManagerException { try {// w w w . ja v a 2s . c o m getProxiedUser(userToProxy).doAs(new PrivilegedExceptionAction<Void>() { @SuppressWarnings("unchecked") @Override public Void run() throws Exception { cancelToken((Token<DelegationTokenIdentifier>) t); return null; } private void cancelToken(Token<DelegationTokenIdentifier> jt) throws IOException, InterruptedException { JobConf jc = new JobConf(conf); JobClient jobClient = new JobClient(jc); jobClient.cancelDelegationToken(jt); } }); } catch (Exception e) { throw new HadoopSecurityManagerException("Failed to cancel token. " + e.getMessage() + e.getCause(), e); } }
From source file:babel.prep.corpus.CorpusGenerator.java
License:Apache License
/** * Configures a map-only dataset generation job. *///from w ww . j av a 2 s . c om protected JobConf createJobConf(String crawlDir, String pagesSubDir, boolean xmlOut) throws IOException { JobConf job = new JobConf(getConf()); job.setJobName("create " + (xmlOut ? "xml formatted" : "") + " dataset from " + pagesSubDir); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(CorpusGenMapper.class); job.setOutputFormat(xmlOut ? MultipleXMLLangFileOutputFormat.class : MultipleLangFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Page.class); FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDir)); Path outDir = new Path(new Path(crawlDir, CORPUS_SUBDIR), "corpus." + (xmlOut ? PARAM_XML + "." : "") + getCurTimeStamp()); m_fs.delete(outDir, true); FileOutputFormat.setOutputPath(job, outDir); setUniqueTempDir(job); return job; }
From source file:babel.prep.datedcorpus.DatedCorpusGenerator.java
License:Apache License
/** * Configures a map-only dataset generation job. *//*from w ww. j ava 2 s .c o m*/ protected JobConf createJobConf(String crawlDir, String pagesSubDir) throws IOException { JobConf job = new JobConf(getConf()); job.setJobName("create dated dataset from " + pagesSubDir); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(DatedCorpusGenMapper.class); job.setReducerClass(DatedCorpusGenReducer.class); job.setMapOutputValueClass(PageVersion.class); job.setOutputFormat(DatedLangFilesOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDir)); Path outDir = new Path(new Path(crawlDir, CORPUS_SUBDIR), "datedcorpus." + getCurTimeStamp()); m_fs.delete(outDir, true); FileOutputFormat.setOutputPath(job, outDir); setUniqueTempDir(job); return job; }
From source file:babel.prep.langid.LangIdentifier.java
License:Apache License
/** * Configures a map-only language id job. *///from ww w. j a va 2 s .c om protected JobConf createJobConf(String crawlDir, String pagesSubDir, String referrer) throws IOException { JobConf job = new JobConf(getConf()); job.setJobName("identify languages for pages in " + pagesSubDir); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(LangIdMapper.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Page.class); FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDir)); Path outDir = new Path(new Path(crawlDir, PAGES_SUBDIR), "pages.langid." + getCurTimeStamp()); m_fs.delete(outDir, true); FileOutputFormat.setOutputPath(job, outDir); setUniqueTempDir(job); job.set(JOB_PROP_JOB_REFERRER, referrer); return job; }