Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults) 

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:azkaban.jobtype.javautils.AbstractHadoopJob.java

License:Apache License

public AbstractHadoopJob(String name, Props props) {
    this.props = props;
    this.jobName = name;
    conf = new Configuration();
    jobconf = new JobConf(conf);
    jobconf.setJobName(name);/*from  w w  w  .  j  a v  a  2s . c  om*/

    visualizer = props.getBoolean("mr.listener.visualizer", false) == true;
    if (visualizer == true) {
        jobStatsFileName = props.getString("azkaban.job.attachment.file");
    }
}

From source file:azkaban.jobtype.SecurePigWrapper.java

License:Apache License

public static void main(final String[] args) throws IOException, InterruptedException {
    final Logger logger = Logger.getRootLogger();
    final Properties p = System.getProperties();
    final Configuration conf = new Configuration();

    SecurityUtils.getProxiedUser(p, logger, conf).doAs(new PrivilegedExceptionAction<Void>() {
        @Override/*from w  w  w .  j  a v a 2s. c  o m*/
        public Void run() throws Exception {
            prefetchToken();
            org.apache.pig.Main.main(args);
            return null;
        }

        // For Pig jobs that need to do extra communication with the
        // JobTracker, it's necessary to pre-fetch a token and include it in
        // the credentials cache
        private void prefetchToken() throws InterruptedException, IOException {
            String shouldPrefetch = p.getProperty(OBTAIN_BINARY_TOKEN);
            if (shouldPrefetch != null && shouldPrefetch.equals("true")) {
                logger.info("Pre-fetching token");
                Job job = new Job(conf, "totally phony, extremely fake, not real job");

                JobConf jc = new JobConf(conf);
                JobClient jobClient = new JobClient(jc);
                logger.info("Pre-fetching: Got new JobClient: " + jc);
                Token<DelegationTokenIdentifier> mrdt = jobClient.getDelegationToken(new Text("hi"));
                job.getCredentials().addToken(new Text("howdy"), mrdt);

                File temp = File.createTempFile("mr-azkaban", ".token");
                temp.deleteOnExit();

                FileOutputStream fos = null;
                DataOutputStream dos = null;
                try {
                    fos = new FileOutputStream(temp);
                    dos = new DataOutputStream(fos);
                    job.getCredentials().writeTokenStorageToStream(dos);
                } finally {
                    if (dos != null) {
                        dos.close();
                    }
                    if (fos != null) {
                        fos.close();
                    }
                }
                logger.info("Setting " + MAPREDUCE_JOB_CREDENTIALS_BINARY + " to " + temp.getAbsolutePath());
                System.setProperty(MAPREDUCE_JOB_CREDENTIALS_BINARY, temp.getAbsolutePath());
            } else {
                logger.info("Not pre-fetching token");
            }
        }
    });
}

From source file:azkaban.security.commons.SecurityUtils.java

License:Apache License

public static synchronized void prefetchToken(final File tokenFile, final Props p, final Logger logger)
        throws InterruptedException, IOException {

    final Configuration conf = new Configuration();
    logger.info("Getting proxy user for " + p.getString(TO_PROXY));
    logger.info("Getting proxy user for " + p.toString());

    getProxiedUser(p.toProperties(), logger, conf).doAs(new PrivilegedExceptionAction<Void>() {
        @Override/*w ww.  j a  v a  2 s.com*/
        public Void run() throws Exception {
            getToken(p);
            return null;
        }

        private void getToken(Props p) throws InterruptedException, IOException {
            String shouldPrefetch = p.getString(OBTAIN_BINARY_TOKEN);
            if (shouldPrefetch != null && shouldPrefetch.equals("true")) {
                logger.info("Pre-fetching token");

                logger.info("Pre-fetching fs token");
                FileSystem fs = FileSystem.get(conf);
                Token<?> fsToken = fs.getDelegationToken(p.getString("user.to.proxy"));
                logger.info("Created token: " + fsToken.toString());

                Job job = new Job(conf, "totally phony, extremely fake, not real job");
                JobConf jc = new JobConf(conf);
                JobClient jobClient = new JobClient(jc);
                logger.info("Pre-fetching job token: Got new JobClient: " + jc);
                Token<DelegationTokenIdentifier> mrdt = jobClient.getDelegationToken(new Text("hi"));
                logger.info("Created token: " + mrdt.toString());

                job.getCredentials().addToken(new Text("howdy"), mrdt);
                job.getCredentials().addToken(fsToken.getService(), fsToken);

                FileOutputStream fos = null;
                DataOutputStream dos = null;
                try {
                    fos = new FileOutputStream(tokenFile);
                    dos = new DataOutputStream(fos);
                    job.getCredentials().writeTokenStorageToStream(dos);
                } finally {
                    if (dos != null) {
                        dos.close();
                    }
                    if (fos != null) {
                        fos.close();
                    }
                }
                logger.info("Loading hadoop tokens into " + tokenFile.getAbsolutePath());
                p.put("HadoopTokenFileLoc", tokenFile.getAbsolutePath());
            } else {
                logger.info("Not pre-fetching token");
            }
        }
    });
}

From source file:azkaban.security.HadoopSecurityManager_H_1_0.java

License:Apache License

@Override
public synchronized void prefetchToken(final File tokenFile, final String userToProxy, final Logger logger)
        throws HadoopSecurityManagerException {

    logger.info("Getting hadoop tokens for " + userToProxy);

    try {//ww  w  . ja v  a  2s. co  m
        getProxiedUser(userToProxy).doAs(new PrivilegedExceptionAction<Void>() {
            @Override
            public Void run() throws Exception {
                getToken(userToProxy);
                return null;
            }

            private void getToken(String userToProxy)
                    throws InterruptedException, IOException, HadoopSecurityManagerException {

                FileSystem fs = FileSystem.get(conf);
                // check if we get the correct FS, and most importantly, the conf
                logger.info("Getting DFS token from " + fs.getCanonicalServiceName() + fs.getUri());
                Token<?> fsToken = fs.getDelegationToken(userToProxy);
                if (fsToken == null) {
                    logger.error("Failed to fetch DFS token for ");
                    throw new HadoopSecurityManagerException("Failed to fetch DFS token for " + userToProxy);
                }
                logger.info("Created DFS token: " + fsToken.toString());
                logger.info("Token kind: " + fsToken.getKind());
                logger.info("Token id: " + fsToken.getIdentifier());
                logger.info("Token service: " + fsToken.getService());

                JobConf jc = new JobConf(conf);
                JobClient jobClient = new JobClient(jc);
                logger.info("Pre-fetching JT token: Got new JobClient: " + jc);

                Token<DelegationTokenIdentifier> mrdt = jobClient.getDelegationToken(new Text("mr token"));
                if (mrdt == null) {
                    logger.error("Failed to fetch JT token for ");
                    throw new HadoopSecurityManagerException("Failed to fetch JT token for " + userToProxy);
                }
                logger.info("Created JT token: " + mrdt.toString());
                logger.info("Token kind: " + mrdt.getKind());
                logger.info("Token id: " + mrdt.getIdentifier());
                logger.info("Token service: " + mrdt.getService());

                jc.getCredentials().addToken(mrdt.getService(), mrdt);
                jc.getCredentials().addToken(fsToken.getService(), fsToken);

                FileOutputStream fos = null;
                DataOutputStream dos = null;
                try {
                    fos = new FileOutputStream(tokenFile);
                    dos = new DataOutputStream(fos);
                    jc.getCredentials().writeTokenStorageToStream(dos);
                } finally {
                    if (dos != null) {
                        dos.close();
                    }
                    if (fos != null) {
                        fos.close();
                    }
                }
                // stash them to cancel after use.
                logger.info("Tokens loaded in " + tokenFile.getAbsolutePath());
            }
        });
    } catch (Exception e) {
        e.printStackTrace();
        throw new HadoopSecurityManagerException(
                "Failed to get hadoop tokens! " + e.getMessage() + e.getCause());

    }
}

From source file:azkaban.security.HadoopSecurityManager_H_1_0.java

License:Apache License

private void cancelMRJobTrackerToken(final Token<? extends TokenIdentifier> t, String userToProxy)
        throws HadoopSecurityManagerException {
    try {/* ww w  .  ja  va 2s .c o m*/
        getProxiedUser(userToProxy).doAs(new PrivilegedExceptionAction<Void>() {
            @SuppressWarnings("unchecked")
            @Override
            public Void run() throws Exception {
                cancelToken((Token<DelegationTokenIdentifier>) t);
                return null;
            }

            private void cancelToken(Token<DelegationTokenIdentifier> jt)
                    throws IOException, InterruptedException {
                JobConf jc = new JobConf(conf);
                JobClient jobClient = new JobClient(jc);
                jobClient.cancelDelegationToken(jt);
            }
        });
    } catch (Exception e) {
        e.printStackTrace();
        throw new HadoopSecurityManagerException("Failed to cancel Token. " + e.getMessage() + e.getCause());
    }
}

From source file:azkaban.security.HadoopSecurityManager_H_2_0.java

License:Apache License

@Override
public synchronized void prefetchToken(final File tokenFile, final String userToProxy, final Logger logger)
        throws HadoopSecurityManagerException {

    logger.info("Getting hadoop tokens for " + userToProxy);

    try {//from   w  w  w.j  av a2 s  .  c  om
        getProxiedUser(userToProxy).doAs(new PrivilegedExceptionAction<Void>() {
            @Override
            public Void run() throws Exception {
                getToken(userToProxy);
                return null;
            }

            private void getToken(String userToProxy)
                    throws InterruptedException, IOException, HadoopSecurityManagerException {

                FileSystem fs = FileSystem.get(conf);
                // check if we get the correct FS, and most importantly, the conf
                logger.info("Getting DFS token from " + fs.getCanonicalServiceName() + fs.getUri());
                Token<?> fsToken = fs.getDelegationToken(userToProxy);
                if (fsToken == null) {
                    logger.error("Failed to fetch DFS token for ");
                    throw new HadoopSecurityManagerException("Failed to fetch DFS token for " + userToProxy);
                }
                logger.info("Created DFS token: " + fsToken.toString());
                logger.info("Token kind: " + fsToken.getKind());
                logger.info("Token id: " + fsToken.getIdentifier());
                logger.info("Token service: " + fsToken.getService());

                JobConf jc = new JobConf(conf);
                JobClient jobClient = new JobClient(jc);
                logger.info("Pre-fetching JT token: Got new JobClient: " + jc);

                Token<DelegationTokenIdentifier> mrdt = jobClient.getDelegationToken(new Text("mr token"));
                if (mrdt == null) {
                    logger.error("Failed to fetch JT token for ");
                    throw new HadoopSecurityManagerException("Failed to fetch JT token for " + userToProxy);
                }
                logger.info("Created JT token: " + mrdt.toString());
                logger.info("Token kind: " + mrdt.getKind());
                logger.info("Token id: " + mrdt.getIdentifier());
                logger.info("Token service: " + mrdt.getService());

                jc.getCredentials().addToken(mrdt.getService(), mrdt);
                jc.getCredentials().addToken(fsToken.getService(), fsToken);

                FileOutputStream fos = null;
                DataOutputStream dos = null;
                try {
                    fos = new FileOutputStream(tokenFile);
                    dos = new DataOutputStream(fos);
                    jc.getCredentials().writeTokenStorageToStream(dos);
                } finally {
                    if (dos != null) {
                        try {
                            dos.close();
                        } catch (Throwable t) {
                            // best effort
                            logger.error(
                                    "encountered exception while closing DataOutputStream of the tokenFile", t);
                        }
                    }
                    if (fos != null) {
                        fos.close();
                    }
                }
                // stash them to cancel after use.
                logger.info("Tokens loaded in " + tokenFile.getAbsolutePath());
            }
        });
    } catch (Exception e) {
        throw new HadoopSecurityManagerException(
                "Failed to get hadoop tokens! " + e.getMessage() + e.getCause());

    }
}

From source file:azkaban.security.HadoopSecurityManager_H_2_0.java

License:Apache License

private void cancelMRJobTrackerToken(final Token<? extends TokenIdentifier> t, String userToProxy)
        throws HadoopSecurityManagerException {
    try {// w  w w .  ja v a  2s .  c o m
        getProxiedUser(userToProxy).doAs(new PrivilegedExceptionAction<Void>() {
            @SuppressWarnings("unchecked")
            @Override
            public Void run() throws Exception {
                cancelToken((Token<DelegationTokenIdentifier>) t);
                return null;
            }

            private void cancelToken(Token<DelegationTokenIdentifier> jt)
                    throws IOException, InterruptedException {
                JobConf jc = new JobConf(conf);
                JobClient jobClient = new JobClient(jc);
                jobClient.cancelDelegationToken(jt);
            }
        });
    } catch (Exception e) {
        throw new HadoopSecurityManagerException("Failed to cancel token. " + e.getMessage() + e.getCause(), e);
    }
}

From source file:babel.prep.corpus.CorpusGenerator.java

License:Apache License

/**
 * Configures a map-only dataset generation job.
 *///from  w  ww . j  av  a  2  s . c  om
protected JobConf createJobConf(String crawlDir, String pagesSubDir, boolean xmlOut) throws IOException {
    JobConf job = new JobConf(getConf());
    job.setJobName("create " + (xmlOut ? "xml formatted" : "") + " dataset from " + pagesSubDir);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(CorpusGenMapper.class);
    job.setOutputFormat(xmlOut ? MultipleXMLLangFileOutputFormat.class : MultipleLangFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Page.class);

    FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDir));

    Path outDir = new Path(new Path(crawlDir, CORPUS_SUBDIR),
            "corpus." + (xmlOut ? PARAM_XML + "." : "") + getCurTimeStamp());
    m_fs.delete(outDir, true);

    FileOutputFormat.setOutputPath(job, outDir);

    setUniqueTempDir(job);

    return job;
}

From source file:babel.prep.datedcorpus.DatedCorpusGenerator.java

License:Apache License

/**
 * Configures a map-only dataset generation job.
 *//*from  w  ww. j  ava  2  s .c  o  m*/
protected JobConf createJobConf(String crawlDir, String pagesSubDir) throws IOException {
    JobConf job = new JobConf(getConf());
    job.setJobName("create dated dataset from " + pagesSubDir);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(DatedCorpusGenMapper.class);
    job.setReducerClass(DatedCorpusGenReducer.class);

    job.setMapOutputValueClass(PageVersion.class);
    job.setOutputFormat(DatedLangFilesOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDir));

    Path outDir = new Path(new Path(crawlDir, CORPUS_SUBDIR), "datedcorpus." + getCurTimeStamp());
    m_fs.delete(outDir, true);

    FileOutputFormat.setOutputPath(job, outDir);

    setUniqueTempDir(job);

    return job;
}

From source file:babel.prep.langid.LangIdentifier.java

License:Apache License

/**
 * Configures a map-only language id job.
 *///from  ww  w. j  a va  2 s .c  om
protected JobConf createJobConf(String crawlDir, String pagesSubDir, String referrer) throws IOException {
    JobConf job = new JobConf(getConf());
    job.setJobName("identify languages for pages in " + pagesSubDir);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(LangIdMapper.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Page.class);

    FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDir));

    Path outDir = new Path(new Path(crawlDir, PAGES_SUBDIR), "pages.langid." + getCurTimeStamp());
    m_fs.delete(outDir, true);

    FileOutputFormat.setOutputPath(job, outDir);

    setUniqueTempDir(job);

    job.set(JOB_PROP_JOB_REFERRER, referrer);

    return job;
}