Example usage for org.apache.hadoop.mapred JobConf setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setBoolean.

Prototype

public void setBoolean(String name, boolean value)

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:org.apache.nutch.scoring.webgraph.WebGraph.java

License:Apache License

/**
 * Creates the three different WebGraph databases, Outlinks, Inlinks, and
 * Node. If a current WebGraph exists then it is updated, if it doesn't exist
 * then a new WebGraph database is created.
 * // w  w w . j  av  a 2 s  . com
 * @param webGraphDb The WebGraph to create or update.
 * @param segments The array of segments used to update the WebGraph. Newer
 * segments and fetch times will overwrite older segments.
 * @param normalize whether to use URLNormalizers on URL's in the segment
 * @param filter whether to use URLFilters on URL's in the segment
 * 
 * @throws IOException If an error occurs while processing the WebGraph.
 */
public void createWebGraph(Path webGraphDb, Path[] segments, boolean normalize, boolean filter)
        throws IOException {

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    if (LOG.isInfoEnabled()) {
        LOG.info("WebGraphDb: starting at " + sdf.format(start));
        LOG.info("WebGraphDb: webgraphdb: " + webGraphDb);
        LOG.info("WebGraphDb: URL normalize: " + normalize);
        LOG.info("WebGraphDb: URL filter: " + filter);
    }

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    // lock an existing webgraphdb to prevent multiple simultaneous updates
    Path lock = new Path(webGraphDb, LOCK_NAME);
    if (!fs.exists(webGraphDb)) {
        fs.mkdirs(webGraphDb);
    }

    LockUtil.createLockFile(fs, lock, false);

    // outlink and temp outlink database paths
    Path outlinkDb = new Path(webGraphDb, OUTLINK_DIR);
    Path oldOutlinkDb = new Path(webGraphDb, OLD_OUTLINK_DIR);

    if (!fs.exists(outlinkDb)) {
        fs.mkdirs(outlinkDb);
    }

    Path tempOutlinkDb = new Path(outlinkDb + "-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
    JobConf outlinkJob = new NutchJob(conf);
    outlinkJob.setJobName("Outlinkdb: " + outlinkDb);

    boolean deleteGone = conf.getBoolean("link.delete.gone", false);
    boolean preserveBackup = conf.getBoolean("db.preserve.backup", true);

    if (deleteGone) {
        LOG.info("OutlinkDb: deleting gone links");
    }

    // get the parse data and crawl fetch data for all segments
    if (segments != null) {
        for (int i = 0; i < segments.length; i++) {
            Path parseData = new Path(segments[i], ParseData.DIR_NAME);
            if (fs.exists(parseData)) {
                LOG.info("OutlinkDb: adding input: " + parseData);
                FileInputFormat.addInputPath(outlinkJob, parseData);
            }

            if (deleteGone) {
                Path crawlFetch = new Path(segments[i], CrawlDatum.FETCH_DIR_NAME);
                if (fs.exists(crawlFetch)) {
                    LOG.info("OutlinkDb: adding input: " + crawlFetch);
                    FileInputFormat.addInputPath(outlinkJob, crawlFetch);
                }
            }
        }
    }

    // add the existing webgraph
    LOG.info("OutlinkDb: adding input: " + outlinkDb);
    FileInputFormat.addInputPath(outlinkJob, outlinkDb);

    outlinkJob.setBoolean(OutlinkDb.URL_NORMALIZING, normalize);
    outlinkJob.setBoolean(OutlinkDb.URL_FILTERING, filter);

    outlinkJob.setInputFormat(SequenceFileInputFormat.class);
    outlinkJob.setMapperClass(OutlinkDb.class);
    outlinkJob.setReducerClass(OutlinkDb.class);
    outlinkJob.setMapOutputKeyClass(Text.class);
    outlinkJob.setMapOutputValueClass(NutchWritable.class);
    outlinkJob.setOutputKeyClass(Text.class);
    outlinkJob.setOutputValueClass(LinkDatum.class);
    FileOutputFormat.setOutputPath(outlinkJob, tempOutlinkDb);
    outlinkJob.setOutputFormat(MapFileOutputFormat.class);
    outlinkJob.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);

    // run the outlinkdb job and replace any old outlinkdb with the new one
    try {
        LOG.info("OutlinkDb: running");
        JobClient.runJob(outlinkJob);
        LOG.info("OutlinkDb: installing " + outlinkDb);
        FSUtils.replace(fs, oldOutlinkDb, outlinkDb, true);
        FSUtils.replace(fs, outlinkDb, tempOutlinkDb, true);
        if (!preserveBackup && fs.exists(oldOutlinkDb))
            fs.delete(oldOutlinkDb, true);
        LOG.info("OutlinkDb: finished");
    } catch (IOException e) {

        // remove lock file and and temporary directory if an error occurs
        LockUtil.removeLockFile(fs, lock);
        if (fs.exists(tempOutlinkDb)) {
            fs.delete(tempOutlinkDb, true);
        }
        LOG.error(StringUtils.stringifyException(e));
        throw e;
    }

    // inlink and temp link database paths
    Path inlinkDb = new Path(webGraphDb, INLINK_DIR);
    Path tempInlinkDb = new Path(inlinkDb + "-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    JobConf inlinkJob = new NutchJob(conf);
    inlinkJob.setJobName("Inlinkdb " + inlinkDb);
    LOG.info("InlinkDb: adding input: " + outlinkDb);
    FileInputFormat.addInputPath(inlinkJob, outlinkDb);
    inlinkJob.setInputFormat(SequenceFileInputFormat.class);
    inlinkJob.setMapperClass(InlinkDb.class);
    inlinkJob.setMapOutputKeyClass(Text.class);
    inlinkJob.setMapOutputValueClass(LinkDatum.class);
    inlinkJob.setOutputKeyClass(Text.class);
    inlinkJob.setOutputValueClass(LinkDatum.class);
    FileOutputFormat.setOutputPath(inlinkJob, tempInlinkDb);
    inlinkJob.setOutputFormat(MapFileOutputFormat.class);
    inlinkJob.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);

    try {

        // run the inlink and replace any old with new
        LOG.info("InlinkDb: running");
        JobClient.runJob(inlinkJob);
        LOG.info("InlinkDb: installing " + inlinkDb);
        FSUtils.replace(fs, inlinkDb, tempInlinkDb, true);
        LOG.info("InlinkDb: finished");
    } catch (IOException e) {

        // remove lock file and and temporary directory if an error occurs
        LockUtil.removeLockFile(fs, lock);
        if (fs.exists(tempInlinkDb)) {
            fs.delete(tempInlinkDb, true);
        }
        LOG.error(StringUtils.stringifyException(e));
        throw e;
    }

    // node and temp node database paths
    Path nodeDb = new Path(webGraphDb, NODE_DIR);
    Path tempNodeDb = new Path(nodeDb + "-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    JobConf nodeJob = new NutchJob(conf);
    nodeJob.setJobName("NodeDb " + nodeDb);
    LOG.info("NodeDb: adding input: " + outlinkDb);
    LOG.info("NodeDb: adding input: " + inlinkDb);
    FileInputFormat.addInputPath(nodeJob, outlinkDb);
    FileInputFormat.addInputPath(nodeJob, inlinkDb);
    nodeJob.setInputFormat(SequenceFileInputFormat.class);
    nodeJob.setReducerClass(NodeDb.class);
    nodeJob.setMapOutputKeyClass(Text.class);
    nodeJob.setMapOutputValueClass(LinkDatum.class);
    nodeJob.setOutputKeyClass(Text.class);
    nodeJob.setOutputValueClass(Node.class);
    FileOutputFormat.setOutputPath(nodeJob, tempNodeDb);
    nodeJob.setOutputFormat(MapFileOutputFormat.class);
    nodeJob.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);

    try {

        // run the node job and replace old nodedb with new
        LOG.info("NodeDb: running");
        JobClient.runJob(nodeJob);
        LOG.info("NodeDb: installing " + nodeDb);
        FSUtils.replace(fs, nodeDb, tempNodeDb, true);
        LOG.info("NodeDb: finished");
    } catch (IOException e) {

        // remove lock file and and temporary directory if an error occurs
        LockUtil.removeLockFile(fs, lock);
        if (fs.exists(tempNodeDb)) {
            fs.delete(tempNodeDb, true);
        }
        LOG.error(StringUtils.stringifyException(e));
        throw e;
    }

    // remove the lock file for the webgraph
    LockUtil.removeLockFile(fs, lock);

    long end = System.currentTimeMillis();
    LOG.info("WebGraphDb: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
}

From source file:org.apache.nutch.segment.SegmentMerger.java

License:Apache License

public void merge(Path out, Path[] segs, boolean filter, boolean normalize, long slice) throws Exception {
    String segmentName = Generator.generateSegmentName();
    if (LOG.isInfoEnabled()) {
        LOG.info("Merging " + segs.length + " segments to " + out + "/" + segmentName);
    }/*w w w.j  a v a2  s.  c  o m*/
    JobConf job = new NutchJob(getConf());
    job.setJobName("mergesegs " + out + "/" + segmentName);
    job.setBoolean("segment.merger.filter", filter);
    job.setBoolean("segment.merger.normalizer", normalize);
    job.setLong("segment.merger.slice", slice);
    job.set("segment.merger.segmentName", segmentName);
    FileSystem fs = FileSystem.get(getConf());
    // prepare the minimal common set of input dirs
    boolean g = true;
    boolean f = true;
    boolean p = true;
    boolean c = true;
    boolean pd = true;
    boolean pt = true;
    for (int i = 0; i < segs.length; i++) {
        if (!fs.exists(segs[i])) {
            if (LOG.isWarnEnabled()) {
                LOG.warn("Input dir " + segs[i] + " doesn't exist, skipping.");
            }
            segs[i] = null;
            continue;
        }
        if (LOG.isInfoEnabled()) {
            LOG.info("SegmentMerger:   adding " + segs[i]);
        }
        Path cDir = new Path(segs[i], Content.DIR_NAME);
        Path gDir = new Path(segs[i], CrawlDatum.GENERATE_DIR_NAME);
        Path fDir = new Path(segs[i], CrawlDatum.FETCH_DIR_NAME);
        Path pDir = new Path(segs[i], CrawlDatum.PARSE_DIR_NAME);
        Path pdDir = new Path(segs[i], ParseData.DIR_NAME);
        Path ptDir = new Path(segs[i], ParseText.DIR_NAME);
        c = c && fs.exists(cDir);
        g = g && fs.exists(gDir);
        f = f && fs.exists(fDir);
        p = p && fs.exists(pDir);
        pd = pd && fs.exists(pdDir);
        pt = pt && fs.exists(ptDir);
    }
    StringBuffer sb = new StringBuffer();
    if (c)
        sb.append(" " + Content.DIR_NAME);
    if (g)
        sb.append(" " + CrawlDatum.GENERATE_DIR_NAME);
    if (f)
        sb.append(" " + CrawlDatum.FETCH_DIR_NAME);
    if (p)
        sb.append(" " + CrawlDatum.PARSE_DIR_NAME);
    if (pd)
        sb.append(" " + ParseData.DIR_NAME);
    if (pt)
        sb.append(" " + ParseText.DIR_NAME);
    if (LOG.isInfoEnabled()) {
        LOG.info("SegmentMerger: using segment data from:" + sb.toString());
    }
    for (int i = 0; i < segs.length; i++) {
        if (segs[i] == null)
            continue;
        if (g) {
            Path gDir = new Path(segs[i], CrawlDatum.GENERATE_DIR_NAME);
            FileInputFormat.addInputPath(job, gDir);
        }
        if (c) {
            Path cDir = new Path(segs[i], Content.DIR_NAME);
            FileInputFormat.addInputPath(job, cDir);
        }
        if (f) {
            Path fDir = new Path(segs[i], CrawlDatum.FETCH_DIR_NAME);
            FileInputFormat.addInputPath(job, fDir);
        }
        if (p) {
            Path pDir = new Path(segs[i], CrawlDatum.PARSE_DIR_NAME);
            FileInputFormat.addInputPath(job, pDir);
        }
        if (pd) {
            Path pdDir = new Path(segs[i], ParseData.DIR_NAME);
            FileInputFormat.addInputPath(job, pdDir);
        }
        if (pt) {
            Path ptDir = new Path(segs[i], ParseText.DIR_NAME);
            FileInputFormat.addInputPath(job, ptDir);
        }
    }
    job.setInputFormat(ObjectInputFormat.class);
    job.setMapperClass(SegmentMerger.class);
    job.setReducerClass(SegmentMerger.class);
    FileOutputFormat.setOutputPath(job, out);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(MetaWrapper.class);
    job.setOutputFormat(SegmentOutputFormat.class);

    setConf(job);

    JobClient.runJob(job);
}

From source file:org.apache.nutch.segment.SegmentReader.java

License:Apache License

private JobConf createJobConf() {
    JobConf job = new NutchJob(getConf());
    job.setBoolean("segment.reader.co", this.co);
    job.setBoolean("segment.reader.fe", this.fe);
    job.setBoolean("segment.reader.ge", this.ge);
    job.setBoolean("segment.reader.pa", this.pa);
    job.setBoolean("segment.reader.pd", this.pd);
    job.setBoolean("segment.reader.pt", this.pt);
    return job;/*from w w w.  j  a va  2 s  .  c o  m*/
}

From source file:org.apache.nutch.tools.compat.CrawlDbConverter.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length == 0) {
        System.err.println("Usage: CrawlDbConverter <oldDb> <newDb> [-withMetadata]");
        System.err.println("\toldDb\tname of the crawldb that uses UTF8 class.");
        System.err.println("\tnewDb\tname of the output crawldb that will use Text class.");
        System.err.println("\twithMetadata\tconvert also all metadata keys that use UTF8 to Text.");
        return -1;
    }/*from w w w .  j  a  v  a 2 s .co m*/
    JobConf job = new NutchJob(getConf());
    FileSystem fs = FileSystem.get(getConf());
    Path oldDb = new Path(args[0], CrawlDb.CURRENT_NAME);
    Path newDb = new Path(oldDb, Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
    if (!fs.exists(oldDb)) {
        LOG.fatal("Old db doesn't exist in '" + args[0] + "'");
        return -1;
    }
    boolean withMetadata = false;
    if (args.length > 2 && args[2].equalsIgnoreCase("-withMetadata"))
        withMetadata = true;

    job.setBoolean(CONVERT_META_KEY, withMetadata);
    job.setInputPath(oldDb);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(CrawlDbConverter.class);
    job.setOutputFormat(MapFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(CrawlDatum.class);
    job.setOutputPath(newDb);
    try {
        JobClient.runJob(job);
        CrawlDb.install(job, new Path(args[1]));
        return 0;
    } catch (Exception e) {
        LOG.fatal("Error: " + StringUtils.stringifyException(e));
        return -1;
    }
}

From source file:org.apache.nutch.tools.FreeGenerator.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: FreeGenerator <inputDir> <segmentsDir> [-filter] [-normalize]");
        System.err.println("\tinputDir\tinput directory containing one or more input files.");
        System.err.println("\t\tEach text file contains a list of URLs, one URL per line");
        System.err.println("\tsegmentsDir\toutput directory, where new segment will be created");
        System.err.println("\t-filter\trun current URLFilters on input URLs");
        System.err.println("\t-normalize\trun current URLNormalizers on input URLs");
        return -1;
    }//from   w  ww .  ja  v  a 2s.  co  m
    boolean filter = false;
    boolean normalize = false;
    if (args.length > 2) {
        for (int i = 2; i < args.length; i++) {
            if (args[i].equals("-filter")) {
                filter = true;
            } else if (args[i].equals("-normalize")) {
                normalize = true;
            } else {
                LOG.error("Unknown argument: " + args[i] + ", exiting ...");
                return -1;
            }
        }
    }

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    LOG.info("FreeGenerator: starting at " + sdf.format(start));

    JobConf job = new NutchJob(getConf());
    job.setBoolean(FILTER_KEY, filter);
    job.setBoolean(NORMALIZE_KEY, normalize);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    job.setInputFormat(TextInputFormat.class);
    job.setMapperClass(FG.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Generator.SelectorEntry.class);
    job.setPartitionerClass(URLPartitioner.class);
    job.setReducerClass(FG.class);
    String segName = Generator.generateSegmentName();
    job.setNumReduceTasks(job.getNumMapTasks());
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(CrawlDatum.class);
    job.setOutputKeyComparatorClass(Generator.HashComparator.class);
    FileOutputFormat.setOutputPath(job, new Path(args[1], new Path(segName, CrawlDatum.GENERATE_DIR_NAME)));
    try {
        JobClient.runJob(job);
    } catch (Exception e) {
        LOG.error("FAILED: " + StringUtils.stringifyException(e));
        return -1;
    }
    long end = System.currentTimeMillis();
    LOG.info("FreeGenerator: finished at " + sdf.format(end) + ", elapsed: "
            + TimingUtil.elapsedTime(start, end));
    return 0;
}

From source file:org.apache.oozie.action.hadoop.JavaActionExecutor.java

License:Apache License

@SuppressWarnings("unchecked")
JobConf createLauncherConf(FileSystem actionFs, Context context, WorkflowAction action, Element actionXml,
        Configuration actionConf) throws ActionExecutorException {
    try {//from w  ww .  ja  va  2  s.c  o  m

        // app path could be a file
        Path appPathRoot = new Path(context.getWorkflow().getAppPath());
        if (actionFs.isFile(appPathRoot)) {
            appPathRoot = appPathRoot.getParent();
        }

        // launcher job configuration
        JobConf launcherJobConf = createBaseHadoopConf(context, actionXml);
        // cancel delegation token on a launcher job which stays alive till child job(s) finishes
        // otherwise (in mapred action), doesn't cancel not to disturb running child job
        launcherJobConf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", true);
        setupLauncherConf(launcherJobConf, actionXml, appPathRoot, context);

        String launcherTag = null;
        // Extracting tag and appending action name to maintain the uniqueness.
        if (context.getVar(ActionStartXCommand.OOZIE_ACTION_YARN_TAG) != null) {
            launcherTag = context.getVar(ActionStartXCommand.OOZIE_ACTION_YARN_TAG);
        } else { //Keeping it to maintain backward compatibly with test cases.
            launcherTag = action.getId();
        }

        // Properties for when a launcher job's AM gets restarted
        if (ConfigurationService.getBoolean(HADOOP_YARN_KILL_CHILD_JOBS_ON_AMRESTART)) {
            // launcher time filter is required to prune the search of launcher tag.
            // Setting coordinator action nominal time as launcher time as it child job cannot launch before nominal
            // time. Workflow created time is good enough when workflow is running independently or workflow is
            // rerunning from failed node.
            long launcherTime = System.currentTimeMillis();
            String coordActionNominalTime = context.getProtoActionConf()
                    .get(CoordActionStartXCommand.OOZIE_COORD_ACTION_NOMINAL_TIME);
            if (coordActionNominalTime != null) {
                launcherTime = Long.parseLong(coordActionNominalTime);
            } else if (context.getWorkflow().getCreatedTime() != null) {
                launcherTime = context.getWorkflow().getCreatedTime().getTime();
            }
            LauncherMapperHelper.setupYarnRestartHandling(launcherJobConf, actionConf, launcherTag,
                    launcherTime);
        } else {
            LOG.info(MessageFormat.format("{0} is set to false, not setting YARN restart properties",
                    HADOOP_YARN_KILL_CHILD_JOBS_ON_AMRESTART));
        }

        String actionShareLibProperty = actionConf.get(ACTION_SHARELIB_FOR + getType());
        if (actionShareLibProperty != null) {
            launcherJobConf.set(ACTION_SHARELIB_FOR + getType(), actionShareLibProperty);
        }
        setLibFilesArchives(context, actionXml, appPathRoot, launcherJobConf);

        String jobName = launcherJobConf.get(HADOOP_JOB_NAME);
        if (jobName == null || jobName.isEmpty()) {
            jobName = XLog.format("oozie:launcher:T={0}:W={1}:A={2}:ID={3}", getType(),
                    context.getWorkflow().getAppName(), action.getName(), context.getWorkflow().getId());
            launcherJobConf.setJobName(jobName);
        }

        // Inject Oozie job information if enabled.
        injectJobInfo(launcherJobConf, actionConf, context, action);

        injectLauncherCallback(context, launcherJobConf);

        String jobId = context.getWorkflow().getId();
        String actionId = action.getId();
        Path actionDir = context.getActionDir();
        String recoveryId = context.getRecoveryId();

        // Getting the prepare XML from the action XML
        Namespace ns = actionXml.getNamespace();
        Element prepareElement = actionXml.getChild("prepare", ns);
        String prepareXML = "";
        if (prepareElement != null) {
            if (prepareElement.getChildren().size() > 0) {
                prepareXML = XmlUtils.prettyPrint(prepareElement).toString().trim();
            }
        }
        LauncherMapperHelper.setupLauncherInfo(launcherJobConf, jobId, actionId, actionDir, recoveryId,
                actionConf, prepareXML);

        // Set the launcher Main Class
        LauncherMapperHelper.setupMainClass(launcherJobConf, getLauncherMain(launcherJobConf, actionXml));
        LauncherMapperHelper.setupLauncherURIHandlerConf(launcherJobConf);
        LauncherMapperHelper.setupMaxOutputData(launcherJobConf, maxActionOutputLen);
        LauncherMapperHelper.setupMaxExternalStatsSize(launcherJobConf, maxExternalStatsSize);
        LauncherMapperHelper.setupMaxFSGlob(launcherJobConf, maxFSGlobMax);

        List<Element> list = actionXml.getChildren("arg", ns);
        String[] args = new String[list.size()];
        for (int i = 0; i < list.size(); i++) {
            args[i] = list.get(i).getTextTrim();
        }
        LauncherMapperHelper.setupMainArguments(launcherJobConf, args);

        // Make mapred.child.java.opts and mapreduce.map.java.opts equal, but give values from the latter priority; also append
        // <java-opt> and <java-opts> and give those highest priority
        StringBuilder opts = new StringBuilder(launcherJobConf.get(HADOOP_CHILD_JAVA_OPTS, ""));
        if (launcherJobConf.get(HADOOP_MAP_JAVA_OPTS) != null) {
            opts.append(" ").append(launcherJobConf.get(HADOOP_MAP_JAVA_OPTS));
        }
        List<Element> javaopts = actionXml.getChildren("java-opt", ns);
        for (Element opt : javaopts) {
            opts.append(" ").append(opt.getTextTrim());
        }
        Element opt = actionXml.getChild("java-opts", ns);
        if (opt != null) {
            opts.append(" ").append(opt.getTextTrim());
        }
        launcherJobConf.set(HADOOP_CHILD_JAVA_OPTS, opts.toString().trim());
        launcherJobConf.set(HADOOP_MAP_JAVA_OPTS, opts.toString().trim());

        // setting for uber mode
        if (launcherJobConf.getBoolean(HADOOP_YARN_UBER_MODE, false)) {
            if (checkPropertiesToDisableUber(launcherJobConf)) {
                launcherJobConf.setBoolean(HADOOP_YARN_UBER_MODE, false);
            } else {
                updateConfForUberMode(launcherJobConf);
            }
        }
        updateConfForJavaTmpDir(launcherJobConf);
        injectLauncherTimelineServiceEnabled(launcherJobConf, actionConf);

        // properties from action that are needed by the launcher (e.g. QUEUE NAME, ACLs)
        // maybe we should add queue to the WF schema, below job-tracker
        actionConfToLauncherConf(actionConf, launcherJobConf);

        return launcherJobConf;
    } catch (Exception ex) {
        throw convertException(ex);
    }
}

From source file:org.apache.oozie.action.hadoop.LauncherMapperHelper.java

License:Apache License

public static void setupLauncherInfo(JobConf launcherConf, String jobId, String actionId, Path actionDir,
        String recoveryId, Configuration actionConf, String prepareXML)
        throws IOException, HadoopAccessorException {

    launcherConf.setMapperClass(LauncherMapper.class);
    launcherConf.setSpeculativeExecution(false);
    launcherConf.setNumMapTasks(1);//ww  w .  jav a2 s .co  m
    launcherConf.setNumReduceTasks(0);

    launcherConf.set(LauncherMapper.OOZIE_JOB_ID, jobId);
    launcherConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId);
    launcherConf.set(LauncherMapper.OOZIE_ACTION_DIR_PATH, actionDir.toString());
    launcherConf.set(LauncherMapper.OOZIE_ACTION_RECOVERY_ID, recoveryId);
    launcherConf.set(LauncherMapper.ACTION_PREPARE_XML, prepareXML);

    actionConf.set(LauncherMapper.OOZIE_JOB_ID, jobId);
    actionConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId);

    if (Services.get().getConf().getBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache",
            false)) {
        List<String> purgedEntries = new ArrayList<String>();
        Collection<String> entries = actionConf.getStringCollection("mapreduce.job.cache.files");
        for (String entry : entries) {
            if (entry.contains("#")) {
                purgedEntries.add(entry);
            }
        }
        actionConf.setStrings("mapreduce.job.cache.files",
                purgedEntries.toArray(new String[purgedEntries.size()]));
        launcherConf.setBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", true);
    }

    FileSystem fs = Services.get().get(HadoopAccessorService.class)
            .createFileSystem(launcherConf.get("user.name"), actionDir.toUri(), launcherConf);
    fs.mkdirs(actionDir);

    OutputStream os = fs.create(new Path(actionDir, LauncherMapper.ACTION_CONF_XML));
    try {
        actionConf.writeXml(os);
    } finally {
        IOUtils.closeSafely(os);
    }

    launcherConf.setInputFormat(OozieLauncherInputFormat.class);
    launcherConf.set("mapred.output.dir", new Path(actionDir, "output").toString());
}

From source file:org.apache.oozie.action.hadoop.PipesMain.java

License:Apache License

@Override
protected void addActionConf(JobConf jobConf, Configuration actionConf) {
    String value = actionConf.get("oozie.pipes.map");
    if (value != null) {
        jobConf.setBoolean("hadoop.pipes.java.mapper", true);
        jobConf.set("mapred.mapper.class", value);
    }/* w ww .  jav  a2  s  . c  o  m*/
    value = actionConf.get("oozie.pipes.reduce");
    if (value != null) {
        jobConf.setBoolean("hadoop.pipes.java.reducer", true);
        jobConf.set("mapred.reducer.class", value);
    }
    value = actionConf.get("oozie.pipes.inputformat");
    if (value != null) {
        jobConf.setBoolean("hadoop.pipes.java.recordreader", true);
        jobConf.set("mapred.input.format.class", value);
    }
    value = actionConf.get("oozie.pipes.partitioner");
    if (value != null) {
        jobConf.set("mapred.partitioner.class", value);
    }
    value = actionConf.get("oozie.pipes.writer");
    if (value != null) {
        jobConf.setBoolean("hadoop.pipes.java.recordwriter", true);
        jobConf.set("mapred.output.format.class", value);
    }
    value = actionConf.get("oozie.pipes.program");
    if (value != null) {
        jobConf.set("hadoop.pipes.executable", value);
        if (value.contains("#")) {
            DistributedCache.createSymlink(jobConf);
        }
    }

    super.addActionConf(jobConf, actionConf);
}

From source file:org.apache.oozie.service.HadoopAccessorService.java

License:Apache License

/**
 * Creates a JobConf using the site configuration for the specified hostname:port.
 * <p>//from  w  ww .j  a v a2  s  . c  o m
 * If the specified hostname:port is not defined it falls back to the '*' site
 * configuration if available. If the '*' site configuration is not available,
 * the JobConf has all Hadoop defaults.
 *
 * @param hostPort hostname:port to lookup Hadoop site configuration.
 * @return a JobConf with the corresponding site configuration for hostPort.
 */
public JobConf createJobConf(String hostPort) {
    JobConf jobConf = new JobConf(getCachedConf());
    XConfiguration.copy(getConfiguration(hostPort), jobConf);
    jobConf.setBoolean(OOZIE_HADOOP_ACCESSOR_SERVICE_CREATED, true);
    return jobConf;
}

From source file:org.apache.parquet.hadoop.codec.CodecConfigTest.java

License:Apache License

public void shouldUseHadoopFlagToSetCodec(String codecClassStr, CompressionCodecName expectedCodec)
        throws IOException {
    //Test mapreduce API
    Job job = new Job();
    Configuration conf = job.getConfiguration();
    conf.setBoolean("mapred.output.compress", true);
    conf.set("mapred.output.compression.codec", codecClassStr);
    TaskAttemptContext task = ContextUtil.newTaskAttemptContext(conf,
            new TaskAttemptID(new TaskID(new JobID("test", 1), false, 1), 1));
    Assert.assertEquals(expectedCodec, CodecConfig.from(task).getCodec());

    //Test mapred API
    JobConf jobConf = new JobConf();
    jobConf.setBoolean("mapred.output.compress", true);
    jobConf.set("mapred.output.compression.codec", codecClassStr);
    Assert.assertEquals(CodecConfig.from(jobConf).getCodec(), expectedCodec);
}