List of usage examples for org.apache.hadoop.mapred JobConf setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:org.apache.nutch.scoring.webgraph.WebGraph.java
License:Apache License
/** * Creates the three different WebGraph databases, Outlinks, Inlinks, and * Node. If a current WebGraph exists then it is updated, if it doesn't exist * then a new WebGraph database is created. * // w w w . j av a 2 s . com * @param webGraphDb The WebGraph to create or update. * @param segments The array of segments used to update the WebGraph. Newer * segments and fetch times will overwrite older segments. * @param normalize whether to use URLNormalizers on URL's in the segment * @param filter whether to use URLFilters on URL's in the segment * * @throws IOException If an error occurs while processing the WebGraph. */ public void createWebGraph(Path webGraphDb, Path[] segments, boolean normalize, boolean filter) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); if (LOG.isInfoEnabled()) { LOG.info("WebGraphDb: starting at " + sdf.format(start)); LOG.info("WebGraphDb: webgraphdb: " + webGraphDb); LOG.info("WebGraphDb: URL normalize: " + normalize); LOG.info("WebGraphDb: URL filter: " + filter); } Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); // lock an existing webgraphdb to prevent multiple simultaneous updates Path lock = new Path(webGraphDb, LOCK_NAME); if (!fs.exists(webGraphDb)) { fs.mkdirs(webGraphDb); } LockUtil.createLockFile(fs, lock, false); // outlink and temp outlink database paths Path outlinkDb = new Path(webGraphDb, OUTLINK_DIR); Path oldOutlinkDb = new Path(webGraphDb, OLD_OUTLINK_DIR); if (!fs.exists(outlinkDb)) { fs.mkdirs(outlinkDb); } Path tempOutlinkDb = new Path(outlinkDb + "-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); JobConf outlinkJob = new NutchJob(conf); outlinkJob.setJobName("Outlinkdb: " + outlinkDb); boolean deleteGone = conf.getBoolean("link.delete.gone", false); boolean preserveBackup = conf.getBoolean("db.preserve.backup", true); if (deleteGone) { LOG.info("OutlinkDb: deleting gone links"); } // get the parse data and crawl fetch data for all segments if (segments != null) { for (int i = 0; i < segments.length; i++) { Path parseData = new Path(segments[i], ParseData.DIR_NAME); if (fs.exists(parseData)) { LOG.info("OutlinkDb: adding input: " + parseData); FileInputFormat.addInputPath(outlinkJob, parseData); } if (deleteGone) { Path crawlFetch = new Path(segments[i], CrawlDatum.FETCH_DIR_NAME); if (fs.exists(crawlFetch)) { LOG.info("OutlinkDb: adding input: " + crawlFetch); FileInputFormat.addInputPath(outlinkJob, crawlFetch); } } } } // add the existing webgraph LOG.info("OutlinkDb: adding input: " + outlinkDb); FileInputFormat.addInputPath(outlinkJob, outlinkDb); outlinkJob.setBoolean(OutlinkDb.URL_NORMALIZING, normalize); outlinkJob.setBoolean(OutlinkDb.URL_FILTERING, filter); outlinkJob.setInputFormat(SequenceFileInputFormat.class); outlinkJob.setMapperClass(OutlinkDb.class); outlinkJob.setReducerClass(OutlinkDb.class); outlinkJob.setMapOutputKeyClass(Text.class); outlinkJob.setMapOutputValueClass(NutchWritable.class); outlinkJob.setOutputKeyClass(Text.class); outlinkJob.setOutputValueClass(LinkDatum.class); FileOutputFormat.setOutputPath(outlinkJob, tempOutlinkDb); outlinkJob.setOutputFormat(MapFileOutputFormat.class); outlinkJob.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false); // run the outlinkdb job and replace any old outlinkdb with the new one try { LOG.info("OutlinkDb: running"); JobClient.runJob(outlinkJob); LOG.info("OutlinkDb: installing " + outlinkDb); FSUtils.replace(fs, oldOutlinkDb, outlinkDb, true); FSUtils.replace(fs, outlinkDb, tempOutlinkDb, true); if (!preserveBackup && fs.exists(oldOutlinkDb)) fs.delete(oldOutlinkDb, true); LOG.info("OutlinkDb: finished"); } catch (IOException e) { // remove lock file and and temporary directory if an error occurs LockUtil.removeLockFile(fs, lock); if (fs.exists(tempOutlinkDb)) { fs.delete(tempOutlinkDb, true); } LOG.error(StringUtils.stringifyException(e)); throw e; } // inlink and temp link database paths Path inlinkDb = new Path(webGraphDb, INLINK_DIR); Path tempInlinkDb = new Path(inlinkDb + "-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); JobConf inlinkJob = new NutchJob(conf); inlinkJob.setJobName("Inlinkdb " + inlinkDb); LOG.info("InlinkDb: adding input: " + outlinkDb); FileInputFormat.addInputPath(inlinkJob, outlinkDb); inlinkJob.setInputFormat(SequenceFileInputFormat.class); inlinkJob.setMapperClass(InlinkDb.class); inlinkJob.setMapOutputKeyClass(Text.class); inlinkJob.setMapOutputValueClass(LinkDatum.class); inlinkJob.setOutputKeyClass(Text.class); inlinkJob.setOutputValueClass(LinkDatum.class); FileOutputFormat.setOutputPath(inlinkJob, tempInlinkDb); inlinkJob.setOutputFormat(MapFileOutputFormat.class); inlinkJob.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false); try { // run the inlink and replace any old with new LOG.info("InlinkDb: running"); JobClient.runJob(inlinkJob); LOG.info("InlinkDb: installing " + inlinkDb); FSUtils.replace(fs, inlinkDb, tempInlinkDb, true); LOG.info("InlinkDb: finished"); } catch (IOException e) { // remove lock file and and temporary directory if an error occurs LockUtil.removeLockFile(fs, lock); if (fs.exists(tempInlinkDb)) { fs.delete(tempInlinkDb, true); } LOG.error(StringUtils.stringifyException(e)); throw e; } // node and temp node database paths Path nodeDb = new Path(webGraphDb, NODE_DIR); Path tempNodeDb = new Path(nodeDb + "-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); JobConf nodeJob = new NutchJob(conf); nodeJob.setJobName("NodeDb " + nodeDb); LOG.info("NodeDb: adding input: " + outlinkDb); LOG.info("NodeDb: adding input: " + inlinkDb); FileInputFormat.addInputPath(nodeJob, outlinkDb); FileInputFormat.addInputPath(nodeJob, inlinkDb); nodeJob.setInputFormat(SequenceFileInputFormat.class); nodeJob.setReducerClass(NodeDb.class); nodeJob.setMapOutputKeyClass(Text.class); nodeJob.setMapOutputValueClass(LinkDatum.class); nodeJob.setOutputKeyClass(Text.class); nodeJob.setOutputValueClass(Node.class); FileOutputFormat.setOutputPath(nodeJob, tempNodeDb); nodeJob.setOutputFormat(MapFileOutputFormat.class); nodeJob.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false); try { // run the node job and replace old nodedb with new LOG.info("NodeDb: running"); JobClient.runJob(nodeJob); LOG.info("NodeDb: installing " + nodeDb); FSUtils.replace(fs, nodeDb, tempNodeDb, true); LOG.info("NodeDb: finished"); } catch (IOException e) { // remove lock file and and temporary directory if an error occurs LockUtil.removeLockFile(fs, lock); if (fs.exists(tempNodeDb)) { fs.delete(tempNodeDb, true); } LOG.error(StringUtils.stringifyException(e)); throw e; } // remove the lock file for the webgraph LockUtil.removeLockFile(fs, lock); long end = System.currentTimeMillis(); LOG.info("WebGraphDb: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); }
From source file:org.apache.nutch.segment.SegmentMerger.java
License:Apache License
public void merge(Path out, Path[] segs, boolean filter, boolean normalize, long slice) throws Exception { String segmentName = Generator.generateSegmentName(); if (LOG.isInfoEnabled()) { LOG.info("Merging " + segs.length + " segments to " + out + "/" + segmentName); }/*w w w.j a v a2 s. c o m*/ JobConf job = new NutchJob(getConf()); job.setJobName("mergesegs " + out + "/" + segmentName); job.setBoolean("segment.merger.filter", filter); job.setBoolean("segment.merger.normalizer", normalize); job.setLong("segment.merger.slice", slice); job.set("segment.merger.segmentName", segmentName); FileSystem fs = FileSystem.get(getConf()); // prepare the minimal common set of input dirs boolean g = true; boolean f = true; boolean p = true; boolean c = true; boolean pd = true; boolean pt = true; for (int i = 0; i < segs.length; i++) { if (!fs.exists(segs[i])) { if (LOG.isWarnEnabled()) { LOG.warn("Input dir " + segs[i] + " doesn't exist, skipping."); } segs[i] = null; continue; } if (LOG.isInfoEnabled()) { LOG.info("SegmentMerger: adding " + segs[i]); } Path cDir = new Path(segs[i], Content.DIR_NAME); Path gDir = new Path(segs[i], CrawlDatum.GENERATE_DIR_NAME); Path fDir = new Path(segs[i], CrawlDatum.FETCH_DIR_NAME); Path pDir = new Path(segs[i], CrawlDatum.PARSE_DIR_NAME); Path pdDir = new Path(segs[i], ParseData.DIR_NAME); Path ptDir = new Path(segs[i], ParseText.DIR_NAME); c = c && fs.exists(cDir); g = g && fs.exists(gDir); f = f && fs.exists(fDir); p = p && fs.exists(pDir); pd = pd && fs.exists(pdDir); pt = pt && fs.exists(ptDir); } StringBuffer sb = new StringBuffer(); if (c) sb.append(" " + Content.DIR_NAME); if (g) sb.append(" " + CrawlDatum.GENERATE_DIR_NAME); if (f) sb.append(" " + CrawlDatum.FETCH_DIR_NAME); if (p) sb.append(" " + CrawlDatum.PARSE_DIR_NAME); if (pd) sb.append(" " + ParseData.DIR_NAME); if (pt) sb.append(" " + ParseText.DIR_NAME); if (LOG.isInfoEnabled()) { LOG.info("SegmentMerger: using segment data from:" + sb.toString()); } for (int i = 0; i < segs.length; i++) { if (segs[i] == null) continue; if (g) { Path gDir = new Path(segs[i], CrawlDatum.GENERATE_DIR_NAME); FileInputFormat.addInputPath(job, gDir); } if (c) { Path cDir = new Path(segs[i], Content.DIR_NAME); FileInputFormat.addInputPath(job, cDir); } if (f) { Path fDir = new Path(segs[i], CrawlDatum.FETCH_DIR_NAME); FileInputFormat.addInputPath(job, fDir); } if (p) { Path pDir = new Path(segs[i], CrawlDatum.PARSE_DIR_NAME); FileInputFormat.addInputPath(job, pDir); } if (pd) { Path pdDir = new Path(segs[i], ParseData.DIR_NAME); FileInputFormat.addInputPath(job, pdDir); } if (pt) { Path ptDir = new Path(segs[i], ParseText.DIR_NAME); FileInputFormat.addInputPath(job, ptDir); } } job.setInputFormat(ObjectInputFormat.class); job.setMapperClass(SegmentMerger.class); job.setReducerClass(SegmentMerger.class); FileOutputFormat.setOutputPath(job, out); job.setOutputKeyClass(Text.class); job.setOutputValueClass(MetaWrapper.class); job.setOutputFormat(SegmentOutputFormat.class); setConf(job); JobClient.runJob(job); }
From source file:org.apache.nutch.segment.SegmentReader.java
License:Apache License
private JobConf createJobConf() { JobConf job = new NutchJob(getConf()); job.setBoolean("segment.reader.co", this.co); job.setBoolean("segment.reader.fe", this.fe); job.setBoolean("segment.reader.ge", this.ge); job.setBoolean("segment.reader.pa", this.pa); job.setBoolean("segment.reader.pd", this.pd); job.setBoolean("segment.reader.pt", this.pt); return job;/*from w w w. j a va 2 s . c o m*/ }
From source file:org.apache.nutch.tools.compat.CrawlDbConverter.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length == 0) { System.err.println("Usage: CrawlDbConverter <oldDb> <newDb> [-withMetadata]"); System.err.println("\toldDb\tname of the crawldb that uses UTF8 class."); System.err.println("\tnewDb\tname of the output crawldb that will use Text class."); System.err.println("\twithMetadata\tconvert also all metadata keys that use UTF8 to Text."); return -1; }/*from w w w . j a v a 2 s .co m*/ JobConf job = new NutchJob(getConf()); FileSystem fs = FileSystem.get(getConf()); Path oldDb = new Path(args[0], CrawlDb.CURRENT_NAME); Path newDb = new Path(oldDb, Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); if (!fs.exists(oldDb)) { LOG.fatal("Old db doesn't exist in '" + args[0] + "'"); return -1; } boolean withMetadata = false; if (args.length > 2 && args[2].equalsIgnoreCase("-withMetadata")) withMetadata = true; job.setBoolean(CONVERT_META_KEY, withMetadata); job.setInputPath(oldDb); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(CrawlDbConverter.class); job.setOutputFormat(MapFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); job.setOutputPath(newDb); try { JobClient.runJob(job); CrawlDb.install(job, new Path(args[1])); return 0; } catch (Exception e) { LOG.fatal("Error: " + StringUtils.stringifyException(e)); return -1; } }
From source file:org.apache.nutch.tools.FreeGenerator.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: FreeGenerator <inputDir> <segmentsDir> [-filter] [-normalize]"); System.err.println("\tinputDir\tinput directory containing one or more input files."); System.err.println("\t\tEach text file contains a list of URLs, one URL per line"); System.err.println("\tsegmentsDir\toutput directory, where new segment will be created"); System.err.println("\t-filter\trun current URLFilters on input URLs"); System.err.println("\t-normalize\trun current URLNormalizers on input URLs"); return -1; }//from w ww . ja v a 2s. co m boolean filter = false; boolean normalize = false; if (args.length > 2) { for (int i = 2; i < args.length; i++) { if (args[i].equals("-filter")) { filter = true; } else if (args[i].equals("-normalize")) { normalize = true; } else { LOG.error("Unknown argument: " + args[i] + ", exiting ..."); return -1; } } } SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); LOG.info("FreeGenerator: starting at " + sdf.format(start)); JobConf job = new NutchJob(getConf()); job.setBoolean(FILTER_KEY, filter); job.setBoolean(NORMALIZE_KEY, normalize); FileInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormat(TextInputFormat.class); job.setMapperClass(FG.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Generator.SelectorEntry.class); job.setPartitionerClass(URLPartitioner.class); job.setReducerClass(FG.class); String segName = Generator.generateSegmentName(); job.setNumReduceTasks(job.getNumMapTasks()); job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); job.setOutputKeyComparatorClass(Generator.HashComparator.class); FileOutputFormat.setOutputPath(job, new Path(args[1], new Path(segName, CrawlDatum.GENERATE_DIR_NAME))); try { JobClient.runJob(job); } catch (Exception e) { LOG.error("FAILED: " + StringUtils.stringifyException(e)); return -1; } long end = System.currentTimeMillis(); LOG.info("FreeGenerator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); return 0; }
From source file:org.apache.oozie.action.hadoop.JavaActionExecutor.java
License:Apache License
@SuppressWarnings("unchecked") JobConf createLauncherConf(FileSystem actionFs, Context context, WorkflowAction action, Element actionXml, Configuration actionConf) throws ActionExecutorException { try {//from w ww . ja va 2 s.c o m // app path could be a file Path appPathRoot = new Path(context.getWorkflow().getAppPath()); if (actionFs.isFile(appPathRoot)) { appPathRoot = appPathRoot.getParent(); } // launcher job configuration JobConf launcherJobConf = createBaseHadoopConf(context, actionXml); // cancel delegation token on a launcher job which stays alive till child job(s) finishes // otherwise (in mapred action), doesn't cancel not to disturb running child job launcherJobConf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", true); setupLauncherConf(launcherJobConf, actionXml, appPathRoot, context); String launcherTag = null; // Extracting tag and appending action name to maintain the uniqueness. if (context.getVar(ActionStartXCommand.OOZIE_ACTION_YARN_TAG) != null) { launcherTag = context.getVar(ActionStartXCommand.OOZIE_ACTION_YARN_TAG); } else { //Keeping it to maintain backward compatibly with test cases. launcherTag = action.getId(); } // Properties for when a launcher job's AM gets restarted if (ConfigurationService.getBoolean(HADOOP_YARN_KILL_CHILD_JOBS_ON_AMRESTART)) { // launcher time filter is required to prune the search of launcher tag. // Setting coordinator action nominal time as launcher time as it child job cannot launch before nominal // time. Workflow created time is good enough when workflow is running independently or workflow is // rerunning from failed node. long launcherTime = System.currentTimeMillis(); String coordActionNominalTime = context.getProtoActionConf() .get(CoordActionStartXCommand.OOZIE_COORD_ACTION_NOMINAL_TIME); if (coordActionNominalTime != null) { launcherTime = Long.parseLong(coordActionNominalTime); } else if (context.getWorkflow().getCreatedTime() != null) { launcherTime = context.getWorkflow().getCreatedTime().getTime(); } LauncherMapperHelper.setupYarnRestartHandling(launcherJobConf, actionConf, launcherTag, launcherTime); } else { LOG.info(MessageFormat.format("{0} is set to false, not setting YARN restart properties", HADOOP_YARN_KILL_CHILD_JOBS_ON_AMRESTART)); } String actionShareLibProperty = actionConf.get(ACTION_SHARELIB_FOR + getType()); if (actionShareLibProperty != null) { launcherJobConf.set(ACTION_SHARELIB_FOR + getType(), actionShareLibProperty); } setLibFilesArchives(context, actionXml, appPathRoot, launcherJobConf); String jobName = launcherJobConf.get(HADOOP_JOB_NAME); if (jobName == null || jobName.isEmpty()) { jobName = XLog.format("oozie:launcher:T={0}:W={1}:A={2}:ID={3}", getType(), context.getWorkflow().getAppName(), action.getName(), context.getWorkflow().getId()); launcherJobConf.setJobName(jobName); } // Inject Oozie job information if enabled. injectJobInfo(launcherJobConf, actionConf, context, action); injectLauncherCallback(context, launcherJobConf); String jobId = context.getWorkflow().getId(); String actionId = action.getId(); Path actionDir = context.getActionDir(); String recoveryId = context.getRecoveryId(); // Getting the prepare XML from the action XML Namespace ns = actionXml.getNamespace(); Element prepareElement = actionXml.getChild("prepare", ns); String prepareXML = ""; if (prepareElement != null) { if (prepareElement.getChildren().size() > 0) { prepareXML = XmlUtils.prettyPrint(prepareElement).toString().trim(); } } LauncherMapperHelper.setupLauncherInfo(launcherJobConf, jobId, actionId, actionDir, recoveryId, actionConf, prepareXML); // Set the launcher Main Class LauncherMapperHelper.setupMainClass(launcherJobConf, getLauncherMain(launcherJobConf, actionXml)); LauncherMapperHelper.setupLauncherURIHandlerConf(launcherJobConf); LauncherMapperHelper.setupMaxOutputData(launcherJobConf, maxActionOutputLen); LauncherMapperHelper.setupMaxExternalStatsSize(launcherJobConf, maxExternalStatsSize); LauncherMapperHelper.setupMaxFSGlob(launcherJobConf, maxFSGlobMax); List<Element> list = actionXml.getChildren("arg", ns); String[] args = new String[list.size()]; for (int i = 0; i < list.size(); i++) { args[i] = list.get(i).getTextTrim(); } LauncherMapperHelper.setupMainArguments(launcherJobConf, args); // Make mapred.child.java.opts and mapreduce.map.java.opts equal, but give values from the latter priority; also append // <java-opt> and <java-opts> and give those highest priority StringBuilder opts = new StringBuilder(launcherJobConf.get(HADOOP_CHILD_JAVA_OPTS, "")); if (launcherJobConf.get(HADOOP_MAP_JAVA_OPTS) != null) { opts.append(" ").append(launcherJobConf.get(HADOOP_MAP_JAVA_OPTS)); } List<Element> javaopts = actionXml.getChildren("java-opt", ns); for (Element opt : javaopts) { opts.append(" ").append(opt.getTextTrim()); } Element opt = actionXml.getChild("java-opts", ns); if (opt != null) { opts.append(" ").append(opt.getTextTrim()); } launcherJobConf.set(HADOOP_CHILD_JAVA_OPTS, opts.toString().trim()); launcherJobConf.set(HADOOP_MAP_JAVA_OPTS, opts.toString().trim()); // setting for uber mode if (launcherJobConf.getBoolean(HADOOP_YARN_UBER_MODE, false)) { if (checkPropertiesToDisableUber(launcherJobConf)) { launcherJobConf.setBoolean(HADOOP_YARN_UBER_MODE, false); } else { updateConfForUberMode(launcherJobConf); } } updateConfForJavaTmpDir(launcherJobConf); injectLauncherTimelineServiceEnabled(launcherJobConf, actionConf); // properties from action that are needed by the launcher (e.g. QUEUE NAME, ACLs) // maybe we should add queue to the WF schema, below job-tracker actionConfToLauncherConf(actionConf, launcherJobConf); return launcherJobConf; } catch (Exception ex) { throw convertException(ex); } }
From source file:org.apache.oozie.action.hadoop.LauncherMapperHelper.java
License:Apache License
public static void setupLauncherInfo(JobConf launcherConf, String jobId, String actionId, Path actionDir, String recoveryId, Configuration actionConf, String prepareXML) throws IOException, HadoopAccessorException { launcherConf.setMapperClass(LauncherMapper.class); launcherConf.setSpeculativeExecution(false); launcherConf.setNumMapTasks(1);//ww w . jav a2 s .co m launcherConf.setNumReduceTasks(0); launcherConf.set(LauncherMapper.OOZIE_JOB_ID, jobId); launcherConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId); launcherConf.set(LauncherMapper.OOZIE_ACTION_DIR_PATH, actionDir.toString()); launcherConf.set(LauncherMapper.OOZIE_ACTION_RECOVERY_ID, recoveryId); launcherConf.set(LauncherMapper.ACTION_PREPARE_XML, prepareXML); actionConf.set(LauncherMapper.OOZIE_JOB_ID, jobId); actionConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId); if (Services.get().getConf().getBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", false)) { List<String> purgedEntries = new ArrayList<String>(); Collection<String> entries = actionConf.getStringCollection("mapreduce.job.cache.files"); for (String entry : entries) { if (entry.contains("#")) { purgedEntries.add(entry); } } actionConf.setStrings("mapreduce.job.cache.files", purgedEntries.toArray(new String[purgedEntries.size()])); launcherConf.setBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", true); } FileSystem fs = Services.get().get(HadoopAccessorService.class) .createFileSystem(launcherConf.get("user.name"), actionDir.toUri(), launcherConf); fs.mkdirs(actionDir); OutputStream os = fs.create(new Path(actionDir, LauncherMapper.ACTION_CONF_XML)); try { actionConf.writeXml(os); } finally { IOUtils.closeSafely(os); } launcherConf.setInputFormat(OozieLauncherInputFormat.class); launcherConf.set("mapred.output.dir", new Path(actionDir, "output").toString()); }
From source file:org.apache.oozie.action.hadoop.PipesMain.java
License:Apache License
@Override protected void addActionConf(JobConf jobConf, Configuration actionConf) { String value = actionConf.get("oozie.pipes.map"); if (value != null) { jobConf.setBoolean("hadoop.pipes.java.mapper", true); jobConf.set("mapred.mapper.class", value); }/* w ww . jav a2 s . c o m*/ value = actionConf.get("oozie.pipes.reduce"); if (value != null) { jobConf.setBoolean("hadoop.pipes.java.reducer", true); jobConf.set("mapred.reducer.class", value); } value = actionConf.get("oozie.pipes.inputformat"); if (value != null) { jobConf.setBoolean("hadoop.pipes.java.recordreader", true); jobConf.set("mapred.input.format.class", value); } value = actionConf.get("oozie.pipes.partitioner"); if (value != null) { jobConf.set("mapred.partitioner.class", value); } value = actionConf.get("oozie.pipes.writer"); if (value != null) { jobConf.setBoolean("hadoop.pipes.java.recordwriter", true); jobConf.set("mapred.output.format.class", value); } value = actionConf.get("oozie.pipes.program"); if (value != null) { jobConf.set("hadoop.pipes.executable", value); if (value.contains("#")) { DistributedCache.createSymlink(jobConf); } } super.addActionConf(jobConf, actionConf); }
From source file:org.apache.oozie.service.HadoopAccessorService.java
License:Apache License
/** * Creates a JobConf using the site configuration for the specified hostname:port. * <p>//from w ww .j a v a2 s . c o m * If the specified hostname:port is not defined it falls back to the '*' site * configuration if available. If the '*' site configuration is not available, * the JobConf has all Hadoop defaults. * * @param hostPort hostname:port to lookup Hadoop site configuration. * @return a JobConf with the corresponding site configuration for hostPort. */ public JobConf createJobConf(String hostPort) { JobConf jobConf = new JobConf(getCachedConf()); XConfiguration.copy(getConfiguration(hostPort), jobConf); jobConf.setBoolean(OOZIE_HADOOP_ACCESSOR_SERVICE_CREATED, true); return jobConf; }
From source file:org.apache.parquet.hadoop.codec.CodecConfigTest.java
License:Apache License
public void shouldUseHadoopFlagToSetCodec(String codecClassStr, CompressionCodecName expectedCodec) throws IOException { //Test mapreduce API Job job = new Job(); Configuration conf = job.getConfiguration(); conf.setBoolean("mapred.output.compress", true); conf.set("mapred.output.compression.codec", codecClassStr); TaskAttemptContext task = ContextUtil.newTaskAttemptContext(conf, new TaskAttemptID(new TaskID(new JobID("test", 1), false, 1), 1)); Assert.assertEquals(expectedCodec, CodecConfig.from(task).getCodec()); //Test mapred API JobConf jobConf = new JobConf(); jobConf.setBoolean("mapred.output.compress", true); jobConf.set("mapred.output.compression.codec", codecClassStr); Assert.assertEquals(CodecConfig.from(jobConf).getCodec(), expectedCodec); }