List of usage examples for org.apache.hadoop.mapred JobConf set
public void set(String name, String value)
value
of the name
property. From source file:etl.cmd.test.XTestCase.java
License:Apache License
private JobConf createDFSConfig() throws UnknownHostException { JobConf conf = new JobConf(); conf.set("dfs.block.access.token.enable", "false"); conf.set("dfs.permissions", "true"); conf.set("hadoop.security.authentication", "simple"); //Doing this because Hadoop 1.x does not support '*' and //Hadoop 0.23.x does not process wildcard if the value is // '*,127.0.0.1' StringBuilder sb = new StringBuilder(); sb.append("127.0.0.1,localhost"); for (InetAddress i : InetAddress.getAllByName(InetAddress.getLocalHost().getHostName())) { sb.append(",").append(i.getCanonicalHostName()); }/* ww w . j ava 2 s . c o m*/ conf.set("hadoop.proxyuser." + getOozieUser() + ".hosts", sb.toString()); conf.set("hadoop.proxyuser." + getOozieUser() + ".groups", getTestGroup()); conf.set("mapred.tasktracker.map.tasks.maximum", "4"); conf.set("mapred.tasktracker.reduce.tasks.maximum", "4"); conf.set("hadoop.tmp.dir", "target/test-data" + "/minicluster"); // Scheduler properties required for YARN CapacityScheduler to work conf.set("yarn.scheduler.capacity.root.queues", "default"); conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); // Required to prevent deadlocks with YARN CapacityScheduler conf.set("yarn.scheduler.capacity.maximum-am-resource-percent", "0.5"); return conf; }
From source file:etl.cmd.test.XTestCase.java
License:Apache License
/** * Returns a jobconf preconfigured to talk with the test cluster/minicluster. * @return a jobconf preconfigured to talk with the test cluster/minicluster. *//*from w w w . java 2 s . com*/ protected JobConf createJobConf() throws IOException { JobConf jobConf = new JobConf(); jobConf.set("mapreduce.jobtracker.address", getJobTrackerUri()); jobConf.set("fs.defaultFS", getNameNodeUri()); return jobConf; }
From source file:eu.larkc.iris.Main.java
License:Apache License
private JobConf setupJob(Configuration conf) { JobConf jobConf = new JobConf(conf, Main.class); // run the job here. /* REAL CLUSTER */ jobConf.set("dfs.blocksize", "536870912"); jobConf.set("dfs.namenode.handler.count", "40"); //jobConf.set("dfs.replication", "1"); jobConf.set("mapreduce.reduce.shuffle.parallelcopies", "10"); jobConf.set("mapreduce.task.io.sort.factor", "100"); jobConf.set("mapreduce.task.io.sort.mb", "1024"); jobConf.set("io.file.buffer.size", "131072"); jobConf.set("mapred.child.java.opts", "-Xmx2560m"); jobConf.set("mapred.child.ulimit", "4194304"); jobConf.set("mapred.min.split.size", "536870912"); jobConf.set("mapreduce.input.fileinputformat.split.minsize", "536870912"); jobConf.set("mapreduce.reduce.merge.inmem.threshold", "0"); /**//*from w w w . ja v a 2s . com*/ /* compression settings jobConf.set("mapreduce.map.output.compress", "false"); jobConf.set("mapreduce.output.fileoutputformat.compress", "true"); jobConf.set("mapreduce.output.fileoutputformat.compression.type", "BLOCK"); ~~~ */ //!!!IMPORTANT, if not : Caused by: java.io.FileNotFoundException: File does not exist: hdfs://ec2-50-19-191-200.compute-1.amazonaws.com:8020/user/root/lubm/facts/lubm50/data jobConf.setBoolean("mapred.input.dir.recursive", true); jobConf.set("cascading.serialization.tokens", "130=eu.larkc.iris.storage.IRIWritable,131=eu.larkc.iris.storage.StringTermWritable"); defaultConfiguration.flowProperties.put("cascading.serialization.tokens", "130=eu.larkc.iris.storage.IRIWritable,131=eu.larkc.iris.storage.StringTermWritable"); /* if( System.getProperty("log4j.logger") != null ) defaultConfiguration.flowProperties.put( "log4j.logger", System.getProperty("log4j.logger") ); */ //jobConf.set("mapred.min.split.size", "134217728"); //jobConf.set("mapred.child.java.opts", "-Xms64m -Xmx512m"); jobConf.setMapSpeculativeExecution(false); jobConf.setReduceSpeculativeExecution(false); //FIXME //jobConf.setNumMapTasks(8); jobConf.setNumReduceTasks(32); FlowConnector.setDebugLevel(defaultConfiguration.flowProperties, DebugLevel.VERBOSE); MultiMapReducePlanner.setJobConf(defaultConfiguration.flowProperties, jobConf); //Flow.setJobPollingInterval(defaultConfiguration.flowProperties, 500); return jobConf; }
From source file:eu.larkc.iris.storage.FactsTap.java
License:Apache License
@Override public void sourceInit(JobConf jobConf) throws IOException { // a hack for MultiInputFormat to see that there is a child format FileInputFormat.setInputPaths(jobConf, getPath()); jobConf.set(IFactsConfiguration.FACTS_CONFIGURATION_CLASS, factsConfigurationClass); if (isSource()) { StringBuilder sb = new StringBuilder(); if (atom != null) { sb.append(atom.getPredicate().getPredicateSymbol()); }//from ww w.ja v a 2 s . c o m if (predicates != null && predicates.length > 0) { for (IPredicate predicate : predicates) { if (sb.length() > 0) { sb.append(","); } sb.append(predicate.getPredicateSymbol()); } } jobConf.set(IFactsConfiguration.PREDICATE_FILTER, sb.toString()); } IFactsConfiguration factsConfiguration = FactsConfigurationFactory.getFactsConfiguration(jobConf); factsConfiguration.setSourceStorageId(storageId); //RdfFactsConfiguration.configure(conf, rdf2GoImpl, serverURL, repositoryID); super.sourceInit(jobConf); }
From source file:eu.larkc.iris.storage.FactsTap.java
License:Apache License
@Override public void sinkInit(JobConf jobConf) throws IOException { if (!isSink()) return;//from w ww. j a va 2s . c o m jobConf.set(IFactsConfiguration.FACTS_CONFIGURATION_CLASS, factsConfigurationClass); IFactsConfiguration factsConfiguration = FactsConfigurationFactory.getFactsConfiguration(jobConf); factsConfiguration.setSinkStorageId(storageId); //RdfFactsConfiguration.configure(conf, rdf2GoImpl, serverURL, repositoryID); super.sinkInit(jobConf); }
From source file:eu.scape_project.tb.chutney.ChutneyDriver.java
License:Apache License
/** * This method sets up and runs the job on Hadoop * @param args The passed through command line arguments *//*from w w w. ja v a 2 s. c om*/ public int run(String[] args) { CommandLineParser parser = new PosixParser(); Options options = new Options(); options.addOption("n", "jobname", true, "name to assign to the hadoop job"); options.addOption("i", "inputlist", true, "text file containing list of input files (ensure no trailing carriage returns)"); options.addOption("t", "jobtype", true, "type of job; CLJ (command line job), TSJ (Taverna Server job), TCL (Taverna command line job), XML (XML defined command line job), XWR (XML workflow report)"); options.addOption("x", "xmlcode", true, "xml definition of job to run for XML jobs"); options.addOption("h", "help", false, "help text"); JobConf conf = new JobConf(ChutneyDriver.class); String input = null; String xmlcode = null; CommandLine com; try { com = parser.parse(options, args); if (com.hasOption("help")) { throw (new ParseException("")); } String jobName = Settings.JOB_NAME + "default"; if (com.hasOption("jobname")) { //set the job name to something better than the default jobName = Settings.JOB_NAME + com.getOptionValue("jobname"); } conf.setJobName(jobName); JobType jobType = JobType.CommandLineJob; if (com.hasOption("jobtype")) { String value = com.getOptionValue("jobtype").toUpperCase(); if (value.equals(CommandLineJob.getShortJobType())) { jobType = CommandLineJob.getJobType(); } else if (value.equals(TavernaCommandLineJob.getShortJobType())) { jobType = TavernaCommandLineJob.getJobType(); } else if (value.equals(TavernaServerJob.getShortJobType())) { jobType = TavernaServerJob.getJobType(); } else if (value.equals(XMLCommandLineJob.getShortJobType())) { jobType = XMLCommandLineJob.getJobType(); } else if (value.equals(XMLWorkflowReport.getShortJobType())) { jobType = XMLWorkflowReport.getJobType(); } } System.out.println("JobType: " + jobType.toString()); conf.set(Settings.JOBTYPE_CONF_SETTING, jobType.toString()); if (com.hasOption("xmlcode")) { //jobType == JobType.XMLCommandLineJob xmlcode = com.getOptionValue("xmlcode"); //if it is a local file get the full path if (new File(xmlcode).exists()) xmlcode = new File(xmlcode).getAbsolutePath(); conf.set(Settings.XMLCODE_CONF_SETTING, xmlcode); } if ((jobType == JobType.XMLCommandLineJob) & (xmlcode == null)) { //i.e. no code specified System.out.println("No XML code specified on the command line"); return -1; } if (com.hasOption("inputlist")) { input = com.getOptionValue("inputlist"); } if (input.equals(null)) { System.out.println("no input given"); return -2; } } catch (ParseException e) { HelpFormatter help = new HelpFormatter(); help.printHelp("hadoop jar TavernaHadoopWrapper.jar", options); return -1; } //using matchbox it may take a while to process the jobs //set a longer timeout than the default (10 mins) //six hours should be more than enough :/ MMM*SS*MS //QAJob testing for 9 tests on ANJO files can take ~4.5hrs+ conf.set("mapred.task.timeout", Integer.toString(360 * 60 * 1000)); FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(conf.getJobName())); //set the mapper to this class' mapper conf.setMapperClass(Chutney.class); //we don't want to reduce //conf.setReducerClass(Reducer.class); //this input format should split the input by one line per map by default. conf.setInputFormat(NLineInputFormat.class); conf.setInt("mapred.line.input.format.linespermap", 1); //sets how the output is written cf. OutputFormat //we can use nulloutputformat if we are writing our own output conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); //this sets maximum jvm reuse conf.set("mapred.job.reuse.jvm.num.tasks", "-1"); //we only want one reduce task conf.setNumReduceTasks(1); try { JobClient.runJob(conf); } catch (IOException ioe) { ioe.printStackTrace(); return -1; } return 0; }
From source file:eu.stratosphere.hadoopcompatibility.HadoopConfiguration.java
License:Apache License
public static void mergeHadoopConf(JobConf jobConf) { org.apache.hadoop.conf.Configuration hadoopConf = DistributedFileSystem.getHadoopConfiguration(); for (Map.Entry<String, String> e : hadoopConf) { jobConf.set(e.getKey(), e.getValue()); }/*from w w w. ja v a 2 s . c om*/ }
From source file:eu.stratosphere.hadoopcompatibility.mapred.utils.HadoopUtils.java
License:Apache License
/** * Merge HadoopConfiguration into JobConf. This is necessary for the HDFS configuration. *///from ww w. j a v a 2 s . co m public static void mergeHadoopConf(JobConf jobConf) { org.apache.hadoop.conf.Configuration hadoopConf = DistributedFileSystem.getHadoopConfiguration(); for (Map.Entry<String, String> e : hadoopConf) { jobConf.set(e.getKey(), e.getValue()); } }
From source file:eu.stratosphere.myriad.driver.hadoop.MyriadInputFormat.java
License:Apache License
public static void setDGenInstallDir(JobConf conf, String dgenNodePath) { conf.set("mapred.myriad.dgen.node.path", dgenNodePath); }
From source file:eu.stratosphere.myriad.driver.hadoop.MyriadInputFormat.java
License:Apache License
public static void setStage(JobConf conf, String stage) { conf.set("mapred.myriad.dgen.stage", stage); }