Example usage for org.apache.hadoop.mapred JobConf set

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf set.

Prototype

public void set(String name, String value)

Source Link

Document

Set the value of the name property.

Usage

From source file:etl.cmd.test.XTestCase.java

License:Apache License

private JobConf createDFSConfig() throws UnknownHostException {
    JobConf conf = new JobConf();
    conf.set("dfs.block.access.token.enable", "false");
    conf.set("dfs.permissions", "true");
    conf.set("hadoop.security.authentication", "simple");

    //Doing this because Hadoop 1.x does not support '*' and
    //Hadoop 0.23.x does not process wildcard if the value is
    // '*,127.0.0.1'
    StringBuilder sb = new StringBuilder();
    sb.append("127.0.0.1,localhost");
    for (InetAddress i : InetAddress.getAllByName(InetAddress.getLocalHost().getHostName())) {
        sb.append(",").append(i.getCanonicalHostName());
    }/* ww w  . j ava  2 s  . c  o m*/
    conf.set("hadoop.proxyuser." + getOozieUser() + ".hosts", sb.toString());

    conf.set("hadoop.proxyuser." + getOozieUser() + ".groups", getTestGroup());
    conf.set("mapred.tasktracker.map.tasks.maximum", "4");
    conf.set("mapred.tasktracker.reduce.tasks.maximum", "4");

    conf.set("hadoop.tmp.dir", "target/test-data" + "/minicluster");

    // Scheduler properties required for YARN CapacityScheduler to work
    conf.set("yarn.scheduler.capacity.root.queues", "default");
    conf.set("yarn.scheduler.capacity.root.default.capacity", "100");
    // Required to prevent deadlocks with YARN CapacityScheduler
    conf.set("yarn.scheduler.capacity.maximum-am-resource-percent", "0.5");
    return conf;
}

From source file:etl.cmd.test.XTestCase.java

License:Apache License

/**
 * Returns a jobconf preconfigured to talk with the test cluster/minicluster.
 * @return a jobconf preconfigured to talk with the test cluster/minicluster.
 *//*from   w w  w  . java  2 s .  com*/
protected JobConf createJobConf() throws IOException {
    JobConf jobConf = new JobConf();
    jobConf.set("mapreduce.jobtracker.address", getJobTrackerUri());
    jobConf.set("fs.defaultFS", getNameNodeUri());
    return jobConf;
}

From source file:eu.larkc.iris.Main.java

License:Apache License

private JobConf setupJob(Configuration conf) {
    JobConf jobConf = new JobConf(conf, Main.class);

    // run the job here.

    /* REAL CLUSTER */
    jobConf.set("dfs.blocksize", "536870912");
    jobConf.set("dfs.namenode.handler.count", "40");
    //jobConf.set("dfs.replication", "1");
    jobConf.set("mapreduce.reduce.shuffle.parallelcopies", "10");
    jobConf.set("mapreduce.task.io.sort.factor", "100");
    jobConf.set("mapreduce.task.io.sort.mb", "1024");
    jobConf.set("io.file.buffer.size", "131072");
    jobConf.set("mapred.child.java.opts", "-Xmx2560m");
    jobConf.set("mapred.child.ulimit", "4194304");
    jobConf.set("mapred.min.split.size", "536870912");
    jobConf.set("mapreduce.input.fileinputformat.split.minsize", "536870912");
    jobConf.set("mapreduce.reduce.merge.inmem.threshold", "0");
    /**//*from w w  w  . ja  v  a 2s . com*/

    /* compression settings 
    jobConf.set("mapreduce.map.output.compress", "false");
    jobConf.set("mapreduce.output.fileoutputformat.compress", "true");
    jobConf.set("mapreduce.output.fileoutputformat.compression.type", "BLOCK");
     ~~~ */

    //!!!IMPORTANT, if not : Caused by: java.io.FileNotFoundException: File does not exist: hdfs://ec2-50-19-191-200.compute-1.amazonaws.com:8020/user/root/lubm/facts/lubm50/data
    jobConf.setBoolean("mapred.input.dir.recursive", true);

    jobConf.set("cascading.serialization.tokens",
            "130=eu.larkc.iris.storage.IRIWritable,131=eu.larkc.iris.storage.StringTermWritable");
    defaultConfiguration.flowProperties.put("cascading.serialization.tokens",
            "130=eu.larkc.iris.storage.IRIWritable,131=eu.larkc.iris.storage.StringTermWritable");

    /*
     if( System.getProperty("log4j.logger") != null )
        defaultConfiguration.flowProperties.put( "log4j.logger", System.getProperty("log4j.logger") );
    */

    //jobConf.set("mapred.min.split.size", "134217728");
    //jobConf.set("mapred.child.java.opts", "-Xms64m -Xmx512m");
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setReduceSpeculativeExecution(false);

    //FIXME
    //jobConf.setNumMapTasks(8);
    jobConf.setNumReduceTasks(32);

    FlowConnector.setDebugLevel(defaultConfiguration.flowProperties, DebugLevel.VERBOSE);
    MultiMapReducePlanner.setJobConf(defaultConfiguration.flowProperties, jobConf);

    //Flow.setJobPollingInterval(defaultConfiguration.flowProperties, 500);

    return jobConf;
}

From source file:eu.larkc.iris.storage.FactsTap.java

License:Apache License

@Override
public void sourceInit(JobConf jobConf) throws IOException {
    // a hack for MultiInputFormat to see that there is a child format
    FileInputFormat.setInputPaths(jobConf, getPath());

    jobConf.set(IFactsConfiguration.FACTS_CONFIGURATION_CLASS, factsConfigurationClass);
    if (isSource()) {
        StringBuilder sb = new StringBuilder();
        if (atom != null) {
            sb.append(atom.getPredicate().getPredicateSymbol());
        }//from   ww  w.ja v  a  2  s  .  c o  m
        if (predicates != null && predicates.length > 0) {
            for (IPredicate predicate : predicates) {
                if (sb.length() > 0) {
                    sb.append(",");
                }
                sb.append(predicate.getPredicateSymbol());
            }
        }
        jobConf.set(IFactsConfiguration.PREDICATE_FILTER, sb.toString());
    }

    IFactsConfiguration factsConfiguration = FactsConfigurationFactory.getFactsConfiguration(jobConf);
    factsConfiguration.setSourceStorageId(storageId);

    //RdfFactsConfiguration.configure(conf, rdf2GoImpl, serverURL, repositoryID);

    super.sourceInit(jobConf);
}

From source file:eu.larkc.iris.storage.FactsTap.java

License:Apache License

@Override
public void sinkInit(JobConf jobConf) throws IOException {
    if (!isSink())
        return;//from  w  ww. j  a va 2s  .  c  o  m

    jobConf.set(IFactsConfiguration.FACTS_CONFIGURATION_CLASS, factsConfigurationClass);

    IFactsConfiguration factsConfiguration = FactsConfigurationFactory.getFactsConfiguration(jobConf);
    factsConfiguration.setSinkStorageId(storageId);

    //RdfFactsConfiguration.configure(conf, rdf2GoImpl, serverURL, repositoryID);

    super.sinkInit(jobConf);
}

From source file:eu.scape_project.tb.chutney.ChutneyDriver.java

License:Apache License

/**
 * This method sets up and runs the job on Hadoop
 * @param args The passed through command line arguments
 *//*from  w  w w. ja v  a  2  s. c  om*/
public int run(String[] args) {

    CommandLineParser parser = new PosixParser();
    Options options = new Options();
    options.addOption("n", "jobname", true, "name to assign to the hadoop job");
    options.addOption("i", "inputlist", true,
            "text file containing list of input files (ensure no trailing carriage returns)");
    options.addOption("t", "jobtype", true,
            "type of job; CLJ (command line job), TSJ (Taverna Server job), TCL (Taverna command line job), XML (XML defined command line job), XWR (XML workflow report)");
    options.addOption("x", "xmlcode", true, "xml definition of job to run for XML jobs");
    options.addOption("h", "help", false, "help text");

    JobConf conf = new JobConf(ChutneyDriver.class);

    String input = null;
    String xmlcode = null;

    CommandLine com;
    try {
        com = parser.parse(options, args);
        if (com.hasOption("help")) {
            throw (new ParseException(""));
        }

        String jobName = Settings.JOB_NAME + "default";
        if (com.hasOption("jobname")) {
            //set the job name to something better than the default
            jobName = Settings.JOB_NAME + com.getOptionValue("jobname");
        }
        conf.setJobName(jobName);

        JobType jobType = JobType.CommandLineJob;
        if (com.hasOption("jobtype")) {
            String value = com.getOptionValue("jobtype").toUpperCase();
            if (value.equals(CommandLineJob.getShortJobType())) {
                jobType = CommandLineJob.getJobType();
            } else if (value.equals(TavernaCommandLineJob.getShortJobType())) {
                jobType = TavernaCommandLineJob.getJobType();
            } else if (value.equals(TavernaServerJob.getShortJobType())) {
                jobType = TavernaServerJob.getJobType();
            } else if (value.equals(XMLCommandLineJob.getShortJobType())) {
                jobType = XMLCommandLineJob.getJobType();
            } else if (value.equals(XMLWorkflowReport.getShortJobType())) {
                jobType = XMLWorkflowReport.getJobType();
            }
        }
        System.out.println("JobType: " + jobType.toString());
        conf.set(Settings.JOBTYPE_CONF_SETTING, jobType.toString());

        if (com.hasOption("xmlcode")) {
            //jobType == JobType.XMLCommandLineJob
            xmlcode = com.getOptionValue("xmlcode");
            //if it is a local file get the full path
            if (new File(xmlcode).exists())
                xmlcode = new File(xmlcode).getAbsolutePath();
            conf.set(Settings.XMLCODE_CONF_SETTING, xmlcode);
        }
        if ((jobType == JobType.XMLCommandLineJob) & (xmlcode == null)) {
            //i.e. no code specified
            System.out.println("No XML code specified on the command line");
            return -1;
        }

        if (com.hasOption("inputlist")) {
            input = com.getOptionValue("inputlist");
        }
        if (input.equals(null)) {
            System.out.println("no input given");
            return -2;
        }

    } catch (ParseException e) {
        HelpFormatter help = new HelpFormatter();
        help.printHelp("hadoop jar TavernaHadoopWrapper.jar", options);
        return -1;
    }

    //using matchbox it may take a while to process the jobs
    //set a longer timeout than the default (10 mins)
    //six hours should be more than enough :/        MMM*SS*MS
    //QAJob testing for 9 tests on ANJO files can take ~4.5hrs+
    conf.set("mapred.task.timeout", Integer.toString(360 * 60 * 1000));

    FileInputFormat.setInputPaths(conf, new Path(input));
    FileOutputFormat.setOutputPath(conf, new Path(conf.getJobName()));

    //set the mapper to this class' mapper
    conf.setMapperClass(Chutney.class);
    //we don't want to reduce
    //conf.setReducerClass(Reducer.class);

    //this input format should split the input by one line per map by default.
    conf.setInputFormat(NLineInputFormat.class);
    conf.setInt("mapred.line.input.format.linespermap", 1);

    //sets how the output is written cf. OutputFormat
    //we can use nulloutputformat if we are writing our own output
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    //this sets maximum jvm reuse
    conf.set("mapred.job.reuse.jvm.num.tasks", "-1");

    //we only want one reduce task
    conf.setNumReduceTasks(1);

    try {
        JobClient.runJob(conf);
    } catch (IOException ioe) {
        ioe.printStackTrace();
        return -1;
    }

    return 0;
}

From source file:eu.stratosphere.hadoopcompatibility.HadoopConfiguration.java

License:Apache License

public static void mergeHadoopConf(JobConf jobConf) {
    org.apache.hadoop.conf.Configuration hadoopConf = DistributedFileSystem.getHadoopConfiguration();
    for (Map.Entry<String, String> e : hadoopConf) {
        jobConf.set(e.getKey(), e.getValue());
    }/*from  w  w w.  ja  v a 2 s . c  om*/
}

From source file:eu.stratosphere.hadoopcompatibility.mapred.utils.HadoopUtils.java

License:Apache License

/**
 * Merge HadoopConfiguration into JobConf. This is necessary for the HDFS configuration.
 *///from ww  w. j a  v a  2 s  . co m
public static void mergeHadoopConf(JobConf jobConf) {
    org.apache.hadoop.conf.Configuration hadoopConf = DistributedFileSystem.getHadoopConfiguration();
    for (Map.Entry<String, String> e : hadoopConf) {
        jobConf.set(e.getKey(), e.getValue());
    }
}

From source file:eu.stratosphere.myriad.driver.hadoop.MyriadInputFormat.java

License:Apache License

public static void setDGenInstallDir(JobConf conf, String dgenNodePath) {
    conf.set("mapred.myriad.dgen.node.path", dgenNodePath);
}

From source file:eu.stratosphere.myriad.driver.hadoop.MyriadInputFormat.java

License:Apache License

public static void setStage(JobConf conf, String stage) {
    conf.set("mapred.myriad.dgen.stage", stage);
}