Example usage for org.apache.hadoop.mapred JobConf addResource

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf addResource.

Prototype

public void addResource(String name)

Source Link

Document

Add a configuration resource.

Usage

From source file:com.cloudera.recordservice.tests.ClusterController.java

License:Apache License

/**
 * This method populates a JobConf with the information in the HadoopConfDir
 *//*  w  w  w  .j  a  v  a  2s  .  c o  m*/
public JobConf populateJobConf(JobConf conf) throws MalformedURLException {
    File[] files = new File(clusterConfiguration_.getHadoopConfDir()).listFiles();
    for (File file : files) {
        if (file.getName().endsWith(".xml")) {
            conf.addResource(file.getAbsoluteFile().toURI().toURL());
        }
    }
    String[] bs = clusterConfiguration_.getHadoopConfDir().split("/");
    String newPath = "/";
    for (int i = 0; i < bs.length - 1; i++) {
        newPath += bs[i] + "/";
    }
    newPath += "recordservice-conf/recordservice-site.xml";
    conf.addResource(new File(newPath).getAbsoluteFile().toURI().toURL());
    return conf;
}

From source file:com.example.hadoop.hdfs.test.HdfsClient.java

License:Open Source License

public static JobConf config() {
    JobConf conf = new JobConf(HdfsClient.class);
    conf.setJobName("HdfsClient");
    conf.addResource("classpath:/hadoop/core-site.xml");
    conf.addResource("classpath:/hadoop/hdfs-site.xml");
    conf.addResource("classpath:/hadoop/mapred-site.xml");
    return conf;//from   w  ww .j a  va2  s  . c o  m
}

From source file:com.example.hadoop.mapreduce.test.MapReduceTest.java

License:Open Source License

public static void main(String[] args) throws IOException {
    String input = HDFS_PATH + "/input/README.txt";
    String input2 = HDFS_PATH + "/input/README2.txt";
    String output = HDFS_PATH + "/test/output";

    // ?mapreduce???
    if (HdfsClient.exists(output)) {
        HdfsClient.rm(output);/*from  ww  w  .j  av a 2 s . co  m*/
    }

    JobConf conf = new JobConf(MapReduceTest.class);
    conf.setJobName("MapReduceTest");
    conf.addResource("classpath:/hadoop/core-site.xml");
    conf.addResource("classpath:/hadoop/hdfs-site.xml");
    conf.addResource("classpath:/hadoop/mapred-site.xml");

    // mapper
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);

    // reducer
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    // mapper
    conf.setMapperClass(MapperTest.class);
    // combiner?????mapper??reducer?
    conf.setCombinerClass(ReducerTest.class);
    // reducer
    conf.setReducerClass(ReducerTest.class);

    // MapReduce?
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    // MapReduce?
    FileInputFormat.setInputPaths(conf, new Path[] { new Path(input), new Path(input2) });
    // MapReduce?
    FileOutputFormat.setOutputPath(conf, new Path(output));

    try {
        JobClient.runJob(conf);
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezExecutionEngine.java

License:Apache License

@SuppressWarnings("deprecation")
private void init(Properties properties) throws ExecException {
    //First set the ssh socket factory
    setSSHFactory();//from ww  w  . jav a 2s. c  o m

    String cluster = null;
    String nameNode = null;

    // We need to build a configuration object first in the manner described below
    // and then get back a properties object to inspect the JOB_TRACKER_LOCATION
    // and FILE_SYSTEM_LOCATION. The reason to do this is if we looked only at
    // the existing properties object, we may not get the right settings. So we want
    // to read the configurations in the order specified below and only then look
    // for JOB_TRACKER_LOCATION and FILE_SYSTEM_LOCATION.

    // Hadoop by default specifies two resources, loaded in-order from the classpath:
    // 1. hadoop-default.xml : Read-only defaults for hadoop.
    // 2. hadoop-site.xml: Site-specific configuration for a given hadoop installation.
    // Now add the settings from "properties" object to override any existing properties
    // All of the above is accomplished in the method call below

    JobConf jc = null;
    if (this.pigContext.getExecType() == ExecType.TEZ) {
        // Check existence of user provided configs
        String isHadoopConfigsOverriden = properties.getProperty("pig.use.overriden.hadoop.configs");
        if (isHadoopConfigsOverriden != null && isHadoopConfigsOverriden.equals("true")) {
            jc = new JobConf(ConfigurationUtil.toConfiguration(properties));
        } else {
            // Check existence of hadoop-site.xml or core-site.xml in classpath
            // if user provided confs are not being used
            Configuration testConf = new Configuration();
            ClassLoader cl = testConf.getClassLoader();
            URL hadoop_site = cl.getResource(HADOOP_SITE);
            URL core_site = cl.getResource(CORE_SITE);

            if (hadoop_site == null && core_site == null) {
                throw new ExecException(
                        "Cannot find hadoop configurations in classpath (neither hadoop-site.xml nor core-site.xml was found in the classpath)."
                                + " If you plan to use local mode, please put -x local option in command line",
                        4010);
            }
            jc = new JobConf();
        }
        jc.addResource("pig-cluster-hadoop-site.xml");
        jc.addResource(YARN_SITE);

        /*
        // Trick to invoke static initializer of DistributedFileSystem to add hdfs-default.xml 
        // into configuration
        new DistributedFileSystem();
        */
        //the method below alters the properties object by overriding the
        //hadoop properties with the values from properties and recomputing
        //the properties
        recomputeProperties(jc, properties);
    } else {
        // If we are running in local mode we dont read the hadoop conf file
        if (properties.getProperty("mapreduce.framework.name") == null) {
            properties.setProperty("mapreduce.framework.name", "local");
        }
        properties.setProperty(JOB_TRACKER_LOCATION, LOCAL);
        properties.setProperty(FILE_SYSTEM_LOCATION, "file:///");
        properties.setProperty(ALTERNATIVE_FILE_SYSTEM_LOCATION, "file:///");

        jc = new JobConf(false);
        jc.addResource("core-default.xml");
        jc.addResource("mapred-default.xml");
        jc.addResource("yarn-default.xml");
        recomputeProperties(jc, properties);
    }

    cluster = jc.get(JOB_TRACKER_LOCATION);
    nameNode = jc.get(FILE_SYSTEM_LOCATION);
    if (nameNode == null)
        nameNode = (String) pigContext.getProperties().get(ALTERNATIVE_FILE_SYSTEM_LOCATION);

    if (cluster != null && cluster.length() > 0) {
        if (!cluster.contains(":") && !cluster.equalsIgnoreCase(LOCAL)) {
            cluster = cluster + ":50020";
        }
        properties.setProperty(JOB_TRACKER_LOCATION, cluster);
    }

    if (nameNode != null && nameNode.length() > 0) {
        if (!nameNode.contains(":") && !nameNode.equalsIgnoreCase(LOCAL)) {
            nameNode = nameNode + ":8020";
        }
        properties.setProperty(FILE_SYSTEM_LOCATION, nameNode);
    }

    log.info("Connecting to hadoop file system at: " + (nameNode == null ? LOCAL : nameNode));
    // constructor sets DEFAULT_REPLICATION_FACTOR_KEY
    ds = new HDataStorage(properties);

    if (cluster != null && !cluster.equalsIgnoreCase(LOCAL)) {
        log.info("Connecting to map-reduce job tracker at: " + jc.get(JOB_TRACKER_LOCATION));
    }

    // Set job-specific configuration knobs
    jobConf = jc;
}

From source file:com.uber.hoodie.common.HoodieMergeOnReadTestUtils.java

License:Apache License

private static void setPropsForInputFormat(HoodieRealtimeInputFormat inputFormat, JobConf jobConf,
        Schema schema, String basePath) {
    List<Schema.Field> fields = schema.getFields();
    String names = fields.stream().map(f -> f.name().toString()).collect(Collectors.joining(","));
    String postions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(","));
    Configuration conf = HoodieTestUtils.getDefaultHadoopConf();
    jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
    jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
    jobConf.set("partition_columns", "datestr");
    conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
    conf.set("partition_columns", "datestr");
    inputFormat.setConf(conf);//from   ww w  . j av  a 2s .  com
    jobConf.addResource(conf);
}

From source file:dataload.LogFetchJobTracker.java

License:Apache License

/**
 * Method to setup a link between the class and the JobTracker through the XML files and to create the link to the database
 * @param url//from w w w . ja  v a2 s . c o  m
 * @param user
 * @param pwd
 * @param coreSiteXml
 * @param hdfsSiteXml
 * @param mapredSiteXml
 */
public void setupClientAndDatabase(String url, String user, String pwd, String coreSiteXml, String hdfsSiteXml,
        String mapredSiteXml) {
    try {
        JobConf conf = new JobConf();

        conf.addResource(new Path(coreSiteXml));
        conf.addResource(new Path(hdfsSiteXml));
        conf.addResource(new Path(mapredSiteXml));

        client = new JobClient(conf);
        connection = DriverManager.getConnection(url, user, pwd);

    } catch (SQLException e) {
        e.printStackTrace();
        System.err.println("Error: " + e.getMessage());
    } catch (IOException e) {
        e.printStackTrace();
        System.err.println("Error: " + e.getMessage());
    }
}

From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java

License:Apache License

private JobConf loadJobConfiguration(RunningJob job) throws InternalException {
    // Try normal job file
    try {//w ww  .  ja  v a 2  s  .  c  om
        JobConf conf = new JobConf();
        Path jobFile = new Path(job.getJobFile());
        FileSystem fs = jobFile.getFileSystem(new Configuration());
        conf.addResource(fs.open(jobFile));

        return conf;
    } catch (IOException ex) {
    } catch (IllegalArgumentException ex) {
    }

    // Hadoop 0.20 only
    return new JobConf(org.apache.hadoop.mapred.JobTracker.getLocalJobFilePath(job.getID()));

    /*
    // Try to retrieve configuration from history
    // Hadoop 0.21 only!
    try
    {
       Method m = JobTracker.class.getMethod("getLocalJobFilePath", JobID.class);
       String jobFile = m.invoke(null, job.getID());
       return new JobConf(jobFile);
    }
    catch (NoSuchMethodException ex)
    {
    }
    catch (SecurityException ex)
    {
    }
            
    // Try to retrieve configuration from history (0.21 only)
    try
    {
       Method getHistoryUrl = job.getClass().getMethod("getHistoryUrl");
               
       Path historyPath = new Path(getHistoryUrl.invoke(job));
       Path historyDir = historyPath.getParent();
               
       Class jobHistoryClass = Class.forName(
       "org.apache.hadoop.mapreduce.jobhistory.JobHistory");
       Method getConfFile = jobHistoryClass.getMethod(
       "getConfFile", Path.class, JobID.class);
               
       Path jobFile = getConfFile.invoke(null, historyDir, job.getID());
               
       return new JobConf(jobFile);
    }
    catch (IOException ex)
    {
    }
    catch (IllegalArgumentException ex)
    {
       // Thrown for empty string in Path
       // This should only be temporary
    }
            
    return null;
    */
}

From source file:edu.uci.ics.hyracks.imru.dataflow.Hdtest.java

License:Apache License

public static JobSpecification createJob() throws Exception {
    JobSpecification spec = new JobSpecification();
    spec.setFrameSize(4096);//w  w  w  . ja  v a  2 s .c  o  m

    String PATH_TO_HADOOP_CONF = "/home/wangrui/a/imru/hadoop-0.20.2/conf";
    String HDFS_INPUT_PATH = "/customer/customer.tbl,/customer_result/part-0";
    JobConf conf = new JobConf();
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
    FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
    conf.setInputFormat(TextInputFormat.class);
    RecordDescriptor recordDesc = new RecordDescriptor(
            new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    InputSplit[] splits = conf.getInputFormat().getSplits(conf, 1);
    HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(spec, recordDesc, conf, splits,
            new String[] { "NC0", "NC1" }, new IKeyValueParserFactory<LongWritable, Text>() {
                @Override
                public IKeyValueParser<LongWritable, Text> createKeyValueParser(final IHyracksTaskContext ctx) {
                    return new IKeyValueParser<LongWritable, Text>() {
                        TupleWriter tupleWriter;

                        @Override
                        public void open(IFrameWriter writer) throws HyracksDataException {
                            tupleWriter = new TupleWriter(ctx, writer, 1);
                        }

                        @Override
                        public void parse(LongWritable key, Text value, IFrameWriter writer, String fileString)
                                throws HyracksDataException {
                            try {
                                tupleWriter.write(value.getBytes(), 0, value.getLength());
                                tupleWriter.finishField();
                                tupleWriter.finishTuple();
                            } catch (IOException e) {
                                throw new HyracksDataException(e);
                            }
                        }

                        @Override
                        public void close(IFrameWriter writer) throws HyracksDataException {
                            tupleWriter.close();
                        }
                    };
                }

            });

    // createPartitionConstraint(spec, readOperator, new String[] {"NC0"});
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, readOperator, new String[] { "NC0", "NC1" });

    IOperatorDescriptor writer = new HDFSOD(spec, null, null, null);
    // createPartitionConstraint(spec, writer, outSplits);

    spec.connect(new OneToOneConnectorDescriptor(spec), readOperator, 0, writer, 0);

    spec.addRoot(writer);
    return spec;
}

From source file:edu.uci.ics.hyracks.imru.jobgen.IMRUJobFactory.java

License:Apache License

public JobConf getConf() throws IOException {
    JobConf conf = new JobConf();
    conf.addResource(new Path(confFactory.hadoopConfPath + "/core-site.xml"));
    conf.addResource(new Path(confFactory.hadoopConfPath + "/mapred-site.xml"));
    conf.addResource(new Path(confFactory.hadoopConfPath + "/hdfs-site.xml"));
    return conf;/*from  ww w. j  a v a  2  s.  com*/
}

From source file:graphbuilding.GenomixDriver.java

License:Apache License

public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, String defaultConfPath)
        throws IOException {

    JobConf conf = new JobConf(GenomixDriver.class);
    conf.setInt("sizeKmer", sizeKmer);

    if (defaultConfPath != null) {
        conf.addResource(new Path(defaultConfPath));
    }//from  w  ww.j av a  2s. c  o  m

    conf.setJobName("Genomix Graph Building");
    conf.setMapperClass(GenomixMapper.class);
    conf.setReducerClass(GenomixReducer.class);
    conf.setCombinerClass(GenomixCombiner.class);

    conf.setMapOutputKeyClass(Kmer.class);
    conf.setMapOutputValueClass(KmerCountValue.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setOutputKeyClass(Kmer.class);
    conf.setOutputValueClass(KmerCountValue.class);
    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    conf.setNumReduceTasks(numReducers);

    FileSystem dfs = FileSystem.get(conf);
    dfs.delete(new Path(outputPath), true);
    JobClient.runJob(conf);
}