Example usage for org.apache.hadoop.mapred JobConf addResource

List of usage examples for org.apache.hadoop.mapred JobConf addResource

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf addResource.

Prototype

public void addResource(String name) 

Source Link

Document

Add a configuration resource.

Usage

From source file:com.cloudera.recordservice.tests.ClusterController.java

License:Apache License

/**
 * This method populates a JobConf with the information in the HadoopConfDir
 *//*  w  w  w  .j  a  v  a  2s  .  c o  m*/
public JobConf populateJobConf(JobConf conf) throws MalformedURLException {
    File[] files = new File(clusterConfiguration_.getHadoopConfDir()).listFiles();
    for (File file : files) {
        if (file.getName().endsWith(".xml")) {
            conf.addResource(file.getAbsoluteFile().toURI().toURL());
        }
    }
    String[] bs = clusterConfiguration_.getHadoopConfDir().split("/");
    String newPath = "/";
    for (int i = 0; i < bs.length - 1; i++) {
        newPath += bs[i] + "/";
    }
    newPath += "recordservice-conf/recordservice-site.xml";
    conf.addResource(new File(newPath).getAbsoluteFile().toURI().toURL());
    return conf;
}

From source file:com.example.hadoop.hdfs.test.HdfsClient.java

License:Open Source License

public static JobConf config() {
    JobConf conf = new JobConf(HdfsClient.class);
    conf.setJobName("HdfsClient");
    conf.addResource("classpath:/hadoop/core-site.xml");
    conf.addResource("classpath:/hadoop/hdfs-site.xml");
    conf.addResource("classpath:/hadoop/mapred-site.xml");
    return conf;//from   w  ww .j a  va2  s  . c o  m
}

From source file:com.example.hadoop.mapreduce.test.MapReduceTest.java

License:Open Source License

public static void main(String[] args) throws IOException {
    String input = HDFS_PATH + "/input/README.txt";
    String input2 = HDFS_PATH + "/input/README2.txt";
    String output = HDFS_PATH + "/test/output";

    // ?mapreduce???
    if (HdfsClient.exists(output)) {
        HdfsClient.rm(output);/*from  ww  w  .j  av a 2 s . co  m*/
    }

    JobConf conf = new JobConf(MapReduceTest.class);
    conf.setJobName("MapReduceTest");
    conf.addResource("classpath:/hadoop/core-site.xml");
    conf.addResource("classpath:/hadoop/hdfs-site.xml");
    conf.addResource("classpath:/hadoop/mapred-site.xml");

    // mapper
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);

    // reducer
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    // mapper
    conf.setMapperClass(MapperTest.class);
    // combiner?????mapper??reducer?
    conf.setCombinerClass(ReducerTest.class);
    // reducer
    conf.setReducerClass(ReducerTest.class);

    // MapReduce?
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    // MapReduce?
    FileInputFormat.setInputPaths(conf, new Path[] { new Path(input), new Path(input2) });
    // MapReduce?
    FileOutputFormat.setOutputPath(conf, new Path(output));

    try {
        JobClient.runJob(conf);
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezExecutionEngine.java

License:Apache License

@SuppressWarnings("deprecation")
private void init(Properties properties) throws ExecException {
    //First set the ssh socket factory
    setSSHFactory();//from ww  w  . jav a 2s. c  o m

    String cluster = null;
    String nameNode = null;

    // We need to build a configuration object first in the manner described below
    // and then get back a properties object to inspect the JOB_TRACKER_LOCATION
    // and FILE_SYSTEM_LOCATION. The reason to do this is if we looked only at
    // the existing properties object, we may not get the right settings. So we want
    // to read the configurations in the order specified below and only then look
    // for JOB_TRACKER_LOCATION and FILE_SYSTEM_LOCATION.

    // Hadoop by default specifies two resources, loaded in-order from the classpath:
    // 1. hadoop-default.xml : Read-only defaults for hadoop.
    // 2. hadoop-site.xml: Site-specific configuration for a given hadoop installation.
    // Now add the settings from "properties" object to override any existing properties
    // All of the above is accomplished in the method call below

    JobConf jc = null;
    if (this.pigContext.getExecType() == ExecType.TEZ) {
        // Check existence of user provided configs
        String isHadoopConfigsOverriden = properties.getProperty("pig.use.overriden.hadoop.configs");
        if (isHadoopConfigsOverriden != null && isHadoopConfigsOverriden.equals("true")) {
            jc = new JobConf(ConfigurationUtil.toConfiguration(properties));
        } else {
            // Check existence of hadoop-site.xml or core-site.xml in classpath
            // if user provided confs are not being used
            Configuration testConf = new Configuration();
            ClassLoader cl = testConf.getClassLoader();
            URL hadoop_site = cl.getResource(HADOOP_SITE);
            URL core_site = cl.getResource(CORE_SITE);

            if (hadoop_site == null && core_site == null) {
                throw new ExecException(
                        "Cannot find hadoop configurations in classpath (neither hadoop-site.xml nor core-site.xml was found in the classpath)."
                                + " If you plan to use local mode, please put -x local option in command line",
                        4010);
            }
            jc = new JobConf();
        }
        jc.addResource("pig-cluster-hadoop-site.xml");
        jc.addResource(YARN_SITE);

        /*
        // Trick to invoke static initializer of DistributedFileSystem to add hdfs-default.xml 
        // into configuration
        new DistributedFileSystem();
        */
        //the method below alters the properties object by overriding the
        //hadoop properties with the values from properties and recomputing
        //the properties
        recomputeProperties(jc, properties);
    } else {
        // If we are running in local mode we dont read the hadoop conf file
        if (properties.getProperty("mapreduce.framework.name") == null) {
            properties.setProperty("mapreduce.framework.name", "local");
        }
        properties.setProperty(JOB_TRACKER_LOCATION, LOCAL);
        properties.setProperty(FILE_SYSTEM_LOCATION, "file:///");
        properties.setProperty(ALTERNATIVE_FILE_SYSTEM_LOCATION, "file:///");

        jc = new JobConf(false);
        jc.addResource("core-default.xml");
        jc.addResource("mapred-default.xml");
        jc.addResource("yarn-default.xml");
        recomputeProperties(jc, properties);
    }

    cluster = jc.get(JOB_TRACKER_LOCATION);
    nameNode = jc.get(FILE_SYSTEM_LOCATION);
    if (nameNode == null)
        nameNode = (String) pigContext.getProperties().get(ALTERNATIVE_FILE_SYSTEM_LOCATION);

    if (cluster != null && cluster.length() > 0) {
        if (!cluster.contains(":") && !cluster.equalsIgnoreCase(LOCAL)) {
            cluster = cluster + ":50020";
        }
        properties.setProperty(JOB_TRACKER_LOCATION, cluster);
    }

    if (nameNode != null && nameNode.length() > 0) {
        if (!nameNode.contains(":") && !nameNode.equalsIgnoreCase(LOCAL)) {
            nameNode = nameNode + ":8020";
        }
        properties.setProperty(FILE_SYSTEM_LOCATION, nameNode);
    }

    log.info("Connecting to hadoop file system at: " + (nameNode == null ? LOCAL : nameNode));
    // constructor sets DEFAULT_REPLICATION_FACTOR_KEY
    ds = new HDataStorage(properties);

    if (cluster != null && !cluster.equalsIgnoreCase(LOCAL)) {
        log.info("Connecting to map-reduce job tracker at: " + jc.get(JOB_TRACKER_LOCATION));
    }

    // Set job-specific configuration knobs
    jobConf = jc;
}

From source file:com.uber.hoodie.common.HoodieMergeOnReadTestUtils.java

License:Apache License

private static void setPropsForInputFormat(HoodieRealtimeInputFormat inputFormat, JobConf jobConf,
        Schema schema, String basePath) {
    List<Schema.Field> fields = schema.getFields();
    String names = fields.stream().map(f -> f.name().toString()).collect(Collectors.joining(","));
    String postions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(","));
    Configuration conf = HoodieTestUtils.getDefaultHadoopConf();
    jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
    jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
    jobConf.set("partition_columns", "datestr");
    conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
    conf.set("partition_columns", "datestr");
    inputFormat.setConf(conf);//from   ww w  . j av  a 2s .  com
    jobConf.addResource(conf);
}

From source file:dataload.LogFetchJobTracker.java

License:Apache License

/**
 * Method to setup a link between the class and the JobTracker through the XML files and to create the link to the database
 * @param url//from w w w . ja  v a2 s . c o  m
 * @param user
 * @param pwd
 * @param coreSiteXml
 * @param hdfsSiteXml
 * @param mapredSiteXml
 */
public void setupClientAndDatabase(String url, String user, String pwd, String coreSiteXml, String hdfsSiteXml,
        String mapredSiteXml) {
    try {
        JobConf conf = new JobConf();

        conf.addResource(new Path(coreSiteXml));
        conf.addResource(new Path(hdfsSiteXml));
        conf.addResource(new Path(mapredSiteXml));

        client = new JobClient(conf);
        connection = DriverManager.getConnection(url, user, pwd);

    } catch (SQLException e) {
        e.printStackTrace();
        System.err.println("Error: " + e.getMessage());
    } catch (IOException e) {
        e.printStackTrace();
        System.err.println("Error: " + e.getMessage());
    }
}

From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java

License:Apache License

private JobConf loadJobConfiguration(RunningJob job) throws InternalException {
    // Try normal job file
    try {//w ww  .  ja  v a 2  s  .  c  om
        JobConf conf = new JobConf();
        Path jobFile = new Path(job.getJobFile());
        FileSystem fs = jobFile.getFileSystem(new Configuration());
        conf.addResource(fs.open(jobFile));

        return conf;
    } catch (IOException ex) {
    } catch (IllegalArgumentException ex) {
    }

    // Hadoop 0.20 only
    return new JobConf(org.apache.hadoop.mapred.JobTracker.getLocalJobFilePath(job.getID()));

    /*
    // Try to retrieve configuration from history
    // Hadoop 0.21 only!
    try
    {
       Method m = JobTracker.class.getMethod("getLocalJobFilePath", JobID.class);
       String jobFile = m.invoke(null, job.getID());
       return new JobConf(jobFile);
    }
    catch (NoSuchMethodException ex)
    {
    }
    catch (SecurityException ex)
    {
    }
            
    // Try to retrieve configuration from history (0.21 only)
    try
    {
       Method getHistoryUrl = job.getClass().getMethod("getHistoryUrl");
               
       Path historyPath = new Path(getHistoryUrl.invoke(job));
       Path historyDir = historyPath.getParent();
               
       Class jobHistoryClass = Class.forName(
       "org.apache.hadoop.mapreduce.jobhistory.JobHistory");
       Method getConfFile = jobHistoryClass.getMethod(
       "getConfFile", Path.class, JobID.class);
               
       Path jobFile = getConfFile.invoke(null, historyDir, job.getID());
               
       return new JobConf(jobFile);
    }
    catch (IOException ex)
    {
    }
    catch (IllegalArgumentException ex)
    {
       // Thrown for empty string in Path
       // This should only be temporary
    }
            
    return null;
    */
}

From source file:edu.uci.ics.hyracks.imru.dataflow.Hdtest.java

License:Apache License

public static JobSpecification createJob() throws Exception {
    JobSpecification spec = new JobSpecification();
    spec.setFrameSize(4096);//w  w  w  . ja  v a  2 s .c  o  m

    String PATH_TO_HADOOP_CONF = "/home/wangrui/a/imru/hadoop-0.20.2/conf";
    String HDFS_INPUT_PATH = "/customer/customer.tbl,/customer_result/part-0";
    JobConf conf = new JobConf();
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
    FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
    conf.setInputFormat(TextInputFormat.class);
    RecordDescriptor recordDesc = new RecordDescriptor(
            new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    InputSplit[] splits = conf.getInputFormat().getSplits(conf, 1);
    HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(spec, recordDesc, conf, splits,
            new String[] { "NC0", "NC1" }, new IKeyValueParserFactory<LongWritable, Text>() {
                @Override
                public IKeyValueParser<LongWritable, Text> createKeyValueParser(final IHyracksTaskContext ctx) {
                    return new IKeyValueParser<LongWritable, Text>() {
                        TupleWriter tupleWriter;

                        @Override
                        public void open(IFrameWriter writer) throws HyracksDataException {
                            tupleWriter = new TupleWriter(ctx, writer, 1);
                        }

                        @Override
                        public void parse(LongWritable key, Text value, IFrameWriter writer, String fileString)
                                throws HyracksDataException {
                            try {
                                tupleWriter.write(value.getBytes(), 0, value.getLength());
                                tupleWriter.finishField();
                                tupleWriter.finishTuple();
                            } catch (IOException e) {
                                throw new HyracksDataException(e);
                            }
                        }

                        @Override
                        public void close(IFrameWriter writer) throws HyracksDataException {
                            tupleWriter.close();
                        }
                    };
                }

            });

    // createPartitionConstraint(spec, readOperator, new String[] {"NC0"});
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, readOperator, new String[] { "NC0", "NC1" });

    IOperatorDescriptor writer = new HDFSOD(spec, null, null, null);
    // createPartitionConstraint(spec, writer, outSplits);

    spec.connect(new OneToOneConnectorDescriptor(spec), readOperator, 0, writer, 0);

    spec.addRoot(writer);
    return spec;
}

From source file:edu.uci.ics.hyracks.imru.jobgen.IMRUJobFactory.java

License:Apache License

public JobConf getConf() throws IOException {
    JobConf conf = new JobConf();
    conf.addResource(new Path(confFactory.hadoopConfPath + "/core-site.xml"));
    conf.addResource(new Path(confFactory.hadoopConfPath + "/mapred-site.xml"));
    conf.addResource(new Path(confFactory.hadoopConfPath + "/hdfs-site.xml"));
    return conf;/*from  ww w. j  a v a  2  s.  com*/
}

From source file:graphbuilding.GenomixDriver.java

License:Apache License

public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, String defaultConfPath)
        throws IOException {

    JobConf conf = new JobConf(GenomixDriver.class);
    conf.setInt("sizeKmer", sizeKmer);

    if (defaultConfPath != null) {
        conf.addResource(new Path(defaultConfPath));
    }//from  w  ww.j av a  2s. c  o  m

    conf.setJobName("Genomix Graph Building");
    conf.setMapperClass(GenomixMapper.class);
    conf.setReducerClass(GenomixReducer.class);
    conf.setCombinerClass(GenomixCombiner.class);

    conf.setMapOutputKeyClass(Kmer.class);
    conf.setMapOutputValueClass(KmerCountValue.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setOutputKeyClass(Kmer.class);
    conf.setOutputValueClass(KmerCountValue.class);
    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    conf.setNumReduceTasks(numReducers);

    FileSystem dfs = FileSystem.get(conf);
    dfs.delete(new Path(outputPath), true);
    JobClient.runJob(conf);
}