List of usage examples for org.apache.hadoop.mapred JobConf addResource
public void addResource(String name)
From source file:com.cloudera.recordservice.tests.ClusterController.java
License:Apache License
/** * This method populates a JobConf with the information in the HadoopConfDir *//* w w w .j a v a 2s . c o m*/ public JobConf populateJobConf(JobConf conf) throws MalformedURLException { File[] files = new File(clusterConfiguration_.getHadoopConfDir()).listFiles(); for (File file : files) { if (file.getName().endsWith(".xml")) { conf.addResource(file.getAbsoluteFile().toURI().toURL()); } } String[] bs = clusterConfiguration_.getHadoopConfDir().split("/"); String newPath = "/"; for (int i = 0; i < bs.length - 1; i++) { newPath += bs[i] + "/"; } newPath += "recordservice-conf/recordservice-site.xml"; conf.addResource(new File(newPath).getAbsoluteFile().toURI().toURL()); return conf; }
From source file:com.example.hadoop.hdfs.test.HdfsClient.java
License:Open Source License
public static JobConf config() { JobConf conf = new JobConf(HdfsClient.class); conf.setJobName("HdfsClient"); conf.addResource("classpath:/hadoop/core-site.xml"); conf.addResource("classpath:/hadoop/hdfs-site.xml"); conf.addResource("classpath:/hadoop/mapred-site.xml"); return conf;//from w ww .j a va2 s . c o m }
From source file:com.example.hadoop.mapreduce.test.MapReduceTest.java
License:Open Source License
public static void main(String[] args) throws IOException { String input = HDFS_PATH + "/input/README.txt"; String input2 = HDFS_PATH + "/input/README2.txt"; String output = HDFS_PATH + "/test/output"; // ?mapreduce??? if (HdfsClient.exists(output)) { HdfsClient.rm(output);/*from ww w .j av a 2 s . co m*/ } JobConf conf = new JobConf(MapReduceTest.class); conf.setJobName("MapReduceTest"); conf.addResource("classpath:/hadoop/core-site.xml"); conf.addResource("classpath:/hadoop/hdfs-site.xml"); conf.addResource("classpath:/hadoop/mapred-site.xml"); // mapper conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); // reducer conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); // mapper conf.setMapperClass(MapperTest.class); // combiner?????mapper??reducer? conf.setCombinerClass(ReducerTest.class); // reducer conf.setReducerClass(ReducerTest.class); // MapReduce? conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // MapReduce? FileInputFormat.setInputPaths(conf, new Path[] { new Path(input), new Path(input2) }); // MapReduce? FileOutputFormat.setOutputPath(conf, new Path(output)); try { JobClient.runJob(conf); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezExecutionEngine.java
License:Apache License
@SuppressWarnings("deprecation") private void init(Properties properties) throws ExecException { //First set the ssh socket factory setSSHFactory();//from ww w . jav a 2s. c o m String cluster = null; String nameNode = null; // We need to build a configuration object first in the manner described below // and then get back a properties object to inspect the JOB_TRACKER_LOCATION // and FILE_SYSTEM_LOCATION. The reason to do this is if we looked only at // the existing properties object, we may not get the right settings. So we want // to read the configurations in the order specified below and only then look // for JOB_TRACKER_LOCATION and FILE_SYSTEM_LOCATION. // Hadoop by default specifies two resources, loaded in-order from the classpath: // 1. hadoop-default.xml : Read-only defaults for hadoop. // 2. hadoop-site.xml: Site-specific configuration for a given hadoop installation. // Now add the settings from "properties" object to override any existing properties // All of the above is accomplished in the method call below JobConf jc = null; if (this.pigContext.getExecType() == ExecType.TEZ) { // Check existence of user provided configs String isHadoopConfigsOverriden = properties.getProperty("pig.use.overriden.hadoop.configs"); if (isHadoopConfigsOverriden != null && isHadoopConfigsOverriden.equals("true")) { jc = new JobConf(ConfigurationUtil.toConfiguration(properties)); } else { // Check existence of hadoop-site.xml or core-site.xml in classpath // if user provided confs are not being used Configuration testConf = new Configuration(); ClassLoader cl = testConf.getClassLoader(); URL hadoop_site = cl.getResource(HADOOP_SITE); URL core_site = cl.getResource(CORE_SITE); if (hadoop_site == null && core_site == null) { throw new ExecException( "Cannot find hadoop configurations in classpath (neither hadoop-site.xml nor core-site.xml was found in the classpath)." + " If you plan to use local mode, please put -x local option in command line", 4010); } jc = new JobConf(); } jc.addResource("pig-cluster-hadoop-site.xml"); jc.addResource(YARN_SITE); /* // Trick to invoke static initializer of DistributedFileSystem to add hdfs-default.xml // into configuration new DistributedFileSystem(); */ //the method below alters the properties object by overriding the //hadoop properties with the values from properties and recomputing //the properties recomputeProperties(jc, properties); } else { // If we are running in local mode we dont read the hadoop conf file if (properties.getProperty("mapreduce.framework.name") == null) { properties.setProperty("mapreduce.framework.name", "local"); } properties.setProperty(JOB_TRACKER_LOCATION, LOCAL); properties.setProperty(FILE_SYSTEM_LOCATION, "file:///"); properties.setProperty(ALTERNATIVE_FILE_SYSTEM_LOCATION, "file:///"); jc = new JobConf(false); jc.addResource("core-default.xml"); jc.addResource("mapred-default.xml"); jc.addResource("yarn-default.xml"); recomputeProperties(jc, properties); } cluster = jc.get(JOB_TRACKER_LOCATION); nameNode = jc.get(FILE_SYSTEM_LOCATION); if (nameNode == null) nameNode = (String) pigContext.getProperties().get(ALTERNATIVE_FILE_SYSTEM_LOCATION); if (cluster != null && cluster.length() > 0) { if (!cluster.contains(":") && !cluster.equalsIgnoreCase(LOCAL)) { cluster = cluster + ":50020"; } properties.setProperty(JOB_TRACKER_LOCATION, cluster); } if (nameNode != null && nameNode.length() > 0) { if (!nameNode.contains(":") && !nameNode.equalsIgnoreCase(LOCAL)) { nameNode = nameNode + ":8020"; } properties.setProperty(FILE_SYSTEM_LOCATION, nameNode); } log.info("Connecting to hadoop file system at: " + (nameNode == null ? LOCAL : nameNode)); // constructor sets DEFAULT_REPLICATION_FACTOR_KEY ds = new HDataStorage(properties); if (cluster != null && !cluster.equalsIgnoreCase(LOCAL)) { log.info("Connecting to map-reduce job tracker at: " + jc.get(JOB_TRACKER_LOCATION)); } // Set job-specific configuration knobs jobConf = jc; }
From source file:com.uber.hoodie.common.HoodieMergeOnReadTestUtils.java
License:Apache License
private static void setPropsForInputFormat(HoodieRealtimeInputFormat inputFormat, JobConf jobConf, Schema schema, String basePath) { List<Schema.Field> fields = schema.getFields(); String names = fields.stream().map(f -> f.name().toString()).collect(Collectors.joining(",")); String postions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(",")); Configuration conf = HoodieTestUtils.getDefaultHadoopConf(); jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names); jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions); jobConf.set("partition_columns", "datestr"); conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions); conf.set("partition_columns", "datestr"); inputFormat.setConf(conf);//from ww w . j av a 2s . com jobConf.addResource(conf); }
From source file:dataload.LogFetchJobTracker.java
License:Apache License
/** * Method to setup a link between the class and the JobTracker through the XML files and to create the link to the database * @param url//from w w w . ja v a2 s . c o m * @param user * @param pwd * @param coreSiteXml * @param hdfsSiteXml * @param mapredSiteXml */ public void setupClientAndDatabase(String url, String user, String pwd, String coreSiteXml, String hdfsSiteXml, String mapredSiteXml) { try { JobConf conf = new JobConf(); conf.addResource(new Path(coreSiteXml)); conf.addResource(new Path(hdfsSiteXml)); conf.addResource(new Path(mapredSiteXml)); client = new JobClient(conf); connection = DriverManager.getConnection(url, user, pwd); } catch (SQLException e) { e.printStackTrace(); System.err.println("Error: " + e.getMessage()); } catch (IOException e) { e.printStackTrace(); System.err.println("Error: " + e.getMessage()); } }
From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java
License:Apache License
private JobConf loadJobConfiguration(RunningJob job) throws InternalException { // Try normal job file try {//w ww . ja v a 2 s . c om JobConf conf = new JobConf(); Path jobFile = new Path(job.getJobFile()); FileSystem fs = jobFile.getFileSystem(new Configuration()); conf.addResource(fs.open(jobFile)); return conf; } catch (IOException ex) { } catch (IllegalArgumentException ex) { } // Hadoop 0.20 only return new JobConf(org.apache.hadoop.mapred.JobTracker.getLocalJobFilePath(job.getID())); /* // Try to retrieve configuration from history // Hadoop 0.21 only! try { Method m = JobTracker.class.getMethod("getLocalJobFilePath", JobID.class); String jobFile = m.invoke(null, job.getID()); return new JobConf(jobFile); } catch (NoSuchMethodException ex) { } catch (SecurityException ex) { } // Try to retrieve configuration from history (0.21 only) try { Method getHistoryUrl = job.getClass().getMethod("getHistoryUrl"); Path historyPath = new Path(getHistoryUrl.invoke(job)); Path historyDir = historyPath.getParent(); Class jobHistoryClass = Class.forName( "org.apache.hadoop.mapreduce.jobhistory.JobHistory"); Method getConfFile = jobHistoryClass.getMethod( "getConfFile", Path.class, JobID.class); Path jobFile = getConfFile.invoke(null, historyDir, job.getID()); return new JobConf(jobFile); } catch (IOException ex) { } catch (IllegalArgumentException ex) { // Thrown for empty string in Path // This should only be temporary } return null; */ }
From source file:edu.uci.ics.hyracks.imru.dataflow.Hdtest.java
License:Apache License
public static JobSpecification createJob() throws Exception { JobSpecification spec = new JobSpecification(); spec.setFrameSize(4096);//w w w . ja v a 2 s .c o m String PATH_TO_HADOOP_CONF = "/home/wangrui/a/imru/hadoop-0.20.2/conf"; String HDFS_INPUT_PATH = "/customer/customer.tbl,/customer_result/part-0"; JobConf conf = new JobConf(); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml")); FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH); conf.setInputFormat(TextInputFormat.class); RecordDescriptor recordDesc = new RecordDescriptor( new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() }); InputSplit[] splits = conf.getInputFormat().getSplits(conf, 1); HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(spec, recordDesc, conf, splits, new String[] { "NC0", "NC1" }, new IKeyValueParserFactory<LongWritable, Text>() { @Override public IKeyValueParser<LongWritable, Text> createKeyValueParser(final IHyracksTaskContext ctx) { return new IKeyValueParser<LongWritable, Text>() { TupleWriter tupleWriter; @Override public void open(IFrameWriter writer) throws HyracksDataException { tupleWriter = new TupleWriter(ctx, writer, 1); } @Override public void parse(LongWritable key, Text value, IFrameWriter writer, String fileString) throws HyracksDataException { try { tupleWriter.write(value.getBytes(), 0, value.getLength()); tupleWriter.finishField(); tupleWriter.finishTuple(); } catch (IOException e) { throw new HyracksDataException(e); } } @Override public void close(IFrameWriter writer) throws HyracksDataException { tupleWriter.close(); } }; } }); // createPartitionConstraint(spec, readOperator, new String[] {"NC0"}); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, readOperator, new String[] { "NC0", "NC1" }); IOperatorDescriptor writer = new HDFSOD(spec, null, null, null); // createPartitionConstraint(spec, writer, outSplits); spec.connect(new OneToOneConnectorDescriptor(spec), readOperator, 0, writer, 0); spec.addRoot(writer); return spec; }
From source file:edu.uci.ics.hyracks.imru.jobgen.IMRUJobFactory.java
License:Apache License
public JobConf getConf() throws IOException { JobConf conf = new JobConf(); conf.addResource(new Path(confFactory.hadoopConfPath + "/core-site.xml")); conf.addResource(new Path(confFactory.hadoopConfPath + "/mapred-site.xml")); conf.addResource(new Path(confFactory.hadoopConfPath + "/hdfs-site.xml")); return conf;/*from ww w. j a v a 2 s. com*/ }
From source file:graphbuilding.GenomixDriver.java
License:Apache License
public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, String defaultConfPath) throws IOException { JobConf conf = new JobConf(GenomixDriver.class); conf.setInt("sizeKmer", sizeKmer); if (defaultConfPath != null) { conf.addResource(new Path(defaultConfPath)); }//from w ww.j av a 2s. c o m conf.setJobName("Genomix Graph Building"); conf.setMapperClass(GenomixMapper.class); conf.setReducerClass(GenomixReducer.class); conf.setCombinerClass(GenomixCombiner.class); conf.setMapOutputKeyClass(Kmer.class); conf.setMapOutputValueClass(KmerCountValue.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(Kmer.class); conf.setOutputValueClass(KmerCountValue.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setNumReduceTasks(numReducers); FileSystem dfs = FileSystem.get(conf); dfs.delete(new Path(outputPath), true); JobClient.runJob(conf); }