List of usage examples for org.apache.hadoop.mapred JobConf addResource
public void addResource(String name)
From source file:org.apache.hyracks.imru.dataflow.Hdtest.java
License:Apache License
public static JobSpecification createJob() throws Exception { JobSpecification spec = new JobSpecification(); spec.setFrameSize(4096);//from w ww . j a va 2 s .co m String PATH_TO_HADOOP_CONF = "/home/wangrui/a/imru/hadoop-0.20.2/conf"; String HDFS_INPUT_PATH = "/customer/customer.tbl,/customer_result/part-0"; JobConf conf = new JobConf(); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml")); FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH); conf.setInputFormat(TextInputFormat.class); RecordDescriptor recordDesc = new RecordDescriptor( new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE }); InputSplit[] splits = conf.getInputFormat().getSplits(conf, 1); HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(spec, recordDesc, conf, splits, new String[] { "NC0", "NC1" }, new IKeyValueParserFactory<LongWritable, Text>() { @Override public IKeyValueParser<LongWritable, Text> createKeyValueParser(final IHyracksTaskContext ctx) { return new IKeyValueParser<LongWritable, Text>() { TupleWriter tupleWriter; @Override public void open(IFrameWriter writer) throws HyracksDataException { tupleWriter = new TupleWriter(ctx, writer, 1); } @Override public void parse(LongWritable key, Text value, IFrameWriter writer, String fileString) throws HyracksDataException { try { tupleWriter.write(value.getBytes(), 0, value.getLength()); tupleWriter.finishField(); tupleWriter.finishTuple(); } catch (IOException e) { throw new HyracksDataException(e); } } @Override public void close(IFrameWriter writer) throws HyracksDataException { tupleWriter.close(); } }; } }); // createPartitionConstraint(spec, readOperator, new String[] {"NC0"}); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, readOperator, new String[] { "NC0", "NC1" }); IOperatorDescriptor writer = new HDFSOD(spec, null, null, null); // createPartitionConstraint(spec, writer, outSplits); spec.connect(new OneToOneConnectorDescriptor(spec), readOperator, 0, writer, 0); spec.addRoot(writer); return spec; }
From source file:org.apache.oozie.action.hadoop.LauncherMainTester.java
License:Apache License
private static JobConf createSleepMapperReducerJobConf() { JobConf jConf = new JobConf(true); jConf.addResource(new Path("file:///", System.getProperty("oozie.action.conf.xml"))); jConf.setMapperClass(SleepMapperReducerForTest.class); jConf.setReducerClass(SleepMapperReducerForTest.class); jConf.setOutputKeyClass(Text.class); jConf.setOutputValueClass(IntWritable.class); jConf.setInputFormat(TextInputFormat.class); jConf.setOutputFormat(TextOutputFormat.class); jConf.setNumReduceTasks(1);//from w w w.ja v a 2 s .co m jConf.set(SleepMapperReducerForTest.SLEEP_TIME_MILLIS_KEY, "60000"); return jConf; }
From source file:org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.java
License:Apache License
public JobConf getLocalConf() { JobConf jc = new JobConf(false); jc.addResource(CORE_DEFAULT_SITE); jc.addResource(MAPRED_DEFAULT_SITE); jc.addResource(YARN_DEFAULT_SITE);/*from w ww . j a va2 s. c om*/ return jc; }
From source file:org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.java
License:Apache License
public JobConf getExecConf(Properties properties) throws ExecException { JobConf jc = null; // Check existence of user provided configs String isHadoopConfigsOverriden = properties.getProperty("pig.use.overriden.hadoop.configs"); if (isHadoopConfigsOverriden != null && isHadoopConfigsOverriden.equals("true")) { jc = new JobConf(ConfigurationUtil.toConfiguration(properties)); } else {//from w w w .j a v a2 s . c om // Check existence of hadoop-site.xml or core-site.xml in // classpath if user provided confs are not being used Configuration testConf = new Configuration(); ClassLoader cl = testConf.getClassLoader(); URL hadoop_site = cl.getResource(HADOOP_SITE); URL core_site = cl.getResource(CORE_SITE); if (hadoop_site == null && core_site == null) { throw new ExecException( "Cannot find hadoop configurations in classpath " + "(neither hadoop-site.xml nor core-site.xml was found in the classpath)." + " If you plan to use local mode, please put -x local option in command line", 4010); } jc = new JobConf(); } jc.addResource("pig-cluster-hadoop-site.xml"); jc.addResource(YARN_SITE); return jc; }
From source file:org.apache.pig.backend.hadoop.executionengine.MRExecutionEngine.java
License:Apache License
@SuppressWarnings({ "deprecation", "resource" }) private void init(Properties properties) throws ExecException { // First set the ssh socket factory setSSHFactory();//from www .j a v a2 s. c o m String cluster = null; String nameNode = null; // We need to build a configuration object first in the manner described // below // and then get back a properties object to inspect the // JOB_TRACKER_LOCATION // and FILE_SYSTEM_LOCATION. The reason to do this is if we looked only // at // the existing properties object, we may not get the right settings. So // we want // to read the configurations in the order specified below and only then // look // for JOB_TRACKER_LOCATION and FILE_SYSTEM_LOCATION. // Hadoop by default specifies two resources, loaded in-order from the // classpath: // 1. hadoop-default.xml : Read-only defaults for hadoop. // 2. hadoop-site.xml: Site-specific configuration for a given hadoop // installation. // Now add the settings from "properties" object to override any // existing properties // All of the above is accomplished in the method call below JobConf jc = null; if (!this.pigContext.getExecType().isLocal()) { // Check existence of user provided configs String isHadoopConfigsOverriden = properties.getProperty("pig.use.overriden.hadoop.configs"); if (isHadoopConfigsOverriden != null && isHadoopConfigsOverriden.equals("true")) { jc = new JobConf(ConfigurationUtil.toConfiguration(properties)); } else { // Check existence of hadoop-site.xml or core-site.xml in // classpath // if user provided confs are not being used Configuration testConf = new Configuration(); ClassLoader cl = testConf.getClassLoader(); URL hadoop_site = cl.getResource(HADOOP_SITE); URL core_site = cl.getResource(CORE_SITE); if (hadoop_site == null && core_site == null) { throw new ExecException( "Cannot find hadoop configurations in classpath (neither hadoop-site.xml nor core-site.xml was found in the classpath)." + " If you plan to use local mode, please put -x local option in command line", 4010); } jc = new JobConf(); } jc.addResource("pig-cluster-hadoop-site.xml"); jc.addResource(YARN_SITE); // Trick to invoke static initializer of DistributedFileSystem to // add hdfs-default.xml // into configuration new DistributedFileSystem(); // the method below alters the properties object by overriding the // hadoop properties with the values from properties and recomputing // the properties recomputeProperties(jc, properties); } else { // If we are running in local mode we dont read the hadoop conf file if (properties.getProperty("mapreduce.framework.name") == null) { properties.setProperty("mapreduce.framework.name", "local"); } properties.setProperty(JOB_TRACKER_LOCATION, LOCAL); properties.setProperty(FILE_SYSTEM_LOCATION, "file:///"); properties.setProperty(ALTERNATIVE_FILE_SYSTEM_LOCATION, "file:///"); jc = new JobConf(false); jc.addResource("core-default.xml"); jc.addResource("mapred-default.xml"); jc.addResource("yarn-default.xml"); recomputeProperties(jc, properties); } cluster = jc.get(JOB_TRACKER_LOCATION); nameNode = jc.get(FILE_SYSTEM_LOCATION); if (nameNode == null) nameNode = (String) pigContext.getProperties().get(ALTERNATIVE_FILE_SYSTEM_LOCATION); if (cluster != null && cluster.length() > 0) { if (!cluster.contains(":") && !cluster.equalsIgnoreCase(LOCAL)) { cluster = cluster + ":50020"; } properties.setProperty(JOB_TRACKER_LOCATION, cluster); } if (nameNode != null && nameNode.length() > 0) { if (!nameNode.contains(":") && !nameNode.equalsIgnoreCase(LOCAL)) { nameNode = nameNode + ":8020"; } properties.setProperty(FILE_SYSTEM_LOCATION, nameNode); } log.info("Connecting to hadoop file system at: " + (nameNode == null ? LOCAL : nameNode)); // constructor sets DEFAULT_REPLICATION_FACTOR_KEY ds = new HDataStorage(properties); if (cluster != null && !cluster.equalsIgnoreCase(LOCAL)) { log.info("Connecting to map-reduce job tracker at: " + jc.get(JOB_TRACKER_LOCATION)); } // Set job-specific configuration knobs jobConf = jc; }
From source file:org.apache.pig.backend.hadoop.executionengine.tez.TezExecutionEngine.java
License:Apache License
@Override public JobConf getExecConf(Properties properties) throws ExecException { JobConf jc = super.getExecConf(properties); jc.addResource(TezConfiguration.TEZ_SITE_XML); return jc;// w w w . jav a 2 s . c om }
From source file:org.apache.vxquery.xtest.MiniDFS.java
License:Apache License
public void startHDFS() throws IOException { FileSystem lfs = FileSystem.getLocal(new Configuration()); JobConf conf = new JobConf(); String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf"; Path hdfs_conf = new Path(PATH_TO_HADOOP_CONF); if (!lfs.exists(hdfs_conf)) { PATH_TO_HADOOP_CONF = "vxquery-xtest/src/test/resources/hadoop/conf"; hdfs_conf = new Path(PATH_TO_HADOOP_CONF); if (!lfs.exists(hdfs_conf)) { PATH_TO_HADOOP_CONF = "../vxquery-xtest/src/test/resources/hadoop/conf"; hdfs_conf = new Path(PATH_TO_HADOOP_CONF); }//from w w w.j av a 2s. c o m } conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml")); int numDataNodes = 1; int nameNodePort = 40000; // cleanup artifacts created on the local file system lfs.delete(new Path("build"), true); System.setProperty("hadoop.log.dir", "logs"); MiniDFSCluster.Builder build = new MiniDFSCluster.Builder(conf); build.nameNodePort(nameNodePort); build.nameNodeHttpPort(nameNodePort + 34); build.numDataNodes(numDataNodes); build.checkExitOnShutdown(true); build.startupOption(StartupOption.REGULAR); build.format(true); build.waitSafeMode(true); dfsCluster = build.build(); FileSystem dfs = FileSystem.get(conf); String DATA_PATH = "src/test/resources/TestSources/ghcnd"; Path src = new Path(DATA_PATH); if (!lfs.exists(src)) { DATA_PATH = "vxquery-xtest/src/test/resources/TestSources/ghcnd"; src = new Path(DATA_PATH); if (!lfs.exists(src)) { DATA_PATH = "../vxquery-xtest/src/test/resources/TestSources/ghcnd"; src = new Path(DATA_PATH); } } dfs.mkdirs(new Path("/tmp")); Path dest = new Path("/tmp/vxquery-hdfs-test"); dfs.copyFromLocalFile(src, dest); if (dfs.exists(dest)) { System.err.println("Test files copied to HDFS successfully"); } }
From source file:org.archive.jbs.Parse.java
License:Apache License
/** * Command-line driver. Runs the Parse as a Hadoop job. *//*from w w w .j ava2 s . c om*/ public static void main(String args[]) throws Exception { JobConf conf = new JobConf(Parse.class); // Load the default set of config properties, including the // essential properties needed by the bits of Nutch that we are // still using. These properties can still be over-ridden by // command-line args. conf.addResource("conf-parse.xml"); int result = ToolRunner.run(conf, new Parse(), args); System.exit(result); }
From source file:org.deeplearning4j.hadoop.util.HdfsUtils.java
License:Apache License
/** * Adapted from //from w ww .j av a 2 s.c o m * http://terrier.org/docs/v3.5/javadoc/org/terrier/utility/io/HadoopUtility.html#saveClassPathToJob%28org.apache.hadoop.mapred.JobConf%29 * @param jobConf * @throws IOException */ public static List<Path> saveClassPathToJob(JobConf jobConf) throws Exception { String hdfs = getHost(jobConf); HdfsLock lock = new HdfsLock(hdfs); String hdfs2 = getHdfs(jobConf); if (jobConf.get(HDFS_HOST) != null) { if (lock.isLocked()) { List<Path> ret = lock.getPaths(); StringBuffer files = new StringBuffer(); StringBuffer classPath = new StringBuffer(); for (Path path : ret) { files.append(hdfs2 + path.toString()); files.append(","); classPath.append(hdfs2 + path.toString()); classPath.append(":"); jobConf.addResource(path.toUri().toURL()); } String classPathToSet = classPath.toString().substring(0, classPath.lastIndexOf(":")); String filesToSet = files.toString().substring(0, files.lastIndexOf(",")); log.info("Setting class path " + classPathToSet); log.info("Using files " + filesToSet); jobConf.set("mapred.cache.files", filesToSet); jobConf.set("mapred.job.classpath.files", classPathToSet); return ret; } } List<Path> paths = new ArrayList<Path>(); log.info("Copying classpath to job"); final String[] jars = findJarFiles(new String[] { System.getenv().get("CLASSPATH"), System.getProperty("java.class.path"), System.getProperty("surefire.test.class.path") }); final FileSystem defFS = FileSystem.get(jobConf); int numFilesWritten = 0; for (String jarFile : jars) { //class path issues if (jarFile.contains("hadoop-client")) { log.info("Skipping hadoop-client"); continue; } else if (jarFile.contains("mapreduce-run")) { log.info("Skipping map reduce run"); continue; } Path srcJarFilePath = new Path("file:///" + jarFile); String filename = srcJarFilePath.getName(); Path tmpJarFilePath = makeFile(jobConf, filename); log.info("Uploading " + jarFile + " to " + tmpJarFilePath.toString()); try { defFS.copyFromLocalFile(srcJarFilePath, tmpJarFilePath); jobConf.addResource(tmpJarFilePath); paths.add(tmpJarFilePath); numFilesWritten++; } catch (Exception e) { for (Path path : paths) { if (defFS.exists(path)) defFS.delete(path, true); } lock.close(); log.error(String.format("Exception writing to hdfs; rolling back %d jar files ", numFilesWritten), e); throw new IOException("Couldn't write jar file " + jarFile); } } try { lock.create(paths); } catch (KeeperException.SessionExpiredException e) { lock = new HdfsLock(hdfs); lock.create(paths); } lock.close(); //resolve any differences by removing clashing names in the files (archives are removed from files) Set<Path> remove = new HashSet<Path>(); for (Path path : paths) { boolean exists = false; try { exists = defFS.exists(path); } catch (IllegalArgumentException e) { exists = false; } if (!exists) remove.add(path); } paths.removeAll(remove); return paths; }
From source file:org.lamapacos.preprocessor.filter.SegmentFilter.java
License:Apache License
@Override public void configure(JobConf job) { setConf(job);/* w ww.j a v a 2s. co m*/ //conf file job.addResource("lamapacos-preprocessor.xml"); RegexsFilter.setPatterns(job); try { this.fs = FileSystem.get(getConf()); } catch (IOException e) { LOG.error("IOException:", e); } }