List of usage examples for org.apache.hadoop.hdfs DistributedFileSystem DistributedFileSystem
public DistributedFileSystem()
From source file:org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.java
License:Apache License
private void init(Properties properties) throws ExecException { String cluster = null;//from ww w. ja v a 2s . c o m String nameNode = null; // We need to build a configuration object first in the manner described // below and then get back a properties object to inspect the // JOB_TRACKER_LOCATION and FILE_SYSTEM_LOCATION. The reason to do this // is if we looked only at the existing properties object, we may not // get the right settings. So we want to read the configurations in the // order specified below and only then look for JOB_TRACKER_LOCATION and // FILE_SYSTEM_LOCATION. // Hadoop by default specifies two resources, loaded in-order from the // classpath: // 1. hadoop-default.xml : Read-only defaults for hadoop. // 2. hadoop-site.xml: Site-specific configuration for a given hadoop // installation. // Now add the settings from "properties" object to override any // existing properties All of the above is accomplished in the method // call below JobConf jc = null; if (!this.pigContext.getExecType().isLocal()) { jc = getExecConf(properties); // Trick to invoke static initializer of DistributedFileSystem to // add hdfs-default.xml into configuration new DistributedFileSystem(); } else { // If we are running in local mode we dont read the hadoop conf file if (properties.getProperty(MRConfiguration.FRAMEWORK_NAME) == null) { properties.setProperty(MRConfiguration.FRAMEWORK_NAME, LOCAL); } properties.setProperty(MRConfiguration.JOB_TRACKER, LOCAL); properties.setProperty(FILE_SYSTEM_LOCATION, "file:///"); properties.setProperty(ALTERNATIVE_FILE_SYSTEM_LOCATION, "file:///"); jc = getLocalConf(); } // the method below alters the properties object by overriding the // hadoop properties with the values from properties and recomputing // the properties Utils.recomputeProperties(jc, properties); cluster = jc.get(MRConfiguration.JOB_TRACKER); nameNode = jc.get(FILE_SYSTEM_LOCATION); if (nameNode == null) { nameNode = (String) pigContext.getProperties().get(ALTERNATIVE_FILE_SYSTEM_LOCATION); } if (cluster != null && cluster.length() > 0) { if (!cluster.contains(":") && !cluster.equalsIgnoreCase(LOCAL)) { cluster = cluster + ":50020"; } properties.setProperty(MRConfiguration.JOB_TRACKER, cluster); } if (nameNode != null && nameNode.length() > 0) { if (!nameNode.contains(":") && !nameNode.equalsIgnoreCase(LOCAL)) { nameNode = nameNode + ":8020"; } properties.setProperty(FILE_SYSTEM_LOCATION, nameNode); } LOG.info("Connecting to hadoop file system at: " + (nameNode == null ? LOCAL : nameNode)); // constructor sets DEFAULT_REPLICATION_FACTOR_KEY ds = new HDataStorage(properties); if (cluster != null && !cluster.equalsIgnoreCase(LOCAL)) { LOG.info("Connecting to map-reduce job tracker at: " + jc.get(MRConfiguration.JOB_TRACKER)); } }
From source file:org.apache.pig.backend.hadoop.executionengine.MRExecutionEngine.java
License:Apache License
@SuppressWarnings({ "deprecation", "resource" }) private void init(Properties properties) throws ExecException { // First set the ssh socket factory setSSHFactory();/*w ww .j av a 2 s . c o m*/ String cluster = null; String nameNode = null; // We need to build a configuration object first in the manner described // below // and then get back a properties object to inspect the // JOB_TRACKER_LOCATION // and FILE_SYSTEM_LOCATION. The reason to do this is if we looked only // at // the existing properties object, we may not get the right settings. So // we want // to read the configurations in the order specified below and only then // look // for JOB_TRACKER_LOCATION and FILE_SYSTEM_LOCATION. // Hadoop by default specifies two resources, loaded in-order from the // classpath: // 1. hadoop-default.xml : Read-only defaults for hadoop. // 2. hadoop-site.xml: Site-specific configuration for a given hadoop // installation. // Now add the settings from "properties" object to override any // existing properties // All of the above is accomplished in the method call below JobConf jc = null; if (!this.pigContext.getExecType().isLocal()) { // Check existence of user provided configs String isHadoopConfigsOverriden = properties.getProperty("pig.use.overriden.hadoop.configs"); if (isHadoopConfigsOverriden != null && isHadoopConfigsOverriden.equals("true")) { jc = new JobConf(ConfigurationUtil.toConfiguration(properties)); } else { // Check existence of hadoop-site.xml or core-site.xml in // classpath // if user provided confs are not being used Configuration testConf = new Configuration(); ClassLoader cl = testConf.getClassLoader(); URL hadoop_site = cl.getResource(HADOOP_SITE); URL core_site = cl.getResource(CORE_SITE); if (hadoop_site == null && core_site == null) { throw new ExecException( "Cannot find hadoop configurations in classpath (neither hadoop-site.xml nor core-site.xml was found in the classpath)." + " If you plan to use local mode, please put -x local option in command line", 4010); } jc = new JobConf(); } jc.addResource("pig-cluster-hadoop-site.xml"); jc.addResource(YARN_SITE); // Trick to invoke static initializer of DistributedFileSystem to // add hdfs-default.xml // into configuration new DistributedFileSystem(); // the method below alters the properties object by overriding the // hadoop properties with the values from properties and recomputing // the properties recomputeProperties(jc, properties); } else { // If we are running in local mode we dont read the hadoop conf file if (properties.getProperty("mapreduce.framework.name") == null) { properties.setProperty("mapreduce.framework.name", "local"); } properties.setProperty(JOB_TRACKER_LOCATION, LOCAL); properties.setProperty(FILE_SYSTEM_LOCATION, "file:///"); properties.setProperty(ALTERNATIVE_FILE_SYSTEM_LOCATION, "file:///"); jc = new JobConf(false); jc.addResource("core-default.xml"); jc.addResource("mapred-default.xml"); jc.addResource("yarn-default.xml"); recomputeProperties(jc, properties); } cluster = jc.get(JOB_TRACKER_LOCATION); nameNode = jc.get(FILE_SYSTEM_LOCATION); if (nameNode == null) nameNode = (String) pigContext.getProperties().get(ALTERNATIVE_FILE_SYSTEM_LOCATION); if (cluster != null && cluster.length() > 0) { if (!cluster.contains(":") && !cluster.equalsIgnoreCase(LOCAL)) { cluster = cluster + ":50020"; } properties.setProperty(JOB_TRACKER_LOCATION, cluster); } if (nameNode != null && nameNode.length() > 0) { if (!nameNode.contains(":") && !nameNode.equalsIgnoreCase(LOCAL)) { nameNode = nameNode + ":8020"; } properties.setProperty(FILE_SYSTEM_LOCATION, nameNode); } log.info("Connecting to hadoop file system at: " + (nameNode == null ? LOCAL : nameNode)); // constructor sets DEFAULT_REPLICATION_FACTOR_KEY ds = new HDataStorage(properties); if (cluster != null && !cluster.equalsIgnoreCase(LOCAL)) { log.info("Connecting to map-reduce job tracker at: " + jc.get(JOB_TRACKER_LOCATION)); } // Set job-specific configuration knobs jobConf = jc; }
From source file:org.commoncrawl.service.pagerank.slave.PageRankUtils.java
License:Open Source License
private static FileSystem buildDistributionOutputStreamVector(boolean useSequenceFile, String fileNamePrefix, File localOutputPath, String remoteOutputPath, int myNodeIndex, int nodeCount, Vector<PRValueOutputStream> outputStreamVector) { Configuration conf = new Configuration(CrawlEnvironment.getHadoopConfig()); conf.setInt("dfs.socket.timeout", 240000); conf.setInt("io.file.buffer.size", 4096 * 20); DistributedFileSystem hdfs = new DistributedFileSystem(); try {/*from ww w. j av a2s.c o m*/ hdfs.initialize(FileSystem.getDefaultUri(conf), conf); for (int i = 0; i < nodeCount; ++i) { // create output filename String fileName = fileNamePrefix + "-" + NUMBER_FORMAT.format(i); // create stream (local or remote stream, depending on i) // remote path Path remotePath = new Path(remoteOutputPath, fileName); // remove file CrawlEnvironment.getDefaultFileSystem().delete(remotePath, false); if (useSequenceFile) { // recreate it ... outputStreamVector.add(new PRSequenceFileOutputStream(conf, CrawlEnvironment.getDefaultFileSystem(), remotePath)); } else { // recreate it ... outputStreamVector .add(new PROldValueOutputStream(CrawlEnvironment.getDefaultFileSystem(), remotePath)); } } } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); for (PRValueOutputStream streamInfo : outputStreamVector) { try { if (streamInfo != null) { streamInfo.close(true); } } catch (IOException e2) { LOG.error(CCStringUtils.stringifyException(e2)); } outputStreamVector.clear(); } } return hdfs; }
From source file:voldemort.store.readonly.mr.utils.HadoopUtils.java
License:Apache License
public static FileSystem getFileSystem(String hdfsUrl, boolean isLocal) throws IOException { // Initialize fs FileSystem fs;// w ww . java2s.co m if (isLocal) { fs = FileSystem.getLocal(new Configuration()); } else { fs = new DistributedFileSystem(); try { fs.initialize(new URI(hdfsUrl), new Configuration()); } catch (URISyntaxException e) { throw new IllegalArgumentException(e); } } return fs; }