Example usage for org.apache.hadoop.hdfs DistributedFileSystem DistributedFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.hdfs DistributedFileSystem DistributedFileSystem.

Prototype

public DistributedFileSystem()

Source Link

Usage

From source file:org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.java

License:Apache License

private void init(Properties properties) throws ExecException {
    String cluster = null;//from ww  w.  ja  v a 2s  .  c  o m
    String nameNode = null;

    // We need to build a configuration object first in the manner described
    // below and then get back a properties object to inspect the
    // JOB_TRACKER_LOCATION and FILE_SYSTEM_LOCATION. The reason to do this
    // is if we looked only at the existing properties object, we may not
    // get the right settings. So we want to read the configurations in the
    // order specified below and only then look for JOB_TRACKER_LOCATION and
    // FILE_SYSTEM_LOCATION.

    // Hadoop by default specifies two resources, loaded in-order from the
    // classpath:
    // 1. hadoop-default.xml : Read-only defaults for hadoop.
    // 2. hadoop-site.xml: Site-specific configuration for a given hadoop
    // installation.
    // Now add the settings from "properties" object to override any
    // existing properties All of the above is accomplished in the method
    // call below

    JobConf jc = null;
    if (!this.pigContext.getExecType().isLocal()) {
        jc = getExecConf(properties);

        // Trick to invoke static initializer of DistributedFileSystem to
        // add hdfs-default.xml into configuration
        new DistributedFileSystem();
    } else {
        // If we are running in local mode we dont read the hadoop conf file
        if (properties.getProperty(MRConfiguration.FRAMEWORK_NAME) == null) {
            properties.setProperty(MRConfiguration.FRAMEWORK_NAME, LOCAL);
        }
        properties.setProperty(MRConfiguration.JOB_TRACKER, LOCAL);
        properties.setProperty(FILE_SYSTEM_LOCATION, "file:///");
        properties.setProperty(ALTERNATIVE_FILE_SYSTEM_LOCATION, "file:///");

        jc = getLocalConf();
    }

    // the method below alters the properties object by overriding the
    // hadoop properties with the values from properties and recomputing
    // the properties
    Utils.recomputeProperties(jc, properties);

    cluster = jc.get(MRConfiguration.JOB_TRACKER);
    nameNode = jc.get(FILE_SYSTEM_LOCATION);
    if (nameNode == null) {
        nameNode = (String) pigContext.getProperties().get(ALTERNATIVE_FILE_SYSTEM_LOCATION);
    }

    if (cluster != null && cluster.length() > 0) {
        if (!cluster.contains(":") && !cluster.equalsIgnoreCase(LOCAL)) {
            cluster = cluster + ":50020";
        }
        properties.setProperty(MRConfiguration.JOB_TRACKER, cluster);
    }

    if (nameNode != null && nameNode.length() > 0) {
        if (!nameNode.contains(":") && !nameNode.equalsIgnoreCase(LOCAL)) {
            nameNode = nameNode + ":8020";
        }
        properties.setProperty(FILE_SYSTEM_LOCATION, nameNode);
    }

    LOG.info("Connecting to hadoop file system at: " + (nameNode == null ? LOCAL : nameNode));
    // constructor sets DEFAULT_REPLICATION_FACTOR_KEY
    ds = new HDataStorage(properties);

    if (cluster != null && !cluster.equalsIgnoreCase(LOCAL)) {
        LOG.info("Connecting to map-reduce job tracker at: " + jc.get(MRConfiguration.JOB_TRACKER));
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.MRExecutionEngine.java

License:Apache License

@SuppressWarnings({ "deprecation", "resource" })
private void init(Properties properties) throws ExecException {
    // First set the ssh socket factory
    setSSHFactory();/*w  ww  .j  av  a  2  s  . c  o m*/

    String cluster = null;
    String nameNode = null;

    // We need to build a configuration object first in the manner described
    // below
    // and then get back a properties object to inspect the
    // JOB_TRACKER_LOCATION
    // and FILE_SYSTEM_LOCATION. The reason to do this is if we looked only
    // at
    // the existing properties object, we may not get the right settings. So
    // we want
    // to read the configurations in the order specified below and only then
    // look
    // for JOB_TRACKER_LOCATION and FILE_SYSTEM_LOCATION.

    // Hadoop by default specifies two resources, loaded in-order from the
    // classpath:
    // 1. hadoop-default.xml : Read-only defaults for hadoop.
    // 2. hadoop-site.xml: Site-specific configuration for a given hadoop
    // installation.
    // Now add the settings from "properties" object to override any
    // existing properties
    // All of the above is accomplished in the method call below

    JobConf jc = null;
    if (!this.pigContext.getExecType().isLocal()) {
        // Check existence of user provided configs
        String isHadoopConfigsOverriden = properties.getProperty("pig.use.overriden.hadoop.configs");
        if (isHadoopConfigsOverriden != null && isHadoopConfigsOverriden.equals("true")) {
            jc = new JobConf(ConfigurationUtil.toConfiguration(properties));
        } else {
            // Check existence of hadoop-site.xml or core-site.xml in
            // classpath
            // if user provided confs are not being used
            Configuration testConf = new Configuration();
            ClassLoader cl = testConf.getClassLoader();
            URL hadoop_site = cl.getResource(HADOOP_SITE);
            URL core_site = cl.getResource(CORE_SITE);

            if (hadoop_site == null && core_site == null) {
                throw new ExecException(
                        "Cannot find hadoop configurations in classpath (neither hadoop-site.xml nor core-site.xml was found in the classpath)."
                                + " If you plan to use local mode, please put -x local option in command line",
                        4010);
            }
            jc = new JobConf();
        }
        jc.addResource("pig-cluster-hadoop-site.xml");
        jc.addResource(YARN_SITE);

        // Trick to invoke static initializer of DistributedFileSystem to
        // add hdfs-default.xml
        // into configuration
        new DistributedFileSystem();

        // the method below alters the properties object by overriding the
        // hadoop properties with the values from properties and recomputing
        // the properties
        recomputeProperties(jc, properties);
    } else {
        // If we are running in local mode we dont read the hadoop conf file
        if (properties.getProperty("mapreduce.framework.name") == null) {
            properties.setProperty("mapreduce.framework.name", "local");
        }
        properties.setProperty(JOB_TRACKER_LOCATION, LOCAL);
        properties.setProperty(FILE_SYSTEM_LOCATION, "file:///");
        properties.setProperty(ALTERNATIVE_FILE_SYSTEM_LOCATION, "file:///");

        jc = new JobConf(false);
        jc.addResource("core-default.xml");
        jc.addResource("mapred-default.xml");
        jc.addResource("yarn-default.xml");
        recomputeProperties(jc, properties);
    }

    cluster = jc.get(JOB_TRACKER_LOCATION);
    nameNode = jc.get(FILE_SYSTEM_LOCATION);
    if (nameNode == null)
        nameNode = (String) pigContext.getProperties().get(ALTERNATIVE_FILE_SYSTEM_LOCATION);

    if (cluster != null && cluster.length() > 0) {
        if (!cluster.contains(":") && !cluster.equalsIgnoreCase(LOCAL)) {
            cluster = cluster + ":50020";
        }
        properties.setProperty(JOB_TRACKER_LOCATION, cluster);
    }

    if (nameNode != null && nameNode.length() > 0) {
        if (!nameNode.contains(":") && !nameNode.equalsIgnoreCase(LOCAL)) {
            nameNode = nameNode + ":8020";
        }
        properties.setProperty(FILE_SYSTEM_LOCATION, nameNode);
    }

    log.info("Connecting to hadoop file system at: " + (nameNode == null ? LOCAL : nameNode));
    // constructor sets DEFAULT_REPLICATION_FACTOR_KEY
    ds = new HDataStorage(properties);

    if (cluster != null && !cluster.equalsIgnoreCase(LOCAL)) {
        log.info("Connecting to map-reduce job tracker at: " + jc.get(JOB_TRACKER_LOCATION));
    }

    // Set job-specific configuration knobs
    jobConf = jc;
}

From source file:org.commoncrawl.service.pagerank.slave.PageRankUtils.java

License:Open Source License

private static FileSystem buildDistributionOutputStreamVector(boolean useSequenceFile, String fileNamePrefix,
        File localOutputPath, String remoteOutputPath, int myNodeIndex, int nodeCount,
        Vector<PRValueOutputStream> outputStreamVector) {

    Configuration conf = new Configuration(CrawlEnvironment.getHadoopConfig());

    conf.setInt("dfs.socket.timeout", 240000);
    conf.setInt("io.file.buffer.size", 4096 * 20);

    DistributedFileSystem hdfs = new DistributedFileSystem();

    try {/*from   ww w. j  av a2s.c  o  m*/

        hdfs.initialize(FileSystem.getDefaultUri(conf), conf);

        for (int i = 0; i < nodeCount; ++i) {

            // create output filename 
            String fileName = fileNamePrefix + "-" + NUMBER_FORMAT.format(i);
            // create stream (local or remote stream, depending on i) 
            // remote path 
            Path remotePath = new Path(remoteOutputPath, fileName);
            // remove file
            CrawlEnvironment.getDefaultFileSystem().delete(remotePath, false);
            if (useSequenceFile) {
                // recreate it ... 
                outputStreamVector.add(new PRSequenceFileOutputStream(conf,
                        CrawlEnvironment.getDefaultFileSystem(), remotePath));
            } else {
                // recreate it ... 
                outputStreamVector
                        .add(new PROldValueOutputStream(CrawlEnvironment.getDefaultFileSystem(), remotePath));
            }
        }
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
        for (PRValueOutputStream streamInfo : outputStreamVector) {
            try {
                if (streamInfo != null) {
                    streamInfo.close(true);
                }
            } catch (IOException e2) {
                LOG.error(CCStringUtils.stringifyException(e2));
            }
            outputStreamVector.clear();
        }
    }

    return hdfs;
}

From source file:voldemort.store.readonly.mr.utils.HadoopUtils.java

License:Apache License

public static FileSystem getFileSystem(String hdfsUrl, boolean isLocal) throws IOException {
    // Initialize fs
    FileSystem fs;//  w  ww .  java2s.co  m
    if (isLocal) {
        fs = FileSystem.getLocal(new Configuration());
    } else {
        fs = new DistributedFileSystem();
        try {
            fs.initialize(new URI(hdfsUrl), new Configuration());
        } catch (URISyntaxException e) {
            throw new IllegalArgumentException(e);
        }
    }
    return fs;
}