Example usage for org.apache.hadoop.hdfs DistributedFileSystem DistributedFileSystem

List of usage examples for org.apache.hadoop.hdfs DistributedFileSystem DistributedFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.hdfs DistributedFileSystem DistributedFileSystem.

Prototype

public DistributedFileSystem() 

Source Link

Usage

From source file:org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.java

License:Apache License

private void init(Properties properties) throws ExecException {
    String cluster = null;//from ww  w.  ja  v a 2s  .  c  o m
    String nameNode = null;

    // We need to build a configuration object first in the manner described
    // below and then get back a properties object to inspect the
    // JOB_TRACKER_LOCATION and FILE_SYSTEM_LOCATION. The reason to do this
    // is if we looked only at the existing properties object, we may not
    // get the right settings. So we want to read the configurations in the
    // order specified below and only then look for JOB_TRACKER_LOCATION and
    // FILE_SYSTEM_LOCATION.

    // Hadoop by default specifies two resources, loaded in-order from the
    // classpath:
    // 1. hadoop-default.xml : Read-only defaults for hadoop.
    // 2. hadoop-site.xml: Site-specific configuration for a given hadoop
    // installation.
    // Now add the settings from "properties" object to override any
    // existing properties All of the above is accomplished in the method
    // call below

    JobConf jc = null;
    if (!this.pigContext.getExecType().isLocal()) {
        jc = getExecConf(properties);

        // Trick to invoke static initializer of DistributedFileSystem to
        // add hdfs-default.xml into configuration
        new DistributedFileSystem();
    } else {
        // If we are running in local mode we dont read the hadoop conf file
        if (properties.getProperty(MRConfiguration.FRAMEWORK_NAME) == null) {
            properties.setProperty(MRConfiguration.FRAMEWORK_NAME, LOCAL);
        }
        properties.setProperty(MRConfiguration.JOB_TRACKER, LOCAL);
        properties.setProperty(FILE_SYSTEM_LOCATION, "file:///");
        properties.setProperty(ALTERNATIVE_FILE_SYSTEM_LOCATION, "file:///");

        jc = getLocalConf();
    }

    // the method below alters the properties object by overriding the
    // hadoop properties with the values from properties and recomputing
    // the properties
    Utils.recomputeProperties(jc, properties);

    cluster = jc.get(MRConfiguration.JOB_TRACKER);
    nameNode = jc.get(FILE_SYSTEM_LOCATION);
    if (nameNode == null) {
        nameNode = (String) pigContext.getProperties().get(ALTERNATIVE_FILE_SYSTEM_LOCATION);
    }

    if (cluster != null && cluster.length() > 0) {
        if (!cluster.contains(":") && !cluster.equalsIgnoreCase(LOCAL)) {
            cluster = cluster + ":50020";
        }
        properties.setProperty(MRConfiguration.JOB_TRACKER, cluster);
    }

    if (nameNode != null && nameNode.length() > 0) {
        if (!nameNode.contains(":") && !nameNode.equalsIgnoreCase(LOCAL)) {
            nameNode = nameNode + ":8020";
        }
        properties.setProperty(FILE_SYSTEM_LOCATION, nameNode);
    }

    LOG.info("Connecting to hadoop file system at: " + (nameNode == null ? LOCAL : nameNode));
    // constructor sets DEFAULT_REPLICATION_FACTOR_KEY
    ds = new HDataStorage(properties);

    if (cluster != null && !cluster.equalsIgnoreCase(LOCAL)) {
        LOG.info("Connecting to map-reduce job tracker at: " + jc.get(MRConfiguration.JOB_TRACKER));
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.MRExecutionEngine.java

License:Apache License

@SuppressWarnings({ "deprecation", "resource" })
private void init(Properties properties) throws ExecException {
    // First set the ssh socket factory
    setSSHFactory();/*w  ww  .j  av  a  2  s  . c  o m*/

    String cluster = null;
    String nameNode = null;

    // We need to build a configuration object first in the manner described
    // below
    // and then get back a properties object to inspect the
    // JOB_TRACKER_LOCATION
    // and FILE_SYSTEM_LOCATION. The reason to do this is if we looked only
    // at
    // the existing properties object, we may not get the right settings. So
    // we want
    // to read the configurations in the order specified below and only then
    // look
    // for JOB_TRACKER_LOCATION and FILE_SYSTEM_LOCATION.

    // Hadoop by default specifies two resources, loaded in-order from the
    // classpath:
    // 1. hadoop-default.xml : Read-only defaults for hadoop.
    // 2. hadoop-site.xml: Site-specific configuration for a given hadoop
    // installation.
    // Now add the settings from "properties" object to override any
    // existing properties
    // All of the above is accomplished in the method call below

    JobConf jc = null;
    if (!this.pigContext.getExecType().isLocal()) {
        // Check existence of user provided configs
        String isHadoopConfigsOverriden = properties.getProperty("pig.use.overriden.hadoop.configs");
        if (isHadoopConfigsOverriden != null && isHadoopConfigsOverriden.equals("true")) {
            jc = new JobConf(ConfigurationUtil.toConfiguration(properties));
        } else {
            // Check existence of hadoop-site.xml or core-site.xml in
            // classpath
            // if user provided confs are not being used
            Configuration testConf = new Configuration();
            ClassLoader cl = testConf.getClassLoader();
            URL hadoop_site = cl.getResource(HADOOP_SITE);
            URL core_site = cl.getResource(CORE_SITE);

            if (hadoop_site == null && core_site == null) {
                throw new ExecException(
                        "Cannot find hadoop configurations in classpath (neither hadoop-site.xml nor core-site.xml was found in the classpath)."
                                + " If you plan to use local mode, please put -x local option in command line",
                        4010);
            }
            jc = new JobConf();
        }
        jc.addResource("pig-cluster-hadoop-site.xml");
        jc.addResource(YARN_SITE);

        // Trick to invoke static initializer of DistributedFileSystem to
        // add hdfs-default.xml
        // into configuration
        new DistributedFileSystem();

        // the method below alters the properties object by overriding the
        // hadoop properties with the values from properties and recomputing
        // the properties
        recomputeProperties(jc, properties);
    } else {
        // If we are running in local mode we dont read the hadoop conf file
        if (properties.getProperty("mapreduce.framework.name") == null) {
            properties.setProperty("mapreduce.framework.name", "local");
        }
        properties.setProperty(JOB_TRACKER_LOCATION, LOCAL);
        properties.setProperty(FILE_SYSTEM_LOCATION, "file:///");
        properties.setProperty(ALTERNATIVE_FILE_SYSTEM_LOCATION, "file:///");

        jc = new JobConf(false);
        jc.addResource("core-default.xml");
        jc.addResource("mapred-default.xml");
        jc.addResource("yarn-default.xml");
        recomputeProperties(jc, properties);
    }

    cluster = jc.get(JOB_TRACKER_LOCATION);
    nameNode = jc.get(FILE_SYSTEM_LOCATION);
    if (nameNode == null)
        nameNode = (String) pigContext.getProperties().get(ALTERNATIVE_FILE_SYSTEM_LOCATION);

    if (cluster != null && cluster.length() > 0) {
        if (!cluster.contains(":") && !cluster.equalsIgnoreCase(LOCAL)) {
            cluster = cluster + ":50020";
        }
        properties.setProperty(JOB_TRACKER_LOCATION, cluster);
    }

    if (nameNode != null && nameNode.length() > 0) {
        if (!nameNode.contains(":") && !nameNode.equalsIgnoreCase(LOCAL)) {
            nameNode = nameNode + ":8020";
        }
        properties.setProperty(FILE_SYSTEM_LOCATION, nameNode);
    }

    log.info("Connecting to hadoop file system at: " + (nameNode == null ? LOCAL : nameNode));
    // constructor sets DEFAULT_REPLICATION_FACTOR_KEY
    ds = new HDataStorage(properties);

    if (cluster != null && !cluster.equalsIgnoreCase(LOCAL)) {
        log.info("Connecting to map-reduce job tracker at: " + jc.get(JOB_TRACKER_LOCATION));
    }

    // Set job-specific configuration knobs
    jobConf = jc;
}

From source file:org.commoncrawl.service.pagerank.slave.PageRankUtils.java

License:Open Source License

private static FileSystem buildDistributionOutputStreamVector(boolean useSequenceFile, String fileNamePrefix,
        File localOutputPath, String remoteOutputPath, int myNodeIndex, int nodeCount,
        Vector<PRValueOutputStream> outputStreamVector) {

    Configuration conf = new Configuration(CrawlEnvironment.getHadoopConfig());

    conf.setInt("dfs.socket.timeout", 240000);
    conf.setInt("io.file.buffer.size", 4096 * 20);

    DistributedFileSystem hdfs = new DistributedFileSystem();

    try {/*from   ww w. j  av a2s.c  o  m*/

        hdfs.initialize(FileSystem.getDefaultUri(conf), conf);

        for (int i = 0; i < nodeCount; ++i) {

            // create output filename 
            String fileName = fileNamePrefix + "-" + NUMBER_FORMAT.format(i);
            // create stream (local or remote stream, depending on i) 
            // remote path 
            Path remotePath = new Path(remoteOutputPath, fileName);
            // remove file
            CrawlEnvironment.getDefaultFileSystem().delete(remotePath, false);
            if (useSequenceFile) {
                // recreate it ... 
                outputStreamVector.add(new PRSequenceFileOutputStream(conf,
                        CrawlEnvironment.getDefaultFileSystem(), remotePath));
            } else {
                // recreate it ... 
                outputStreamVector
                        .add(new PROldValueOutputStream(CrawlEnvironment.getDefaultFileSystem(), remotePath));
            }
        }
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
        for (PRValueOutputStream streamInfo : outputStreamVector) {
            try {
                if (streamInfo != null) {
                    streamInfo.close(true);
                }
            } catch (IOException e2) {
                LOG.error(CCStringUtils.stringifyException(e2));
            }
            outputStreamVector.clear();
        }
    }

    return hdfs;
}

From source file:voldemort.store.readonly.mr.utils.HadoopUtils.java

License:Apache License

public static FileSystem getFileSystem(String hdfsUrl, boolean isLocal) throws IOException {
    // Initialize fs
    FileSystem fs;//  w  ww .  java2s.co  m
    if (isLocal) {
        fs = FileSystem.getLocal(new Configuration());
    } else {
        fs = new DistributedFileSystem();
        try {
            fs.initialize(new URI(hdfsUrl), new Configuration());
        } catch (URISyntaxException e) {
            throw new IllegalArgumentException(e);
        }
    }
    return fs;
}