Example usage for org.apache.hadoop.fs FileSystem getUri

List of usage examples for org.apache.hadoop.fs FileSystem getUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getUri.

Prototype

public abstract URI getUri();

Source Link

Document

Returns a URI which identifies this FileSystem.

Usage

From source file:org.gbif.ocurrence.index.solr.SolrOutputFormat.java

License:Apache License

public static void setupSolrHomeCache(File solrHome, Configuration jobConf) throws IOException {
    if (solrHome == null || !(solrHome.exists() && solrHome.isDirectory())) {
        throw new IOException("Invalid solr.home: " + solrHome);
    }//  w w  w  . j a  va 2  s.c om
    File tmpZip = File.createTempFile("solr", "zip");
    createZip(solrHome, tmpZip);
    // Make a reasonably unique name for the zip file in the distributed cache
    // to avoid collisions if multiple jobs are running.
    String hdfsZipName = UUID.randomUUID().toString() + '.' + ZIP_FILE_BASE_NAME;
    jobConf.set(ZIP_NAME, hdfsZipName);

    Path zipPath = new Path("/tmp", getZipName(jobConf));
    FileSystem fs = FileSystem.get(jobConf);
    fs.copyFromLocalFile(new Path(tmpZip.toString()), zipPath);
    final URI baseZipUrl = fs.getUri().resolve(zipPath.toString() + '#' + getZipName(jobConf));

    DistributedCache.addCacheArchive(baseZipUrl, jobConf);
    LOG.info("Set Solr cache: " + Arrays.asList(DistributedCache.getCacheArchives(jobConf)));
    // Actually send the path for the configuration zip file
    jobConf.set(SETUP_OK, zipPath.toString());
}

From source file:org.hdl.caffe.yarn.app.Client.java

License:Apache License

/**
 * Main run function for the client/*from   w  ww . jav  a 2  s .  co  m*/
 *
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
public boolean run() throws IOException, YarnException {

    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress="
                + node.getHttpAddress() + ", nodeRackName=" + node.getRackName() + ", nodeNumContainers="
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();

    long maxMem = appResponse.getMaximumResourceCapability().getMemorySize();
    LOG.info("Max mem capability of resources in this cluster " + maxMem);

    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capability of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    appContext.setApplicationName(appName);

    if (attemptFailuresValidityInterval >= 0) {
        appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval);
    }

    Set<String> tags = new HashSet<String>();
    appContext.setApplicationTags(tags);

    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    CaffeAmContainer CaffeAmContainer = new CaffeAmContainer(this);

    // Copy the application jar to the filesystem
    FileSystem fs = FileSystem.get(conf);
    String dstJarPath = copyLocalFileToDfs(fs, appId.toString(), appMasterJar, CaffeContainer.SERVER_JAR_PATH);
    CaffeAmContainer.addToLocalResources(fs, new Path(dstJarPath), CaffeAmContainer.APPMASTER_JAR_PATH,
            localResources);

    Map<String, String> env = CaffeAmContainer.setJavaEnv(conf);
    env.put("LD_LIBRARY_PATH",
            "/root/CaffeOnSpark/caffe-public/distribute/lib:/root/CaffeOnSpark/caffe-distri/distribute/lib");

    if (null != nodeLabelExpression) {
        appContext.setNodeLabelExpression(nodeLabelExpression);
    }

    StringBuilder command = CaffeAmContainer.makeCommands(amMemory, appMasterMainClass, containerMemory,
            containerVirtualCores, processorNum, dstJarPath, containerRetryOptions, train, solver, feature,
            label, model, output, connection);

    LOG.info("AppMaster command: " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());

    ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null,
            null, null);

    Resource capability = Resource.newInstance(amMemory, amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
        Credentials credentials = new Credentials();
        String tokenRenewer = YarnClientUtils.getRmPrincipal(conf);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    Priority pri = Priority.newInstance(amPriority);
    appContext.setPriority(pri);

    appContext.setQueue(amQueue);

    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);
    handleSignal(appId);
    return monitorApplication(appId);

}

From source file:org.kiji.bento.BentoHBaseTestingUtility.java

License:Apache License

/**
 * Start a mini dfs cluster. We override this method in our child class so we can
 * disable formatting the filesystem between runs and so we can pass configuration options for
 * the namenode port and namenode ui address.
 *
 * @param servers How many DNs to start.
 * @param hosts hostnames DNs to run on.
 * @throws Exception If an error occurs when starting up the cluster.
 * @see {@link #shutdownMiniDFSCluster()}
 * @return The mini dfs cluster created.
 *//*w  ww . jav  a 2  s  .c  o m*/
@Override
public MiniDFSCluster startMiniDFSCluster(int servers, final String[] hosts) throws Exception {
    // Check that there is not already a cluster running
    isRunningCluster();

    // We have to set this property as it is used by MiniCluster
    System.setProperty("test.build.data", mClusterTestDir.toString());

    // Some tests also do this:
    //  System.getProperty("test.cache.data", "build/test/cache");
    // It's also deprecated
    System.setProperty("test.cache.data", mClusterTestDir.toString());

    // Use configuration provided values for the namenode port and namenode ui port, or use
    // accepted defaults.
    Configuration conf = getConfiguration();
    int nameNodePort = FileSystem.get(conf).getUri().getPort();
    int nameNodeUiPort = getPortFromConfiguration("dfs.http.address", 50070);
    MiniDFSCluster dfsCluster = null;
    MiniDFSCluster.Builder options = new MiniDFSCluster.Builder(conf).nameNodePort(nameNodePort)
            .nameNodeHttpPort(nameNodeUiPort).numDataNodes(servers).manageNameDfsDirs(true)
            .manageDataDfsDirs(true).hosts(hosts);

    // Ok, now we can start. First try it without reformatting.
    try {
        LOG.debug("Attempting to use existing cluster storage.");
        dfsCluster = options.format(false).build();
    } catch (InconsistentFSStateException e) {
        LOG.debug("Couldn't use existing storage. Attempting to format and try again.");
        dfsCluster = options.format(true).build();
    }

    // Set this just-started cluster as our filesystem.
    FileSystem fs = dfsCluster.getFileSystem();
    conf.set("fs.defaultFS", fs.getUri().toString());
    // Do old style too just to be safe.
    conf.set("fs.default.name", fs.getUri().toString());

    // Wait for the cluster to be totally up
    dfsCluster.waitClusterUp();

    // Save the dfsCluster in the private field of the parent class.
    setField(HBaseTestingUtility.class, this, "dfsCluster", dfsCluster);

    return dfsCluster;
}

From source file:org.kiji.mapreduce.IntegrationTestTableMapReducer.java

License:Apache License

@Test
public void testTableMapReducer() throws Exception {
    final Configuration conf = createConfiguration();
    final FileSystem fs = FileSystem.get(conf);

    final KijiURI uri = getKijiURI();
    final Kiji kiji = Kiji.Factory.open(uri, conf);
    try {//from   ww  w .  j  a v a2  s .  co m
        final int nregions = 16;
        final KijiTableLayout tableLayout = KijiTableLayout.newLayout(KijiMRTestLayouts.getTestLayout());
        final String tableName = tableLayout.getName();
        kiji.createTable(tableName, tableLayout, nregions);

        final KijiTable table = kiji.openTable(tableName);
        try {
            {
                final KijiTableWriter writer = table.openTableWriter();
                for (int i = 0; i < 10; ++i) {
                    writer.put(table.getEntityId("row-" + i), "primitives", "int", i % 3);
                }
                writer.close();
            }

            final Path output = new Path(fs.getUri().toString(), "/table-mr-output");

            final MapReduceJob mrjob = KijiGatherJobBuilder.create().withConf(conf)
                    .withGatherer(SimpleTableMapReducer.TableMapper.class)
                    .withReducer(SimpleTableMapReducer.TableReducer.class).withInputTable(table)
                    .withOutput(new HFileMapReduceJobOutput(table, output, 16)).build();
            if (!mrjob.run()) {
                Assert.fail("Map/Reduce job failed");
            }

        } finally {
            table.close();
        }

    } finally {
        kiji.release();
    }
}

From source file:org.kitesdk.data.spi.filesystem.FileSystemDatasetRepository.java

License:Apache License

@Override
public boolean delete(String namespace, String name) {
    Preconditions.checkNotNull(namespace, "Namespace cannot be null");
    Preconditions.checkNotNull(name, "Dataset name cannot be null");

    LOG.debug("Deleting dataset:{}", name);

    DatasetDescriptor descriptor;//from   w  w  w  . j  ava  2 s.c  om
    try {
        descriptor = metadataProvider.load(namespace, name);
    } catch (DatasetNotFoundException ex) {
        return false;
    }

    // don't care about the return value here -- if it already doesn't exist
    // we still need to delete the data directory
    boolean changed = metadataProvider.delete(namespace, name);

    Path dataLocation = new Path(descriptor.getLocation());
    FileSystem dataFS = fsForPath(dataLocation, conf);

    if (fs.getUri().equals(dataFS.getUri())) {
        // the data location is on the right FS, so cleanlyDelete will work
        changed |= FileSystemUtil.cleanlyDelete(fs, rootDirectory, dataLocation);
    } else {
        try {
            if (dataFS.exists(dataLocation)) {
                if (dataFS.delete(dataLocation, true)) {
                    changed = true;
                } else {
                    throw new IOException(
                            "Failed to delete dataset name:" + name + " location:" + dataLocation);
                }
            }
        } catch (IOException e) {
            throw new DatasetIOException("Internal failure when removing location:" + dataLocation, e);
        }
    }
    return changed;
}

From source file:org.kitesdk.minicluster.HBaseService.java

License:Apache License

/**
 * Configure the HBase cluster before launching it
 * /*from ww w.j  a va  2 s  .  co m*/
 * @param config
 *          already created Hadoop configuration we'll further configure for
 *          HDFS
 * @param zkClientPort
 *          The client port zookeeper is listening on
 * @param hdfsFs
 *          The HDFS FileSystem this HBase cluster will run on top of
 * @param bindIP
 *          The IP Address to force bind all sockets on. If null, will use
 *          defaults
 * @param masterPort
 *          The port the master listens on
 * @param regionserverPort
 *          The port the regionserver listens on
 * @return The updated Configuration object.
 * @throws IOException
 */
private static Configuration configureHBaseCluster(Configuration config, int zkClientPort, FileSystem hdfsFs,
        String bindIP, int masterPort, int regionserverPort) throws IOException {
    // Configure the zookeeper port
    config.set(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(zkClientPort));
    // Initialize HDFS path configurations required by HBase
    Path hbaseDir = new Path(hdfsFs.makeQualified(hdfsFs.getHomeDirectory()), "hbase");
    FSUtils.setRootDir(config, hbaseDir);
    hdfsFs.mkdirs(hbaseDir);
    config.set("fs.defaultFS", hdfsFs.getUri().toString());
    config.set("fs.default.name", hdfsFs.getUri().toString());
    FSUtils.setVersion(hdfsFs, hbaseDir);

    // Configure the bind addresses and ports. If running in Openshift, we only
    // have permission to bind to the private IP address, accessible through an
    // environment variable.
    logger.info("HBase force binding to ip: " + bindIP);
    config.set("hbase.master.ipc.address", bindIP);
    config.set(HConstants.MASTER_PORT, Integer.toString(masterPort));
    config.set("hbase.regionserver.ipc.address", bindIP);
    config.set(HConstants.REGIONSERVER_PORT, Integer.toString(regionserverPort));
    config.set(HConstants.ZOOKEEPER_QUORUM, bindIP);

    // By default, the HBase master and regionservers will report to zookeeper
    // that its hostname is what it determines by reverse DNS lookup, and not
    // what we use as the bind address. This means when we set the bind
    // address, daemons won't actually be able to connect to eachother if they
    // are different. Here, we do something that's illegal in 48 states - use
    // reflection to override a private static final field in the DNS class
    // that is a cachedHostname. This way, we are forcing the hostname that
    // reverse dns finds. This may not be compatible with newer versions of
    // Hadoop.
    try {
        Field cachedHostname = DNS.class.getDeclaredField("cachedHostname");
        cachedHostname.setAccessible(true);
        Field modifiersField = Field.class.getDeclaredField("modifiers");
        modifiersField.setAccessible(true);
        modifiersField.setInt(cachedHostname, cachedHostname.getModifiers() & ~Modifier.FINAL);
        cachedHostname.set(null, bindIP);
    } catch (Exception e) {
        // Reflection can throw so many checked exceptions. Let's wrap in an
        // IOException.
        throw new IOException(e);
    }

    // By setting the info ports to -1 for, we won't launch the master or
    // regionserver info web interfaces
    config.set(HConstants.MASTER_INFO_PORT, "-1");
    config.set(HConstants.REGIONSERVER_INFO_PORT, "-1");
    return config;
}

From source file:org.lilyproject.hadooptestfw.fork.HBaseTestingUtility.java

License:Apache License

/**
 * Start a minidfscluster./*from w  w w  .  j  ava  2  s  .c  o  m*/
 * Can only create one.
 *
 * @param servers How many DNs to start.
 * @param hosts   hostnames DNs to run on.
 * @return The mini dfs cluster created.
 * @see {@link #shutdownMiniDFSCluster()}
 */
public MiniDFSCluster startMiniDFSCluster(int servers, final String hosts[]) throws Exception {

    // Check that there is not already a cluster running
    isRunningCluster();

    // Initialize the local directory used by the MiniDFS
    if (clusterTestDir == null) {
        setupClusterTestDir();
    }

    // We have to set this property as it is used by MiniCluster
    System.setProperty(TEST_DIRECTORY_KEY, this.clusterTestDir.toString());

    // Some tests also do this:
    //  System.getProperty("test.cache.data", "build/test/cache");
    // It's also deprecated
    System.setProperty("test.cache.data", this.clusterTestDir.toString());

    // Ok, now we can start

    // Lily change: first argument changed from 0 to 8020
    // Lily change: let the formatting of NameNode and DataNodes depend on whether the dir is empty
    boolean format = this.clusterTestDir.list().length == 0;

    this.dfsCluster = new MiniDFSCluster(8020 /* Lily change */, this.conf, servers, format /* Lily change */,
            true, true, null, null, hosts, null);

    // Set this just-started cluster as our filesystem.
    FileSystem fs = this.dfsCluster.getFileSystem();
    this.conf.set("fs.defaultFS", fs.getUri().toString());
    // Do old style too just to be safe.
    // Lily change: commented this out to avoid warnings
    // this.conf.set("fs.default.name", fs.getUri().toString());

    // Wait for the cluster to be totally up
    this.dfsCluster.waitClusterUp();

    return this.dfsCluster;
}

From source file:org.lilyproject.testfw.HadoopLauncher.java

License:Apache License

public MiniHBaseCluster startMiniCluster(final int servers) throws Exception {
    // Make a new random dir to home everything in.  Set it as system property.
    // minidfs reads home from system property.
    this.clusterTestBuildDir = setupClusterTestBuildDir();
    System.setProperty(TEST_DIRECTORY_KEY, this.clusterTestBuildDir.getPath());
    // Bring up mini dfs cluster. This spews a bunch of warnings about missing
    // scheme. Complaints are 'Scheme is undefined for build/test/data/dfs/name1'.
    startMiniDFSCluster(servers, this.clusterTestBuildDir);

    // Mangle conf so fs parameter points to minidfs we just started up
    FileSystem fs = this.dfsCluster.getFileSystem();
    this.conf.set("fs.defaultFS", fs.getUri().toString());
    // Do old style too just to be safe.
    this.conf.set("fs.default.name", fs.getUri().toString());
    this.dfsCluster.waitClusterUp();

    // Start up a zk cluster.
    if (this.zkCluster == null) {
        startMiniZKCluster(this.clusterTestBuildDir);
    }/*from  ww  w .j  a va2s .  co  m*/

    // Now do the mini hbase cluster.  Set the hbase.rootdir in config.
    Path hbaseRootdir = fs.makeQualified(fs.getHomeDirectory());
    this.conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString());
    fs.mkdirs(hbaseRootdir);
    FSUtils.setVersion(fs, hbaseRootdir);
    Configuration c = new Configuration(this.conf);
    this.hbaseCluster = new MiniHBaseCluster(c, servers);
    // Don't leave here till we've done a successful scan of the .META.
    HTable t = new HTable(c, HConstants.META_TABLE_NAME);
    ResultScanner s = t.getScanner(new Scan());
    while (s.next() != null)
        continue;

    return this.hbaseCluster;
}

From source file:org.openflamingo.uploader.util.FileSystemUtils.java

License:Open Source License

/**
 * ? ? ?  URI? scheme? ? ?? ?.//  w w w .j  a  va2  s.  c  om
 *
 * @param fs1 ?1
 * @param fs2 ?2
 * @return ?? <tt>true</tt>
 */
private static boolean compareFs(FileSystem fs1, FileSystem fs2) {
    URI uri1 = fs1.getUri();
    URI uri2 = fs2.getUri();
    if (uri1.getScheme() == null) {
        return false;
    }
    if (!uri1.getScheme().equals(uri2.getScheme())) {
        return false;
    }
    String srcHost = uri1.getHost();
    String dstHost = uri2.getHost();
    if ((srcHost != null) && (dstHost != null)) {
        try {
            srcHost = InetAddress.getByName(srcHost).getCanonicalHostName();
            dstHost = InetAddress.getByName(dstHost).getCanonicalHostName();
        } catch (UnknownHostException ue) {
            return false;
        }
        if (!srcHost.equals(dstHost)) {
            return false;
        }
    } else if (srcHost == null && dstHost != null) {
        return false;
    } else if (srcHost != null) {
        return false;
    }
    // ? ?
    return uri1.getPort() == uri2.getPort();
}

From source file:org.openflamingo.util.FileSystemUtils.java

License:Apache License

/**
 *      ?./*from   w  w  w . j  av  a 2s  . c om*/
 *     ???  ?.
 *
 * @param path 
 * @throws org.openflamingo.core.exception.FileSystemException  ?  ,   ? ?? 
 */
public static void testCreateDir(Path path) {
    try {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);

        //            FileSystem fs = path.getFileSystem(conf);

        System.out.println("--------New------------------");
        System.out.println("--------fs--------" + fs);
        System.out.println("--------fs getUri--------" + fs.getUri());
        System.out.println("--------fs getWorkingDirectory--------" + fs.getWorkingDirectory());
        System.out.println("--------fs getHomeDirectory--------" + fs.getHomeDirectory());

        System.out.println("--------path--------" + path);
        System.out.println("--------fs.exists(path)--------" + fs.exists(path));
        System.out.println("--------fs.mkdirs(path--------" + fs.mkdirs(path));

        if (fs.exists(path) && !fs.getFileStatus(path).isDir()) {
            throw new FileSystemException(ExceptionUtils.getMessage("'{}' is not directory.", path));
        }

        if (!fs.exists(path)) {
            if (!fs.mkdirs(path)) {
                throw new FileSystemException(ExceptionUtils.getMessage("Cannot create '{}'", path));
            }
        }
    } catch (Exception ex) {
        String message = MessageFormatter.format("Cannot create '{}'", path.toString()).getMessage();
        throw new FileSystemException(message, ex);
    }
}