Example usage for org.apache.hadoop.fs FileSystem getUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getUri.

Prototype

public abstract URI getUri();

Source Link

Document

Returns a URI which identifies this FileSystem.

Usage

From source file:org.gbif.ocurrence.index.solr.SolrOutputFormat.java

License:Apache License

public static void setupSolrHomeCache(File solrHome, Configuration jobConf) throws IOException {
    if (solrHome == null || !(solrHome.exists() && solrHome.isDirectory())) {
        throw new IOException("Invalid solr.home: " + solrHome);
    }//  w w  w  . j a  va 2  s.c om
    File tmpZip = File.createTempFile("solr", "zip");
    createZip(solrHome, tmpZip);
    // Make a reasonably unique name for the zip file in the distributed cache
    // to avoid collisions if multiple jobs are running.
    String hdfsZipName = UUID.randomUUID().toString() + '.' + ZIP_FILE_BASE_NAME;
    jobConf.set(ZIP_NAME, hdfsZipName);

    Path zipPath = new Path("/tmp", getZipName(jobConf));
    FileSystem fs = FileSystem.get(jobConf);
    fs.copyFromLocalFile(new Path(tmpZip.toString()), zipPath);
    final URI baseZipUrl = fs.getUri().resolve(zipPath.toString() + '#' + getZipName(jobConf));

    DistributedCache.addCacheArchive(baseZipUrl, jobConf);
    LOG.info("Set Solr cache: " + Arrays.asList(DistributedCache.getCacheArchives(jobConf)));
    // Actually send the path for the configuration zip file
    jobConf.set(SETUP_OK, zipPath.toString());
}

From source file:org.hdl.caffe.yarn.app.Client.java

License:Apache License

/**
 * Main run function for the client/*from   w  ww . jav  a 2  s .  co  m*/
 *
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
public boolean run() throws IOException, YarnException {

    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress="
                + node.getHttpAddress() + ", nodeRackName=" + node.getRackName() + ", nodeNumContainers="
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();

    long maxMem = appResponse.getMaximumResourceCapability().getMemorySize();
    LOG.info("Max mem capability of resources in this cluster " + maxMem);

    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capability of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    appContext.setApplicationName(appName);

    if (attemptFailuresValidityInterval >= 0) {
        appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval);
    }

    Set<String> tags = new HashSet<String>();
    appContext.setApplicationTags(tags);

    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    CaffeAmContainer CaffeAmContainer = new CaffeAmContainer(this);

    // Copy the application jar to the filesystem
    FileSystem fs = FileSystem.get(conf);
    String dstJarPath = copyLocalFileToDfs(fs, appId.toString(), appMasterJar, CaffeContainer.SERVER_JAR_PATH);
    CaffeAmContainer.addToLocalResources(fs, new Path(dstJarPath), CaffeAmContainer.APPMASTER_JAR_PATH,
            localResources);

    Map<String, String> env = CaffeAmContainer.setJavaEnv(conf);
    env.put("LD_LIBRARY_PATH",
            "/root/CaffeOnSpark/caffe-public/distribute/lib:/root/CaffeOnSpark/caffe-distri/distribute/lib");

    if (null != nodeLabelExpression) {
        appContext.setNodeLabelExpression(nodeLabelExpression);
    }

    StringBuilder command = CaffeAmContainer.makeCommands(amMemory, appMasterMainClass, containerMemory,
            containerVirtualCores, processorNum, dstJarPath, containerRetryOptions, train, solver, feature,
            label, model, output, connection);

    LOG.info("AppMaster command: " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());

    ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null,
            null, null);

    Resource capability = Resource.newInstance(amMemory, amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
        Credentials credentials = new Credentials();
        String tokenRenewer = YarnClientUtils.getRmPrincipal(conf);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    Priority pri = Priority.newInstance(amPriority);
    appContext.setPriority(pri);

    appContext.setQueue(amQueue);

    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);
    handleSignal(appId);
    return monitorApplication(appId);

}

From source file:org.kiji.bento.BentoHBaseTestingUtility.java

License:Apache License

/**
 * Start a mini dfs cluster. We override this method in our child class so we can
 * disable formatting the filesystem between runs and so we can pass configuration options for
 * the namenode port and namenode ui address.
 *
 * @param servers How many DNs to start.
 * @param hosts hostnames DNs to run on.
 * @throws Exception If an error occurs when starting up the cluster.
 * @see {@link #shutdownMiniDFSCluster()}
 * @return The mini dfs cluster created.
 *//*w  ww . jav  a 2  s  .c  o m*/
@Override
public MiniDFSCluster startMiniDFSCluster(int servers, final String[] hosts) throws Exception {
    // Check that there is not already a cluster running
    isRunningCluster();

    // We have to set this property as it is used by MiniCluster
    System.setProperty("test.build.data", mClusterTestDir.toString());

    // Some tests also do this:
    //  System.getProperty("test.cache.data", "build/test/cache");
    // It's also deprecated
    System.setProperty("test.cache.data", mClusterTestDir.toString());

    // Use configuration provided values for the namenode port and namenode ui port, or use
    // accepted defaults.
    Configuration conf = getConfiguration();
    int nameNodePort = FileSystem.get(conf).getUri().getPort();
    int nameNodeUiPort = getPortFromConfiguration("dfs.http.address", 50070);
    MiniDFSCluster dfsCluster = null;
    MiniDFSCluster.Builder options = new MiniDFSCluster.Builder(conf).nameNodePort(nameNodePort)
            .nameNodeHttpPort(nameNodeUiPort).numDataNodes(servers).manageNameDfsDirs(true)
            .manageDataDfsDirs(true).hosts(hosts);

    // Ok, now we can start. First try it without reformatting.
    try {
        LOG.debug("Attempting to use existing cluster storage.");
        dfsCluster = options.format(false).build();
    } catch (InconsistentFSStateException e) {
        LOG.debug("Couldn't use existing storage. Attempting to format and try again.");
        dfsCluster = options.format(true).build();
    }

    // Set this just-started cluster as our filesystem.
    FileSystem fs = dfsCluster.getFileSystem();
    conf.set("fs.defaultFS", fs.getUri().toString());
    // Do old style too just to be safe.
    conf.set("fs.default.name", fs.getUri().toString());

    // Wait for the cluster to be totally up
    dfsCluster.waitClusterUp();

    // Save the dfsCluster in the private field of the parent class.
    setField(HBaseTestingUtility.class, this, "dfsCluster", dfsCluster);

    return dfsCluster;
}

From source file:org.kiji.mapreduce.IntegrationTestTableMapReducer.java

License:Apache License

@Test
public void testTableMapReducer() throws Exception {
    final Configuration conf = createConfiguration();
    final FileSystem fs = FileSystem.get(conf);

    final KijiURI uri = getKijiURI();
    final Kiji kiji = Kiji.Factory.open(uri, conf);
    try {//from   ww  w .  j  a v a2  s .  co m
        final int nregions = 16;
        final KijiTableLayout tableLayout = KijiTableLayout.newLayout(KijiMRTestLayouts.getTestLayout());
        final String tableName = tableLayout.getName();
        kiji.createTable(tableName, tableLayout, nregions);

        final KijiTable table = kiji.openTable(tableName);
        try {
            {
                final KijiTableWriter writer = table.openTableWriter();
                for (int i = 0; i < 10; ++i) {
                    writer.put(table.getEntityId("row-" + i), "primitives", "int", i % 3);
                }
                writer.close();
            }

            final Path output = new Path(fs.getUri().toString(), "/table-mr-output");

            final MapReduceJob mrjob = KijiGatherJobBuilder.create().withConf(conf)
                    .withGatherer(SimpleTableMapReducer.TableMapper.class)
                    .withReducer(SimpleTableMapReducer.TableReducer.class).withInputTable(table)
                    .withOutput(new HFileMapReduceJobOutput(table, output, 16)).build();
            if (!mrjob.run()) {
                Assert.fail("Map/Reduce job failed");
            }

        } finally {
            table.close();
        }

    } finally {
        kiji.release();
    }
}

From source file:org.kitesdk.data.spi.filesystem.FileSystemDatasetRepository.java

License:Apache License

@Override
public boolean delete(String namespace, String name) {
    Preconditions.checkNotNull(namespace, "Namespace cannot be null");
    Preconditions.checkNotNull(name, "Dataset name cannot be null");

    LOG.debug("Deleting dataset:{}", name);

    DatasetDescriptor descriptor;//from   w  w  w  . j  ava  2 s.c  om
    try {
        descriptor = metadataProvider.load(namespace, name);
    } catch (DatasetNotFoundException ex) {
        return false;
    }

    // don't care about the return value here -- if it already doesn't exist
    // we still need to delete the data directory
    boolean changed = metadataProvider.delete(namespace, name);

    Path dataLocation = new Path(descriptor.getLocation());
    FileSystem dataFS = fsForPath(dataLocation, conf);

    if (fs.getUri().equals(dataFS.getUri())) {
        // the data location is on the right FS, so cleanlyDelete will work
        changed |= FileSystemUtil.cleanlyDelete(fs, rootDirectory, dataLocation);
    } else {
        try {
            if (dataFS.exists(dataLocation)) {
                if (dataFS.delete(dataLocation, true)) {
                    changed = true;
                } else {
                    throw new IOException(
                            "Failed to delete dataset name:" + name + " location:" + dataLocation);
                }
            }
        } catch (IOException e) {
            throw new DatasetIOException("Internal failure when removing location:" + dataLocation, e);
        }
    }
    return changed;
}

From source file:org.kitesdk.minicluster.HBaseService.java

License:Apache License

/**
 * Configure the HBase cluster before launching it
 * /*from ww w.j  a va  2 s  .  co m*/
 * @param config
 *          already created Hadoop configuration we'll further configure for
 *          HDFS
 * @param zkClientPort
 *          The client port zookeeper is listening on
 * @param hdfsFs
 *          The HDFS FileSystem this HBase cluster will run on top of
 * @param bindIP
 *          The IP Address to force bind all sockets on. If null, will use
 *          defaults
 * @param masterPort
 *          The port the master listens on
 * @param regionserverPort
 *          The port the regionserver listens on
 * @return The updated Configuration object.
 * @throws IOException
 */
private static Configuration configureHBaseCluster(Configuration config, int zkClientPort, FileSystem hdfsFs,
        String bindIP, int masterPort, int regionserverPort) throws IOException {
    // Configure the zookeeper port
    config.set(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(zkClientPort));
    // Initialize HDFS path configurations required by HBase
    Path hbaseDir = new Path(hdfsFs.makeQualified(hdfsFs.getHomeDirectory()), "hbase");
    FSUtils.setRootDir(config, hbaseDir);
    hdfsFs.mkdirs(hbaseDir);
    config.set("fs.defaultFS", hdfsFs.getUri().toString());
    config.set("fs.default.name", hdfsFs.getUri().toString());
    FSUtils.setVersion(hdfsFs, hbaseDir);

    // Configure the bind addresses and ports. If running in Openshift, we only
    // have permission to bind to the private IP address, accessible through an
    // environment variable.
    logger.info("HBase force binding to ip: " + bindIP);
    config.set("hbase.master.ipc.address", bindIP);
    config.set(HConstants.MASTER_PORT, Integer.toString(masterPort));
    config.set("hbase.regionserver.ipc.address", bindIP);
    config.set(HConstants.REGIONSERVER_PORT, Integer.toString(regionserverPort));
    config.set(HConstants.ZOOKEEPER_QUORUM, bindIP);

    // By default, the HBase master and regionservers will report to zookeeper
    // that its hostname is what it determines by reverse DNS lookup, and not
    // what we use as the bind address. This means when we set the bind
    // address, daemons won't actually be able to connect to eachother if they
    // are different. Here, we do something that's illegal in 48 states - use
    // reflection to override a private static final field in the DNS class
    // that is a cachedHostname. This way, we are forcing the hostname that
    // reverse dns finds. This may not be compatible with newer versions of
    // Hadoop.
    try {
        Field cachedHostname = DNS.class.getDeclaredField("cachedHostname");
        cachedHostname.setAccessible(true);
        Field modifiersField = Field.class.getDeclaredField("modifiers");
        modifiersField.setAccessible(true);
        modifiersField.setInt(cachedHostname, cachedHostname.getModifiers() & ~Modifier.FINAL);
        cachedHostname.set(null, bindIP);
    } catch (Exception e) {
        // Reflection can throw so many checked exceptions. Let's wrap in an
        // IOException.
        throw new IOException(e);
    }

    // By setting the info ports to -1 for, we won't launch the master or
    // regionserver info web interfaces
    config.set(HConstants.MASTER_INFO_PORT, "-1");
    config.set(HConstants.REGIONSERVER_INFO_PORT, "-1");
    return config;
}

From source file:org.lilyproject.hadooptestfw.fork.HBaseTestingUtility.java

License:Apache License

/**
 * Start a minidfscluster./*from w  w w  .  j  ava  2  s  .c  o  m*/
 * Can only create one.
 *
 * @param servers How many DNs to start.
 * @param hosts   hostnames DNs to run on.
 * @return The mini dfs cluster created.
 * @see {@link #shutdownMiniDFSCluster()}
 */
public MiniDFSCluster startMiniDFSCluster(int servers, final String hosts[]) throws Exception {

    // Check that there is not already a cluster running
    isRunningCluster();

    // Initialize the local directory used by the MiniDFS
    if (clusterTestDir == null) {
        setupClusterTestDir();
    }

    // We have to set this property as it is used by MiniCluster
    System.setProperty(TEST_DIRECTORY_KEY, this.clusterTestDir.toString());

    // Some tests also do this:
    //  System.getProperty("test.cache.data", "build/test/cache");
    // It's also deprecated
    System.setProperty("test.cache.data", this.clusterTestDir.toString());

    // Ok, now we can start

    // Lily change: first argument changed from 0 to 8020
    // Lily change: let the formatting of NameNode and DataNodes depend on whether the dir is empty
    boolean format = this.clusterTestDir.list().length == 0;

    this.dfsCluster = new MiniDFSCluster(8020 /* Lily change */, this.conf, servers, format /* Lily change */,
            true, true, null, null, hosts, null);

    // Set this just-started cluster as our filesystem.
    FileSystem fs = this.dfsCluster.getFileSystem();
    this.conf.set("fs.defaultFS", fs.getUri().toString());
    // Do old style too just to be safe.
    // Lily change: commented this out to avoid warnings
    // this.conf.set("fs.default.name", fs.getUri().toString());

    // Wait for the cluster to be totally up
    this.dfsCluster.waitClusterUp();

    return this.dfsCluster;
}

From source file:org.lilyproject.testfw.HadoopLauncher.java

License:Apache License

public MiniHBaseCluster startMiniCluster(final int servers) throws Exception {
    // Make a new random dir to home everything in.  Set it as system property.
    // minidfs reads home from system property.
    this.clusterTestBuildDir = setupClusterTestBuildDir();
    System.setProperty(TEST_DIRECTORY_KEY, this.clusterTestBuildDir.getPath());
    // Bring up mini dfs cluster. This spews a bunch of warnings about missing
    // scheme. Complaints are 'Scheme is undefined for build/test/data/dfs/name1'.
    startMiniDFSCluster(servers, this.clusterTestBuildDir);

    // Mangle conf so fs parameter points to minidfs we just started up
    FileSystem fs = this.dfsCluster.getFileSystem();
    this.conf.set("fs.defaultFS", fs.getUri().toString());
    // Do old style too just to be safe.
    this.conf.set("fs.default.name", fs.getUri().toString());
    this.dfsCluster.waitClusterUp();

    // Start up a zk cluster.
    if (this.zkCluster == null) {
        startMiniZKCluster(this.clusterTestBuildDir);
    }/*from  ww  w .j  a va2s .  co  m*/

    // Now do the mini hbase cluster.  Set the hbase.rootdir in config.
    Path hbaseRootdir = fs.makeQualified(fs.getHomeDirectory());
    this.conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString());
    fs.mkdirs(hbaseRootdir);
    FSUtils.setVersion(fs, hbaseRootdir);
    Configuration c = new Configuration(this.conf);
    this.hbaseCluster = new MiniHBaseCluster(c, servers);
    // Don't leave here till we've done a successful scan of the .META.
    HTable t = new HTable(c, HConstants.META_TABLE_NAME);
    ResultScanner s = t.getScanner(new Scan());
    while (s.next() != null)
        continue;

    return this.hbaseCluster;
}

From source file:org.openflamingo.uploader.util.FileSystemUtils.java

License:Open Source License

/**
 * ? ? ?  URI? scheme? ? ?? ?.//  w w w .j  a  va2  s.  c  om
 *
 * @param fs1 ?1
 * @param fs2 ?2
 * @return ?? <tt>true</tt>
 */
private static boolean compareFs(FileSystem fs1, FileSystem fs2) {
    URI uri1 = fs1.getUri();
    URI uri2 = fs2.getUri();
    if (uri1.getScheme() == null) {
        return false;
    }
    if (!uri1.getScheme().equals(uri2.getScheme())) {
        return false;
    }
    String srcHost = uri1.getHost();
    String dstHost = uri2.getHost();
    if ((srcHost != null) && (dstHost != null)) {
        try {
            srcHost = InetAddress.getByName(srcHost).getCanonicalHostName();
            dstHost = InetAddress.getByName(dstHost).getCanonicalHostName();
        } catch (UnknownHostException ue) {
            return false;
        }
        if (!srcHost.equals(dstHost)) {
            return false;
        }
    } else if (srcHost == null && dstHost != null) {
        return false;
    } else if (srcHost != null) {
        return false;
    }
    // ? ?
    return uri1.getPort() == uri2.getPort();
}

From source file:org.openflamingo.util.FileSystemUtils.java

License:Apache License

/**
 *      ?./*from   w  w  w . j  av  a 2s  . c om*/
 *     ???  ?.
 *
 * @param path 
 * @throws org.openflamingo.core.exception.FileSystemException  ?  ,   ? ?? 
 */
public static void testCreateDir(Path path) {
    try {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);

        //            FileSystem fs = path.getFileSystem(conf);

        System.out.println("--------New------------------");
        System.out.println("--------fs--------" + fs);
        System.out.println("--------fs getUri--------" + fs.getUri());
        System.out.println("--------fs getWorkingDirectory--------" + fs.getWorkingDirectory());
        System.out.println("--------fs getHomeDirectory--------" + fs.getHomeDirectory());

        System.out.println("--------path--------" + path);
        System.out.println("--------fs.exists(path)--------" + fs.exists(path));
        System.out.println("--------fs.mkdirs(path--------" + fs.mkdirs(path));

        if (fs.exists(path) && !fs.getFileStatus(path).isDir()) {
            throw new FileSystemException(ExceptionUtils.getMessage("'{}' is not directory.", path));
        }

        if (!fs.exists(path)) {
            if (!fs.mkdirs(path)) {
                throw new FileSystemException(ExceptionUtils.getMessage("Cannot create '{}'", path));
            }
        }
    } catch (Exception ex) {
        String message = MessageFormatter.format("Cannot create '{}'", path.toString()).getMessage();
        throw new FileSystemException(message, ex);
    }
}