Example usage for org.apache.hadoop.fs FileSystem get

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem get.

Prototype

public static FileSystem get(Configuration conf) throws IOException

Source Link

Document

Returns the configured FileSystem implementation.

Usage

From source file:cmd.download.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.printf("Usage: %s [generic options] <input node table> <input b+tree indexes> <output>\n",
                getClass().getName());/* w  ww  . ja  v a  2 s. co m*/
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }

    Configuration configuration = getConf();

    Location location = new Location(args[2]);
    DatasetGraphTDB dsgDisk = SetupTDB.buildDataset(location);
    dsgDisk.sync();
    dsgDisk.close();

    FileSystem fs = FileSystem.get(configuration);

    // Node table
    new File(args[1], "nodes.dat").delete();
    mergeToLocalFile(fs, new Path(args[0]), args[2], configuration);
    // TODO: this is a sort of a cheat and it could go away (if it turns out to be too slow)!
    fixNodeTable2(location);

    // B+Tree indexes
    mergeToLocalFile2(fs, new Path(args[1]), args[2], configuration);

    return 0;
}

From source file:cmd.freebase2hdfs.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    String input = null;//from   ww w  .  j a v  a2  s  . c  o  m
    String output = null;
    if (args.length == 1) {
        input = "http://download.freebase.com/datadumps/latest/freebase-datadump-quadruples.tsv.bz2";
        output = args[0];
    } else if (args.length == 2) {
        input = args[0];
        output = args[1];
    } else {
        System.err.printf(
                "Usage: %s [generic options] [<http://path/to/freebase/datadump>] <hdfs://path/to/destination>\n",
                getClass().getName());
        System.err.println(
                "[<http://path/to/freebase/datadump>] is optional, it defaults to http://download.freebase.com/datadumps/latest/freebase-datadump-quadruples.tsv.bz2\n");
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }

    Configuration configuration = getConf();
    FileSystem fs = FileSystem.get(configuration);
    Path outputPath = new Path(output);
    InputStream in = new URL(input).openStream();
    FSDataOutputStream out = fs.create(outputPath);
    IOUtils.copyBytes(in, out, BUFFER_SIZE, true);

    return 0;
}

From source file:cn.edu.buaa.act.petuumOnYarn.ApplicationMaster.java

License:Apache License

private void processMachineFile(List<String> allocatedIpList) {
    try {//  ww  w .j av a2s  . c  om
        String text = "";
        String lineTxt = "";
        for (int i = 0; i < allocatedIpList.size(); i++) {
            lineTxt = i + " " + allocatedIpList.get(i) + " " + startPort;
            text = text + lineTxt + "\n";
        }
        LOG.info("server text:" + text.trim());
        FileSystem fs = FileSystem.get(conf);
        if (petuumHDFSPathPrefix.equals("")) {
            hostfileHDFSPath = new Path(fs.getHomeDirectory(), hostfileIdentifier).toUri().toString();
        } else {
            hostfileHDFSPath = new Path(fs.getHomeDirectory(), petuumHDFSPathPrefix + hostfileIdentifier)
                    .toUri().toString();
        }
        LOG.info("Hostfile being writen to " + hostfileHDFSPath);
        YarnUtil.writeFileHDFS(fs, hostfileHDFSPath, text.trim());
    } catch (Exception e) {
        System.out.println("read file error");
        e.printStackTrace();
    }
}

From source file:cn.edu.buaa.act.petuumOnYarn.Client.java

License:Apache License

/**
 * Main run function for the client/*  www.  j a v a  2s  .c om*/
 * 
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
public boolean run() throws IOException, YarnException {

    LOG.info("Running Client");
    yarnClient.start();
    String[] s;
    s = conf.getStrings(YarnConfiguration.RM_ADDRESS);
    for (String ss : s)
        LOG.info("RM address: " + ss);
    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers() + ", nodeIdHost" + node.getNodeId().getHost());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max.
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
    appContext.setApplicationName(appName);

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of
    // the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    FileSystem fs = FileSystem.get(conf);
    YarnUtil.copyAndAddToLocalResources(fs, appMasterJar, petuumHDFSPathPrefix, appMasterJarPath,
            localResources, null);
    scriptHDFSPath = YarnUtil.copyToHDFS(fs, scriptPath, petuumHDFSPathPrefix, launchPath, null);
    // Set the log4j properties if needed
    if (!log4jPropFile.isEmpty()) {
        YarnUtil.copyAndAddToLocalResources(fs, log4jPropFile, petuumHDFSPathPrefix, log4jPath, localResources,
                null);
    }

    // Set the env variables to be setup in the env where the application
    // master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();

    // Add AppMaster.jar location to classpath
    // At some point we should not be required to add
    // the hadoop specific classpaths to the env.
    // It should be provided out of the box.
    // For now setting all required classpaths including
    // the classpath to "." for the application jar
    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$())
            .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
        classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    // Set the necessary command to execute the application master
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);

    // Set java executable command
    LOG.info("Setting up app master command");
    vargs.add(Environment.JAVA_HOME.$$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    // Set class name
    vargs.add(appMasterMainClass);
    // Set params for Application Master
    vargs.add("--container_memory " + String.valueOf(containerMemory));
    vargs.add("--container_vcores " + String.valueOf(containerVirtualCores));
    vargs.add("--num_nodes " + String.valueOf(numNodes));
    vargs.add("--start_port " + String.valueOf(startPort));
    vargs.add("--priority " + String.valueOf(workerPriority));
    vargs.add("--script_hdfs_path " + scriptHDFSPath);

    for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
        vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
    }
    if (debugFlag) {
        vargs.add("--debug");
    }

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null,
            null, null);

    // Set up resource type requirements
    // For now, both memory and vcores are supported, so we set memory and
    // vcores requirements
    Resource capability = Resource.newInstance(amMemory, amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        // Note: Credentials class is marked as LimitedPrivate for HDFS and
        // MapReduce
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    Priority pri = Priority.newInstance(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp =
    // applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on
    // success
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // Monitor the application
    currentTime = System.currentTimeMillis();
    LOG.info("submit AM in " + (currentTime - startTime) + "ms");
    return monitorApplication(appId);
}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.DBReader.java

public static void main(String[] args) throws Exception {
    Path crawlPath = new Path("task2");
    Path currentPath = new Path(crawlPath, "crawldb/current");
    Path output = new Path("output");

    Configuration config = CrawlerConfiguration.create();
    FileSystem fs = FileSystem.get(config);

    if (fs.exists(output)) {
        fs.delete(output);/*from   w  w  w  .j a  va2 s .c o m*/
    }

    Job job = new Job(config);
    job.setJobName("dbreader " + crawlPath.toString());
    job.setMapperClass(DBReaderMapper.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, currentPath);
    FileOutputFormat.setOutputPath(job, output);

    job.waitForCompletion(true);

}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.Injector.java

public static void inject(Path crawlPath, CrawlDatums datums, Configuration conf)
        throws IOException, InterruptedException, ClassNotFoundException, Exception {
    Path crawldbPath = new Path(crawlPath, "crawldb");
    FileSystem fs = FileSystem.get(conf);
    Path tempdb = new Path(crawldbPath, "temp");
    if (fs.exists(tempdb)) {
        fs.delete(tempdb);//from  w w w  . ja  va2 s .  c o m
    }

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(tempdb, "info"), Text.class,
            CrawlDatum.class);

    for (CrawlDatum datum : datums) {

        String key = datum.getKey();
        writer.append(new Text(key), datum);
        LOG.info("inject:" + key);
    }
    writer.close();

    Path[] mergePaths = new Path[] { tempdb };

    Merge.merge(crawlPath, mergePaths, conf, "inject");
    Merge.install(crawlPath, conf);

    if (fs.exists(tempdb)) {
        fs.delete(tempdb);
    }

}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.Merge.java

public static void merge(Path crawlPath, Path[] mergePaths, Configuration conf, String jobName)
        throws Exception {

    Job job = new Job(conf);
    job.setJobName(jobName + "  " + crawlPath.toString());
    job.setJarByClass(Merge.class);
    // job.getConfiguration().set("mapred", "/home/hu/mygit/WebCollector2/WebCollectorCluster/target/WebCollectorCluster-2.0.jar");
    Path crawldbPath = new Path(crawlPath, "crawldb");
    Path newdb = new Path(crawldbPath, "new");
    Path currentdb = new Path(crawldbPath, "current");

    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(currentdb)) {
        FileInputFormat.addInputPath(job, currentdb);
    }//  w  ww  .  j  av a 2s.  c  o m

    if (fs.exists(newdb)) {
        fs.delete(newdb);
    }
    for (Path mergePath : mergePaths) {
        FileInputFormat.addInputPath(job, mergePath);
    }
    FileOutputFormat.setOutputPath(job, newdb);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(CrawlDatum.class);

    job.setMapperClass(MergeMap.class);
    job.setReducerClass(MergeReduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(CrawlDatum.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.waitForCompletion(true);

}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.Merge.java

public static void install(Path crawlPath, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path crawldbPath = new Path(crawlPath, "crawldb");
    Path newdb = new Path(crawldbPath, "new");
    Path currentdb = new Path(crawldbPath, "current");
    Path olddb = new Path(crawldbPath, "old");
    if (fs.exists(currentdb)) {
        if (fs.exists(olddb)) {
            fs.delete(olddb);//from   ww w  .j a v  a  2s. c  o m
        }
        fs.rename(currentdb, olddb);
    }
    fs.rename(newdb, currentdb);
}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.SegmentUtil.java

public static void initSegments(Path crawlPath, Configuration conf) throws IOException {
    Path segmentsPath = new Path(crawlPath, "segments");
    FileSystem fs = FileSystem.get(conf);
    if (!fs.exists(segmentsPath)) {
        fs.mkdirs(segmentsPath);//from  www .j ava 2  s  .com
    }
}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.SegmentUtil.java

public static String createSegment(Path crawlPath, Configuration conf) throws IOException {
    String segmentName = createSegmentName();
    FileSystem fs = FileSystem.get(conf);
    Path segmentPath = new Path(crawlPath, "segments/" + segmentName);
    fs.mkdirs(segmentPath);//www.j  a v a2s .  co m
    return segmentName;
}