List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(Configuration conf) throws IOException
From source file:cmd.download.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 3) { System.err.printf("Usage: %s [generic options] <input node table> <input b+tree indexes> <output>\n", getClass().getName());/* w ww . ja v a 2 s. co m*/ ToolRunner.printGenericCommandUsage(System.err); return -1; } Configuration configuration = getConf(); Location location = new Location(args[2]); DatasetGraphTDB dsgDisk = SetupTDB.buildDataset(location); dsgDisk.sync(); dsgDisk.close(); FileSystem fs = FileSystem.get(configuration); // Node table new File(args[1], "nodes.dat").delete(); mergeToLocalFile(fs, new Path(args[0]), args[2], configuration); // TODO: this is a sort of a cheat and it could go away (if it turns out to be too slow)! fixNodeTable2(location); // B+Tree indexes mergeToLocalFile2(fs, new Path(args[1]), args[2], configuration); return 0; }
From source file:cmd.freebase2hdfs.java
License:Apache License
@Override public int run(String[] args) throws Exception { String input = null;//from ww w . j a v a2 s . c o m String output = null; if (args.length == 1) { input = "http://download.freebase.com/datadumps/latest/freebase-datadump-quadruples.tsv.bz2"; output = args[0]; } else if (args.length == 2) { input = args[0]; output = args[1]; } else { System.err.printf( "Usage: %s [generic options] [<http://path/to/freebase/datadump>] <hdfs://path/to/destination>\n", getClass().getName()); System.err.println( "[<http://path/to/freebase/datadump>] is optional, it defaults to http://download.freebase.com/datadumps/latest/freebase-datadump-quadruples.tsv.bz2\n"); ToolRunner.printGenericCommandUsage(System.err); return -1; } Configuration configuration = getConf(); FileSystem fs = FileSystem.get(configuration); Path outputPath = new Path(output); InputStream in = new URL(input).openStream(); FSDataOutputStream out = fs.create(outputPath); IOUtils.copyBytes(in, out, BUFFER_SIZE, true); return 0; }
From source file:cn.edu.buaa.act.petuumOnYarn.ApplicationMaster.java
License:Apache License
private void processMachineFile(List<String> allocatedIpList) { try {// ww w .j av a2s . c om String text = ""; String lineTxt = ""; for (int i = 0; i < allocatedIpList.size(); i++) { lineTxt = i + " " + allocatedIpList.get(i) + " " + startPort; text = text + lineTxt + "\n"; } LOG.info("server text:" + text.trim()); FileSystem fs = FileSystem.get(conf); if (petuumHDFSPathPrefix.equals("")) { hostfileHDFSPath = new Path(fs.getHomeDirectory(), hostfileIdentifier).toUri().toString(); } else { hostfileHDFSPath = new Path(fs.getHomeDirectory(), petuumHDFSPathPrefix + hostfileIdentifier) .toUri().toString(); } LOG.info("Hostfile being writen to " + hostfileHDFSPath); YarnUtil.writeFileHDFS(fs, hostfileHDFSPath, text.trim()); } catch (Exception e) { System.out.println("read file error"); e.printStackTrace(); } }
From source file:cn.edu.buaa.act.petuumOnYarn.Client.java
License:Apache License
/** * Main run function for the client/* www. j a v a 2s .c om*/ * * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); String[] s; s = conf.getStrings(YarnConfiguration.RM_ADDRESS); for (String ss : s) LOG.info("RM address: " + ss); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers() + ", nodeIdHost" + node.getNodeId().getHost()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores); if (amVCores > maxVCores) { LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value." + ", specified=" + amVCores + ", max=" + maxVCores); amVCores = maxVCores; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setKeepContainersAcrossApplicationAttempts(keepContainers); appContext.setApplicationName(appName); // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of // the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); YarnUtil.copyAndAddToLocalResources(fs, appMasterJar, petuumHDFSPathPrefix, appMasterJarPath, localResources, null); scriptHDFSPath = YarnUtil.copyToHDFS(fs, scriptPath, petuumHDFSPathPrefix, launchPath, null); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { YarnUtil.copyAndAddToLocalResources(fs, log4jPropFile, petuumHDFSPathPrefix, log4jPath, localResources, null); } // Set the env variables to be setup in the env where the application // master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$()) .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) { classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR); classPathEnv.append(c.trim()); } classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); vargs.add("--num_nodes " + String.valueOf(numNodes)); vargs.add("--start_port " + String.valueOf(startPort)); vargs.add("--priority " + String.valueOf(workerPriority)); vargs.add("--script_hdfs_path " + scriptHDFSPath); for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); // Set up the container launch context for the application master ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null, null, null); // Set up resource type requirements // For now, both memory and vcores are supported, so we set memory and // vcores requirements Resource capability = Resource.newInstance(amMemory, amVCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and // MapReduce Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Priority.newInstance(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = // applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on // success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // Monitor the application currentTime = System.currentTimeMillis(); LOG.info("submit AM in " + (currentTime - startTime) + "ms"); return monitorApplication(appId); }
From source file:cn.edu.hfut.dmic.webcollector.crawldb.DBReader.java
public static void main(String[] args) throws Exception { Path crawlPath = new Path("task2"); Path currentPath = new Path(crawlPath, "crawldb/current"); Path output = new Path("output"); Configuration config = CrawlerConfiguration.create(); FileSystem fs = FileSystem.get(config); if (fs.exists(output)) { fs.delete(output);/*from w w w .j a va2 s .c o m*/ } Job job = new Job(config); job.setJobName("dbreader " + crawlPath.toString()); job.setMapperClass(DBReaderMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, currentPath); FileOutputFormat.setOutputPath(job, output); job.waitForCompletion(true); }
From source file:cn.edu.hfut.dmic.webcollector.crawldb.Injector.java
public static void inject(Path crawlPath, CrawlDatums datums, Configuration conf) throws IOException, InterruptedException, ClassNotFoundException, Exception { Path crawldbPath = new Path(crawlPath, "crawldb"); FileSystem fs = FileSystem.get(conf); Path tempdb = new Path(crawldbPath, "temp"); if (fs.exists(tempdb)) { fs.delete(tempdb);//from w w w . ja va2 s . c o m } SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(tempdb, "info"), Text.class, CrawlDatum.class); for (CrawlDatum datum : datums) { String key = datum.getKey(); writer.append(new Text(key), datum); LOG.info("inject:" + key); } writer.close(); Path[] mergePaths = new Path[] { tempdb }; Merge.merge(crawlPath, mergePaths, conf, "inject"); Merge.install(crawlPath, conf); if (fs.exists(tempdb)) { fs.delete(tempdb); } }
From source file:cn.edu.hfut.dmic.webcollector.crawldb.Merge.java
public static void merge(Path crawlPath, Path[] mergePaths, Configuration conf, String jobName) throws Exception { Job job = new Job(conf); job.setJobName(jobName + " " + crawlPath.toString()); job.setJarByClass(Merge.class); // job.getConfiguration().set("mapred", "/home/hu/mygit/WebCollector2/WebCollectorCluster/target/WebCollectorCluster-2.0.jar"); Path crawldbPath = new Path(crawlPath, "crawldb"); Path newdb = new Path(crawldbPath, "new"); Path currentdb = new Path(crawldbPath, "current"); FileSystem fs = FileSystem.get(conf); if (fs.exists(currentdb)) { FileInputFormat.addInputPath(job, currentdb); }// w ww . j av a 2s. c o m if (fs.exists(newdb)) { fs.delete(newdb); } for (Path mergePath : mergePaths) { FileInputFormat.addInputPath(job, mergePath); } FileOutputFormat.setOutputPath(job, newdb); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); job.setMapperClass(MergeMap.class); job.setReducerClass(MergeReduce.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(CrawlDatum.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.waitForCompletion(true); }
From source file:cn.edu.hfut.dmic.webcollector.crawldb.Merge.java
public static void install(Path crawlPath, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); Path crawldbPath = new Path(crawlPath, "crawldb"); Path newdb = new Path(crawldbPath, "new"); Path currentdb = new Path(crawldbPath, "current"); Path olddb = new Path(crawldbPath, "old"); if (fs.exists(currentdb)) { if (fs.exists(olddb)) { fs.delete(olddb);//from ww w .j a v a 2s. c o m } fs.rename(currentdb, olddb); } fs.rename(newdb, currentdb); }
From source file:cn.edu.hfut.dmic.webcollector.crawldb.SegmentUtil.java
public static void initSegments(Path crawlPath, Configuration conf) throws IOException { Path segmentsPath = new Path(crawlPath, "segments"); FileSystem fs = FileSystem.get(conf); if (!fs.exists(segmentsPath)) { fs.mkdirs(segmentsPath);//from www .j ava 2 s .com } }
From source file:cn.edu.hfut.dmic.webcollector.crawldb.SegmentUtil.java
public static String createSegment(Path crawlPath, Configuration conf) throws IOException { String segmentName = createSegmentName(); FileSystem fs = FileSystem.get(conf); Path segmentPath = new Path(crawlPath, "segments/" + segmentName); fs.mkdirs(segmentPath);//www.j a v a2s . co m return segmentName; }