List of usage examples for org.apache.hadoop.fs FileSystem getUri
public abstract URI getUri();
From source file:com.google.mr4c.sources.MBTilesDatasetSourceTest.java
License:Open Source License
@Test public void testStaging() throws Exception { // save a file on disk File file1 = new File("output/mbtilessource/test_stage1.db"); saveInputFile(file1);/*w w w .j av a2 s . co m*/ // specify location in HDFS FileSystem fs = HadoopTestUtils.getTestDFS(); Path root = new Path(fs.getUri()); Path file2 = new Path(root, "/test/sources/MBTilesDatasetSourceTest/test_stage2.db"); // standard source test MBTilesDatasetSource src1 = new MBTilesDatasetSource(file1.toURI()); MBTilesDatasetSource src2 = new MBTilesDatasetSource(file2.toUri()); SourceTestUtils.testSource(src1, src2); }
From source file:com.hortonworks.minicluster.MiniHadoopCluster.java
License:Apache License
@Override public void start() { try {/*from ww w. j av a 2 s . co m*/ if (logger.isInfoEnabled()) { logger.info("Starting DFS Cluster"); } this.dfsCluster = new MiniDFSCluster.Builder(this.configuration).numDataNodes(this.nodeManagers.length) .nameNodePort(55555).build(); FileSystem fs = this.dfsCluster.getFileSystem(); if (logger.isInfoEnabled()) { logger.info("Created default FileSystem at: " + fs.getUri().toString()); } this.configuration.set("fs.defaultFS", fs.getUri().toString()); // use HDFS this.configuration.setInt("yarn.nodemanager.delete.debug-delay-sec", 60000); } catch (Exception e) { throw new IllegalStateException("Failed to start DFS cluster", e); } this.init(this.configuration); super.start(); }
From source file:com.indeed.imhotep.builder.tsv.TsvConverter.java
License:Apache License
/** * * @return true if upload succeeded// ww w . j a v a 2 s .c o m */ private static boolean uploadShard(String localShardDir, String shardName, String indexName, Path finalIndexPath, FileSystem finalFS, boolean qaMode) { final Path finalIndexDirPath = new Path(finalIndexPath, indexName); final Path finalShardPath = new Path(finalIndexDirPath, shardName + ".sqar"); try { if (!finalFS.exists(finalIndexDirPath)) { finalFS.mkdirs(finalIndexDirPath); if (qaMode) { makeWorldWritable(finalFS, finalIndexDirPath); } } if (finalFS.exists(finalShardPath)) { log.info("File already exists. HDFS upload aborted."); return true; } final String scheme = finalFS.getUri().getScheme(); if (scheme.equals("hdfs")) { /* * upload to temp file then rename, * to avoid having other systems see a partial file */ final String tmpUploadShardName = indexName + "-" + shardName; final Path tempUploadPath = new Path(new Path("/tmp/"), tmpUploadShardName + ".sqar"); final File shardDir = new File(localShardDir, shardName); final SquallArchiveWriter writer = new SquallArchiveWriter(finalFS, tempUploadPath, true, SquallArchiveCompressor.GZIP); writer.batchAppendDirectory(shardDir); writer.commit(); finalFS.rename(tempUploadPath, finalShardPath); } else if (scheme.equals("s3n")) { /* * s3 files are only visible after the upload is complete, * so no need to use a temp file */ final File shardDir = new File(localShardDir, shardName); final SquallArchiveWriter writer = new SquallArchiveWriter(finalFS, finalShardPath, true, SquallArchiveCompressor.GZIP); writer.batchAppendDirectory(shardDir); writer.commit(); } } catch (IOException e) { log.error(e); return false; } if (qaMode) { try { // try to set permissions on the uploaded file makeWorldWritable(finalFS, finalShardPath); } catch (Exception e) { log.warn("Failed to set permissions on the uploaded file " + finalShardPath); } } return true; }
From source file:com.inforefiner.hdata.SubmitClient.java
License:Apache License
/** * Main run function for the client//www . java 2 s . com * * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } if (domainId != null && domainId.length() > 0 && toCreateDomain) { prepareTimelineDomain(); } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores); if (amVCores > maxVCores) { LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value." + ", specified=" + amVCores + ", max=" + maxVCores); amVCores = maxVCores; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setKeepContainersAcrossApplicationAttempts(keepContainers); appContext.setApplicationName(appName); if (attemptFailuresValidityInterval >= 0) { appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval); } // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; if (!shellScriptPath.isEmpty()) { Path shellSrc = new Path(shellScriptPath); String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH; Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); } if (!shellCommand.isEmpty()) { addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand); } if (shellArgs.length > 0) { addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources, StringUtils.join(shellArgs, " ")); } // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct local resource for the // eventual containers that will be launched to execute the shell scripts env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); if (domainId != null && domainId.length() > 0) { env.put(DSConstants.DISTRIBUTEDSHELLTIMELINEDOMAIN, domainId); } // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$()) .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) { classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR); classPathEnv.append(c.trim()); } classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); vargs.add("--num_containers " + String.valueOf(numContainers)); if (null != nodeLabelExpression) { appContext.setNodeLabelExpression(nodeLabelExpression); } vargs.add("--priority " + String.valueOf(shellCmdPriority)); for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); // Set up the container launch context for the application master ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null, null, null); // Set up resource type requirements // For now, both memory and vcores are supported, so we set memory and // vcores requirements Resource capability = Resource.newInstance(amMemory, amVCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master // TODO - what is the range for priority? how to decide? Priority pri = Priority.newInstance(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? Thread t = new Thread(new LogReceiver()); t.start(); // Monitor the application return monitorApplication(appId); }
From source file:com.inmobi.conduit.distcp.MergedStreamService.java
License:Apache License
private List<FileStatus> recursiveListingOfDir(FileSystem currentFs, Path path) { try {//from w ww . j a v a 2s . co m FileStatus streamDir = currentFs.getFileStatus(path); List<FileStatus> filestatus = new ArrayList<FileStatus>(); createListing(currentFs, streamDir, filestatus); return filestatus; } catch (IOException ie) { LOG.error("IOException while doing recursive listing to create checkpoint on " + "cluster filesystem" + currentFs.getUri(), ie); } return null; }
From source file:com.inmobi.conduit.distcp.tools.TestGlobbedCopyListing.java
License:Apache License
private static void recordInExpectedValues(String path) throws Exception { FileSystem fileSystem = cluster.getFileSystem(); Path sourcePath = new Path(fileSystem.getUri().toString() + path); expectedValues.put(sourcePath.toString(), DistCpUtils.getRelativePath(new Path("/tmp/source"), sourcePath)); }
From source file:com.inmobi.conduit.distcp.tools.util.DistCpUtils.java
License:Apache License
public static boolean compareFs(FileSystem srcFs, FileSystem destFs) { URI srcUri = srcFs.getUri(); URI dstUri = destFs.getUri(); if (srcUri.getScheme() == null) { return false; }//from w w w . ja v a 2 s . c o m if (!srcUri.getScheme().equals(dstUri.getScheme())) { return false; } String srcHost = srcUri.getHost(); String dstHost = dstUri.getHost(); if ((srcHost != null) && (dstHost != null)) { try { srcHost = InetAddress.getByName(srcHost).getCanonicalHostName(); dstHost = InetAddress.getByName(dstHost).getCanonicalHostName(); } catch (UnknownHostException ue) { return false; } if (!srcHost.equals(dstHost)) { return false; } } else if (srcHost == null && dstHost != null) { return false; } else if (srcHost != null) { return false; } //check for ports if (srcUri.getPort() != dstUri.getPort()) { return false; } return true; }
From source file:com.inmobi.conduit.local.LocalStreamServiceTest.java
License:Apache License
private void createMockForFileSystem(FileSystem fs, Cluster cluster) throws Exception { FileStatus[] files = createTestData(2, "/conduit/data/stream", true); FileStatus[] stream1 = createTestData(2, "/conduit/data/stream1/collector", true); FileStatus[] stream3 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream1/collector1/file", true); FileStatus[] stream4 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream1/collector2/file", true); FileStatus[] stream2 = createTestData(2, "/conduit/data/stream2/collector", true); FileStatus[] stream5 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream2/collector1/file", true); FileStatus[] stream6 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream2/collector2/file", true); when(fs.getWorkingDirectory()).thenReturn(new Path("/tmp/")); when(fs.getUri()).thenReturn(new URI("localhost")); when(fs.listStatus(cluster.getDataDir())).thenReturn(files); when(fs.listStatus(new Path("/conduit/data/stream1"))).thenReturn(stream1); when(fs.listStatus(new Path("/conduit/data/stream1/collector1"), any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream3); when(fs.listStatus(new Path("/conduit/data/stream2"))).thenReturn(stream2); when(fs.listStatus(new Path("/conduit/data/stream1/collector2"), any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream4); when(fs.listStatus(new Path("/conduit/data/stream2/collector1"), any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream5); when(fs.listStatus(new Path("/conduit/data/stream2/collector2"), any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream6); Path file = mock(Path.class); when(file.makeQualified(any(FileSystem.class))).thenReturn(new Path("/conduit/data/stream1/collector1/")); }
From source file:com.inmobi.databus.local.LocalStreamServiceTest.java
License:Apache License
private void createMockForFileSystem(FileSystem fs, Cluster cluster) throws Exception { FileStatus[] files = createTestData(2, "/databus/data/stream", true); FileStatus[] stream1 = createTestData(2, "/databus/data/stream1/collector", true); FileStatus[] stream3 = createTestData(number_files, "/databus/data/stream1/collector1/file", true); FileStatus[] stream4 = createTestData(number_files, "/databus/data/stream1/collector2/file", true); FileStatus[] stream2 = createTestData(2, "/databus/data/stream2/collector", true); FileStatus[] stream5 = createTestData(number_files, "/databus/data/stream2/collector1/file", true); FileStatus[] stream6 = createTestData(number_files, "/databus/data/stream2/collector2/file", true); when(fs.getWorkingDirectory()).thenReturn(new Path("/tmp/")); when(fs.getUri()).thenReturn(new URI("localhost")); when(fs.listStatus(cluster.getDataDir())).thenReturn(files); when(fs.listStatus(new Path("/databus/data/stream1"))).thenReturn(stream1); when(fs.listStatus(new Path("/databus/data/stream1/collector1"), any(CollectorPathFilter.class))) .thenReturn(stream3);/*w ww.j av a 2s. c o m*/ when(fs.listStatus(new Path("/databus/data/stream2"))).thenReturn(stream2); when(fs.listStatus(new Path("/databus/data/stream1/collector2"), any(CollectorPathFilter.class))) .thenReturn(stream4); when(fs.listStatus(new Path("/databus/data/stream2/collector1"), any(CollectorPathFilter.class))) .thenReturn(stream5); when(fs.listStatus(new Path("/databus/data/stream2/collector2"), any(CollectorPathFilter.class))) .thenReturn(stream6); Path file = mock(Path.class); when(file.makeQualified(any(FileSystem.class))).thenReturn(new Path("/databus/data/stream1/collector1/")); }
From source file:com.inmobi.databus.readers.TestCollectorStreamReader.java
License:Apache License
@BeforeTest public void setup() throws Exception { consumerNumber = 1;/*from www . ja va2 s .c o m*/ // setup cluster cluster = TestUtil.setupLocalCluster(this.getClass().getSimpleName(), testStream, partitionId, files, null, 0, TestUtil.getConfiguredRootDir()); collectorDir = new Path(new Path(cluster.getDataDir(), testStream), collectorName); conf = cluster.getHadoopConf(); FileSystem fs = FileSystem.get(conf); fsUri = fs.getUri().toString(); TestUtil.createEmptyFile(fs, collectorDir, testStream + "_current"); TestUtil.createEmptyFile(fs, collectorDir, "scribe_stats"); }