List of usage examples for org.apache.hadoop.fs FileStatus getModificationTime
public long getModificationTime()
From source file:com.ikanow.aleph2.core.shared.utils.JarCacheUtils.java
License:Apache License
/** Moves a shared JAR into a local spot (if required) * @param library_bean/*ww w.j a v a2 s . co m*/ * @param fs * @return either a basic message bean containing an error, or the fully qualified path of the cached JAR */ public static <M> CompletableFuture<Validation<BasicMessageBean, String>> getCachedJar( final String local_cached_jar_dir, final SharedLibraryBean library_bean, final IStorageService fs, final String handler_for_errors, final M msg_for_errors) { try { final FileContext dfs = fs.getUnderlyingPlatformDriver(FileContext.class, Optional.empty()).get(); final FileContext lfs = fs.getUnderlyingPlatformDriver(FileContext.class, IStorageService.LOCAL_FS) .get(); final Path cached_jar_file = lfs .makeQualified(new Path(local_cached_jar_dir + "/" + buildCachedJarName(library_bean))); final Path original_jar_file = dfs.makeQualified(new Path(library_bean.path_name())); final FileStatus file_status = dfs.getFileStatus(original_jar_file); // (this will exception out if it doesn't exist, as it should) try { final FileStatus local_file_status = lfs.getFileStatus(cached_jar_file); // (this will exception in to case 2 if it doesn't exist) // if the local version exists then overwrite it if (file_status.getModificationTime() > local_file_status.getModificationTime()) { // (it gets kinda complicated here so just invalidate the entire classloader cache..) // TODO (ALEPH-12): add a coverage test for this ClassloaderUtils.clearCache(); lfs.util().copy(original_jar_file, cached_jar_file, false, true); } } catch (FileNotFoundException f) { // 2) if the local version doesn't exist then just copy the distributed file across // (note: don't need to do anything with the classloader cache here since the file doesn't exist so can't have a cache key) lfs.util().copy(original_jar_file, cached_jar_file); } return CompletableFuture.completedFuture(Validation.success(cached_jar_file.toString())); } catch (Throwable e) { return CompletableFuture.completedFuture( Validation.fail(SharedErrorUtils.buildErrorMessage(handler_for_errors, msg_for_errors, SharedErrorUtils.getLongForm(SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND, e, library_bean.path_name())))); } }
From source file:com.ikanow.infinit.e.processing.custom.utils.InfiniteHadoopUtils.java
License:Open Source License
public static Path cacheLocalFile(String localPath, String localName, Configuration config) throws IOException { FileSystem fs = FileSystem.get(config); Path toDir = new Path("cache"); Path destFile = new Path("cache/" + localName); File fromFile = new File(localPath + "/" + localName); if (!fromFile.exists()) { throw new IOException("Source file does not exist: " + fromFile.toString()); }//from ww w .ja va 2s. co m boolean needToCopyFile = true; if (!fs.exists(toDir)) { // (ie relative to WD) fs.mkdirs(toDir); } else { // Now check if the file already exists if (fs.exists(destFile)) { FileStatus fsStat = fs.getFileStatus(destFile); if ((fsStat.getLen() == fromFile.length()) && (fromFile.lastModified() <= fsStat.getModificationTime())) { needToCopyFile = false; } } } if (needToCopyFile) { fs.copyFromLocalFile(false, true, new Path(localPath + "/" + localName), destFile); } return new Path(fs.getFileStatus(destFile).getPath().toUri().getPath()); // (apparently the path has to be in absolute format without even the hdfs:// at the front?!) }
From source file:com.inforefiner.hdata.SubmitClient.java
License:Apache License
/** * Main run function for the client/* w w w .ja v a2 s . c om*/ * * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } if (domainId != null && domainId.length() > 0 && toCreateDomain) { prepareTimelineDomain(); } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores); if (amVCores > maxVCores) { LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value." + ", specified=" + amVCores + ", max=" + maxVCores); amVCores = maxVCores; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setKeepContainersAcrossApplicationAttempts(keepContainers); appContext.setApplicationName(appName); if (attemptFailuresValidityInterval >= 0) { appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval); } // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; if (!shellScriptPath.isEmpty()) { Path shellSrc = new Path(shellScriptPath); String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH; Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); } if (!shellCommand.isEmpty()) { addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand); } if (shellArgs.length > 0) { addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources, StringUtils.join(shellArgs, " ")); } // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct local resource for the // eventual containers that will be launched to execute the shell scripts env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); if (domainId != null && domainId.length() > 0) { env.put(DSConstants.DISTRIBUTEDSHELLTIMELINEDOMAIN, domainId); } // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$()) .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) { classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR); classPathEnv.append(c.trim()); } classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); vargs.add("--num_containers " + String.valueOf(numContainers)); if (null != nodeLabelExpression) { appContext.setNodeLabelExpression(nodeLabelExpression); } vargs.add("--priority " + String.valueOf(shellCmdPriority)); for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); // Set up the container launch context for the application master ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null, null, null); // Set up resource type requirements // For now, both memory and vcores are supported, so we set memory and // vcores requirements Resource capability = Resource.newInstance(amMemory, amVCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master // TODO - what is the range for priority? how to decide? Priority pri = Priority.newInstance(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? Thread t = new Thread(new LogReceiver()); t.start(); // Monitor the application return monitorApplication(appId); }
From source file:com.inmobi.conduit.local.LocalStreamService.java
License:Apache License
protected String getCurrentFile(FileSystem fs, FileStatus[] files, TreeSet<FileStatus> sortedFiles) { // Proposed Algo :-> Sort files based on timestamp // if there are no files) // then null (implying process this file as non-current file) // else// w w w .j av a2s . c o m // return last file as the current file if (files == null || files.length == 0) return null; for (FileStatus file : files) { sortedFiles.add(file); } // get last file from set FileStatus lastFile = sortedFiles.last(); long diff = (System.currentTimeMillis() - lastFile.getModificationTime()) / MILLISECONDS_IN_MINUTE; if (diff > timeoutToProcessLastCollectorFile) { processLastFile = true; } else { processLastFile = false; } return lastFile.getPath().getName(); }
From source file:com.inmobi.databus.files.FileMap.java
License:Apache License
public void addPath(FileStatus path) { T fileKey = getStreamFile(path);//from w w w.jav a2 s . co m files.put(fileKey, path); LOG.info("Added path: " + path.getPath() + "timestamp [" + path.getModificationTime() + "]"); }
From source file:com.inmobi.databus.files.HadoopStreamFile.java
License:Apache License
public static HadoopStreamFile create(FileStatus status) { return new HadoopStreamFile(status.getPath().getParent(), status.getPath().getName(), status.getModificationTime()); }
From source file:com.inmobi.databus.local.LocalStreamService.java
License:Apache License
protected String getCurrentFile(FileSystem fs, FileStatus[] files, long lastFileTimeout) { //Proposed Algo :-> Sort files based on timestamp //if ((currentTimeStamp - last file's timestamp) > 5min || // if there are no files) // then null (implying process this file as non-current file) // else/*from w ww.j ava2 s .c om*/ // return last file as the current file class FileTimeStampComparator implements Comparator { public int compare(Object o, Object o1) { FileStatus file1 = (FileStatus) o; FileStatus file2 = (FileStatus) o1; long file1Time = file1.getModificationTime(); long file2Time = file2.getModificationTime(); if ((file1Time < file2Time)) return -1; else return 1; } } if (files == null || files.length == 0) return null; TreeSet<FileStatus> sortedFiles = new TreeSet<FileStatus>(new FileTimeStampComparator()); for (FileStatus file : files) { sortedFiles.add(file); } //get last file from set FileStatus lastFile = sortedFiles.last(); long currentTime = System.currentTimeMillis(); long lastFileTime = lastFile.getModificationTime(); if (currentTime - lastFileTime >= lastFileTimeout) { return null; } else return lastFile.getPath().getName(); }
From source file:com.jaeksoft.searchlib.crawler.cache.HadoopCrawlCache.java
License:Open Source License
private final long purge(FileStatus[] files, long expiration) throws IOException { long count = 0; for (FileStatus file : files) { if (file.isDirectory()) { Path p = file.getPath(); count += purge(fileSystem.listStatus(p), expiration); FileStatus[] fs = fileSystem.listStatus(p); if (fs.length == 0) if (fileSystem.delete(p, false)) count++;//w w w . ja v a2s .c o m } else { if (file.getModificationTime() < expiration) if (fileSystem.delete(file.getPath(), false)) count++; } } return count; }
From source file:com.jeffy.hdfs.FileMetaData.java
License:Apache License
public static void showFileStatusForFile(String path) { Configuration config = new Configuration(); try {// ww w.j a v a 2s . c om FileSystem fs = FileSystem.get(URI.create(path), config); FileStatus stat = fs.getFileStatus(new Path(path)); System.out.println("File URI: " + stat.getPath().toUri().getPath()); System.out.println("Is directory: " + stat.isDirectory()); System.out.println("File length: " + stat.getLen()); System.out.println("Modification Time: " + new Date(stat.getModificationTime())); System.out.println("File replications: " + stat.getReplication()); System.out.println("File Block Size: " + (stat.getBlockSize() >>> 10 >>> 10) + " MB"); System.out.println("File Owner: " + stat.getOwner()); System.out.println("File Group: " + stat.getGroup()); System.out.println("File Permission: " + stat.getPermission().toString()); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.jkoolcloud.tnt4j.streams.inputs.HdfsFileLineStream.java
License:Apache License
/** * Searches for files matching name pattern. Name pattern also may contain path of directory, where file search * should be performed, e.g., C:/Tomcat/logs/localhost_access_log.*.txt. If no path is defined (just file name * pattern) then files are searched in {@code System.getProperty("user.dir")}. Files array is ordered by file create * timestamp in descending order./* w w w .j ava2s. c om*/ * * @param path * path of file * @param fs * file system * * @return array of found files paths. * @throws IOException * if files can't be listed by file system. * * @see FileSystem#listStatus(Path, PathFilter) * @see FilenameUtils#wildcardMatch(String, String, IOCase) */ public static Path[] searchFiles(Path path, FileSystem fs) throws IOException { FileStatus[] dir = fs.listStatus(path.getParent(), new PathFilter() { @Override public boolean accept(Path path) { String name = path.getName(); return FilenameUtils.wildcardMatch(name, "*", IOCase.INSENSITIVE); // NON-NLS } }); Path[] activityFiles = new Path[dir == null ? 0 : dir.length]; if (dir != null) { Arrays.sort(dir, new Comparator<FileStatus>() { @Override public int compare(FileStatus o1, FileStatus o2) { return Long.valueOf(o1.getModificationTime()).compareTo(o2.getModificationTime()) * (-1); } }); for (int i = 0; i < dir.length; i++) { activityFiles[i] = dir[i].getPath(); } } return activityFiles; }