List of usage examples for org.apache.hadoop.fs FileStatus getModificationTime
public long getModificationTime()
From source file:com.turn.camino.Camino.java
License:Open Source License
/** * Materialize path/* w w w.j ava 2 s. c o m*/ * * Converts a path or path pattern into zero or more actual paths * * @param value rendered value of path * @param fileSystem file system * @return path status * @throws IOException */ protected List<PathDetail> materializePath(String value, FileSystem fileSystem) throws IOException { // using value to find path FileStatus[] fss = fileSystem.globStatus(new org.apache.hadoop.fs.Path(value)); // path doesn't exist if (fss == null || fss.length == 0) { return Collections.emptyList(); } // found match(es) List<PathDetail> pathDetails = Lists.newArrayListWithExpectedSize(fss.length); for (FileStatus fs : fss) { PathDetail pathDetail = new PathDetail(fs.getPath().toString(), fs.isDirectory(), fs.getLen(), fs.getModificationTime()); pathDetails.add(pathDetail); } // return path details return pathDetails; }
From source file:com.turn.sorcerer.status.impl.HDFSStatusStorage.java
License:Open Source License
@Override public DateTime getLastUpdateTime(String identifier, int id) throws IOException { Preconditions.checkNotNull(identifier); Preconditions.checkNotNull(id);/*from w ww.java 2s .com*/ if (fs == null) { try { fs = FileSystem.get(new Configuration()); } catch (IOException e) { logger.error("Filesystem unreachable!", e); throw e; } } long maxTS = 0; Path directoryPath = new Path(getStatusPath(identifier, id)); FileStatus[] fileStatuses; try { fileStatuses = fs.listStatus(directoryPath); } catch (FileNotFoundException fnfe) { return new DateTime(0); } for (FileStatus fileStatus : fileStatuses) { maxTS = Math.max(fileStatus.getModificationTime(), maxTS); } return new DateTime(maxTS); }
From source file:com.twitter.elephanttwin.util.HdfsUtils.java
License:Apache License
/** * Get the last modification date of an HDFS file. * * @param fs The file system.//w w w . ja v a2s.c o m * @param fileUri URI of the file. * @return The last modification date of the file, in msecs since epoch, or -1 if unknown. * @throws IOException */ public static long getModificationTime(FileSystem fs, String fileUri) throws IOException { Preconditions.checkNotNull(fs); Preconditions.checkNotNull(fileUri); FileStatus fileStatus = fs.getFileStatus(new Path(fileUri)); return fileStatus == null ? -1 : fileStatus.getModificationTime(); }
From source file:com.twitter.hraven.etl.FileStatusModificationComparator.java
License:Apache License
public int compare(FileStatus fileStatus1, FileStatus fileStatus2) { // Do the obligatory null checks. if ((fileStatus1 == null) && (fileStatus2 == null)) { return 0; }/*from w w w . ja v a 2 s .c o m*/ if (fileStatus1 == null) { return -1; } if (fileStatus2 == null) { return 1; } long modificationTime1 = fileStatus1.getModificationTime(); long modificationTime2 = fileStatus2.getModificationTime(); return (modificationTime1 < modificationTime2 ? -1 : (modificationTime1 == modificationTime2 ? 0 : 1)); }
From source file:com.twitter.hraven.etl.JobFileModifiedRangePathFilter.java
License:Apache License
@Override public boolean accept(Path path) { if (!super.accept(path)) { return false; }/*from w w w.ja va2 s .c o m*/ JobFile jobFile = new JobFile(path.getName()); if (jobFile.isJobConfFile() || jobFile.isJobHistoryFile()) { try { FileSystem fs = path.getFileSystem(myConf); FileStatus fileStatus = fs.getFileStatus(path); long fileModificationTimeMillis = fileStatus.getModificationTime(); return accept(fileModificationTimeMillis); } catch (IOException e) { throw new ImportException("Cannot determine file modification time of " + path.getName(), e); } } else { // Reject anything that does not match a job conf filename. LOG.info(" Not a valid job conf / job history file " + path.getName()); return false; } }
From source file:com.twitter.hraven.etl.JobFileModifiedRangeSubstringPathFilter.java
License:Apache License
@Override public boolean accept(Path path) { if (!super.accept(path)) { return false; }//from www . ja v a2 s . c o m JobFile jobFile = new JobFile(path.getName()); if (jobFile.isJobConfFile() || jobFile.isJobHistoryFile()) { if (jobFile.isJobHistoryFile()) { if (!includesPathSubstrings(path) || !excludesPathSubstrings(path)) { return false; } } try { FileSystem fs = path.getFileSystem(myConf); FileStatus fileStatus = fs.getFileStatus(path); long fileModificationTimeMillis = fileStatus.getModificationTime(); return accept(fileModificationTimeMillis); } catch (IOException e) { throw new ImportException("Cannot determine file modification time of " + path.getName(), e); } } else { // Reject anything that does not match a job conf filename. LOG.info(" Not a valid job conf / job history file " + path.getName()); return false; } }
From source file:com.twitter.hraven.etl.JobFilePartitioner.java
License:Apache License
/** * @param hdfs//www . j a v a2s .c o m * FileSystem handle * @param f * file to process * @param outputPath * @param conf * configuration to use for copying. * @param skipExisting * skip if the file already exist in the target. File will be * overwritten if already there and this argument is false. * @retain whether this file should be retained * * @throws IOException */ private void processHDFSSource(FileSystem hdfs, FileStatus f, Path outputPath, Configuration conf, boolean skipExisting, boolean retain) throws IOException { long fileModTime = f.getModificationTime(); Path targetDir = getTargetDirectory(hdfs, outputPath, fileModTime); boolean targetExists = false; Path target = new Path(targetDir, f.getPath().getName()); targetExists = hdfs.exists(target); if (moveFiles || !retain) { if (targetExists) { hdfs.delete(f.getPath(), false); } else { hdfs.rename(f.getPath(), targetDir); } } else { if (targetExists && skipExisting) { // Do nothing, target is already there and we're instructed to skip // existing records. } else { copy(hdfs, f, conf, targetDir); } } }
From source file:com.twitter.hraven.etl.MinMaxJobFileTracker.java
License:Apache License
/** * Converts a jobFileStatus to a JobFile and tracks the min and max * modification times and JobIds.//from ww w . j av a2 s. c o m * * @param jobFileStatus * of a jobfile, must be a proper JobFile. Cannot be null. * @return a JobFile for the given jobFileStatus. */ public JobFile track(FileStatus jobFileStatus) { String jobfileName = jobFileStatus.getPath().getName(); JobFile jobFile = new JobFile(jobfileName); // Extra check, caller should already have taken care of this. if (jobFile.isJobConfFile() || jobFile.isJobHistoryFile()) { track(jobFile.getJobid()); long modificationTimeMillis = jobFileStatus.getModificationTime(); if (modificationTimeMillis < minModificationTimeMillis) { minModificationTimeMillis = modificationTimeMillis; } if (modificationTimeMillis > maxModificationTimeMillis) { maxModificationTimeMillis = modificationTimeMillis; } } return jobFile; }
From source file:com.twitter.hraven.mapreduce.JobFileRawLoaderMapper.java
License:Apache License
/** * Call {@link #readJobFile(FileStatus)} and add the raw bytes and the last * modified millis to {@code puts}//from w w w . j ava2s .co m * * @param puts * to add puts to. * @rowkey to identify the row in the raw table. * @param rawColumn * where to add the raw data in * @param fileStatus * Referring to the jobFile to load. * @throws IOException */ private void addRawPut(List<Put> puts, byte[] rowKey, byte[] rawColumn, byte[] lastModificationColumn, FileStatus fileStatus) throws IOException { byte[] rawBytes = readJobFile(fileStatus); Put raw = new Put(rowKey); byte[] rawLastModifiedMillis = Bytes.toBytes(fileStatus.getModificationTime()); raw.add(Constants.RAW_FAM_BYTES, rawColumn, rawBytes); raw.add(Constants.INFO_FAM_BYTES, lastModificationColumn, rawLastModifiedMillis); puts.add(raw); }
From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezJobControlCompiler.java
License:Apache License
public DAG createDAG(TezOperPlan tezPlan, FileSystem remoteFs, TezConfiguration conf, ApplicationId appId, Path remoteStagingDir) throws IOException, YarnException { DAG dag = new DAG("MRRSleepJob"); /*/*from ww w .j a va2 s . c o m*/ String jarPath = ClassUtil.findContainingJar(getClass()); Path remoteJarPath = remoteFs.makeQualified( new Path(remoteStagingDir, "dag_job.jar")); remoteFs.copyFromLocalFile(new Path(jarPath), remoteJarPath); FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath); */ Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>(); if (!pigContext.inIllustrator && pigContext.getExecType() != ExecType.TEZ_LOCAL) { // Setup the DistributedCache for this job for (URL extraJar : pigContext.extraJars) { //log.debug("Adding jar to DistributedCache: " + extraJar.toString()); TezJobControlCompiler.putJarOnClassPathThroughDistributedCache(pigContext, conf, extraJar); } //Create the jar of all functions and classes required File submitJarFile = File.createTempFile("Job", ".jar"); //log.info("creating jar file "+submitJarFile.getName()); // ensure the job jar is deleted on exit submitJarFile.deleteOnExit(); FileOutputStream fos = new FileOutputStream(submitJarFile); try { JarManager.createJar(fos, new HashSet<String>(), pigContext); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } Path remoteJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "dag_job.jar")); remoteFs.copyFromLocalFile(new Path(submitJarFile.getAbsolutePath()), remoteJarPath); FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath); LocalResource dagJarLocalRsrc = LocalResource.newInstance( ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, jarFileStatus.getLen(), jarFileStatus.getModificationTime()); commonLocalResources.put("dag_job.jar", dagJarLocalRsrc); Path remoteTezJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "pig-tez.jar")); remoteFs.copyFromLocalFile(new Path("pig-tez.jar"), remoteTezJarPath); FileStatus tezJarFileStatus = remoteFs.getFileStatus(remoteTezJarPath); LocalResource tezJarLocalRsrc = LocalResource.newInstance( ConverterUtils.getYarnUrlFromPath(remoteTezJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, tezJarFileStatus.getLen(), tezJarFileStatus.getModificationTime()); commonLocalResources.put("pig-tez.jar", tezJarLocalRsrc); //log.info("jar file "+submitJarFile.getName()+" created"); //Start setting the JobConf properties conf.set("mapred.jar", submitJarFile.getPath()); } /* LocalResource dagJarLocalRsrc = LocalResource.newInstance( ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, jarFileStatus.getLen(), jarFileStatus.getModificationTime()); commonLocalResources.put("dag_job.jar", dagJarLocalRsrc); */ Hashtable<TezOperator, Pair<Vertex, Configuration>> vertexMap = new Hashtable<TezOperator, Pair<Vertex, Configuration>>(); List<TezOperator> operators = tezPlan.getRoots(); // add settings for pig statistics String setScriptProp = conf.get(ScriptState.INSERT_ENABLED, "true"); ScriptState ss = null; if (setScriptProp.equalsIgnoreCase("true")) { ss = ScriptState.get(); } while (operators != null && operators.size() != 0) { List<TezOperator> successors = new ArrayList<TezOperator>(); for (TezOperator oper : operators) { Configuration operConf = oper.configure(pigContext, conf); /* if (ss != null){ ss.addSettingsToConf(oper, conf); } */ List<TezOperator> predecessors = plan.getPredecessors(oper); if (predecessors != null && predecessors.size() != 0) { MultiStageMRConfToTezTranslator.translateVertexConfToTez(operConf, vertexMap.get(predecessors.get(0)).second); } else { MultiStageMRConfToTezTranslator.translateVertexConfToTez(operConf, null); } List<TezOperator> operSuccessors = tezPlan.getSuccessors(oper); if (operSuccessors != null) { successors.addAll(operSuccessors); } MRHelpers.doJobClientMagic(operConf); //mapStageConf.setInt(MRJobConfig.NUM_MAPS, numMapper); Vertex operVertex = new Vertex(oper.name(), new ProcessorDescriptor(oper.getProcessor(), MRHelpers.createUserPayloadFromConf(operConf)), oper.getParallelism(), MRHelpers.getMapResource(operConf)); oper.configureVertex(operVertex, operConf, commonLocalResources, remoteStagingDir); dag.addVertex(operVertex); if (predecessors != null) { for (TezOperator predecessor : predecessors) { dag.addEdge(new Edge(vertexMap.get(predecessor).first, operVertex, tezPlan.getEdgeProperty(predecessor, oper))); } } vertexMap.put(oper, new Pair<Vertex, Configuration>(operVertex, operConf)); } operators = successors; } return dag; }