Example usage for org.apache.hadoop.fs FileStatus getModificationTime

List of usage examples for org.apache.hadoop.fs FileStatus getModificationTime

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getModificationTime.

Prototype

public long getModificationTime() 

Source Link

Document

Get the modification time of the file.

Usage

From source file:com.turn.camino.Camino.java

License:Open Source License

/**
 * Materialize path/*  w w  w.j  ava 2  s.  c  o  m*/
 *
 * Converts a path or path pattern into zero or more actual paths
 *
 * @param value rendered value of path
 * @param fileSystem file system
 * @return path status
 * @throws IOException
 */
protected List<PathDetail> materializePath(String value, FileSystem fileSystem) throws IOException {

    // using value to find path
    FileStatus[] fss = fileSystem.globStatus(new org.apache.hadoop.fs.Path(value));

    // path doesn't exist
    if (fss == null || fss.length == 0) {
        return Collections.emptyList();
    }

    // found match(es)
    List<PathDetail> pathDetails = Lists.newArrayListWithExpectedSize(fss.length);
    for (FileStatus fs : fss) {
        PathDetail pathDetail = new PathDetail(fs.getPath().toString(), fs.isDirectory(), fs.getLen(),
                fs.getModificationTime());
        pathDetails.add(pathDetail);
    }

    // return path details
    return pathDetails;
}

From source file:com.turn.sorcerer.status.impl.HDFSStatusStorage.java

License:Open Source License

@Override
public DateTime getLastUpdateTime(String identifier, int id) throws IOException {
    Preconditions.checkNotNull(identifier);
    Preconditions.checkNotNull(id);/*from   w ww.java 2s .com*/

    if (fs == null) {
        try {
            fs = FileSystem.get(new Configuration());
        } catch (IOException e) {
            logger.error("Filesystem unreachable!", e);
            throw e;
        }
    }

    long maxTS = 0;

    Path directoryPath = new Path(getStatusPath(identifier, id));

    FileStatus[] fileStatuses;

    try {
        fileStatuses = fs.listStatus(directoryPath);
    } catch (FileNotFoundException fnfe) {
        return new DateTime(0);
    }

    for (FileStatus fileStatus : fileStatuses) {
        maxTS = Math.max(fileStatus.getModificationTime(), maxTS);
    }

    return new DateTime(maxTS);
}

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

/**
 * Get the last modification date of an HDFS file.
 *
 * @param fs The file system.//w  w  w . ja  v a2s.c o  m
 * @param fileUri URI of the file.
 * @return The last modification date of the file, in msecs since epoch, or -1 if unknown.
 * @throws IOException
 */
public static long getModificationTime(FileSystem fs, String fileUri) throws IOException {
    Preconditions.checkNotNull(fs);
    Preconditions.checkNotNull(fileUri);
    FileStatus fileStatus = fs.getFileStatus(new Path(fileUri));
    return fileStatus == null ? -1 : fileStatus.getModificationTime();
}

From source file:com.twitter.hraven.etl.FileStatusModificationComparator.java

License:Apache License

public int compare(FileStatus fileStatus1, FileStatus fileStatus2) {

    // Do the obligatory null checks.
    if ((fileStatus1 == null) && (fileStatus2 == null)) {
        return 0;
    }/*from w w  w  .  ja v  a 2 s  .c o  m*/
    if (fileStatus1 == null) {
        return -1;
    }
    if (fileStatus2 == null) {
        return 1;
    }

    long modificationTime1 = fileStatus1.getModificationTime();
    long modificationTime2 = fileStatus2.getModificationTime();

    return (modificationTime1 < modificationTime2 ? -1 : (modificationTime1 == modificationTime2 ? 0 : 1));
}

From source file:com.twitter.hraven.etl.JobFileModifiedRangePathFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    if (!super.accept(path)) {
        return false;
    }/*from   w  w w.ja  va2 s  .c o m*/

    JobFile jobFile = new JobFile(path.getName());
    if (jobFile.isJobConfFile() || jobFile.isJobHistoryFile()) {
        try {
            FileSystem fs = path.getFileSystem(myConf);
            FileStatus fileStatus = fs.getFileStatus(path);
            long fileModificationTimeMillis = fileStatus.getModificationTime();
            return accept(fileModificationTimeMillis);
        } catch (IOException e) {
            throw new ImportException("Cannot determine file modification time of " + path.getName(), e);
        }
    } else {
        // Reject anything that does not match a job conf filename.
        LOG.info(" Not a valid job conf / job history file " + path.getName());
        return false;
    }
}

From source file:com.twitter.hraven.etl.JobFileModifiedRangeSubstringPathFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    if (!super.accept(path)) {
        return false;
    }//from  www  .  ja v  a2 s  . c o  m

    JobFile jobFile = new JobFile(path.getName());
    if (jobFile.isJobConfFile() || jobFile.isJobHistoryFile()) {
        if (jobFile.isJobHistoryFile()) {
            if (!includesPathSubstrings(path) || !excludesPathSubstrings(path)) {
                return false;
            }
        }
        try {
            FileSystem fs = path.getFileSystem(myConf);
            FileStatus fileStatus = fs.getFileStatus(path);
            long fileModificationTimeMillis = fileStatus.getModificationTime();
            return accept(fileModificationTimeMillis);
        } catch (IOException e) {
            throw new ImportException("Cannot determine file modification time of " + path.getName(), e);
        }
    } else {
        // Reject anything that does not match a job conf filename.
        LOG.info(" Not a valid job conf / job history file " + path.getName());
        return false;
    }
}

From source file:com.twitter.hraven.etl.JobFilePartitioner.java

License:Apache License

/**
 * @param hdfs//www . j a v a2s .c o m
 *          FileSystem handle
 * @param f
 *          file to process
 * @param outputPath
 * @param conf
 *          configuration to use for copying.
 * @param skipExisting
 *          skip if the file already exist in the target. File will be
 *          overwritten if already there and this argument is false.
 * @retain whether this file should be retained
 * 
 * @throws IOException
 */
private void processHDFSSource(FileSystem hdfs, FileStatus f, Path outputPath, Configuration conf,
        boolean skipExisting, boolean retain) throws IOException {

    long fileModTime = f.getModificationTime();
    Path targetDir = getTargetDirectory(hdfs, outputPath, fileModTime);

    boolean targetExists = false;
    Path target = new Path(targetDir, f.getPath().getName());
    targetExists = hdfs.exists(target);

    if (moveFiles || !retain) {
        if (targetExists) {
            hdfs.delete(f.getPath(), false);
        } else {
            hdfs.rename(f.getPath(), targetDir);
        }
    } else {
        if (targetExists && skipExisting) {
            // Do nothing, target is already there and we're instructed to skip
            // existing records.
        } else {
            copy(hdfs, f, conf, targetDir);
        }
    }
}

From source file:com.twitter.hraven.etl.MinMaxJobFileTracker.java

License:Apache License

/**
 * Converts a jobFileStatus to a JobFile and tracks the min and max
 * modification times and JobIds.//from  ww w  . j  av  a2  s. c o  m
 * 
 * @param jobFileStatus
 *          of a jobfile, must be a proper JobFile. Cannot be null.
 * @return a JobFile for the given jobFileStatus.
 */
public JobFile track(FileStatus jobFileStatus) {

    String jobfileName = jobFileStatus.getPath().getName();
    JobFile jobFile = new JobFile(jobfileName);

    // Extra check, caller should already have taken care of this.
    if (jobFile.isJobConfFile() || jobFile.isJobHistoryFile()) {
        track(jobFile.getJobid());

        long modificationTimeMillis = jobFileStatus.getModificationTime();
        if (modificationTimeMillis < minModificationTimeMillis) {
            minModificationTimeMillis = modificationTimeMillis;
        }
        if (modificationTimeMillis > maxModificationTimeMillis) {
            maxModificationTimeMillis = modificationTimeMillis;
        }
    }
    return jobFile;
}

From source file:com.twitter.hraven.mapreduce.JobFileRawLoaderMapper.java

License:Apache License

/**
 * Call {@link #readJobFile(FileStatus)} and add the raw bytes and the last
 * modified millis to {@code puts}//from   w  w w . j  ava2s  .co m
 * 
 * @param puts
 *          to add puts to.
 * @rowkey to identify the row in the raw table.
 * @param rawColumn
 *          where to add the raw data in
 * @param fileStatus
 *          Referring to the jobFile to load.
 * @throws IOException
 */
private void addRawPut(List<Put> puts, byte[] rowKey, byte[] rawColumn, byte[] lastModificationColumn,
        FileStatus fileStatus) throws IOException {
    byte[] rawBytes = readJobFile(fileStatus);

    Put raw = new Put(rowKey);

    byte[] rawLastModifiedMillis = Bytes.toBytes(fileStatus.getModificationTime());

    raw.add(Constants.RAW_FAM_BYTES, rawColumn, rawBytes);
    raw.add(Constants.INFO_FAM_BYTES, lastModificationColumn, rawLastModifiedMillis);
    puts.add(raw);
}

From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezJobControlCompiler.java

License:Apache License

public DAG createDAG(TezOperPlan tezPlan, FileSystem remoteFs, TezConfiguration conf, ApplicationId appId,
        Path remoteStagingDir) throws IOException, YarnException {

    DAG dag = new DAG("MRRSleepJob");
    /*/*from  ww w  .j a  va2  s  . c  o  m*/
          String jarPath = ClassUtil.findContainingJar(getClass());
          Path remoteJarPath = remoteFs.makeQualified(
    new Path(remoteStagingDir, "dag_job.jar"));
          remoteFs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
          FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath);
    */
    Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>();

    if (!pigContext.inIllustrator && pigContext.getExecType() != ExecType.TEZ_LOCAL) {

        // Setup the DistributedCache for this job
        for (URL extraJar : pigContext.extraJars) {
            //log.debug("Adding jar to DistributedCache: " + extraJar.toString());
            TezJobControlCompiler.putJarOnClassPathThroughDistributedCache(pigContext, conf, extraJar);
        }

        //Create the jar of all functions and classes required
        File submitJarFile = File.createTempFile("Job", ".jar");
        //log.info("creating jar file "+submitJarFile.getName());
        // ensure the job jar is deleted on exit
        submitJarFile.deleteOnExit();
        FileOutputStream fos = new FileOutputStream(submitJarFile);
        try {
            JarManager.createJar(fos, new HashSet<String>(), pigContext);
        } catch (ClassNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        Path remoteJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "dag_job.jar"));
        remoteFs.copyFromLocalFile(new Path(submitJarFile.getAbsolutePath()), remoteJarPath);
        FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath);

        LocalResource dagJarLocalRsrc = LocalResource.newInstance(
                ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE,
                LocalResourceVisibility.APPLICATION, jarFileStatus.getLen(),
                jarFileStatus.getModificationTime());
        commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);

        Path remoteTezJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "pig-tez.jar"));
        remoteFs.copyFromLocalFile(new Path("pig-tez.jar"), remoteTezJarPath);
        FileStatus tezJarFileStatus = remoteFs.getFileStatus(remoteTezJarPath);

        LocalResource tezJarLocalRsrc = LocalResource.newInstance(
                ConverterUtils.getYarnUrlFromPath(remoteTezJarPath), LocalResourceType.FILE,
                LocalResourceVisibility.APPLICATION, tezJarFileStatus.getLen(),
                tezJarFileStatus.getModificationTime());
        commonLocalResources.put("pig-tez.jar", tezJarLocalRsrc);

        //log.info("jar file "+submitJarFile.getName()+" created");
        //Start setting the JobConf properties
        conf.set("mapred.jar", submitJarFile.getPath());
    }

    /*
    LocalResource dagJarLocalRsrc = LocalResource.newInstance(
    ConverterUtils.getYarnUrlFromPath(remoteJarPath),
    LocalResourceType.FILE,
    LocalResourceVisibility.APPLICATION,
    jarFileStatus.getLen(),
    jarFileStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);
    */

    Hashtable<TezOperator, Pair<Vertex, Configuration>> vertexMap = new Hashtable<TezOperator, Pair<Vertex, Configuration>>();

    List<TezOperator> operators = tezPlan.getRoots();

    // add settings for pig statistics
    String setScriptProp = conf.get(ScriptState.INSERT_ENABLED, "true");
    ScriptState ss = null;

    if (setScriptProp.equalsIgnoreCase("true")) {
        ss = ScriptState.get();
    }

    while (operators != null && operators.size() != 0) {

        List<TezOperator> successors = new ArrayList<TezOperator>();

        for (TezOperator oper : operators) {

            Configuration operConf = oper.configure(pigContext, conf);
            /*
            if (ss != null){
               ss.addSettingsToConf(oper, conf);
            }
            */
            List<TezOperator> predecessors = plan.getPredecessors(oper);

            if (predecessors != null && predecessors.size() != 0) {
                MultiStageMRConfToTezTranslator.translateVertexConfToTez(operConf,
                        vertexMap.get(predecessors.get(0)).second);
            } else {
                MultiStageMRConfToTezTranslator.translateVertexConfToTez(operConf, null);
            }

            List<TezOperator> operSuccessors = tezPlan.getSuccessors(oper);
            if (operSuccessors != null) {
                successors.addAll(operSuccessors);
            }

            MRHelpers.doJobClientMagic(operConf);

            //mapStageConf.setInt(MRJobConfig.NUM_MAPS, numMapper);

            Vertex operVertex = new Vertex(oper.name(),
                    new ProcessorDescriptor(oper.getProcessor(), MRHelpers.createUserPayloadFromConf(operConf)),
                    oper.getParallelism(), MRHelpers.getMapResource(operConf));

            oper.configureVertex(operVertex, operConf, commonLocalResources, remoteStagingDir);

            dag.addVertex(operVertex);
            if (predecessors != null) {

                for (TezOperator predecessor : predecessors) {
                    dag.addEdge(new Edge(vertexMap.get(predecessor).first, operVertex,
                            tezPlan.getEdgeProperty(predecessor, oper)));
                }

            }

            vertexMap.put(oper, new Pair<Vertex, Configuration>(operVertex, operConf));
        }

        operators = successors;
    }
    return dag;
}