Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.inmobi.databus.files.FileMap.java

License:Apache License

public void addPath(FileStatus path) {
    T fileKey = getStreamFile(path);//from  ww  w . ja va 2 s . co m
    files.put(fileKey, path);
    LOG.info("Added path: " + path.getPath() + "timestamp [" + path.getModificationTime() + "]");
}

From source file:com.inmobi.databus.files.FileMap.java

License:Apache License

public boolean setIterator(FileStatus cfile) {
    if (cfile != null) {
        createIterator();//from ww  w . jav a 2s  .co  m
        T file = getStreamFile(cfile);
        while (fileNameIterator.hasNext()) {
            StreamFile nextfile = fileNameIterator.next();
            if (nextfile.equals(file)) {
                return true;
            }
        }
        LOG.info("Did not find file" + cfile.getPath());
    }
    return false;
}

From source file:com.inmobi.databus.files.HadoopStreamFile.java

License:Apache License

public static HadoopStreamFile create(FileStatus status) {
    return new HadoopStreamFile(status.getPath().getParent(), status.getPath().getName(),
            status.getModificationTime());
}

From source file:com.inmobi.databus.local.LocalStreamService.java

License:Apache License

private Map<Path, Path> prepareForCommit(long commitTime, Map<FileStatus, String> fileListing)
        throws Exception {
    FileSystem fs = FileSystem.get(cluster.getHadoopConf());

    // find final destination paths
    Map<Path, Path> mvPaths = new LinkedHashMap<Path, Path>();
    FileStatus[] categories = fs.listStatus(tmpJobOutputPath);
    for (FileStatus categoryDir : categories) {
        String categoryName = categoryDir.getPath().getName();
        Path destDir = new Path(cluster.getLocalDestDir(categoryName, commitTime));
        FileStatus[] files = fs.listStatus(categoryDir.getPath());
        for (FileStatus file : files) {
            Path destPath = new Path(destDir, file.getPath().getName());
            LOG.debug("Moving [" + file.getPath() + "] to [" + destPath + "]");
            mvPaths.put(file.getPath(), destPath);
        }//from  w ww  .  j ava 2  s  . co m
        publishMissingPaths(fs, cluster.getLocalFinalDestDirRoot(), commitTime, categoryName);
    }

    // find input files for consumer
    Map<Path, Path> consumerCommitPaths = new HashMap<Path, Path>();
    for (Cluster clusterEntry : getConfig().getClusters().values()) {
        Set<String> destStreams = clusterEntry.getDestinationStreams().keySet();
        boolean consumeCluster = false;
        for (String destStream : destStreams) {
            if (clusterEntry.getPrimaryDestinationStreams().contains(destStream)
                    && cluster.getSourceStreams().contains(destStream)) {
                consumeCluster = true;
            }
        }

        if (consumeCluster) {
            Path tmpConsumerPath = new Path(tmpPath, clusterEntry.getName());
            boolean isFileOpened = false;
            FSDataOutputStream out = null;
            try {
                for (Path destPath : mvPaths.values()) {
                    String category = getCategoryFromDestPath(destPath);
                    if (clusterEntry.getDestinationStreams().containsKey(category)) {
                        if (!isFileOpened) {
                            out = fs.create(tmpConsumerPath);
                            isFileOpened = true;
                        }
                        out.writeBytes(destPath.toString());
                        LOG.debug("Adding [" + destPath + "]  for consumer [" + clusterEntry.getName()
                                + "] to commit Paths in [" + tmpConsumerPath + "]");

                        out.writeBytes("\n");
                    }
                }
            } finally {
                if (isFileOpened) {
                    out.close();
                    Path finalConsumerPath = new Path(cluster.getConsumePath(clusterEntry),
                            Long.toString(System.currentTimeMillis()));
                    LOG.debug("Moving [" + tmpConsumerPath + "] to [ " + finalConsumerPath + "]");
                    consumerCommitPaths.put(tmpConsumerPath, finalConsumerPath);
                }
            }
        }
    }

    Map<Path, Path> commitPaths = new LinkedHashMap<Path, Path>();
    commitPaths.putAll(mvPaths);
    commitPaths.putAll(consumerCommitPaths);

    return commitPaths;
}

From source file:com.inmobi.databus.local.LocalStreamService.java

License:Apache License

Map<Path, Path> populateTrashCommitPaths(Set<FileStatus> trashSet) {
    // find trash paths
    Map<Path, Path> trashPaths = new TreeMap<Path, Path>();
    Path trash = cluster.getTrashPathWithDateHour();
    Iterator<FileStatus> it = trashSet.iterator();
    while (it.hasNext()) {
        FileStatus src = it.next();
        Path target = null;/*ww  w . ja va2  s .c o  m*/
        target = new Path(trash, src.getPath().getParent().getName() + "-" + src.getPath().getName());
        LOG.debug("Trashing [" + src.getPath() + "] to [" + target + "]");
        trashPaths.put(src.getPath(), target);
    }
    return trashPaths;
}

From source file:com.inmobi.databus.local.LocalStreamService.java

License:Apache License

public void createListing(FileSystem fs, FileStatus fileStatus, Map<FileStatus, String> results,
        Set<FileStatus> trashSet, Map<String, FileStatus> checkpointPaths, long lastFileTimeout)
        throws IOException {
    FileStatus[] streams = fs.listStatus(fileStatus.getPath());
    for (FileStatus stream : streams) {
        String streamName = stream.getPath().getName();
        LOG.debug("createListing working on Stream [" + streamName + "]");
        FileStatus[] collectors = fs.listStatus(stream.getPath());
        for (FileStatus collector : collectors) {
            TreeMap<String, FileStatus> collectorPaths = new TreeMap<String, FileStatus>();
            // check point for this collector
            String collectorName = collector.getPath().getName();
            String checkPointKey = streamName + collectorName;
            String checkPointValue = null;
            byte[] value = checkpointProvider.read(checkPointKey);
            if (value != null)
                checkPointValue = new String(value);
            LOG.debug("CheckPoint Key [" + checkPointKey + "] value [ " + checkPointValue + "]");

            FileStatus[] files = fs.listStatus(collector.getPath(), new CollectorPathFilter());

            if (files == null) {
                LOG.warn("No Files Found in the Collector " + collector.getPath() + " Skipping Directory");
                continue;
            }/* w w w .j  av  a 2s.c o  m*/

            String currentFile = getCurrentFile(fs, files, lastFileTimeout);

            for (FileStatus file : files) {
                processFile(file, currentFile, checkPointValue, fs, results, collectorPaths);
            }
            populateTrash(collectorPaths, trashSet);
            populateCheckpointPathForCollector(checkpointPaths, collectorPaths, checkPointKey);
        } // all files in a collector
    }
}

From source file:com.inmobi.databus.local.LocalStreamService.java

License:Apache License

private void processFile(FileStatus file, String currentFile, String checkPointValue, FileSystem fs,
        Map<FileStatus, String> results, Map<String, FileStatus> collectorPaths) throws IOException {

    String fileName = file.getPath().getName();
    if (fileName != null && !fileName.equalsIgnoreCase(currentFile)) {
        if (!isEmptyFile(file, fs)) {
            Path src = file.getPath().makeQualified(fs);
            String destDir = getCategoryJobOutTmpPath(getCategoryFromSrcPath(src)).toString();
            if (aboveCheckpoint(checkPointValue, fileName))
                results.put(file, destDir);
            collectorPaths.put(fileName, file);
        } else {/*w  w w.j  a v a  2  s .c om*/
            LOG.info("Empty File [" + file.getPath() + "] found. " + "Deleting it");
            fs.delete(file.getPath(), false);
        }
    }
}

From source file:com.inmobi.databus.local.LocalStreamService.java

License:Apache License

private boolean isEmptyFile(FileStatus fileStatus, FileSystem fs) {
    boolean retVal = false;
    FSDataInputStream in = null;/*from   w w w  . j a v  a  2 s.c o m*/
    try {
        in = fs.open(fileStatus.getPath());
        byte[] data = new byte[1];
        //try reading 1 byte
        int bytesRead = in.read(data);
        if (bytesRead == 1) {
            //not empty file
            retVal = false;
        } else {
            //not able to read 1 bytes also then empty file
            retVal = true;
        }
    } catch (IOException e) {
        LOG.error("Unable to find if file is empty or not [" + fileStatus.getPath() + "]", e);
    } finally {
        if (in != null) {
            try {
                in.close();
            } catch (IOException e1) {
                LOG.error("Error in closing file [" + fileStatus.getPath() + "]", e1);
            }
        }
    }
    return retVal;
}

From source file:com.inmobi.databus.local.LocalStreamService.java

License:Apache License

protected String getCurrentFile(FileSystem fs, FileStatus[] files, long lastFileTimeout) {
    //Proposed Algo :-> Sort files based on timestamp
    //if ((currentTimeStamp - last file's timestamp) > 5min ||
    //     if there are no files)
    // then null (implying process this file as non-current file)
    // else//w  w  w.j  av  a  2  s  .c om
    // return last file as the current file
    class FileTimeStampComparator implements Comparator {
        public int compare(Object o, Object o1) {
            FileStatus file1 = (FileStatus) o;
            FileStatus file2 = (FileStatus) o1;
            long file1Time = file1.getModificationTime();
            long file2Time = file2.getModificationTime();
            if ((file1Time < file2Time))
                return -1;
            else
                return 1;
        }
    }

    if (files == null || files.length == 0)
        return null;
    TreeSet<FileStatus> sortedFiles = new TreeSet<FileStatus>(new FileTimeStampComparator());
    for (FileStatus file : files) {
        sortedFiles.add(file);
    }

    //get last file from set
    FileStatus lastFile = sortedFiles.last();

    long currentTime = System.currentTimeMillis();
    long lastFileTime = lastFile.getModificationTime();
    if (currentTime - lastFileTime >= lastFileTimeout) {
        return null;
    } else
        return lastFile.getPath().getName();
}

From source file:com.inmobi.databus.local.LocalStreamServiceTest.java

License:Apache License

private void testCreateListing() {
    try {//from w w  w. ja va2s .  c  o  m
        Cluster cluster = ClusterTest.buildLocalCluster();
        FileSystem fs = mock(FileSystem.class);
        createMockForFileSystem(fs, cluster);

        Map<FileStatus, String> results = new TreeMap<FileStatus, java.lang.String>();
        Set<FileStatus> trashSet = new HashSet<FileStatus>();
        Map<String, FileStatus> checkpointPaths = new HashMap<String, FileStatus>();
        fs.delete(cluster.getDataDir(), true);
        FileStatus dataDir = new FileStatus(20, false, 3, 23823, 2438232, cluster.getDataDir());
        fs.delete(new Path(cluster.getRootDir() + "/databus-checkpoint"), true);

        TestLocalStreamService service = new TestLocalStreamService(null, cluster,
                new FSCheckpointProvider(cluster.getRootDir() + "/databus-checkpoint"));
        service.createListing(fs, dataDir, results, trashSet, checkpointPaths);

        Set<String> tmpResults = new LinkedHashSet<String>();
        // print the results
        for (FileStatus status : results.keySet()) {
            tmpResults.add(status.getPath().toString());
            LOG.debug("Results [" + status.getPath().toString() + "]");
        }

        // print the trash
        Iterator<FileStatus> it = trashSet.iterator();
        Set<String> tmpTrashPaths = new LinkedHashSet<String>();
        while (it.hasNext()) {
            FileStatus trashfile = it.next();
            tmpTrashPaths.add(trashfile.getPath().toString());
            LOG.debug("trash file [" + trashfile.getPath());
        }

        Map<String, String> tmpCheckPointPaths = new TreeMap<String, String>();
        // print checkPointPaths
        for (String key : checkpointPaths.keySet()) {
            tmpCheckPointPaths.put(key, checkpointPaths.get(key).getPath().getName());
            LOG.debug("CheckPoint key [" + key + "] value [" + checkpointPaths.get(key).getPath().getName()
                    + "]");
        }
        validateExpectedOutput(tmpResults, tmpTrashPaths, tmpCheckPointPaths);
        fs.delete(new Path(cluster.getRootDir() + "/databus-checkpoint"), true);
        fs.delete(cluster.getDataDir(), true);
        fs.close();
    } catch (Exception e) {
        LOG.debug("Error in running testCreateListing", e);
        assert false;
    }
}