Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.inmobi.conduit.distcp.tools.TestIntegration.java

License:Apache License

@Test
public void testJobConters() {
    try {// w  w w.  j a  v a2s  . c  o m
        Path listFile = new Path("target/tmp1/listing").makeQualified(fs);
        addEntries(listFile, "*");
        createFileForAudit("/conduit/streams/test1/2013/10/10/10/10/file1.gz");
        runTest(listFile, target, true);
        int numberOfCountersPerFile = 0;
        long sumOfCounterValues = 0;
        FileStatus[] statuses = fs.listStatus(counterOutputPath, new PathFilter() {
            public boolean accept(Path path) {
                return path.toString().contains("part");
            }
        });
        for (FileStatus status : statuses) {
            Scanner scanner = new Scanner(fs.open(status.getPath()));
            while (scanner.hasNext()) {
                String counterNameValue = null;
                try {
                    counterNameValue = scanner.next();
                    String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER);
                    Assert.assertEquals(4, tmp.length);
                    Long numOfMsgs = Long.parseLong(tmp[3]);
                    numberOfCountersPerFile++;
                    sumOfCounterValues += numOfMsgs;
                } catch (Exception e) {
                    LOG.error("Counters file has malformed line with counter name = " + counterNameValue
                            + " ..skipping the line ", e);
                }
            }
        }
        // should have 2 conters per file
        Assert.assertEquals(2, numberOfCountersPerFile);
        // sum of all counter values should equal to total number of messages
        Assert.assertEquals(3, sumOfCounterValues);
        checkResult(target, 1);
    } catch (IOException e) {
        LOG.error("Exception encountered while testing distcp", e);
        Assert.fail("distcp failure");
    } finally {
        TestDistCpUtils.delete(fs, root);
    }
}

From source file:com.inmobi.conduit.distcp.tools.util.TestDistCpUtils.java

License:Apache License

public static boolean checkIfFoldersAreInSync(FileSystem fs, String targetBase, String sourceBase)
        throws IOException {
    Path base = new Path(targetBase);

    Stack<Path> stack = new Stack<Path>();
    stack.push(base);/*ww w .  ja  va  2s  .  c o  m*/
    while (!stack.isEmpty()) {
        Path file = stack.pop();
        if (!fs.exists(file))
            continue;
        FileStatus[] fStatus = fs.listStatus(file);
        if (fStatus == null || fStatus.length == 0)
            continue;

        for (FileStatus status : fStatus) {
            if (status.isDir()) {
                stack.push(status.getPath());
            }
            Assert.assertTrue(fs.exists(new Path(
                    sourceBase + "/" + DistCpUtils.getRelativePath(new Path(targetBase), status.getPath()))));
        }
    }
    return true;
}

From source file:com.inmobi.conduit.local.CopyMapper.java

License:Apache License

@Override
public void map(Text key, FileStatus value, Context context) throws IOException, InterruptedException {
    Path src = value.getPath();
    String dest = key.toString();
    String collector = src.getParent().getName();
    String category = src.getParent().getParent().getName();
    Map<Long, Long> received = null;
    if (context.getConfiguration().getBoolean(ConduitConstants.AUDIT_ENABLED_KEY, true)) {
        received = new HashMap<Long, Long>();
    }/*from   ww  w .  j  av  a 2s  .  c o  m*/
    Configuration srcConf = new Configuration();
    srcConf.set(FS_DEFAULT_NAME_KEY, context.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY));

    FileSystem fs = FileSystem.get(srcConf);
    Path target = getTempPath(context, src, category, collector);
    if (FileUtil.gzip(src, target, srcConf, received)) {
        LOG.info("File " + src + " is empty hence returning without compressing");
        return;
    }
    // move to final destination
    fs.mkdirs(new Path(dest).makeQualified(fs));
    String destnFilename = collector + "-" + src.getName() + ".gz";
    Path destPath = new Path(dest + File.separator + destnFilename);
    LOG.info("Renaming file " + target + " to " + destPath);
    fs.rename(target, destPath);
    if (received != null) {

        for (Entry<Long, Long> entry : received.entrySet()) {
            String counterNameValue = getCounterNameValue(category, destnFilename, entry.getKey(),
                    entry.getValue());
            context.write(NullWritable.get(), new Text(counterNameValue));
        }
    }

}

From source file:com.inmobi.conduit.local.LocalStreamService.java

License:Apache License

Map<Path, Path> prepareForCommit(long commitTime) throws Exception {
    FileSystem fs = FileSystem.get(srcCluster.getHadoopConf());

    // find final destination paths
    Map<Path, Path> mvPaths = new LinkedHashMap<Path, Path>();
    FileStatus[] categories;/*from  ww w  .java2  s  .c o  m*/
    try {
        categories = fs.listStatus(tmpJobOutputPath);
    } catch (FileNotFoundException e) {
        categories = new FileStatus[0];
    }
    for (FileStatus categoryDir : categories) {
        String categoryName = categoryDir.getPath().getName();
        Path destDir = new Path(srcCluster.getLocalDestDir(categoryName, commitTime));
        FileStatus[] files;
        try {
            files = fs.listStatus(categoryDir.getPath());
        } catch (FileNotFoundException e) {
            files = new FileStatus[0];
        }
        for (FileStatus file : files) {
            Path destPath = new Path(destDir, file.getPath().getName());
            LOG.debug("Moving [" + file.getPath() + "] to [" + destPath + "]");
            mvPaths.put(file.getPath(), destPath);
        }
    }
    publishMissingPaths(fs, srcCluster.getLocalFinalDestDirRoot(), commitTime, streamsToProcess);
    return mvPaths;
}

From source file:com.inmobi.conduit.local.LocalStreamService.java

License:Apache License

Map<Path, Path> populateTrashCommitPaths(Set<FileStatus> trashSet) {
    // find trash paths
    Map<Path, Path> trashPaths = new TreeMap<Path, Path>();
    Path trash = srcCluster.getTrashPathWithDateHour();
    Iterator<FileStatus> it = trashSet.iterator();
    while (it.hasNext()) {
        FileStatus src = it.next();
        Path target = null;/* w  w  w.j  av  a 2  s .co  m*/
        target = new Path(trash, src.getPath().getParent().getName() + "-" + src.getPath().getName());
        LOG.debug("Trashing [" + src.getPath() + "] to [" + target + "]");
        trashPaths.put(src.getPath(), target);
    }
    return trashPaths;
}

From source file:com.inmobi.conduit.local.LocalStreamService.java

License:Apache License

public void createListing(FileSystem fs, FileStatus fileStatus, Map<FileStatus, String> results,
        Set<FileStatus> trashSet, Table<String, String, String> checkpointPaths) throws IOException {
    List<FileStatus> streamsFileStatus = new ArrayList<FileStatus>();
    FileSystem srcFs = FileSystem.get(srcCluster.getHadoopConf());
    for (String stream : streamsToProcess) {
        streamsFileStatus.add(srcFs.getFileStatus(new Path(srcCluster.getDataDir(), stream)));
    }/*from www. j  a  v a  2  s .c  o m*/
    for (FileStatus stream : streamsFileStatus) {
        String streamName = stream.getPath().getName();
        LOG.debug("createListing working on Stream [" + streamName + "]");
        FileStatus[] collectors;
        try {
            collectors = fs.listStatus(stream.getPath());
        } catch (FileNotFoundException ex) {
            collectors = new FileStatus[0];
        }
        long minOfLatestCollectorTimeStamp = -1;
        for (FileStatus collector : collectors) {
            TreeMap<String, FileStatus> collectorPaths = new TreeMap<String, FileStatus>();
            // check point for this collector
            String collectorName = collector.getPath().getName();
            String checkPointKey = getCheckPointKey(this.getClass().getSimpleName(), streamName, collectorName);

            String checkPointValue = null;
            byte[] value = checkpointProvider.read(checkPointKey);
            if (value == null) {
                // In case checkpointKey with newer name format is absent,read old
                // checkpoint key
                String oldCheckPointKey = streamName + collectorName;
                value = checkpointProvider.read(oldCheckPointKey);
            }
            if (value != null)
                checkPointValue = new String(value);
            LOG.debug("CheckPoint Key [" + checkPointKey + "] value [ " + checkPointValue + "]");
            FileStatus[] files = null;
            try {
                files = fs.listStatus(collector.getPath(), new CollectorPathFilter());
            } catch (FileNotFoundException e) {
            }

            if (files == null) {
                LOG.warn("No Files Found in the Collector " + collector.getPath() + " Skipping Directory");
                continue;
            }
            TreeSet<FileStatus> sortedFiles = new TreeSet<FileStatus>(new FileTimeStampComparator());
            String currentFile = getCurrentFile(fs, files, sortedFiles);
            LOG.debug("last file " + currentFile + " in the collector directory " + collector.getPath());

            Iterator<FileStatus> it = sortedFiles.iterator();
            numberOfFilesProcessed = 0;
            long latestCollectorFileTimeStamp = -1;
            while (it.hasNext() && numberOfFilesProcessed < filesPerCollector) {
                FileStatus file = it.next();
                LOG.debug("Processing " + file.getPath());
                /*
                 * fileTimeStamp value will be -1 for the files which are already processed
                 */
                long fileTimeStamp = processFile(file, currentFile, checkPointValue, fs, results,
                        collectorPaths, streamName);
                if (fileTimeStamp > latestCollectorFileTimeStamp) {
                    latestCollectorFileTimeStamp = fileTimeStamp;
                }
            }
            populateTrash(collectorPaths, trashSet);
            populateCheckpointPathForCollector(checkpointPaths, collectorPaths);

            if ((latestCollectorFileTimeStamp < minOfLatestCollectorTimeStamp
                    || minOfLatestCollectorTimeStamp == -1) && latestCollectorFileTimeStamp != -1) {
                minOfLatestCollectorTimeStamp = latestCollectorFileTimeStamp;
            }
        } // all files in a collector
        if (minOfLatestCollectorTimeStamp != -1) {
            lastProcessedFile.put(streamName, minOfLatestCollectorTimeStamp);
        } else {
            LOG.warn("No new files in " + streamName + " stream");
        }
    }
}

From source file:com.inmobi.conduit.local.LocalStreamService.java

License:Apache License

private long processFile(FileStatus file, String currentFile, String checkPointValue, FileSystem fs,
        Map<FileStatus, String> results, Map<String, FileStatus> collectorPaths, String stream)
        throws IOException {
    long fileTimeStamp = -1;

    String fileName = file.getPath().getName();
    if (fileName != null && (!fileName.equalsIgnoreCase(currentFile) || processLastFile)) {
        String destDir = getCategoryJobOutTmpPath(stream).toString();
        if (aboveCheckpoint(checkPointValue, fileName)) {
            results.put(file, destDir);/*from   w  w  w .jav a 2s.co m*/
            fileTimeStamp = CalendarHelper.getDateFromCollectorFileName(fileName);
            /*
             * Depending on getLen() only for incrementing the
             * number of data files count
             */
            if (file.getLen() != 0) {
                numberOfFilesProcessed++;
            }
        }
        collectorPaths.put(fileName, file);
    }
    return fileTimeStamp;
}

From source file:com.inmobi.conduit.local.LocalStreamService.java

License:Apache License

private boolean isEmptyFile(FileStatus fileStatus, FileSystem fs) {
    boolean retVal = false;
    FSDataInputStream in = null;//  www  . jav  a2 s  . co m
    try {
        in = fs.open(fileStatus.getPath());
        byte[] data = new byte[1];
        // try reading 1 byte
        int bytesRead = in.read(data);
        if (bytesRead == 1) {
            // not empty file
            retVal = false;
        } else {
            // not able to read 1 bytes also then empty file
            retVal = true;
        }
    } catch (IOException e) {
        LOG.error("Unable to find if file is empty or not [" + fileStatus.getPath() + "]", e);
    } finally {
        if (in != null) {
            try {
                in.close();
            } catch (IOException e1) {
                LOG.error("Error in closing file [" + fileStatus.getPath() + "]", e1);
            }
        }
    }
    return retVal;
}

From source file:com.inmobi.conduit.local.LocalStreamService.java

License:Apache License

protected String getCurrentFile(FileSystem fs, FileStatus[] files, TreeSet<FileStatus> sortedFiles) {
    // Proposed Algo :-> Sort files based on timestamp
    // if there are no files)
    // then null (implying process this file as non-current file)
    // else// w  ww  .  java2 s.  co m
    // return last file as the current file

    if (files == null || files.length == 0)
        return null;
    for (FileStatus file : files) {
        sortedFiles.add(file);
    }

    // get last file from set
    FileStatus lastFile = sortedFiles.last();
    long diff = (System.currentTimeMillis() - lastFile.getModificationTime()) / MILLISECONDS_IN_MINUTE;
    if (diff > timeoutToProcessLastCollectorFile) {
        processLastFile = true;
    } else {
        processLastFile = false;
    }
    return lastFile.getPath().getName();
}

From source file:com.inmobi.conduit.local.LocalStreamServiceTest.java

License:Apache License

private void testCreateListing() {
    try {/* ww  w.j  a v a 2 s  .c om*/
        Cluster cluster = ClusterTest.buildLocalCluster();
        FileSystem fs = mock(FileSystem.class);
        createMockForFileSystem(fs, cluster);

        Map<FileStatus, String> results = new TreeMap<FileStatus, java.lang.String>();
        Set<FileStatus> trashSet = new HashSet<FileStatus>();
        Table<String, String, String> checkpointPaths = HashBasedTable.create();
        fs.delete(cluster.getDataDir(), true);
        FileStatus dataDir = new FileStatus(20, false, 3, 23823, 2438232, cluster.getDataDir());
        fs.delete(new Path(cluster.getRootDir() + "/conduit-checkpoint"), true);

        Set<String> streamsToProcess = new HashSet<String>();
        streamsToProcess.add("stream1");
        streamsToProcess.add("stream2");
        TestLocalStreamService service = new TestLocalStreamService(null, cluster, null,
                new FSCheckpointProvider(cluster.getRootDir() + "/conduit-checkpoint"), streamsToProcess);
        service.createListing(fs, dataDir, results, trashSet, checkpointPaths);

        Set<String> tmpResults = new LinkedHashSet<String>();
        // print the results
        for (FileStatus status : results.keySet()) {
            tmpResults.add(status.getPath().toString());
            LOG.debug("Results [" + status.getPath().toString() + "]");
        }

        // print the trash
        Iterator<FileStatus> it = trashSet.iterator();
        Set<String> tmpTrashPaths = new LinkedHashSet<String>();
        while (it.hasNext()) {
            FileStatus trashfile = it.next();
            tmpTrashPaths.add(trashfile.getPath().toString());
            LOG.debug("trash file [" + trashfile.getPath());
        }

        Map<String, String> tmpCheckPointPaths = new TreeMap<String, String>();
        Set<String> streams = checkpointPaths.rowKeySet();
        for (String streamName : streams) {
            Map<String, String> collectorCheckpointValueMap = checkpointPaths.row(streamName);
            for (String collector : collectorCheckpointValueMap.keySet()) {
                String checkpointKey = AbstractService.getCheckPointKey(service.getName(), streamName,
                        collector);
                LOG.debug("Check Pointing Key [" + checkpointKey + "] with value ["
                        + collectorCheckpointValueMap.get(collector) + "]");
                tmpCheckPointPaths.put(checkpointKey, collectorCheckpointValueMap.get(collector));
            }
        }
        validateExpectedOutput(tmpResults, tmpTrashPaths, tmpCheckPointPaths);
        fs.delete(new Path(cluster.getRootDir() + "/conduit-checkpoint"), true);
        fs.delete(cluster.getDataDir(), true);
        fs.close();
    } catch (Exception e) {
        LOG.debug("Error in running testCreateListing", e);
        assert false;
    }
}