Example usage for org.apache.hadoop.fs FileSystem listFiles

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listFiles.

Prototype

public RemoteIterator<LocatedFileStatus> listFiles(final Path f, final boolean recursive)
        throws FileNotFoundException, IOException

Source Link

Document

List the statuses and block locations of the files in the given path.

Usage

From source file:org.apache.falcon.service.SharedLibraryHostingService.java

License:Apache License

private void pushExtensionArtifactsToCluster(final Cluster cluster, final FileSystem clusterFs)
        throws FalconException {
    if (!Services.get().isRegistered(ExtensionService.SERVICE_NAME)) {
        LOG.info("ExtensionService not registered, return");
        return;//from w  w  w . j a  va  2  s .  co m
    }

    ExtensionStore store = ExtensionStore.get();
    if (!store.isExtensionStoreInitialized()) {
        LOG.info(
                "Extension store not initialized by Extension service. Make sure Extension service is added in "
                        + "start up properties");
        return;
    }

    final String filterPath = "/apps/falcon/extensions/mirroring/";
    Path extensionStorePath = store.getExtensionStorePath();
    LOG.info("extensionStorePath :{}", extensionStorePath);
    FileSystem falconFileSystem = HadoopClientFactory.get().createFalconFileSystem(extensionStorePath.toUri());
    String nameNode = StringUtils
            .removeEnd(falconFileSystem.getConf().get(HadoopClientFactory.FS_DEFAULT_NAME_KEY), File.separator);

    String clusterStorageUrl = StringUtils.removeEnd(ClusterHelper.getStorageUrl(cluster), File.separator);

    // If default fs for Falcon server is same as cluster fs abort copy
    if (nameNode.equalsIgnoreCase(clusterStorageUrl)) {
        LOG.info("clusterStorageUrl :{} same return", clusterStorageUrl);
        return;
    }

    try {
        RemoteIterator<LocatedFileStatus> fileStatusListIterator = falconFileSystem
                .listFiles(extensionStorePath, true);

        while (fileStatusListIterator.hasNext()) {
            LocatedFileStatus srcfileStatus = fileStatusListIterator.next();
            Path filePath = Path.getPathWithoutSchemeAndAuthority(srcfileStatus.getPath());

            if (filePath != null && filePath.toString().startsWith(filterPath)) {
                /* HiveDR uses filter path as store path in DRStatusStore, so skip it. Copy only the extension
                 artifacts */
                continue;
            }

            if (srcfileStatus.isDirectory()) {
                if (!clusterFs.exists(filePath)) {
                    HadoopClientFactory.mkdirs(clusterFs, filePath, srcfileStatus.getPermission());
                }
            } else {
                if (clusterFs.exists(filePath)) {
                    FileStatus targetfstat = clusterFs.getFileStatus(filePath);
                    if (targetfstat.getLen() == srcfileStatus.getLen()) {
                        continue;
                    }
                }

                Path parentPath = filePath.getParent();
                if (!clusterFs.exists(parentPath)) {
                    FsPermission dirPerm = falconFileSystem.getFileStatus(parentPath).getPermission();
                    HadoopClientFactory.mkdirs(clusterFs, parentPath, dirPerm);
                }

                FileUtil.copy(falconFileSystem, srcfileStatus, clusterFs, filePath, false, true,
                        falconFileSystem.getConf());
                FileUtil.chmod(clusterFs.makeQualified(filePath).toString(),
                        srcfileStatus.getPermission().toString());
            }
        }
    } catch (IOException | InterruptedException e) {
        throw new FalconException("Failed to copy extension artifacts to cluster" + cluster.getName(), e);
    }
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java

License:Apache License

@Override
public void open(Configuration parameters) throws Exception {
    super.open(parameters);

    subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    partCounter = 0;/*from w  ww.ja v a  2  s.co m*/

    this.writer = writerTemplate.duplicate();

    if (bucketState == null) {
        bucketState = new BucketState();
    }

    FileSystem fs = new Path(basePath).getFileSystem(new org.apache.hadoop.conf.Configuration());
    refTruncate = reflectTruncate(fs);

    // delete pending/in-progress files that might be left if we fail while
    // no checkpoint has yet been done
    try {
        if (fs.exists(new Path(basePath)) && cleanupOnOpen) {
            RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true);

            while (bucketFiles.hasNext()) {
                LocatedFileStatus file = bucketFiles.next();
                if (file.getPath().toString().endsWith(pendingSuffix)) {
                    // only delete files that contain our subtask index
                    if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                        LOG.debug("(OPEN) Deleting leftover pending file {}", file.getPath().toString());
                        fs.delete(file.getPath(), true);
                    }
                }
                if (file.getPath().toString().endsWith(inProgressSuffix)) {
                    // only delete files that contain our subtask index
                    if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                        LOG.debug("(OPEN) Deleting leftover in-progress file {}", file.getPath().toString());
                        fs.delete(file.getPath(), true);
                    }
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting leftover pending/in-progress files: {}", e);
        throw new RuntimeException("Error while deleting leftover pending/in-progress files.", e);
    }
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java

License:Apache License

@Override
public void restoreState(BucketState state) {
    bucketState = state;//from w  ww.  j  ava 2 s  .  com
    // we can clean all the pending files since they where renamed to final files
    // after this checkpoint was successfull
    bucketState.pendingFiles.clear();
    FileSystem fs = null;
    try {
        fs = new Path(basePath).getFileSystem(new org.apache.hadoop.conf.Configuration());
    } catch (IOException e) {
        LOG.error("Error while creating FileSystem in checkpoint restore.", e);
        throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e);
    }
    if (bucketState.currentFile != null) {
        // We were writing to a file when the last checkpoint occured. This file can either
        // be still in-progress or became a pending file at some point after the checkpoint.
        // Either way, we have to truncate it back to a valid state (or write a .valid-length)
        // file that specifies up to which length it is valid and rename it to the final name
        // before starting a new bucket file.
        Path partPath = new Path(bucketState.currentFile);
        try {
            Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName())
                    .suffix(pendingSuffix);
            Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName())
                    .suffix(inProgressSuffix);

            if (fs.exists(partPendingPath)) {
                LOG.debug(
                        "In-progress file {} has been moved to pending after checkpoint, moving to final location.",
                        partPath);
                // has been moved to pending in the mean time, rename to final location
                fs.rename(partPendingPath, partPath);
            } else if (fs.exists(partInProgressPath)) {
                LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath);
                // it was still in progress, rename to final path
                fs.rename(partInProgressPath, partPath);
            } else if (fs.exists(partPath)) {
                LOG.debug("In-Progress file {} was already moved to final location {}.",
                        bucketState.currentFile, partPath);
            } else {
                LOG.debug(
                        "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, "
                                + "it was moved to final location by a previous snapshot restore",
                        bucketState.currentFile);
            }

            refTruncate = reflectTruncate(fs);
            // truncate it or write a ".valid-length" file to specify up to which point it is valid
            if (refTruncate != null) {
                LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength);
                // some-one else might still hold the lease from a previous try, we are
                // recovering, after all ...
                if (fs instanceof DistributedFileSystem) {
                    DistributedFileSystem dfs = (DistributedFileSystem) fs;
                    LOG.debug("Trying to recover file lease {}", partPath);
                    dfs.recoverLease(partPath);
                    boolean isclosed = dfs.isFileClosed(partPath);
                    StopWatch sw = new StopWatch();
                    sw.start();
                    while (!isclosed) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        isclosed = dfs.isFileClosed(partPath);
                    }
                }
                Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath,
                        bucketState.currentFileValidLength);
                if (!truncated) {
                    LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath);

                    // we must wait for the asynchronous truncate operation to complete
                    StopWatch sw = new StopWatch();
                    sw.start();
                    long newLen = fs.getFileStatus(partPath).getLen();
                    while (newLen != bucketState.currentFileValidLength) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        newLen = fs.getFileStatus(partPath).getLen();
                    }
                    if (newLen != bucketState.currentFileValidLength) {
                        throw new RuntimeException("Truncate did not truncate to right length. Should be "
                                + bucketState.currentFileValidLength + " is " + newLen + ".");
                    }
                }

            } else {
                LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath,
                        bucketState.currentFileValidLength);
                Path validLengthFilePath = new Path(partPath.getParent(),
                        validLengthPrefix + partPath.getName()).suffix(validLengthSuffix);
                if (!fs.exists(validLengthFilePath)) {
                    FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath);
                    lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength));
                    lengthFileOut.close();
                }
            }

            // invalidate in the state object
            bucketState.currentFile = null;
            bucketState.currentFileValidLength = -1;
        } catch (IOException e) {
            LOG.error("Error while restoring RollingSink state.", e);
            throw new RuntimeException("Error while restoring RollingSink state.", e);
        } catch (InvocationTargetException | IllegalAccessException e) {
            LOG.error("Cound not invoke truncate.", e);
            throw new RuntimeException("Could not invoke truncate.", e);
        }
    }

    LOG.debug("Clearing pending/in-progress files.");

    // Move files that are confirmed by a checkpoint but did not get moved to final location
    // because the checkpoint notification did not happen before a failure

    Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet();
    LOG.debug("Moving pending files to final location on restore.");
    for (Long pastCheckpointId : pastCheckpointIds) {
        // All the pending files are buckets that have been completed but are waiting to be renamed
        // to their final name
        for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) {
            Path finalPath = new Path(filename);
            Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName())
                    .suffix(pendingSuffix);

            try {
                if (fs.exists(pendingPath)) {
                    LOG.debug(
                            "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.",
                            pendingPath, pastCheckpointId);
                    fs.rename(pendingPath, finalPath);
                }
            } catch (IOException e) {
                LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}", pendingPath,
                        finalPath, e);
                throw new RuntimeException(
                        "Error while renaming pending file " + pendingPath + " to final path " + finalPath, e);
            }
        }
    }
    bucketState.pendingFiles.clear();
    synchronized (bucketState.pendingFilesPerCheckpoint) {
        bucketState.pendingFilesPerCheckpoint.clear();
    }

    // we need to get this here since open() has not yet been called
    int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    // delete pending files
    try {

        RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true);

        while (bucketFiles.hasNext()) {
            LocatedFileStatus file = bucketFiles.next();
            if (file.getPath().toString().endsWith(pendingSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
            if (file.getPath().toString().endsWith(inProgressSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting old pending files: {}", e);
        throw new RuntimeException("Error while deleting old pending files.", e);
    }
}

From source file:org.apache.flink.yarn.YarnFileStageTest.java

License:Apache License

/**
 * Verifies that nested directories are properly copied with the given filesystem and paths.
 *
 * @param targetFileSystem//from  ww w.j a  v a2s .c o  m
 *       file system of the target path
 * @param targetDir
 *       target path (URI like <tt>hdfs://...</tt>)
 * @param tempFolder
 *       JUnit temporary folder rule to create the source directory with
 * @param addSchemeToLocalPath
 *       whether add the <tt>file://</tt> scheme to the local path to copy from
 */
static void testCopyFromLocalRecursive(FileSystem targetFileSystem, Path targetDir, TemporaryFolder tempFolder,
        boolean addSchemeToLocalPath) throws Exception {

    // directory must not yet exist
    assertFalse(targetFileSystem.exists(targetDir));

    final File srcDir = tempFolder.newFolder();
    final Path srcPath;
    if (addSchemeToLocalPath) {
        srcPath = new Path("file://" + srcDir.getAbsolutePath());
    } else {
        srcPath = new Path(srcDir.getAbsolutePath());
    }

    HashMap<String /* (relative) path */, /* contents */ String> srcFiles = new HashMap<>(4);

    // create and fill source files
    srcFiles.put("1", "Hello 1");
    srcFiles.put("2", "Hello 2");
    srcFiles.put("nested/3", "Hello nested/3");
    srcFiles.put("nested/4/5", "Hello nested/4/5");
    for (Map.Entry<String, String> src : srcFiles.entrySet()) {
        File file = new File(srcDir, src.getKey());
        //noinspection ResultOfMethodCallIgnored
        file.getParentFile().mkdirs();
        try (DataOutputStream out = new DataOutputStream(new FileOutputStream(file))) {
            out.writeUTF(src.getValue());
        }
    }

    // copy the created directory recursively:
    try {
        List<Path> remotePaths = new ArrayList<>();
        HashMap<String, LocalResource> localResources = new HashMap<>();
        AbstractYarnClusterDescriptor.uploadAndRegisterFiles(
                Collections.singletonList(new File(srcPath.toUri().getPath())), targetFileSystem, targetDir,
                ApplicationId.newInstance(0, 0), remotePaths, localResources, new StringBuilder());
        assertEquals(srcFiles.size(), localResources.size());

        Path workDir = ConverterUtils
                .getPathFromYarnURL(localResources.get(srcPath.getName() + "/1").getResource()).getParent();

        RemoteIterator<LocatedFileStatus> targetFilesIterator = targetFileSystem.listFiles(workDir, true);
        HashMap<String /* (relative) path */, /* contents */ String> targetFiles = new HashMap<>(4);

        final int workDirPrefixLength = workDir.toString().length() + 1; // one more for the concluding "/"
        while (targetFilesIterator.hasNext()) {
            LocatedFileStatus targetFile = targetFilesIterator.next();

            int retries = 5;
            do {
                try (FSDataInputStream in = targetFileSystem.open(targetFile.getPath())) {
                    String absolutePathString = targetFile.getPath().toString();
                    String relativePath = absolutePathString.substring(workDirPrefixLength);
                    targetFiles.put(relativePath, in.readUTF());

                    assertEquals("extraneous data in file " + relativePath, -1, in.read());
                    break;
                } catch (FileNotFoundException e) {
                    // For S3, read-after-write may be eventually consistent, i.e. when trying
                    // to access the object before writing it; see
                    // https://docs.aws.amazon.com/AmazonS3/latest/dev/Introduction.html#ConsistencyModel
                    // -> try again a bit later
                    Thread.sleep(50);
                }
            } while ((retries--) > 0);
        }

        assertThat(targetFiles, equalTo(srcFiles));
    } finally {
        // clean up
        targetFileSystem.delete(targetDir, true);
    }
}

From source file:org.apache.impala.catalog.HdfsTable.java

License:Apache License

/**
 * Drops and re-loads the block metadata for all partitions in 'partsByPath' whose
 * location is under the given 'dirPath'. It involves the following steps:
 * - Clear the current block metadata of the partitions.
 * - Call FileSystem.listStatus() on 'dirPath' to fetch the BlockLocations for each
 *   file under it recursively./*from   ww  w  .  ja  va 2  s  .c  o  m*/
 * - For every valid data file, map it to a partition from 'partsByPath' (if one exists)
 *   and enumerate all its blocks and their corresponding hosts and disk IDs.
 * Requires that 'dirPath' and all paths in 'partsByPath' have consistent qualification
 * (either fully qualified or unqualified), for isDescendantPath().
 * TODO: Split this method into more logical methods for cleaner code.
 */
private void loadBlockMetadata(Path dirPath, HashMap<Path, List<HdfsPartition>> partsByPath) {
    try {
        FileSystem fs = dirPath.getFileSystem(CONF);
        // No need to load blocks for empty partitions list.
        if (partsByPath.size() == 0 || !fs.exists(dirPath))
            return;
        if (LOG.isTraceEnabled()) {
            LOG.trace("Loading block md for " + name_ + " directory " + dirPath.toString());
        }

        // Clear the state of partitions under dirPath since they are going to be updated
        // based on the current snapshot of files in the directory.
        List<HdfsPartition> dirPathPartitions = partsByPath.get(dirPath);
        if (dirPathPartitions != null) {
            // The dirPath is a partition directory. This means the path is the root of an
            // unpartitioned table, or the path of at least one partition.
            for (HdfsPartition partition : dirPathPartitions) {
                partition.setFileDescriptors(new ArrayList<FileDescriptor>());
            }
        } else {
            // The dirPath is not a partition directory. We expect it to be an ancestor of
            // partition paths (e.g., the table root). Clear all partitions whose paths are
            // a descendant of dirPath.
            for (Map.Entry<Path, List<HdfsPartition>> entry : partsByPath.entrySet()) {
                Path partDir = entry.getKey();
                if (!FileSystemUtil.isDescendantPath(partDir, dirPath))
                    continue;
                for (HdfsPartition partition : entry.getValue()) {
                    partition.setFileDescriptors(new ArrayList<FileDescriptor>());
                }
            }
        }

        // For file systems that do not support BlockLocation API, we manually synthesize
        // block location metadata based on file formats.
        if (!FileSystemUtil.supportsStorageIds(fs)) {
            synthesizeBlockMetadata(fs, dirPath, partsByPath);
            return;
        }

        int unknownDiskIdCount = 0;
        RemoteIterator<LocatedFileStatus> fileStatusIter = fs.listFiles(dirPath, true);
        while (fileStatusIter.hasNext()) {
            LocatedFileStatus fileStatus = fileStatusIter.next();
            if (!FileSystemUtil.isValidDataFile(fileStatus))
                continue;
            // Find the partition that this file belongs (if any).
            Path partPathDir = fileStatus.getPath().getParent();
            Preconditions.checkNotNull(partPathDir);

            List<HdfsPartition> partitions = partsByPath.get(partPathDir);
            // Skip if this file does not belong to any known partition.
            if (partitions == null) {
                if (LOG.isTraceEnabled()) {
                    LOG.trace("File " + fileStatus.getPath().toString() + " doesn't correspond "
                            + " to a known partition. Skipping metadata load for this file.");
                }
                continue;
            }
            String fileName = fileStatus.getPath().getName();
            FileDescriptor fd = new FileDescriptor(fileName, fileStatus.getLen(),
                    fileStatus.getModificationTime());
            BlockLocation[] locations = fileStatus.getBlockLocations();
            String partPathDirName = partPathDir.toString();
            for (BlockLocation loc : locations) {
                Set<String> cachedHosts = Sets.newHashSet(loc.getCachedHosts());
                // Enumerate all replicas of the block, adding any unknown hosts
                // to hostIndex_. We pick the network address from getNames() and
                // map it to the corresponding hostname from getHosts().
                List<BlockReplica> replicas = Lists.newArrayListWithExpectedSize(loc.getNames().length);
                for (int i = 0; i < loc.getNames().length; ++i) {
                    TNetworkAddress networkAddress = BlockReplica.parseLocation(loc.getNames()[i]);
                    replicas.add(new BlockReplica(hostIndex_.getIndex(networkAddress),
                            cachedHosts.contains(loc.getHosts()[i])));
                }
                FileBlock currentBlock = new FileBlock(loc.getOffset(), loc.getLength(), replicas);
                THdfsFileBlock tHdfsFileBlock = currentBlock.toThrift();
                fd.addThriftFileBlock(tHdfsFileBlock);
                unknownDiskIdCount += loadDiskIds(loc, tHdfsFileBlock);
            }
            if (LOG.isTraceEnabled()) {
                LOG.trace("Adding file md dir: " + partPathDirName + " file: " + fileName);
            }
            // Update the partitions' metadata that this file belongs to.
            for (HdfsPartition partition : partitions) {
                partition.getFileDescriptors().add(fd);
                numHdfsFiles_++;
                totalHdfsBytes_ += fd.getFileLength();
            }
        }
        if (unknownDiskIdCount > 0) {
            if (LOG.isWarnEnabled()) {
                LOG.warn("Unknown disk id count for filesystem " + fs + ":" + unknownDiskIdCount);
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(
                "Error loading block metadata for directory " + dirPath.toString() + ": " + e.getMessage(), e);
    }
}

From source file:org.apache.impala.catalog.HdfsTable.java

License:Apache License

/**
 * For filesystems that don't support BlockLocation API, synthesize file blocks
 * by manually splitting the file range into fixed-size blocks.  That way, scan
 * ranges can be derived from file blocks as usual.  All synthesized blocks are given
 * an invalid network address so that the scheduler will treat them as remote.
 *//*  ww  w  . j a  v  a 2s  .c om*/
private void synthesizeBlockMetadata(FileSystem fs, Path dirPath,
        HashMap<Path, List<HdfsPartition>> partsByPath) throws IOException {
    RemoteIterator<LocatedFileStatus> fileStatusIter = fs.listFiles(dirPath, true);
    while (fileStatusIter.hasNext()) {
        LocatedFileStatus fileStatus = fileStatusIter.next();
        if (!FileSystemUtil.isValidDataFile(fileStatus))
            continue;
        Path partPathDir = fileStatus.getPath().getParent();
        Preconditions.checkNotNull(partPathDir);
        List<HdfsPartition> partitions = partsByPath.get(partPathDir);
        // Skip if this file does not belong to any known partition.
        if (partitions == null) {
            if (LOG.isTraceEnabled()) {
                LOG.trace("File " + fileStatus.getPath().toString() + " doesn't correspond "
                        + " to a known partition. Skipping metadata load for this file.");
            }
            continue;
        }
        String fileName = fileStatus.getPath().getName();
        FileDescriptor fd = new FileDescriptor(fileName, fileStatus.getLen(), fileStatus.getModificationTime());
        Preconditions.checkState(partitions.size() > 0);
        // For the purpose of synthesizing block metadata, we assume that all partitions
        // with the same location have the same file format.
        HdfsFileFormat fileFormat = partitions.get(0).getFileFormat();
        synthesizeFdBlockMetadata(fs, fd, fileFormat);
        // Update the partitions' metadata that this file belongs to.
        for (HdfsPartition partition : partitions) {
            partition.getFileDescriptors().add(fd);
            numHdfsFiles_++;
            totalHdfsBytes_ += fd.getFileLength();
        }
    }
}

From source file:org.apache.metron.maas.service.callback.LaunchContainer.java

License:Apache License

public String localizeResources(Map<String, LocalResource> resources, Path scriptLocation,
        Path appJarLocation) {//from  w w  w . j  av  a2 s .  c  om
    try {
        LOG.info("Model payload: " + scriptLocation);
        LOG.info("AppJAR Location: " + appJarLocation);
        FileSystem fs = scriptLocation.getFileSystem(conf);
        String script = null;
        Map.Entry<String, LocalResource> kv = localizeResource(fs.getFileStatus(appJarLocation));
        resources.put(kv.getKey(), kv.getValue());
        for (RemoteIterator<LocatedFileStatus> it = fs.listFiles(scriptLocation, true); it.hasNext();) {
            LocatedFileStatus status = it.next();
            kv = localizeResource(status);
            String name = kv.getKey();
            if (name.endsWith(".sh")) {
                script = name;
            }
            LOG.info("Localized " + name + " -> " + status.toString());
            resources.put(name, kv.getValue());
        }
        return script;
    } catch (Exception e) {
        LOG.error(e.getMessage(), e);
        return null;
    }
}

From source file:org.apache.metron.pcap.finalizer.PcapFinalizer.java

License:Apache License

/**
 * Returns a lazily-read Iterable over a set of sequence files.
 *//*from w  w  w. j av a 2s  .  c  om*/
protected SequenceFileIterable readInterimResults(Path interimResultPath, Configuration config, FileSystem fs)
        throws IOException {
    List<Path> files = new ArrayList<>();
    for (RemoteIterator<LocatedFileStatus> it = fs.listFiles(interimResultPath, false); it.hasNext();) {
        Path p = it.next().getPath();
        if (p.getName().equals("_SUCCESS")) {
            fs.delete(p, false);
            continue;
        }
        files.add(p);
    }
    if (files.size() == 0) {
        LOG.info("No files to process with specified date range.");
    } else {
        LOG.debug("Interim results path={}", interimResultPath);
        Collections.sort(files, (o1, o2) -> o1.getName().compareTo(o2.getName()));
    }
    return new SequenceFileIterable(files, config);
}

From source file:org.apache.metron.pcap.mr.PcapJob.java

License:Apache License

protected Iterable<Path> listFiles(FileSystem fs, Path basePath) throws IOException {
    List<Path> ret = new ArrayList<>();
    RemoteIterator<LocatedFileStatus> filesIt = fs.listFiles(basePath, true);
    while (filesIt.hasNext()) {
        ret.add(filesIt.next().getPath());
    }/*from   www  .  j  a  va  2 s  .c o  m*/
    return ret;
}

From source file:org.apache.metron.pcap.mr.PcapJob.java

License:Apache License

/**
 * Returns a lazily-read Iterable over a set of sequence files
 *///from  w  ww . j a v a 2s  . co m
private SequenceFileIterable readResults(Path outputPath, Configuration config, FileSystem fs)
        throws IOException {
    List<Path> files = new ArrayList<>();
    for (RemoteIterator<LocatedFileStatus> it = fs.listFiles(outputPath, false); it.hasNext();) {
        Path p = it.next().getPath();
        if (p.getName().equals("_SUCCESS")) {
            fs.delete(p, false);
            continue;
        }
        files.add(p);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug(outputPath);
    }
    Collections.sort(files, (o1, o2) -> o1.getName().compareTo(o2.getName()));
    return new SequenceFileIterable(files, config);
}