Example usage for org.apache.hadoop.fs FileSystem listFiles

List of usage examples for org.apache.hadoop.fs FileSystem listFiles

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listFiles.

Prototype

public RemoteIterator<LocatedFileStatus> listFiles(final Path f, final boolean recursive)
        throws FileNotFoundException, IOException 

Source Link

Document

List the statuses and block locations of the files in the given path.

Usage

From source file:org.apache.falcon.service.SharedLibraryHostingService.java

License:Apache License

private void pushExtensionArtifactsToCluster(final Cluster cluster, final FileSystem clusterFs)
        throws FalconException {
    if (!Services.get().isRegistered(ExtensionService.SERVICE_NAME)) {
        LOG.info("ExtensionService not registered, return");
        return;//from w  w  w . j a  va  2  s .  co m
    }

    ExtensionStore store = ExtensionStore.get();
    if (!store.isExtensionStoreInitialized()) {
        LOG.info(
                "Extension store not initialized by Extension service. Make sure Extension service is added in "
                        + "start up properties");
        return;
    }

    final String filterPath = "/apps/falcon/extensions/mirroring/";
    Path extensionStorePath = store.getExtensionStorePath();
    LOG.info("extensionStorePath :{}", extensionStorePath);
    FileSystem falconFileSystem = HadoopClientFactory.get().createFalconFileSystem(extensionStorePath.toUri());
    String nameNode = StringUtils
            .removeEnd(falconFileSystem.getConf().get(HadoopClientFactory.FS_DEFAULT_NAME_KEY), File.separator);

    String clusterStorageUrl = StringUtils.removeEnd(ClusterHelper.getStorageUrl(cluster), File.separator);

    // If default fs for Falcon server is same as cluster fs abort copy
    if (nameNode.equalsIgnoreCase(clusterStorageUrl)) {
        LOG.info("clusterStorageUrl :{} same return", clusterStorageUrl);
        return;
    }

    try {
        RemoteIterator<LocatedFileStatus> fileStatusListIterator = falconFileSystem
                .listFiles(extensionStorePath, true);

        while (fileStatusListIterator.hasNext()) {
            LocatedFileStatus srcfileStatus = fileStatusListIterator.next();
            Path filePath = Path.getPathWithoutSchemeAndAuthority(srcfileStatus.getPath());

            if (filePath != null && filePath.toString().startsWith(filterPath)) {
                /* HiveDR uses filter path as store path in DRStatusStore, so skip it. Copy only the extension
                 artifacts */
                continue;
            }

            if (srcfileStatus.isDirectory()) {
                if (!clusterFs.exists(filePath)) {
                    HadoopClientFactory.mkdirs(clusterFs, filePath, srcfileStatus.getPermission());
                }
            } else {
                if (clusterFs.exists(filePath)) {
                    FileStatus targetfstat = clusterFs.getFileStatus(filePath);
                    if (targetfstat.getLen() == srcfileStatus.getLen()) {
                        continue;
                    }
                }

                Path parentPath = filePath.getParent();
                if (!clusterFs.exists(parentPath)) {
                    FsPermission dirPerm = falconFileSystem.getFileStatus(parentPath).getPermission();
                    HadoopClientFactory.mkdirs(clusterFs, parentPath, dirPerm);
                }

                FileUtil.copy(falconFileSystem, srcfileStatus, clusterFs, filePath, false, true,
                        falconFileSystem.getConf());
                FileUtil.chmod(clusterFs.makeQualified(filePath).toString(),
                        srcfileStatus.getPermission().toString());
            }
        }
    } catch (IOException | InterruptedException e) {
        throw new FalconException("Failed to copy extension artifacts to cluster" + cluster.getName(), e);
    }
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java

License:Apache License

@Override
public void open(Configuration parameters) throws Exception {
    super.open(parameters);

    subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    partCounter = 0;/*from w  ww.ja v a  2  s.co m*/

    this.writer = writerTemplate.duplicate();

    if (bucketState == null) {
        bucketState = new BucketState();
    }

    FileSystem fs = new Path(basePath).getFileSystem(new org.apache.hadoop.conf.Configuration());
    refTruncate = reflectTruncate(fs);

    // delete pending/in-progress files that might be left if we fail while
    // no checkpoint has yet been done
    try {
        if (fs.exists(new Path(basePath)) && cleanupOnOpen) {
            RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true);

            while (bucketFiles.hasNext()) {
                LocatedFileStatus file = bucketFiles.next();
                if (file.getPath().toString().endsWith(pendingSuffix)) {
                    // only delete files that contain our subtask index
                    if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                        LOG.debug("(OPEN) Deleting leftover pending file {}", file.getPath().toString());
                        fs.delete(file.getPath(), true);
                    }
                }
                if (file.getPath().toString().endsWith(inProgressSuffix)) {
                    // only delete files that contain our subtask index
                    if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                        LOG.debug("(OPEN) Deleting leftover in-progress file {}", file.getPath().toString());
                        fs.delete(file.getPath(), true);
                    }
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting leftover pending/in-progress files: {}", e);
        throw new RuntimeException("Error while deleting leftover pending/in-progress files.", e);
    }
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java

License:Apache License

@Override
public void restoreState(BucketState state) {
    bucketState = state;//from w  ww.  j  ava 2 s  .  com
    // we can clean all the pending files since they where renamed to final files
    // after this checkpoint was successfull
    bucketState.pendingFiles.clear();
    FileSystem fs = null;
    try {
        fs = new Path(basePath).getFileSystem(new org.apache.hadoop.conf.Configuration());
    } catch (IOException e) {
        LOG.error("Error while creating FileSystem in checkpoint restore.", e);
        throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e);
    }
    if (bucketState.currentFile != null) {
        // We were writing to a file when the last checkpoint occured. This file can either
        // be still in-progress or became a pending file at some point after the checkpoint.
        // Either way, we have to truncate it back to a valid state (or write a .valid-length)
        // file that specifies up to which length it is valid and rename it to the final name
        // before starting a new bucket file.
        Path partPath = new Path(bucketState.currentFile);
        try {
            Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName())
                    .suffix(pendingSuffix);
            Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName())
                    .suffix(inProgressSuffix);

            if (fs.exists(partPendingPath)) {
                LOG.debug(
                        "In-progress file {} has been moved to pending after checkpoint, moving to final location.",
                        partPath);
                // has been moved to pending in the mean time, rename to final location
                fs.rename(partPendingPath, partPath);
            } else if (fs.exists(partInProgressPath)) {
                LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath);
                // it was still in progress, rename to final path
                fs.rename(partInProgressPath, partPath);
            } else if (fs.exists(partPath)) {
                LOG.debug("In-Progress file {} was already moved to final location {}.",
                        bucketState.currentFile, partPath);
            } else {
                LOG.debug(
                        "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, "
                                + "it was moved to final location by a previous snapshot restore",
                        bucketState.currentFile);
            }

            refTruncate = reflectTruncate(fs);
            // truncate it or write a ".valid-length" file to specify up to which point it is valid
            if (refTruncate != null) {
                LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength);
                // some-one else might still hold the lease from a previous try, we are
                // recovering, after all ...
                if (fs instanceof DistributedFileSystem) {
                    DistributedFileSystem dfs = (DistributedFileSystem) fs;
                    LOG.debug("Trying to recover file lease {}", partPath);
                    dfs.recoverLease(partPath);
                    boolean isclosed = dfs.isFileClosed(partPath);
                    StopWatch sw = new StopWatch();
                    sw.start();
                    while (!isclosed) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        isclosed = dfs.isFileClosed(partPath);
                    }
                }
                Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath,
                        bucketState.currentFileValidLength);
                if (!truncated) {
                    LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath);

                    // we must wait for the asynchronous truncate operation to complete
                    StopWatch sw = new StopWatch();
                    sw.start();
                    long newLen = fs.getFileStatus(partPath).getLen();
                    while (newLen != bucketState.currentFileValidLength) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        newLen = fs.getFileStatus(partPath).getLen();
                    }
                    if (newLen != bucketState.currentFileValidLength) {
                        throw new RuntimeException("Truncate did not truncate to right length. Should be "
                                + bucketState.currentFileValidLength + " is " + newLen + ".");
                    }
                }

            } else {
                LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath,
                        bucketState.currentFileValidLength);
                Path validLengthFilePath = new Path(partPath.getParent(),
                        validLengthPrefix + partPath.getName()).suffix(validLengthSuffix);
                if (!fs.exists(validLengthFilePath)) {
                    FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath);
                    lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength));
                    lengthFileOut.close();
                }
            }

            // invalidate in the state object
            bucketState.currentFile = null;
            bucketState.currentFileValidLength = -1;
        } catch (IOException e) {
            LOG.error("Error while restoring RollingSink state.", e);
            throw new RuntimeException("Error while restoring RollingSink state.", e);
        } catch (InvocationTargetException | IllegalAccessException e) {
            LOG.error("Cound not invoke truncate.", e);
            throw new RuntimeException("Could not invoke truncate.", e);
        }
    }

    LOG.debug("Clearing pending/in-progress files.");

    // Move files that are confirmed by a checkpoint but did not get moved to final location
    // because the checkpoint notification did not happen before a failure

    Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet();
    LOG.debug("Moving pending files to final location on restore.");
    for (Long pastCheckpointId : pastCheckpointIds) {
        // All the pending files are buckets that have been completed but are waiting to be renamed
        // to their final name
        for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) {
            Path finalPath = new Path(filename);
            Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName())
                    .suffix(pendingSuffix);

            try {
                if (fs.exists(pendingPath)) {
                    LOG.debug(
                            "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.",
                            pendingPath, pastCheckpointId);
                    fs.rename(pendingPath, finalPath);
                }
            } catch (IOException e) {
                LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}", pendingPath,
                        finalPath, e);
                throw new RuntimeException(
                        "Error while renaming pending file " + pendingPath + " to final path " + finalPath, e);
            }
        }
    }
    bucketState.pendingFiles.clear();
    synchronized (bucketState.pendingFilesPerCheckpoint) {
        bucketState.pendingFilesPerCheckpoint.clear();
    }

    // we need to get this here since open() has not yet been called
    int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    // delete pending files
    try {

        RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true);

        while (bucketFiles.hasNext()) {
            LocatedFileStatus file = bucketFiles.next();
            if (file.getPath().toString().endsWith(pendingSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
            if (file.getPath().toString().endsWith(inProgressSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting old pending files: {}", e);
        throw new RuntimeException("Error while deleting old pending files.", e);
    }
}

From source file:org.apache.flink.yarn.YarnFileStageTest.java

License:Apache License

/**
 * Verifies that nested directories are properly copied with the given filesystem and paths.
 *
 * @param targetFileSystem//from  ww w.j a  v a2s .c o  m
 *       file system of the target path
 * @param targetDir
 *       target path (URI like <tt>hdfs://...</tt>)
 * @param tempFolder
 *       JUnit temporary folder rule to create the source directory with
 * @param addSchemeToLocalPath
 *       whether add the <tt>file://</tt> scheme to the local path to copy from
 */
static void testCopyFromLocalRecursive(FileSystem targetFileSystem, Path targetDir, TemporaryFolder tempFolder,
        boolean addSchemeToLocalPath) throws Exception {

    // directory must not yet exist
    assertFalse(targetFileSystem.exists(targetDir));

    final File srcDir = tempFolder.newFolder();
    final Path srcPath;
    if (addSchemeToLocalPath) {
        srcPath = new Path("file://" + srcDir.getAbsolutePath());
    } else {
        srcPath = new Path(srcDir.getAbsolutePath());
    }

    HashMap<String /* (relative) path */, /* contents */ String> srcFiles = new HashMap<>(4);

    // create and fill source files
    srcFiles.put("1", "Hello 1");
    srcFiles.put("2", "Hello 2");
    srcFiles.put("nested/3", "Hello nested/3");
    srcFiles.put("nested/4/5", "Hello nested/4/5");
    for (Map.Entry<String, String> src : srcFiles.entrySet()) {
        File file = new File(srcDir, src.getKey());
        //noinspection ResultOfMethodCallIgnored
        file.getParentFile().mkdirs();
        try (DataOutputStream out = new DataOutputStream(new FileOutputStream(file))) {
            out.writeUTF(src.getValue());
        }
    }

    // copy the created directory recursively:
    try {
        List<Path> remotePaths = new ArrayList<>();
        HashMap<String, LocalResource> localResources = new HashMap<>();
        AbstractYarnClusterDescriptor.uploadAndRegisterFiles(
                Collections.singletonList(new File(srcPath.toUri().getPath())), targetFileSystem, targetDir,
                ApplicationId.newInstance(0, 0), remotePaths, localResources, new StringBuilder());
        assertEquals(srcFiles.size(), localResources.size());

        Path workDir = ConverterUtils
                .getPathFromYarnURL(localResources.get(srcPath.getName() + "/1").getResource()).getParent();

        RemoteIterator<LocatedFileStatus> targetFilesIterator = targetFileSystem.listFiles(workDir, true);
        HashMap<String /* (relative) path */, /* contents */ String> targetFiles = new HashMap<>(4);

        final int workDirPrefixLength = workDir.toString().length() + 1; // one more for the concluding "/"
        while (targetFilesIterator.hasNext()) {
            LocatedFileStatus targetFile = targetFilesIterator.next();

            int retries = 5;
            do {
                try (FSDataInputStream in = targetFileSystem.open(targetFile.getPath())) {
                    String absolutePathString = targetFile.getPath().toString();
                    String relativePath = absolutePathString.substring(workDirPrefixLength);
                    targetFiles.put(relativePath, in.readUTF());

                    assertEquals("extraneous data in file " + relativePath, -1, in.read());
                    break;
                } catch (FileNotFoundException e) {
                    // For S3, read-after-write may be eventually consistent, i.e. when trying
                    // to access the object before writing it; see
                    // https://docs.aws.amazon.com/AmazonS3/latest/dev/Introduction.html#ConsistencyModel
                    // -> try again a bit later
                    Thread.sleep(50);
                }
            } while ((retries--) > 0);
        }

        assertThat(targetFiles, equalTo(srcFiles));
    } finally {
        // clean up
        targetFileSystem.delete(targetDir, true);
    }
}

From source file:org.apache.impala.catalog.HdfsTable.java

License:Apache License

/**
 * Drops and re-loads the block metadata for all partitions in 'partsByPath' whose
 * location is under the given 'dirPath'. It involves the following steps:
 * - Clear the current block metadata of the partitions.
 * - Call FileSystem.listStatus() on 'dirPath' to fetch the BlockLocations for each
 *   file under it recursively./*from   ww  w  .  ja  va 2  s  .c  o  m*/
 * - For every valid data file, map it to a partition from 'partsByPath' (if one exists)
 *   and enumerate all its blocks and their corresponding hosts and disk IDs.
 * Requires that 'dirPath' and all paths in 'partsByPath' have consistent qualification
 * (either fully qualified or unqualified), for isDescendantPath().
 * TODO: Split this method into more logical methods for cleaner code.
 */
private void loadBlockMetadata(Path dirPath, HashMap<Path, List<HdfsPartition>> partsByPath) {
    try {
        FileSystem fs = dirPath.getFileSystem(CONF);
        // No need to load blocks for empty partitions list.
        if (partsByPath.size() == 0 || !fs.exists(dirPath))
            return;
        if (LOG.isTraceEnabled()) {
            LOG.trace("Loading block md for " + name_ + " directory " + dirPath.toString());
        }

        // Clear the state of partitions under dirPath since they are going to be updated
        // based on the current snapshot of files in the directory.
        List<HdfsPartition> dirPathPartitions = partsByPath.get(dirPath);
        if (dirPathPartitions != null) {
            // The dirPath is a partition directory. This means the path is the root of an
            // unpartitioned table, or the path of at least one partition.
            for (HdfsPartition partition : dirPathPartitions) {
                partition.setFileDescriptors(new ArrayList<FileDescriptor>());
            }
        } else {
            // The dirPath is not a partition directory. We expect it to be an ancestor of
            // partition paths (e.g., the table root). Clear all partitions whose paths are
            // a descendant of dirPath.
            for (Map.Entry<Path, List<HdfsPartition>> entry : partsByPath.entrySet()) {
                Path partDir = entry.getKey();
                if (!FileSystemUtil.isDescendantPath(partDir, dirPath))
                    continue;
                for (HdfsPartition partition : entry.getValue()) {
                    partition.setFileDescriptors(new ArrayList<FileDescriptor>());
                }
            }
        }

        // For file systems that do not support BlockLocation API, we manually synthesize
        // block location metadata based on file formats.
        if (!FileSystemUtil.supportsStorageIds(fs)) {
            synthesizeBlockMetadata(fs, dirPath, partsByPath);
            return;
        }

        int unknownDiskIdCount = 0;
        RemoteIterator<LocatedFileStatus> fileStatusIter = fs.listFiles(dirPath, true);
        while (fileStatusIter.hasNext()) {
            LocatedFileStatus fileStatus = fileStatusIter.next();
            if (!FileSystemUtil.isValidDataFile(fileStatus))
                continue;
            // Find the partition that this file belongs (if any).
            Path partPathDir = fileStatus.getPath().getParent();
            Preconditions.checkNotNull(partPathDir);

            List<HdfsPartition> partitions = partsByPath.get(partPathDir);
            // Skip if this file does not belong to any known partition.
            if (partitions == null) {
                if (LOG.isTraceEnabled()) {
                    LOG.trace("File " + fileStatus.getPath().toString() + " doesn't correspond "
                            + " to a known partition. Skipping metadata load for this file.");
                }
                continue;
            }
            String fileName = fileStatus.getPath().getName();
            FileDescriptor fd = new FileDescriptor(fileName, fileStatus.getLen(),
                    fileStatus.getModificationTime());
            BlockLocation[] locations = fileStatus.getBlockLocations();
            String partPathDirName = partPathDir.toString();
            for (BlockLocation loc : locations) {
                Set<String> cachedHosts = Sets.newHashSet(loc.getCachedHosts());
                // Enumerate all replicas of the block, adding any unknown hosts
                // to hostIndex_. We pick the network address from getNames() and
                // map it to the corresponding hostname from getHosts().
                List<BlockReplica> replicas = Lists.newArrayListWithExpectedSize(loc.getNames().length);
                for (int i = 0; i < loc.getNames().length; ++i) {
                    TNetworkAddress networkAddress = BlockReplica.parseLocation(loc.getNames()[i]);
                    replicas.add(new BlockReplica(hostIndex_.getIndex(networkAddress),
                            cachedHosts.contains(loc.getHosts()[i])));
                }
                FileBlock currentBlock = new FileBlock(loc.getOffset(), loc.getLength(), replicas);
                THdfsFileBlock tHdfsFileBlock = currentBlock.toThrift();
                fd.addThriftFileBlock(tHdfsFileBlock);
                unknownDiskIdCount += loadDiskIds(loc, tHdfsFileBlock);
            }
            if (LOG.isTraceEnabled()) {
                LOG.trace("Adding file md dir: " + partPathDirName + " file: " + fileName);
            }
            // Update the partitions' metadata that this file belongs to.
            for (HdfsPartition partition : partitions) {
                partition.getFileDescriptors().add(fd);
                numHdfsFiles_++;
                totalHdfsBytes_ += fd.getFileLength();
            }
        }
        if (unknownDiskIdCount > 0) {
            if (LOG.isWarnEnabled()) {
                LOG.warn("Unknown disk id count for filesystem " + fs + ":" + unknownDiskIdCount);
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(
                "Error loading block metadata for directory " + dirPath.toString() + ": " + e.getMessage(), e);
    }
}

From source file:org.apache.impala.catalog.HdfsTable.java

License:Apache License

/**
 * For filesystems that don't support BlockLocation API, synthesize file blocks
 * by manually splitting the file range into fixed-size blocks.  That way, scan
 * ranges can be derived from file blocks as usual.  All synthesized blocks are given
 * an invalid network address so that the scheduler will treat them as remote.
 *//*  ww  w  . j a  v  a 2s  .c om*/
private void synthesizeBlockMetadata(FileSystem fs, Path dirPath,
        HashMap<Path, List<HdfsPartition>> partsByPath) throws IOException {
    RemoteIterator<LocatedFileStatus> fileStatusIter = fs.listFiles(dirPath, true);
    while (fileStatusIter.hasNext()) {
        LocatedFileStatus fileStatus = fileStatusIter.next();
        if (!FileSystemUtil.isValidDataFile(fileStatus))
            continue;
        Path partPathDir = fileStatus.getPath().getParent();
        Preconditions.checkNotNull(partPathDir);
        List<HdfsPartition> partitions = partsByPath.get(partPathDir);
        // Skip if this file does not belong to any known partition.
        if (partitions == null) {
            if (LOG.isTraceEnabled()) {
                LOG.trace("File " + fileStatus.getPath().toString() + " doesn't correspond "
                        + " to a known partition. Skipping metadata load for this file.");
            }
            continue;
        }
        String fileName = fileStatus.getPath().getName();
        FileDescriptor fd = new FileDescriptor(fileName, fileStatus.getLen(), fileStatus.getModificationTime());
        Preconditions.checkState(partitions.size() > 0);
        // For the purpose of synthesizing block metadata, we assume that all partitions
        // with the same location have the same file format.
        HdfsFileFormat fileFormat = partitions.get(0).getFileFormat();
        synthesizeFdBlockMetadata(fs, fd, fileFormat);
        // Update the partitions' metadata that this file belongs to.
        for (HdfsPartition partition : partitions) {
            partition.getFileDescriptors().add(fd);
            numHdfsFiles_++;
            totalHdfsBytes_ += fd.getFileLength();
        }
    }
}

From source file:org.apache.metron.maas.service.callback.LaunchContainer.java

License:Apache License

public String localizeResources(Map<String, LocalResource> resources, Path scriptLocation,
        Path appJarLocation) {//from  w w  w . j  av  a2 s .  c  om
    try {
        LOG.info("Model payload: " + scriptLocation);
        LOG.info("AppJAR Location: " + appJarLocation);
        FileSystem fs = scriptLocation.getFileSystem(conf);
        String script = null;
        Map.Entry<String, LocalResource> kv = localizeResource(fs.getFileStatus(appJarLocation));
        resources.put(kv.getKey(), kv.getValue());
        for (RemoteIterator<LocatedFileStatus> it = fs.listFiles(scriptLocation, true); it.hasNext();) {
            LocatedFileStatus status = it.next();
            kv = localizeResource(status);
            String name = kv.getKey();
            if (name.endsWith(".sh")) {
                script = name;
            }
            LOG.info("Localized " + name + " -> " + status.toString());
            resources.put(name, kv.getValue());
        }
        return script;
    } catch (Exception e) {
        LOG.error(e.getMessage(), e);
        return null;
    }
}

From source file:org.apache.metron.pcap.finalizer.PcapFinalizer.java

License:Apache License

/**
 * Returns a lazily-read Iterable over a set of sequence files.
 *//*from w  w  w. j av a 2s  .  c  om*/
protected SequenceFileIterable readInterimResults(Path interimResultPath, Configuration config, FileSystem fs)
        throws IOException {
    List<Path> files = new ArrayList<>();
    for (RemoteIterator<LocatedFileStatus> it = fs.listFiles(interimResultPath, false); it.hasNext();) {
        Path p = it.next().getPath();
        if (p.getName().equals("_SUCCESS")) {
            fs.delete(p, false);
            continue;
        }
        files.add(p);
    }
    if (files.size() == 0) {
        LOG.info("No files to process with specified date range.");
    } else {
        LOG.debug("Interim results path={}", interimResultPath);
        Collections.sort(files, (o1, o2) -> o1.getName().compareTo(o2.getName()));
    }
    return new SequenceFileIterable(files, config);
}

From source file:org.apache.metron.pcap.mr.PcapJob.java

License:Apache License

protected Iterable<Path> listFiles(FileSystem fs, Path basePath) throws IOException {
    List<Path> ret = new ArrayList<>();
    RemoteIterator<LocatedFileStatus> filesIt = fs.listFiles(basePath, true);
    while (filesIt.hasNext()) {
        ret.add(filesIt.next().getPath());
    }/*from   www  .  j  a  va  2 s  .c o  m*/
    return ret;
}

From source file:org.apache.metron.pcap.mr.PcapJob.java

License:Apache License

/**
 * Returns a lazily-read Iterable over a set of sequence files
 *///from  w  ww . j a v a 2s  . co m
private SequenceFileIterable readResults(Path outputPath, Configuration config, FileSystem fs)
        throws IOException {
    List<Path> files = new ArrayList<>();
    for (RemoteIterator<LocatedFileStatus> it = fs.listFiles(outputPath, false); it.hasNext();) {
        Path p = it.next().getPath();
        if (p.getName().equals("_SUCCESS")) {
            fs.delete(p, false);
            continue;
        }
        files.add(p);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug(outputPath);
    }
    Collections.sort(files, (o1, o2) -> o1.getName().compareTo(o2.getName()));
    return new SequenceFileIterable(files, config);
}