Example usage for org.apache.hadoop.fs FileSystem listFiles

List of usage examples for org.apache.hadoop.fs FileSystem listFiles

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listFiles.

Prototype

public RemoteIterator<LocatedFileStatus> listFiles(final Path f, final boolean recursive)
        throws FileNotFoundException, IOException 

Source Link

Document

List the statuses and block locations of the files in the given path.

Usage

From source file:org.apache.drill.TestDynamicUDFSupport.java

License:Apache License

@Test
public void testSuccessfulRegistration() throws Exception {
    copyDefaultJarsToStagingArea();//w  w w  . j a  va2 s  .co m

    String summary = "The following UDFs in jar %s have been registered:\n"
            + "[custom_lower(VARCHAR-REQUIRED)]";

    testBuilder().sqlQuery("create function using jar '%s'", default_binary_name).unOrdered()
            .baselineColumns("ok", "summary").baselineValues(true, String.format(summary, default_binary_name))
            .go();

    RemoteFunctionRegistry remoteFunctionRegistry = getDrillbitContext().getRemoteFunctionRegistry();
    FileSystem fs = remoteFunctionRegistry.getFs();

    assertFalse("Staging area should be empty",
            fs.listFiles(remoteFunctionRegistry.getStagingArea(), false).hasNext());
    assertFalse("Temporary area should be empty",
            fs.listFiles(remoteFunctionRegistry.getTmpArea(), false).hasNext());

    assertTrue("Binary should be present in registry area",
            fs.exists(new Path(remoteFunctionRegistry.getRegistryArea(), default_binary_name)));
    assertTrue("Source should be present in registry area",
            fs.exists(new Path(remoteFunctionRegistry.getRegistryArea(), default_source_name)));

    Registry registry = remoteFunctionRegistry.getRegistry(new DataChangeVersion());
    assertEquals("Registry should contain one jar", registry.getJarList().size(), 1);
    assertEquals(registry.getJar(0).getName(), default_binary_name);
}

From source file:org.apache.drill.TestDynamicUDFSupport.java

License:Apache License

@Test
public void testSuccessfulRegistrationAfterSeveralRetryAttempts() throws Exception {
    RemoteFunctionRegistry remoteFunctionRegistry = spyRemoteFunctionRegistry();
    copyDefaultJarsToStagingArea();/*from w  w  w.jav a 2 s .  c om*/

    doThrow(new VersionMismatchException("Version mismatch detected", 1))
            .doThrow(new VersionMismatchException("Version mismatch detected", 1)).doCallRealMethod()
            .when(remoteFunctionRegistry).updateRegistry(any(Registry.class), any(DataChangeVersion.class));

    String summary = "The following UDFs in jar %s have been registered:\n"
            + "[custom_lower(VARCHAR-REQUIRED)]";

    testBuilder().sqlQuery("create function using jar '%s'", default_binary_name).unOrdered()
            .baselineColumns("ok", "summary").baselineValues(true, String.format(summary, default_binary_name))
            .go();

    verify(remoteFunctionRegistry, times(3)).updateRegistry(any(Registry.class), any(DataChangeVersion.class));

    FileSystem fs = remoteFunctionRegistry.getFs();

    assertFalse("Staging area should be empty",
            fs.listFiles(remoteFunctionRegistry.getStagingArea(), false).hasNext());
    assertFalse("Temporary area should be empty",
            fs.listFiles(remoteFunctionRegistry.getTmpArea(), false).hasNext());

    assertTrue("Binary should be present in registry area",
            fs.exists(new Path(remoteFunctionRegistry.getRegistryArea(), default_binary_name)));
    assertTrue("Source should be present in registry area",
            fs.exists(new Path(remoteFunctionRegistry.getRegistryArea(), default_source_name)));

    Registry registry = remoteFunctionRegistry.getRegistry(new DataChangeVersion());
    assertEquals("Registry should contain one jar", registry.getJarList().size(), 1);
    assertEquals(registry.getJar(0).getName(), default_binary_name);
}

From source file:org.apache.drill.TestDynamicUDFSupport.java

License:Apache License

@Test
public void testSuccessfulUnregistrationAfterSeveralRetryAttempts() throws Exception {
    RemoteFunctionRegistry remoteFunctionRegistry = spyRemoteFunctionRegistry();
    copyDefaultJarsToStagingArea();/*w  w  w. j  ava  2 s.  c  o m*/
    test("create function using jar '%s'", default_binary_name);

    reset(remoteFunctionRegistry);
    doThrow(new VersionMismatchException("Version mismatch detected", 1))
            .doThrow(new VersionMismatchException("Version mismatch detected", 1)).doCallRealMethod()
            .when(remoteFunctionRegistry).updateRegistry(any(Registry.class), any(DataChangeVersion.class));

    String summary = "The following UDFs in jar %s have been unregistered:\n"
            + "[custom_lower(VARCHAR-REQUIRED)]";

    testBuilder().sqlQuery("drop function using jar '%s'", default_binary_name).unOrdered()
            .baselineColumns("ok", "summary").baselineValues(true, String.format(summary, default_binary_name))
            .go();

    verify(remoteFunctionRegistry, times(3)).updateRegistry(any(Registry.class), any(DataChangeVersion.class));

    FileSystem fs = remoteFunctionRegistry.getFs();

    assertFalse("Registry area should be empty",
            fs.listFiles(remoteFunctionRegistry.getRegistryArea(), false).hasNext());
    assertEquals("Registry should be empty",
            remoteFunctionRegistry.getRegistry(new DataChangeVersion()).getJarList().size(), 0);
}

From source file:org.apache.drill.TestDynamicUDFSupport.java

License:Apache License

@Test
public void testExceedRetryAttemptsDuringRegistration() throws Exception {
    RemoteFunctionRegistry remoteFunctionRegistry = spyRemoteFunctionRegistry();
    copyDefaultJarsToStagingArea();//from  w ww. j av a2 s . c o m

    doThrow(new VersionMismatchException("Version mismatch detected", 1)).when(remoteFunctionRegistry)
            .updateRegistry(any(Registry.class), any(DataChangeVersion.class));

    String summary = "Failed to update remote function registry. Exceeded retry attempts limit.";

    testBuilder().sqlQuery("create function using jar '%s'", default_binary_name).unOrdered()
            .baselineColumns("ok", "summary").baselineValues(false, summary).go();

    verify(remoteFunctionRegistry, times(remoteFunctionRegistry.getRetryAttempts() + 1))
            .updateRegistry(any(Registry.class), any(DataChangeVersion.class));

    FileSystem fs = remoteFunctionRegistry.getFs();

    assertTrue("Binary should be present in staging area",
            fs.exists(new Path(remoteFunctionRegistry.getStagingArea(), default_binary_name)));
    assertTrue("Source should be present in staging area",
            fs.exists(new Path(remoteFunctionRegistry.getStagingArea(), default_source_name)));

    assertFalse("Registry area should be empty",
            fs.listFiles(remoteFunctionRegistry.getRegistryArea(), false).hasNext());
    assertFalse("Temporary area should be empty",
            fs.listFiles(remoteFunctionRegistry.getTmpArea(), false).hasNext());

    assertEquals("Registry should be empty",
            remoteFunctionRegistry.getRegistry(new DataChangeVersion()).getJarList().size(), 0);
}

From source file:org.apache.drill.TestDynamicUDFSupport.java

License:Apache License

@Test
public void testRegistrationFailDuringRegistryUpdate() throws Exception {
    final RemoteFunctionRegistry remoteFunctionRegistry = spyRemoteFunctionRegistry();
    final FileSystem fs = remoteFunctionRegistry.getFs();
    final String errorMessage = "Failure during remote registry update.";
    doAnswer(new Answer<Void>() {
        @Override/*  w w  w . j  ava2 s  .  c  om*/
        public Void answer(InvocationOnMock invocation) throws Throwable {
            assertTrue("Binary should be present in registry area",
                    fs.exists(new Path(remoteFunctionRegistry.getRegistryArea(), default_binary_name)));
            assertTrue("Source should be present in registry area",
                    fs.exists(new Path(remoteFunctionRegistry.getRegistryArea(), default_source_name)));
            throw new RuntimeException(errorMessage);
        }
    }).when(remoteFunctionRegistry).updateRegistry(any(Registry.class), any(DataChangeVersion.class));

    copyDefaultJarsToStagingArea();

    testBuilder().sqlQuery("create function using jar '%s'", default_binary_name).unOrdered()
            .baselineColumns("ok", "summary").baselineValues(false, errorMessage).go();

    assertFalse("Registry area should be empty",
            fs.listFiles(remoteFunctionRegistry.getRegistryArea(), false).hasNext());
    assertFalse("Temporary area should be empty",
            fs.listFiles(remoteFunctionRegistry.getTmpArea(), false).hasNext());

    assertTrue("Binary should be present in staging area",
            fs.exists(new Path(remoteFunctionRegistry.getStagingArea(), default_binary_name)));
    assertTrue("Source should be present in staging area",
            fs.exists(new Path(remoteFunctionRegistry.getStagingArea(), default_source_name)));
}

From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
        jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }//from ww  w  .  j  ava2  s.  c o  m
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
        throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);

    jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));

    setJobName(jobConf, segments);

    if (converterConfig.getJobPriority() != null) {
        jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }

    final Job job = Job.getInstance(jobConf);

    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);

    JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
            JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
            job);

    Throwable throwable = null;
    try {
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        final boolean success = job.waitForCompletion(true);
        if (!success) {
            final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
            if (reports != null) {
                for (final TaskReport report : reports) {
                    log.error("Error in task [%s] : %s", report.getTaskId(),
                            Arrays.toString(report.getDiagnostics()));
                }
            }
            return null;
        }
        try {
            loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
            writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
        } catch (IOException ex) {
            log.error(ex, "Could not fetch counters");
        }
        final JobID jobID = job.getJobID();

        final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
        final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
        final List<Path> goodPaths = new ArrayList<>();
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            if (locatedFileStatus.isFile()) {
                final Path myPath = locatedFileStatus.getPath();
                if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
                    goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
                }
            }
        }
        if (goodPaths.isEmpty()) {
            log.warn("No good data found at [%s]", jobDir);
            return null;
        }
        final List<DataSegment> returnList = ImmutableList
                .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {
                    @Nullable
                    @Override
                    public DataSegment apply(final Path input) {
                        try {
                            if (!fs.exists(input)) {
                                throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]",
                                        ConvertingOutputFormat.DATA_SUCCESS_KEY,
                                        ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
                            }
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                        try (final InputStream stream = fs.open(input)) {
                            return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                }));
        if (returnList.size() == segments.size()) {
            return returnList;
        } else {
            throw new ISE(
                    "Tasks reported success but result length did not match! Expected %d found %d at path [%s]",
                    segments.size(), returnList.size(), jobDir);
        }
    } catch (InterruptedException | ClassNotFoundException e) {
        RuntimeException exception = Throwables.propagate(e);
        throwable = exception;
        throw exception;
    } catch (Throwable t) {
        throwable = t;
        throw t;
    } finally {
        try {
            cleanup(job);
        } catch (IOException e) {
            if (throwable != null) {
                throwable.addSuppressed(e);
            } else {
                log.error(e, "Could not clean up job [%s]", job.getJobID());
            }
        }
    }
}

From source file:org.apache.druid.storage.hdfs.HdfsDataSegmentFinder.java

License:Apache License

@Override
public Set<DataSegment> findSegments(String workingDirPathStr, boolean updateDescriptor)
        throws SegmentLoadingException {
    final Map<String, Pair<DataSegment, Long>> timestampedSegments = new HashMap<>();
    final Path workingDirPath = new Path(workingDirPathStr);
    FileSystem fs;
    try {//from  w  w  w .  j a va2 s  .co  m
        fs = workingDirPath.getFileSystem(config);

        log.info(fs.getScheme());
        log.info("FileSystem URI:" + fs.getUri().toString());

        if (!fs.exists(workingDirPath)) {
            throw new SegmentLoadingException("Working directory [%s] doesn't exist.", workingDirPath);
        }

        if (!fs.isDirectory(workingDirPath)) {
            throw new SegmentLoadingException("Working directory [%s] is not a directory!?", workingDirPath);
        }

        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(workingDirPath, true);
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            final Path path = locatedFileStatus.getPath();
            if (path.getName().endsWith("descriptor.json")) {

                // There are 3 supported path formats:
                //    - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum/descriptor.json
                //    - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_descriptor.json
                //    - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_UUID_descriptor.json
                final String descriptorParts[] = path.getName().split("_");

                Path indexZip = new Path(path.getParent(), "index.zip");
                if (descriptorParts.length > 1) {
                    Preconditions
                            .checkState(
                                    descriptorParts.length <= 3
                                            && org.apache.commons.lang.StringUtils.isNumeric(descriptorParts[0])
                                            && "descriptor.json"
                                                    .equals(descriptorParts[descriptorParts.length - 1]),
                                    "Unexpected descriptor filename format [%s]", path);

                    indexZip = new Path(path.getParent(), StringUtils.format("%s_%sindex.zip",
                            descriptorParts[0], descriptorParts.length == 2 ? "" : descriptorParts[1] + "_"));
                }

                if (fs.exists(indexZip)) {
                    final DataSegment dataSegment = mapper.readValue(fs.open(path), DataSegment.class);
                    log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip);

                    final Map<String, Object> loadSpec = dataSegment.getLoadSpec();
                    final String pathWithoutScheme = indexZip.toUri().getPath();

                    if (!loadSpec.get("type").equals(HdfsStorageDruidModule.SCHEME)
                            || !loadSpec.get("path").equals(pathWithoutScheme)) {
                        loadSpec.put("type", HdfsStorageDruidModule.SCHEME);
                        loadSpec.put("path", pathWithoutScheme);
                        if (updateDescriptor) {
                            log.info("Updating loadSpec in descriptor.json at [%s] with new path [%s]", path,
                                    pathWithoutScheme);
                            mapper.writeValue(fs.create(path, true), dataSegment);
                        }
                    }

                    DataSegmentFinder.putInMapRetainingNewest(timestampedSegments, dataSegment,
                            locatedFileStatus.getModificationTime());
                } else {
                    throw new SegmentLoadingException(
                            "index.zip didn't exist at [%s] while descripter.json exists!?", indexZip);
                }
            }
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Problems interacting with filesystem[%s].", workingDirPath);
    }

    return timestampedSegments.values().stream().map(x -> x.lhs).collect(Collectors.toSet());
}

From source file:org.apache.druid.storage.hdfs.HdfsDataSegmentPuller.java

License:Apache License

FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir) throws SegmentLoadingException {
    try {/*www  .  java 2  s  .c o  m*/
        org.apache.commons.io.FileUtils.forceMkdir(outDir);
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "");
    }
    try {
        final FileSystem fs = path.getFileSystem(config);
        if (fs.isDirectory(path)) {

            // --------    directory     ---------

            try {
                return RetryUtils.retry(() -> {
                    if (!fs.exists(path)) {
                        throw new SegmentLoadingException("No files found at [%s]", path.toString());
                    }

                    final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false);
                    final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
                    while (children.hasNext()) {
                        final LocatedFileStatus child = children.next();
                        final Path childPath = child.getPath();
                        final String fname = childPath.getName();
                        if (fs.isDirectory(childPath)) {
                            log.warn("[%s] is a child directory, skipping", childPath.toString());
                        } else {
                            final File outFile = new File(outDir, fname);
                            try (final FSDataInputStream in = fs.open(childPath)) {
                                NativeIO.chunkedCopy(in, outFile);
                            }
                            result.addFile(outFile);
                        }
                    }
                    log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(),
                            outDir.getAbsolutePath());
                    return result;
                }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        } else if (CompressionUtils.isZip(path.getName())) {

            // --------    zip     ---------

            final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() {
                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outDir, shouldRetryPredicate(), false);

            log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(),
                    outDir.getAbsolutePath());

            return result;
        } else if (CompressionUtils.isGz(path.getName())) {

            // --------    gzip     ---------

            final String fname = path.getName();
            final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname));
            final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() {
                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outFile);

            log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(),
                    outFile.getAbsolutePath());
            return result;
        } else {
            throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString());
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Error loading [%s]", path.toString());
    }
}

From source file:org.apache.falcon.extensions.store.ExtensionStore.java

License:Apache License

public String getResource(final String extensionResourcePath) throws FalconException {
    StringBuilder definition = new StringBuilder();
    Path resourcePath = new Path(extensionResourcePath);
    FileSystem fileSystem = HadoopClientFactory.get().createFalconFileSystem(resourcePath.toUri());
    try {//from   www  .j a  va 2s  . co  m
        if (fileSystem.isFile(resourcePath)) {
            definition.append(getExtensionResource(extensionResourcePath.toString()));
        } else {
            RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem.listFiles(resourcePath,
                    false);
            while (fileStatusListIterator.hasNext()) {
                LocatedFileStatus fileStatus = fileStatusListIterator.next();
                Path filePath = fileStatus.getPath();
                definition.append("Contents of file ").append(filePath.getName()).append(":\n");
                definition.append(getExtensionResource(filePath.toString())).append("\n \n");
            }
        }
    } catch (IOException e) {
        LOG.error("Exception while getting file(s) with path : " + extensionResourcePath, e);
        throw new StoreAccessException(e);
    }

    return definition.toString();

}

From source file:org.apache.falcon.regression.core.util.HadoopUtil.java

License:Apache License

/**
 * Recursively retrieves all data file names from a given location.
 * @param fs filesystem/*from www .ja v a  2 s.c  om*/
 * @param location given location
 * @return list of all files
 * @throws IOException
 */
public static List<Path> getAllFilesRecursivelyHDFS(FileSystem fs, Path location) throws IOException {
    List<Path> returnList = new ArrayList<>();
    RemoteIterator<LocatedFileStatus> remoteIterator;
    try {
        remoteIterator = fs.listFiles(location, true);
    } catch (FileNotFoundException e) {
        LOGGER.info("Path '" + location + "' is not found on " + fs.getUri());
        return returnList;
    }
    while (remoteIterator.hasNext()) {
        Path path = remoteIterator.next().getPath();
        if (!path.toUri().toString().contains("_SUCCESS")) {
            returnList.add(path);
        }
    }
    return returnList;
}