Example usage for org.apache.hadoop.fs FileSystem listFiles

List of usage examples for org.apache.hadoop.fs FileSystem listFiles

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listFiles.

Prototype

public RemoteIterator<LocatedFileStatus> listFiles(final Path f, final boolean recursive)
        throws FileNotFoundException, IOException 

Source Link

Document

List the statuses and block locations of the files in the given path.

Usage

From source file:com.streamsets.pipeline.stage.it.HdfsAvroSchemaSerIT.java

License:Apache License

private void verifySerializationLocation(String location) throws IOException {
    FileSystem fs = BaseHiveIT.getDefaultFileSystem();
    Path path = new Path(BaseHiveIT.getDefaultFsUri() + location);
    Assert.assertTrue("Location does not exist:" + location, fs.exists(path));
    boolean found = false;
    RemoteIterator<LocatedFileStatus> fsIterator = fs.listFiles(path, false);
    while (!found || fsIterator.hasNext()) {
        LocatedFileStatus status = fsIterator.next();
        LOG.info("Found file: " + status.getPath().getName());
        found = status.getPath().getName().startsWith(AVRO_SCHEMA_SERIALIZATION_PREFIX);
    }//from  w  w w.j a v  a 2s.c om
    fs.delete(path, true);
    Assert.assertTrue("Avro schema file not found in the location " + location, found);
}

From source file:com.thinkbiganalytics.spark.io.ZipStreamingOutputTest.java

License:Apache License

/**
 * Verify streaming output.//  ww  w .ja va  2 s  .  co  m
 */
@Test
public void test() throws Exception {
    // Mock file system
    final FileSystem fs = Mockito.mock(FileSystem.class);
    final Path source = new Path("/tmp/source");

    final LocatedFileStatus file1 = createFile("_SUCCESS", source);
    final LocatedFileStatus file2 = createFile("part-0", source);
    Mockito.when(fs.listFiles(source, false))
            .thenReturn(new ForwardingRemoteIterator<>(Iterators.forArray(file1, file2)));

    final FSDataInputStream fileStream = new FSDataInputStream(new SeekableNullInputStream());
    Mockito.when(fs.open(file1.getPath())).thenReturn(fileStream);
    Mockito.when(fs.open(file2.getPath())).thenReturn(fileStream);

    final CountDownLatch deleteLatch = new CountDownLatch(1);
    Mockito.when(fs.delete(source, true)).then(new Answer<Boolean>() {
        @Override
        public Boolean answer(final InvocationOnMock invocation) {
            deleteLatch.countDown();
            return true;
        }
    });

    // Write ZIP to output stream
    final ByteArrayOutputStream out = new ByteArrayOutputStream();
    final ZipStreamingOutput zip = new ZipStreamingOutput(source, fs);
    zip.write(out);

    // Verify output stream
    final ZipInputStream in = new ZipInputStream(out.toInputStream());

    ZipEntry entry = in.getNextEntry();
    Assert.assertNotNull("Missing _SUCCESS entry", entry);
    Assert.assertEquals("_SUCCESS", entry.getName());

    entry = in.getNextEntry();
    Assert.assertNotNull("Missing part-0 entry", entry);
    Assert.assertEquals("part-0", entry.getName());

    entry = in.getNextEntry();
    Assert.assertNull("Unexpected entry", entry);

    // Verify path deleted
    deleteLatch.await(1, TimeUnit.SECONDS);
    Mockito.verify(fs).delete(source, true);
}

From source file:com.toddbodnar.simpleHive.IO.hdfsFile.java

@Override
public void resetStream() {

    try {//from www . ja v  a  2s .  c o m
        if (out != null)
            out.close();
        writing = false;
        if (in != null)
            in.close();
        FileSystem fs = FileSystem.get(GetConfiguration.get());

        if (fs.isFile(location)) {
            LinkedList<FileStatus> file = new LinkedList<>();
            file.add(fs.getFileStatus(location));
            theFiles = file.iterator();
        } else {
            LinkedList<FileStatus> files = new LinkedList<>();
            RemoteIterator<LocatedFileStatus> fileremote = fs.listFiles(location, true);
            while (fileremote.hasNext())
                files.add(fileremote.next());
            theFiles = files.iterator();
        }

        FileStatus nextFileStatus;
        do {
            if (!theFiles.hasNext()) {
                System.err.println("WARNING: File is Empty");
                super.next = null;
                return;
            }
            nextFileStatus = theFiles.next();
        } while (fs.isDirectory(nextFileStatus.getPath()) || nextFileStatus.getLen() == 0);

        in = new BufferedReader(new InputStreamReader(fs.open(nextFileStatus.getPath())));
        next = in.readLine();

        //out.flush();
    } catch (FileNotFoundException ex) {
        Logger.getLogger(fileFile.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(fileFile.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:com.uber.hoodie.common.model.HoodieTestUtils.java

License:Apache License

public static FileStatus[] listAllDataFilesInPath(FileSystem fs, String basePath) throws IOException {
    RemoteIterator<LocatedFileStatus> itr = fs.listFiles(new Path(basePath), true);
    List<FileStatus> returns = Lists.newArrayList();
    while (itr.hasNext()) {
        LocatedFileStatus status = itr.next();
        if (status.getPath().getName().contains(".parquet")) {
            returns.add(status);//from   ww  w.  j  a v a 2 s . co  m
        }
    }
    return returns.toArray(new FileStatus[returns.size()]);
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

/**
 * Recursively processes all files in the base-path. If excludeMetaFolder is set, the meta-folder and all its
 * subdirs are skipped//from  w  ww .ja v  a2  s  .  com
 * @param fs           File System
 * @param basePathStr  Base-Path
 * @param consumer     Callback for processing
 * @param excludeMetaFolder Exclude .hoodie folder
 * @throws IOException
 */
@VisibleForTesting
static void processFiles(FileSystem fs, String basePathStr, Function<FileStatus, Boolean> consumer,
        boolean excludeMetaFolder) throws IOException {
    PathFilter pathFilter = excludeMetaFolder ? getExcludeMetaPathFilter() : ALLOW_ALL_FILTER;
    FileStatus[] topLevelStatuses = fs.listStatus(new Path(basePathStr));
    for (int i = 0; i < topLevelStatuses.length; i++) {
        FileStatus child = topLevelStatuses[i];
        if (child.isFile()) {
            boolean success = consumer.apply(child);
            if (!success) {
                throw new HoodieException("Failed to process file-status=" + child);
            }
        } else if (pathFilter.accept(child.getPath())) {
            RemoteIterator<LocatedFileStatus> itr = fs.listFiles(child.getPath(), true);
            while (itr.hasNext()) {
                FileStatus status = itr.next();
                boolean success = consumer.apply(status);
                if (!success) {
                    throw new HoodieException("Failed to process file-status=" + status);
                }
            }
        }
    }
}

From source file:com.wipro.ats.bdre.clustermigration.DestTableLoad.java

License:Apache License

public void execute(String[] params) throws IOException {

    CommandLine commandLine = getCommandLine(params, PARAMS_STRUCTURE);
    String src = commandLine.getOptionValue("source-path");
    String dest = commandLine.getOptionValue("dest-path");
    String destFs = commandLine.getOptionValue("dest-fs");

    Configuration config = new Configuration();
    config.set("fs.defaultFS", destFs);
    FileSystem hdfs = FileSystem.get(config);
    Path srcPath = new Path(src);
    RemoteIterator<LocatedFileStatus> srcFiles = hdfs.listFiles(srcPath, true);
    while (srcFiles.hasNext()) {
        String absolutePath = srcFiles.next().getPath().toUri().toString();
        if (absolutePath.endsWith("/"))
            absolutePath = absolutePath.substring(0, absolutePath.length() - 1);
        LOGGER.info("absolutePath of source business partition= " + absolutePath);
        String relativePath = absolutePath.replace(src, "");
        if (relativePath.endsWith("/"))
            relativePath = relativePath.substring(0, relativePath.length() - 1);
        LOGGER.info("relativePath of source business partition= = " + relativePath);
        if (!dest.endsWith("/"))
            dest = dest + "/";
        String destCheckPathString = dest + relativePath;
        Path destCheckPath = new Path(destCheckPathString);
        LOGGER.info("destCheckPath = " + destCheckPath);
        //find first index that contains a "/" from the end of the string, after first find the second such occurrence, finally trim the '/instanceexecid=number/part_0000' from the whole path, do this for both source and dest paths
        int destIndex = destCheckPathString.lastIndexOf("/");
        int secondLastDestIndex = destCheckPath.toString().lastIndexOf("/", destIndex - 1);
        int srcIndex = absolutePath.lastIndexOf("/");
        int secondLastSrcIndex = absolutePath.substring(0, srcIndex).lastIndexOf("/", srcIndex - 1);
        String truncatedSrcPath = absolutePath.substring(0, secondLastSrcIndex);
        LOGGER.info("truncated Src Path = " + truncatedSrcPath);
        String truncatedDestPath = destCheckPath.toString().substring(0, secondLastDestIndex);
        LOGGER.info("truncated Dest Path = " + truncatedDestPath);
        Path existsPathCheck = new Path(truncatedDestPath);
        Path srcPathToMove = new Path(truncatedSrcPath);
        //check if the business partition to be copied already exists inside the destination table, if it does, it has to be overwritten (in this case delete at dest and move from source to dest
        LOGGER.info("Does the business partition exist already inside the table? True/False? = "
                + hdfs.exists(existsPathCheck));
        if (hdfs.exists(existsPathCheck)) {
            LOGGER.info(/*from   w  w w.jav  a2 s .  c  o  m*/
                    "bus partitions to be copied already exist at the destination, hence deleting them at destination");
            hdfs.delete(existsPathCheck, true);
        }
        String destPartitionPath = truncatedDestPath.substring(0, truncatedDestPath.lastIndexOf("/"));
        Path partitionWisePath = new Path(destPartitionPath);
        hdfs.mkdirs(partitionWisePath);
        LOGGER.info("moving the business partitions to the destination table");
        LOGGER.info("moving " + srcPathToMove + " to " + partitionWisePath);
        hdfs.rename(srcPathToMove, partitionWisePath);
    }
    hdfs.delete(srcPath, true);
}

From source file:edu.iu.daal_als.SGDUtil.java

License:Apache License

public static Int2ObjectOpenHashMap<VRowCol> loadTMapRow(String testFilePath, int numThreads,
        Configuration configuration) {

    List<String> testFilePaths = new LinkedList<>();
    Path path = new Path(testFilePath);
    try {//from   www  . j  av a2  s  . c  om
        FileSystem fs = path.getFileSystem(configuration);
        RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(path, true);
        while (iterator.hasNext()) {
            String name = iterator.next().getPath().toUri().toString();
            testFilePaths.add(name);
        }
    } catch (IOException e) {
        LOG.error("Fail to get test files", e);
    }

    VStore vStore = new VStore(testFilePaths, numThreads, false, configuration);
    vStore.load(false, true);
    return vStore.getVWMap();

}

From source file:edu.iu.daal_als.SGDUtil.java

License:Apache License

public static Int2ObjectOpenHashMap<VRowCol> loadTestVHMap(String testFilePath, Configuration configuration,
        int numThreads) {
    List<String> testFilePaths = new LinkedList<>();
    Path path = new Path(testFilePath);
    try {/*  w ww . jav a 2 s .c  om*/
        FileSystem fs = path.getFileSystem(configuration);
        RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(path, true);
        while (iterator.hasNext()) {
            String name = iterator.next().getPath().toUri().toString();
            testFilePaths.add(name);
        }
    } catch (IOException e) {
        LOG.error("Fail to get test files", e);
    }
    VStore testVStore = new VStore(testFilePaths, numThreads, false, configuration);
    testVStore.load(true, false);
    return testVStore.getVHMap();
}

From source file:edu.iu.daal_linreg.LinRegDaalCollectiveMapper.java

License:Apache License

private NumericTable getNumericTableHDFS(DaalContext daal_Context, Configuration conf, String inputFiles,
        int vectorSize, int numRows) throws IOException {
    Path inputFilePaths = new Path(inputFiles);
    List<String> inputFileList = new LinkedList<>();

    try {//from  ww w.j  av  a 2  s  . c  om
        FileSystem fs = inputFilePaths.getFileSystem(conf);
        RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(inputFilePaths, true);

        while (iterator.hasNext()) {
            String name = iterator.next().getPath().toUri().toString();
            inputFileList.add(name);
        }

    } catch (IOException e) {
        LOG.error("Fail to get test files", e);
    }
    int dataSize = vectorSize * numRows;
    // float[] data = new float[dataSize];
    double[] data = new double[dataSize];
    long[] dims = { numRows, vectorSize };
    int index = 0;

    FSDataInputStream in = null;

    //loop over all the files in the list
    ListIterator<String> file_itr = inputFileList.listIterator();
    while (file_itr.hasNext()) {
        String file_name = file_itr.next();
        LOG.info("read in file name: " + file_name);

        Path file_path = new Path(file_name);
        try {

            FileSystem fs = file_path.getFileSystem(conf);
            in = fs.open(file_path);

        } catch (Exception e) {
            LOG.error("Fail to open file " + e.toString());
            return null;
        }

        //read file content
        while (true) {
            String line = in.readLine();
            if (line == null)
                break;

            String[] lineData = line.split(",");

            for (int t = 0; t < vectorSize; t++) {
                if (index < dataSize) {
                    // data[index] = Float.parseFloat(lineData[t]);
                    data[index] = Double.parseDouble(lineData[t]);
                    index++;
                } else {
                    LOG.error("Incorrect size of file: dataSize: " + dataSize + "; index val: " + index);
                    return null;
                }

            }
        }

        in.close();

    }

    if (index != dataSize) {
        LOG.error("Incorrect total size of file: dataSize: " + dataSize + "; index val: " + index);
        return null;
    }
    //debug check the vals of data
    // for(int p=0;p<60;p++)
    //     LOG.info("data at: " + p + " is: " + data[p]);

    NumericTable predictionData = new HomogenNumericTable(daal_Context, data, vectorSize, numRows);
    return predictionData;

}

From source file:edu.iu.daal_sgd.SGDUtil.java

License:Apache License

public static Int2ObjectOpenHashMap<VRowCol> loadTestVHMap(String testFilePath, Configuration configuration,
        int numThreads) {
    List<String> testFilePaths = new LinkedList<>();
    Path path = new Path(testFilePath);
    try {//from   ww w.  j a  v a2  s.c  om
        FileSystem fs = path.getFileSystem(configuration);
        RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(path, true);
        while (iterator.hasNext()) {
            String name = iterator.next().getPath().toUri().toString();
            testFilePaths.add(name);
        }
    } catch (IOException e) {
        LOG.error("Fail to get test files", e);
    }
    VStore testVStore = new VStore(testFilePaths, numThreads, configuration);
    testVStore.load(true, false);
    return testVStore.getVHMap();
}