List of usage examples for org.apache.hadoop.fs FileSystem listFiles
public RemoteIterator<LocatedFileStatus> listFiles(final Path f, final boolean recursive) throws FileNotFoundException, IOException
From source file:com.streamsets.pipeline.stage.it.HdfsAvroSchemaSerIT.java
License:Apache License
private void verifySerializationLocation(String location) throws IOException { FileSystem fs = BaseHiveIT.getDefaultFileSystem(); Path path = new Path(BaseHiveIT.getDefaultFsUri() + location); Assert.assertTrue("Location does not exist:" + location, fs.exists(path)); boolean found = false; RemoteIterator<LocatedFileStatus> fsIterator = fs.listFiles(path, false); while (!found || fsIterator.hasNext()) { LocatedFileStatus status = fsIterator.next(); LOG.info("Found file: " + status.getPath().getName()); found = status.getPath().getName().startsWith(AVRO_SCHEMA_SERIALIZATION_PREFIX); }//from w w w.j a v a 2s.c om fs.delete(path, true); Assert.assertTrue("Avro schema file not found in the location " + location, found); }
From source file:com.thinkbiganalytics.spark.io.ZipStreamingOutputTest.java
License:Apache License
/** * Verify streaming output.// ww w .ja va 2 s . co m */ @Test public void test() throws Exception { // Mock file system final FileSystem fs = Mockito.mock(FileSystem.class); final Path source = new Path("/tmp/source"); final LocatedFileStatus file1 = createFile("_SUCCESS", source); final LocatedFileStatus file2 = createFile("part-0", source); Mockito.when(fs.listFiles(source, false)) .thenReturn(new ForwardingRemoteIterator<>(Iterators.forArray(file1, file2))); final FSDataInputStream fileStream = new FSDataInputStream(new SeekableNullInputStream()); Mockito.when(fs.open(file1.getPath())).thenReturn(fileStream); Mockito.when(fs.open(file2.getPath())).thenReturn(fileStream); final CountDownLatch deleteLatch = new CountDownLatch(1); Mockito.when(fs.delete(source, true)).then(new Answer<Boolean>() { @Override public Boolean answer(final InvocationOnMock invocation) { deleteLatch.countDown(); return true; } }); // Write ZIP to output stream final ByteArrayOutputStream out = new ByteArrayOutputStream(); final ZipStreamingOutput zip = new ZipStreamingOutput(source, fs); zip.write(out); // Verify output stream final ZipInputStream in = new ZipInputStream(out.toInputStream()); ZipEntry entry = in.getNextEntry(); Assert.assertNotNull("Missing _SUCCESS entry", entry); Assert.assertEquals("_SUCCESS", entry.getName()); entry = in.getNextEntry(); Assert.assertNotNull("Missing part-0 entry", entry); Assert.assertEquals("part-0", entry.getName()); entry = in.getNextEntry(); Assert.assertNull("Unexpected entry", entry); // Verify path deleted deleteLatch.await(1, TimeUnit.SECONDS); Mockito.verify(fs).delete(source, true); }
From source file:com.toddbodnar.simpleHive.IO.hdfsFile.java
@Override public void resetStream() { try {//from www . ja v a 2s . c o m if (out != null) out.close(); writing = false; if (in != null) in.close(); FileSystem fs = FileSystem.get(GetConfiguration.get()); if (fs.isFile(location)) { LinkedList<FileStatus> file = new LinkedList<>(); file.add(fs.getFileStatus(location)); theFiles = file.iterator(); } else { LinkedList<FileStatus> files = new LinkedList<>(); RemoteIterator<LocatedFileStatus> fileremote = fs.listFiles(location, true); while (fileremote.hasNext()) files.add(fileremote.next()); theFiles = files.iterator(); } FileStatus nextFileStatus; do { if (!theFiles.hasNext()) { System.err.println("WARNING: File is Empty"); super.next = null; return; } nextFileStatus = theFiles.next(); } while (fs.isDirectory(nextFileStatus.getPath()) || nextFileStatus.getLen() == 0); in = new BufferedReader(new InputStreamReader(fs.open(nextFileStatus.getPath()))); next = in.readLine(); //out.flush(); } catch (FileNotFoundException ex) { Logger.getLogger(fileFile.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(fileFile.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:com.uber.hoodie.common.model.HoodieTestUtils.java
License:Apache License
public static FileStatus[] listAllDataFilesInPath(FileSystem fs, String basePath) throws IOException { RemoteIterator<LocatedFileStatus> itr = fs.listFiles(new Path(basePath), true); List<FileStatus> returns = Lists.newArrayList(); while (itr.hasNext()) { LocatedFileStatus status = itr.next(); if (status.getPath().getName().contains(".parquet")) { returns.add(status);//from ww w. j a v a 2 s . co m } } return returns.toArray(new FileStatus[returns.size()]); }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
/** * Recursively processes all files in the base-path. If excludeMetaFolder is set, the meta-folder and all its * subdirs are skipped//from w ww .ja v a2 s . com * @param fs File System * @param basePathStr Base-Path * @param consumer Callback for processing * @param excludeMetaFolder Exclude .hoodie folder * @throws IOException */ @VisibleForTesting static void processFiles(FileSystem fs, String basePathStr, Function<FileStatus, Boolean> consumer, boolean excludeMetaFolder) throws IOException { PathFilter pathFilter = excludeMetaFolder ? getExcludeMetaPathFilter() : ALLOW_ALL_FILTER; FileStatus[] topLevelStatuses = fs.listStatus(new Path(basePathStr)); for (int i = 0; i < topLevelStatuses.length; i++) { FileStatus child = topLevelStatuses[i]; if (child.isFile()) { boolean success = consumer.apply(child); if (!success) { throw new HoodieException("Failed to process file-status=" + child); } } else if (pathFilter.accept(child.getPath())) { RemoteIterator<LocatedFileStatus> itr = fs.listFiles(child.getPath(), true); while (itr.hasNext()) { FileStatus status = itr.next(); boolean success = consumer.apply(status); if (!success) { throw new HoodieException("Failed to process file-status=" + status); } } } } }
From source file:com.wipro.ats.bdre.clustermigration.DestTableLoad.java
License:Apache License
public void execute(String[] params) throws IOException { CommandLine commandLine = getCommandLine(params, PARAMS_STRUCTURE); String src = commandLine.getOptionValue("source-path"); String dest = commandLine.getOptionValue("dest-path"); String destFs = commandLine.getOptionValue("dest-fs"); Configuration config = new Configuration(); config.set("fs.defaultFS", destFs); FileSystem hdfs = FileSystem.get(config); Path srcPath = new Path(src); RemoteIterator<LocatedFileStatus> srcFiles = hdfs.listFiles(srcPath, true); while (srcFiles.hasNext()) { String absolutePath = srcFiles.next().getPath().toUri().toString(); if (absolutePath.endsWith("/")) absolutePath = absolutePath.substring(0, absolutePath.length() - 1); LOGGER.info("absolutePath of source business partition= " + absolutePath); String relativePath = absolutePath.replace(src, ""); if (relativePath.endsWith("/")) relativePath = relativePath.substring(0, relativePath.length() - 1); LOGGER.info("relativePath of source business partition= = " + relativePath); if (!dest.endsWith("/")) dest = dest + "/"; String destCheckPathString = dest + relativePath; Path destCheckPath = new Path(destCheckPathString); LOGGER.info("destCheckPath = " + destCheckPath); //find first index that contains a "/" from the end of the string, after first find the second such occurrence, finally trim the '/instanceexecid=number/part_0000' from the whole path, do this for both source and dest paths int destIndex = destCheckPathString.lastIndexOf("/"); int secondLastDestIndex = destCheckPath.toString().lastIndexOf("/", destIndex - 1); int srcIndex = absolutePath.lastIndexOf("/"); int secondLastSrcIndex = absolutePath.substring(0, srcIndex).lastIndexOf("/", srcIndex - 1); String truncatedSrcPath = absolutePath.substring(0, secondLastSrcIndex); LOGGER.info("truncated Src Path = " + truncatedSrcPath); String truncatedDestPath = destCheckPath.toString().substring(0, secondLastDestIndex); LOGGER.info("truncated Dest Path = " + truncatedDestPath); Path existsPathCheck = new Path(truncatedDestPath); Path srcPathToMove = new Path(truncatedSrcPath); //check if the business partition to be copied already exists inside the destination table, if it does, it has to be overwritten (in this case delete at dest and move from source to dest LOGGER.info("Does the business partition exist already inside the table? True/False? = " + hdfs.exists(existsPathCheck)); if (hdfs.exists(existsPathCheck)) { LOGGER.info(/*from w w w.jav a2 s . c o m*/ "bus partitions to be copied already exist at the destination, hence deleting them at destination"); hdfs.delete(existsPathCheck, true); } String destPartitionPath = truncatedDestPath.substring(0, truncatedDestPath.lastIndexOf("/")); Path partitionWisePath = new Path(destPartitionPath); hdfs.mkdirs(partitionWisePath); LOGGER.info("moving the business partitions to the destination table"); LOGGER.info("moving " + srcPathToMove + " to " + partitionWisePath); hdfs.rename(srcPathToMove, partitionWisePath); } hdfs.delete(srcPath, true); }
From source file:edu.iu.daal_als.SGDUtil.java
License:Apache License
public static Int2ObjectOpenHashMap<VRowCol> loadTMapRow(String testFilePath, int numThreads, Configuration configuration) { List<String> testFilePaths = new LinkedList<>(); Path path = new Path(testFilePath); try {//from www . j av a2 s . c om FileSystem fs = path.getFileSystem(configuration); RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(path, true); while (iterator.hasNext()) { String name = iterator.next().getPath().toUri().toString(); testFilePaths.add(name); } } catch (IOException e) { LOG.error("Fail to get test files", e); } VStore vStore = new VStore(testFilePaths, numThreads, false, configuration); vStore.load(false, true); return vStore.getVWMap(); }
From source file:edu.iu.daal_als.SGDUtil.java
License:Apache License
public static Int2ObjectOpenHashMap<VRowCol> loadTestVHMap(String testFilePath, Configuration configuration, int numThreads) { List<String> testFilePaths = new LinkedList<>(); Path path = new Path(testFilePath); try {/* w ww . jav a 2 s .c om*/ FileSystem fs = path.getFileSystem(configuration); RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(path, true); while (iterator.hasNext()) { String name = iterator.next().getPath().toUri().toString(); testFilePaths.add(name); } } catch (IOException e) { LOG.error("Fail to get test files", e); } VStore testVStore = new VStore(testFilePaths, numThreads, false, configuration); testVStore.load(true, false); return testVStore.getVHMap(); }
From source file:edu.iu.daal_linreg.LinRegDaalCollectiveMapper.java
License:Apache License
private NumericTable getNumericTableHDFS(DaalContext daal_Context, Configuration conf, String inputFiles, int vectorSize, int numRows) throws IOException { Path inputFilePaths = new Path(inputFiles); List<String> inputFileList = new LinkedList<>(); try {//from ww w.j av a 2 s . c om FileSystem fs = inputFilePaths.getFileSystem(conf); RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(inputFilePaths, true); while (iterator.hasNext()) { String name = iterator.next().getPath().toUri().toString(); inputFileList.add(name); } } catch (IOException e) { LOG.error("Fail to get test files", e); } int dataSize = vectorSize * numRows; // float[] data = new float[dataSize]; double[] data = new double[dataSize]; long[] dims = { numRows, vectorSize }; int index = 0; FSDataInputStream in = null; //loop over all the files in the list ListIterator<String> file_itr = inputFileList.listIterator(); while (file_itr.hasNext()) { String file_name = file_itr.next(); LOG.info("read in file name: " + file_name); Path file_path = new Path(file_name); try { FileSystem fs = file_path.getFileSystem(conf); in = fs.open(file_path); } catch (Exception e) { LOG.error("Fail to open file " + e.toString()); return null; } //read file content while (true) { String line = in.readLine(); if (line == null) break; String[] lineData = line.split(","); for (int t = 0; t < vectorSize; t++) { if (index < dataSize) { // data[index] = Float.parseFloat(lineData[t]); data[index] = Double.parseDouble(lineData[t]); index++; } else { LOG.error("Incorrect size of file: dataSize: " + dataSize + "; index val: " + index); return null; } } } in.close(); } if (index != dataSize) { LOG.error("Incorrect total size of file: dataSize: " + dataSize + "; index val: " + index); return null; } //debug check the vals of data // for(int p=0;p<60;p++) // LOG.info("data at: " + p + " is: " + data[p]); NumericTable predictionData = new HomogenNumericTable(daal_Context, data, vectorSize, numRows); return predictionData; }
From source file:edu.iu.daal_sgd.SGDUtil.java
License:Apache License
public static Int2ObjectOpenHashMap<VRowCol> loadTestVHMap(String testFilePath, Configuration configuration, int numThreads) { List<String> testFilePaths = new LinkedList<>(); Path path = new Path(testFilePath); try {//from ww w. j a v a2 s.c om FileSystem fs = path.getFileSystem(configuration); RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(path, true); while (iterator.hasNext()) { String name = iterator.next().getPath().toUri().toString(); testFilePaths.add(name); } } catch (IOException e) { LOG.error("Fail to get test files", e); } VStore testVStore = new VStore(testFilePaths, numThreads, configuration); testVStore.load(true, false); return testVStore.getVHMap(); }