List of usage examples for org.apache.hadoop.fs FileSystem listFiles
public RemoteIterator<LocatedFileStatus> listFiles(final Path f, final boolean recursive) throws FileNotFoundException, IOException
From source file:org.apache.nutch.tools.CommonCrawlDataDumper.java
License:Apache License
/** * Dumps the reverse engineered CBOR content from the provided segment * directories if a parent directory contains more than one segment, * otherwise a single segment can be passed as an argument. If the boolean * argument is provided then the CBOR is also zipped. * * @param outputDir the directory you wish to dump the raw content to. This * directory will be created. * @param segmentRootDir a directory containing one or more segments. * @param linkdb Path to linkdb. * @param gzip a boolean flag indicating whether the CBOR content should also * be gzipped./*from w w w . j av a 2 s. c o m*/ * @param epochFilename if {@code true}, output files will be names using the epoch time (in milliseconds). * @param extension a file extension to use with output documents. * @throws Exception if any exception occurs. */ public void dump(File outputDir, File segmentRootDir, File linkdb, boolean gzip, String[] mimeTypes, boolean epochFilename, String extension, boolean warc) throws Exception { if (gzip) { LOG.info("Gzipping CBOR data has been skipped"); } // total file counts Map<String, Integer> typeCounts = new HashMap<>(); // filtered file counters Map<String, Integer> filteredCounts = new HashMap<>(); Configuration nutchConfig = NutchConfiguration.create(); Path segmentRootPath = new Path(segmentRootDir.toString()); FileSystem fs = segmentRootPath.getFileSystem(nutchConfig); //get all paths List<Path> parts = new ArrayList<>(); RemoteIterator<LocatedFileStatus> files = fs.listFiles(segmentRootPath, true); String partPattern = ".*" + File.separator + Content.DIR_NAME + File.separator + "part-[0-9]{5}" + File.separator + "data"; while (files.hasNext()) { LocatedFileStatus next = files.next(); if (next.isFile()) { Path path = next.getPath(); if (path.toString().matches(partPattern)) { parts.add(path); } } } LinkDbReader linkDbReader = null; if (linkdb != null) { linkDbReader = new LinkDbReader(nutchConfig, new Path(linkdb.toString())); } if (parts == null || parts.size() == 0) { LOG.error("No segment directories found in {} ", segmentRootDir.getAbsolutePath()); System.exit(1); } LOG.info("Found {} segment parts", parts.size()); if (gzip && !warc) { fileList = new ArrayList<>(); constructNewStream(outputDir); } for (Path segmentPart : parts) { LOG.info("Processing segment Part : [ {} ]", segmentPart); try { SequenceFile.Reader reader = new SequenceFile.Reader(nutchConfig, SequenceFile.Reader.file(segmentPart)); Writable key = (Writable) reader.getKeyClass().getConstructor().newInstance(); Content content = null; while (reader.next(key)) { content = new Content(); reader.getCurrentValue(content); Metadata metadata = content.getMetadata(); String url = key.toString(); String baseName = FilenameUtils.getBaseName(url); String extensionName = FilenameUtils.getExtension(url); if (!extension.isEmpty()) { extensionName = extension; } else if ((extensionName == null) || extensionName.isEmpty()) { extensionName = "html"; } String outputFullPath = null; String outputRelativePath = null; String filename = null; String timestamp = null; String reverseKey = null; if (epochFilename || config.getReverseKey()) { try { long epoch = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z") .parse(getDate(metadata.get("Date"))).getTime(); timestamp = String.valueOf(epoch); } catch (ParseException pe) { LOG.warn(pe.getMessage()); } reverseKey = reverseUrl(url); config.setReverseKeyValue( reverseKey.replace("/", "_") + "_" + DigestUtils.sha1Hex(url) + "_" + timestamp); } if (!warc) { if (epochFilename) { outputFullPath = DumpFileUtil.createFileNameFromUrl(outputDir.getAbsolutePath(), reverseKey, url, timestamp, extensionName, !gzip); outputRelativePath = outputFullPath.substring(0, outputFullPath.lastIndexOf(File.separator) - 1); filename = content.getMetadata().get(Metadata.DATE) + "." + extensionName; } else { String md5Ofurl = DumpFileUtil.getUrlMD5(url); String fullDir = DumpFileUtil.createTwoLevelsDirectory(outputDir.getAbsolutePath(), md5Ofurl, !gzip); filename = DumpFileUtil.createFileName(md5Ofurl, baseName, extensionName); outputFullPath = String.format("%s/%s", fullDir, filename); String[] fullPathLevels = fullDir.split(Pattern.quote(File.separator)); String firstLevelDirName = fullPathLevels[fullPathLevels.length - 2]; String secondLevelDirName = fullPathLevels[fullPathLevels.length - 1]; outputRelativePath = firstLevelDirName + secondLevelDirName; } } // Encode all filetypes if no mimetypes have been given Boolean filter = (mimeTypes == null); String jsonData = ""; try { String mimeType = new Tika().detect(content.getContent()); // Maps file to JSON-based structure Set<String> inUrls = null; //there may be duplicates, so using set if (linkDbReader != null) { Inlinks inlinks = linkDbReader.getInlinks((Text) key); if (inlinks != null) { Iterator<Inlink> iterator = inlinks.iterator(); inUrls = new LinkedHashSet<>(); while (inUrls.size() <= MAX_INLINKS && iterator.hasNext()) { inUrls.add(iterator.next().getFromUrl()); } } } //TODO: Make this Jackson Format implementation reusable try (CommonCrawlFormat format = CommonCrawlFormatFactory .getCommonCrawlFormat(warc ? "WARC" : "JACKSON", nutchConfig, config)) { if (inUrls != null) { format.setInLinks(new ArrayList<>(inUrls)); } jsonData = format.getJsonData(url, content, metadata); } collectStats(typeCounts, mimeType); // collects statistics for the given mimetypes if ((mimeType != null) && (mimeTypes != null) && Arrays.asList(mimeTypes).contains(mimeType)) { collectStats(filteredCounts, mimeType); filter = true; } } catch (IOException ioe) { LOG.error("Fatal error in creating JSON data: " + ioe.getMessage()); return; } if (!warc) { if (filter) { byte[] byteData = serializeCBORData(jsonData); if (!gzip) { File outputFile = new File(outputFullPath); if (outputFile.exists()) { LOG.info("Skipping writing: [" + outputFullPath + "]: file already exists"); } else { LOG.info("Writing: [" + outputFullPath + "]"); IOUtils.copy(new ByteArrayInputStream(byteData), new FileOutputStream(outputFile)); } } else { if (fileList.contains(outputFullPath)) { LOG.info("Skipping compressing: [" + outputFullPath + "]: file already exists"); } else { fileList.add(outputFullPath); LOG.info("Compressing: [" + outputFullPath + "]"); //TarArchiveEntry tarEntry = new TarArchiveEntry(firstLevelDirName + File.separator + secondLevelDirName + File.separator + filename); TarArchiveEntry tarEntry = new TarArchiveEntry( outputRelativePath + File.separator + filename); tarEntry.setSize(byteData.length); tarOutput.putArchiveEntry(tarEntry); tarOutput.write(byteData); tarOutput.closeArchiveEntry(); } } } } } reader.close(); } catch (Exception e) { LOG.warn("SKIPPED: {} Because : {}", segmentPart, e.getMessage()); } finally { fs.close(); } } if (gzip && !warc) { closeStream(); } if (!typeCounts.isEmpty()) { LOG.info("CommonsCrawlDataDumper File Stats: " + DumpFileUtil.displayFileTypes(typeCounts, filteredCounts)); } }
From source file:org.apache.phoenix.compile.ListJarsQueryPlan.java
License:Apache License
@Override public ResultIterator iterator(ParallelScanGrouper scanGrouper) throws SQLException { return new ResultIterator() { private RemoteIterator<LocatedFileStatus> listFiles = null; @Override/*w ww . j a va 2 s .co m*/ public void close() throws SQLException { } @Override public Tuple next() throws SQLException { try { if (first) { String dynamicJarsDir = stmt.getConnection().getQueryServices().getProps() .get(QueryServices.DYNAMIC_JARS_DIR_KEY); if (dynamicJarsDir == null) { throw new SQLException(QueryServices.DYNAMIC_JARS_DIR_KEY + " is not configured for the listing the jars."); } dynamicJarsDir = dynamicJarsDir.endsWith("/") ? dynamicJarsDir : dynamicJarsDir + '/'; Configuration conf = HBaseFactoryProvider.getConfigurationFactory().getConfiguration(); Path dynamicJarsDirPath = new Path(dynamicJarsDir); FileSystem fs = dynamicJarsDirPath.getFileSystem(conf); listFiles = fs.listFiles(dynamicJarsDirPath, true); first = false; } if (listFiles == null || !listFiles.hasNext()) return null; ImmutableBytesWritable ptr = new ImmutableBytesWritable(); ParseNodeFactory factory = new ParseNodeFactory(); LiteralParseNode literal = factory.literal(listFiles.next().getPath().toString()); LiteralExpression expression = LiteralExpression.newConstant(literal.getValue(), PVarchar.INSTANCE, Determinism.ALWAYS); expression.evaluate(null, ptr); byte[] rowKey = ByteUtil.copyKeyBytesIfNecessary(ptr); Cell cell = CellUtil.createCell(rowKey, HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY, EnvironmentEdgeManager.currentTimeMillis(), Type.Put.getCode(), HConstants.EMPTY_BYTE_ARRAY); List<Cell> cells = new ArrayList<Cell>(1); cells.add(cell); return new ResultTuple(Result.create(cells)); } catch (IOException e) { throw new SQLException(e); } } @Override public void explain(List<String> planSteps) { } }; }
From source file:org.apache.ranger.services.hdfs.HDFSRangerTest.java
License:Apache License
@org.junit.Test public void executeTest() throws Exception { FileSystem fileSystem = hdfsCluster.getFileSystem(); // Write a file - the AccessControlEnforcer won't be invoked as we are the "superuser" final Path file = new Path("/tmp/tmpdir3/data-file2"); FSDataOutputStream out = fileSystem.create(file); for (int i = 0; i < 1024; ++i) { out.write(("data" + i + "\n").getBytes("UTF-8")); out.flush();//from w ww. j ava 2s.c o m } out.close(); // Change permissions to read-only fileSystem.setPermission(file, new FsPermission(FsAction.READ, FsAction.NONE, FsAction.NONE)); // Change the parent directory permissions to be execute only for the owner Path parentDir = new Path("/tmp/tmpdir3"); fileSystem.setPermission(parentDir, new FsPermission(FsAction.EXECUTE, FsAction.NONE, FsAction.NONE)); // Try to read the directory as "bob" - this should be allowed (by the policy - user) UserGroupInformation ugi = UserGroupInformation.createUserForTesting("bob", new String[] {}); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false); Assert.assertTrue(iter.hasNext()); fs.close(); return null; } }); // Try to read the directory as "alice" - this should be allowed (by the policy - group) ugi = UserGroupInformation.createUserForTesting("alice", new String[] { "IT" }); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false); Assert.assertTrue(iter.hasNext()); fs.close(); return null; } }); // Now try to read the directory as unknown user "eve" - this should not be allowed ugi = UserGroupInformation.createUserForTesting("eve", new String[] {}); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", defaultFs); FileSystem fs = FileSystem.get(conf); // Write to the file try { RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false); Assert.assertTrue(iter.hasNext()); Assert.fail("Failure expected on an incorrect permission"); } catch (RemoteException ex) { // expected Assert.assertTrue(RangerAccessControlException.class.getName().equals(ex.getClassName())); } fs.close(); return null; } }); }
From source file:org.apache.sentry.tests.e2e.hdfs.TestHDFSIntegration.java
License:Apache License
private void verifyAccessToPath(String user, String group, String path, boolean hasPermission) throws Exception { Path p = new Path(path); UserGroupInformation hadoopUser = UserGroupInformation.createUserForTesting(user, new String[] { group }); FileSystem fs = DFSTestUtil.getFileSystemAs(hadoopUser, hadoopConf); try {//from w ww. ja v a2 s . c om fs.listFiles(p, true); if (!hasPermission) { Assert.assertFalse("Expected listing files to fail", false); } } catch (Exception e) { if (hasPermission) { throw e; } } }
From source file:org.apache.sentry.tests.e2e.hdfs.TestHDFSIntegrationBase.java
License:Apache License
protected void verifyAccessToPath(String user, String group, String path, boolean hasPermission) throws Exception { Path p = new Path(path); FileSystem fs = miniDFS.getFileSystem(); try {/*from w w w. java 2 s . co m*/ fs.listFiles(p, true); if (!hasPermission) { assertFalse("Expected listing files to fail", false); } } catch (Exception e) { if (hasPermission) { throw e; } } }
From source file:org.apache.tajo.engine.planner.TestPlannerUtil.java
License:Apache License
@Test public void testGetNonZeroLengthDataFiles() throws Exception { String queryFiles = ClassLoader.getSystemResource("queries").toString() + "/TestSelectQuery"; Path path = new Path(queryFiles); TableDesc tableDesc = new TableDesc(); tableDesc.setName("Test"); tableDesc.setPath(path.toUri());// w ww. j a v a 2s . c om FileSystem fs = path.getFileSystem(util.getConfiguration()); List<Path> expectedFiles = new ArrayList<Path>(); RemoteIterator<LocatedFileStatus> files = fs.listFiles(path, true); while (files.hasNext()) { LocatedFileStatus file = files.next(); if (file.isFile() && file.getLen() > 0) { expectedFiles.add(file.getPath()); } } int fileNum = expectedFiles.size() / 5; int numResultFiles = 0; for (int i = 0; i <= 5; i++) { int start = i * fileNum; FragmentProto[] fragments = PhysicalPlanUtil.getNonZeroLengthDataFiles(util.getConfiguration(), tableDesc, start, fileNum); assertNotNull(fragments); numResultFiles += fragments.length; int expectedSize = fileNum; if (i == 5) { //last expectedSize = expectedFiles.size() - (fileNum * 5); } comparePath(expectedFiles, fragments, start, expectedSize); } assertEquals(expectedFiles.size(), numResultFiles); }
From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer.java
License:Apache License
public static File copyDirectoryIfNonExistent(final FileSystem fileSystem, final String directory) { try {// w w w .j a v a2 s . c o m final String hadoopGremlinLibsRemote = "hadoop-gremlin-" + Gremlin.version() + "-libs"; final Path path = new Path(directory); if (Boolean.valueOf(System.getProperty("is.testing", "false")) || (fileSystem.exists(path) && fileSystem.isDirectory(path))) { final File tempDirectory = new File( System.getProperty("java.io.tmpdir") + File.separator + hadoopGremlinLibsRemote); assert tempDirectory.exists() || tempDirectory.mkdirs(); final String tempPath = tempDirectory.getAbsolutePath() + File.separator + path.getName(); final RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(path, false); while (files.hasNext()) { final LocatedFileStatus f = files.next(); fileSystem.copyToLocalFile(false, f.getPath(), new Path(tempPath + System.getProperty("file.separator") + f.getPath().getName()), true); } return new File(tempPath); } else return new File(directory); } catch (final IOException e) { throw new IllegalStateException(e.getMessage(), e); } }
From source file:org.apache.vxquery.metadata.VXQueryCollectionOperatorDescriptor.java
License:Apache License
@Override public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException { final FrameTupleAccessor fta = new FrameTupleAccessor(ctx.getFrameSize(), recordDescProvider.getInputRecordDescriptor(getActivityId(), 0)); final int fieldOutputCount = recordDescProvider.getOutputRecordDescriptor(getActivityId(), 0) .getFieldCount();/*from w ww . j av a 2 s. co m*/ final ByteBuffer frame = ctx.allocateFrame(); final FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize(), fieldOutputCount); final short partitionId = (short) ctx.getTaskAttemptId().getTaskId().getPartition(); final ITreeNodeIdProvider nodeIdProvider = new TreeNodeIdProvider(partitionId, dataSourceId, totalDataSources); final String nodeId = ctx.getJobletContext().getApplicationContext().getNodeId(); final DynamicContext dCtx = (DynamicContext) ctx.getJobletContext().getGlobalJobData(); final String collectionName = collectionPartitions[partition % collectionPartitions.length]; final XMLParser parser = new XMLParser(false, nodeIdProvider, nodeId, frame, appender, childSeq, dCtx.getStaticContext()); return new AbstractUnaryInputUnaryOutputOperatorNodePushable() { @Override public void open() throws HyracksDataException { appender.reset(frame, true); writer.open(); hdfs = new HDFSFunctions(); } @Override public void nextFrame(ByteBuffer buffer) throws HyracksDataException { fta.reset(buffer); String collectionModifiedName = collectionName.replace("${nodeId}", nodeId); if (!collectionModifiedName.contains("hdfs:/")) { File collectionDirectory = new File(collectionModifiedName); //check if directory is in the local file system if (collectionDirectory.exists()) { // Go through each tuple. if (collectionDirectory.isDirectory()) { for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) { Iterator<File> it = FileUtils.iterateFiles(collectionDirectory, new VXQueryIOFileFilter(), TrueFileFilter.INSTANCE); while (it.hasNext()) { File xmlDocument = it.next(); if (LOGGER.isLoggable(Level.FINE)) { LOGGER.fine( "Starting to read XML document: " + xmlDocument.getAbsolutePath()); } parser.parseElements(xmlDocument, writer, fta, tupleIndex); } } } else { throw new HyracksDataException("Invalid directory parameter (" + nodeId + ":" + collectionDirectory.getAbsolutePath() + ") passed to collection."); } } } else { // Else check in HDFS file system // Get instance of the HDFS filesystem FileSystem fs = hdfs.getFileSystem(); if (fs != null) { collectionModifiedName = collectionModifiedName.replaceAll("hdfs:/", ""); Path directory = new Path(collectionModifiedName); Path xmlDocument; if (tag != null) { hdfs.setJob(directory.getName(), tag); tag = "<" + tag + ">"; Job job = hdfs.getJob(); InputFormat inputFormat = hdfs.getinputFormat(); try { hdfs.scheduleSplits(); ArrayList<Integer> schedule = hdfs .getScheduleForNode(InetAddress.getLocalHost().getHostName()); List<InputSplit> splits = hdfs.getSplits(); List<FileSplit> fileSplits = new ArrayList<FileSplit>(); for (int i : schedule) { fileSplits.add((FileSplit) splits.get(i)); } FileSplitsFactory splitsFactory = new FileSplitsFactory(fileSplits); List<FileSplit> inputSplits = splitsFactory.getSplits(); ContextFactory ctxFactory = new ContextFactory(); int size = inputSplits.size(); InputStream stream; String value; RecordReader reader; TaskAttemptContext context; for (int i = 0; i < size; i++) { //read split context = ctxFactory.createContext(job.getConfiguration(), i); try { reader = inputFormat.createRecordReader(inputSplits.get(i), context); reader.initialize(inputSplits.get(i), context); while (reader.nextKeyValue()) { value = reader.getCurrentValue().toString(); //Split value if it contains more than one item with the tag if (StringUtils.countMatches(value, tag) > 1) { String items[] = value.split(tag); for (String item : items) { if (item.length() > 0) { item = START_TAG + tag + item; stream = new ByteArrayInputStream( item.getBytes(StandardCharsets.UTF_8)); parser.parseHDFSElements(stream, writer, fta, i); } } } else { value = START_TAG + value; //create an input stream to the file currently reading and send it to parser stream = new ByteArrayInputStream( value.getBytes(StandardCharsets.UTF_8)); parser.parseHDFSElements(stream, writer, fta, i); } } } catch (InterruptedException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } } } catch (IOException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } catch (ParserConfigurationException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } catch (SAXException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } } else { try { //check if the path exists and is a directory if (fs.exists(directory) && fs.isDirectory(directory)) { for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) { //read every file in the directory RemoteIterator<LocatedFileStatus> it = fs.listFiles(directory, true); while (it.hasNext()) { xmlDocument = it.next().getPath(); if (fs.isFile(xmlDocument)) { if (LOGGER.isLoggable(Level.FINE)) { LOGGER.fine("Starting to read XML document: " + xmlDocument.getName()); } //create an input stream to the file currently reading and send it to parser InputStream in = fs.open(xmlDocument).getWrappedStream(); parser.parseHDFSElements(in, writer, fta, tupleIndex); } } } } else { throw new HyracksDataException("Invalid HDFS directory parameter (" + nodeId + ":" + directory + ") passed to collection."); } } catch (FileNotFoundException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } catch (IOException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } } try { fs.close(); } catch (IOException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } } } } @Override public void fail() throws HyracksDataException { writer.fail(); } @Override public void close() throws HyracksDataException { // Check if needed? fta.reset(frame); if (fta.getTupleCount() > 0) { FrameUtils.flushFrame(frame, writer); } writer.close(); } }; }
From source file:org.datacleaner.spark.ApplicationDriver.java
License:Open Source License
private List<String> buildJarFiles(MutableRef<String> primaryJarRef) throws IOException { final List<String> list = new ArrayList<>(); final Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://" + _hostname + ":" + _port); final FileSystem fs = FileSystem.newInstance(conf); try {//from w ww.ja v a 2 s. c o m final Path directoryPath = new Path(_jarDirectoryPath); final RemoteIterator<LocatedFileStatus> files = fs.listFiles(directoryPath, false); while (files.hasNext()) { final LocatedFileStatus file = files.next(); final Path path = file.getPath(); final String filename = path.getName(); if (filename.startsWith(PRIMARY_JAR_FILENAME_PREFIX)) { primaryJarRef.set(path.toString()); } else { list.add(path.toString()); } } } finally { FileHelper.safeClose(fs); } if (primaryJarRef.get() == null) { throw new IllegalArgumentException("Failed to find primary jar (starting with '" + PRIMARY_JAR_FILENAME_PREFIX + "') in JAR file directory: " + _jarDirectoryPath); } return list; }
From source file:org.deeplearning4j.datasets.DatasetReaderFromHdfs.java
License:Apache License
protected void doInitialize() { FileSystem fs = CommonUtils.openHdfsConnect(); try {/*w w w. ja va2s. c o m*/ if (train) { hdfsIter = fs.listFiles(new Path(CommonUtils.TRAIN_HDFS_PATH), true); } else { hdfsIter = fs.listFiles(new Path(CommonUtils.VALIDATE_HDFS_PATH), true); } while (hdfsIter.hasNext()) { LocatedFileStatus next = hdfsIter.next(); Path path = next.getPath(); String currentPath = path.toUri().getPath(); fileNames.add(path.toString()); String name = FilenameUtils.getBaseName((new File(currentPath)).getParent()); if (!labels.contains(name)) { labels.add(name); } } Collections.shuffle(fileNames); fileIterator = fileNames.iterator(); numExample = fileNames.size(); } catch (Exception e) { throw new RuntimeException(e); } finally { CommonUtils.closeHdfsConnect(fs); } }