List of usage examples for org.apache.hadoop.fs LocatedFileStatus getPath
public Path getPath()
From source file:org.apache.tez.mapreduce.hadoop.TestMRInputHelpers.java
License:Apache License
@Test(timeout = 5000) public void testNewSplitsGen() throws Exception { DataSourceDescriptor dataSource = generateDataSourceDescriptorMapReduce(newSplitsDir); Assert.assertTrue(dataSource.getAdditionalLocalFiles().containsKey(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME)); Assert.assertTrue(// www . j ava2s.co m dataSource.getAdditionalLocalFiles().containsKey(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME)); RemoteIterator<LocatedFileStatus> files = remoteFs.listFiles(newSplitsDir, false); boolean foundSplitsFile = false; boolean foundMetaFile = false; int totalFilesFound = 0; while (files.hasNext()) { LocatedFileStatus status = files.next(); String fName = status.getPath().getName(); totalFilesFound++; if (fName.equals(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME)) { foundSplitsFile = true; } else if (fName.equals(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME)) { foundMetaFile = true; } else { Assert.fail("Found invalid file in splits dir, filename=" + fName); } Assert.assertTrue(status.getLen() > 0); } Assert.assertEquals(2, totalFilesFound); Assert.assertTrue(foundSplitsFile); Assert.assertTrue(foundMetaFile); verifyLocationHints(newSplitsDir, dataSource.getLocationHint().getTaskLocationHints()); }
From source file:org.apache.tez.mapreduce.hadoop.TestMRInputHelpers.java
License:Apache License
@Test(timeout = 5000) public void testOldSplitsGen() throws Exception { DataSourceDescriptor dataSource = generateDataSourceDescriptorMapRed(oldSplitsDir); Assert.assertTrue(dataSource.getAdditionalLocalFiles().containsKey(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME)); Assert.assertTrue(//w ww .j a v a 2s . co m dataSource.getAdditionalLocalFiles().containsKey(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME)); RemoteIterator<LocatedFileStatus> files = remoteFs.listFiles(oldSplitsDir, false); boolean foundSplitsFile = false; boolean foundMetaFile = false; int totalFilesFound = 0; while (files.hasNext()) { LocatedFileStatus status = files.next(); String fName = status.getPath().getName(); totalFilesFound++; if (fName.equals(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME)) { foundSplitsFile = true; } else if (fName.equals(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME)) { foundMetaFile = true; } else { Assert.fail("Found invalid file in splits dir, filename=" + fName); } Assert.assertTrue(status.getLen() > 0); } Assert.assertEquals(2, totalFilesFound); Assert.assertTrue(foundSplitsFile); Assert.assertTrue(foundMetaFile); verifyLocationHints(oldSplitsDir, dataSource.getLocationHint().getTaskLocationHints()); }
From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer.java
License:Apache License
public static File copyDirectoryIfNonExistent(final FileSystem fileSystem, final String directory) { try {//from w w w . ja va2 s . c om final String hadoopGremlinLibsRemote = "hadoop-gremlin-" + Gremlin.version() + "-libs"; final Path path = new Path(directory); if (Boolean.valueOf(System.getProperty("is.testing", "false")) || (fileSystem.exists(path) && fileSystem.isDirectory(path))) { final File tempDirectory = new File( System.getProperty("java.io.tmpdir") + File.separator + hadoopGremlinLibsRemote); assert tempDirectory.exists() || tempDirectory.mkdirs(); final String tempPath = tempDirectory.getAbsolutePath() + File.separator + path.getName(); final RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(path, false); while (files.hasNext()) { final LocatedFileStatus f = files.next(); fileSystem.copyToLocalFile(false, f.getPath(), new Path(tempPath + System.getProperty("file.separator") + f.getPath().getName()), true); } return new File(tempPath); } else return new File(directory); } catch (final IOException e) { throw new IllegalStateException(e.getMessage(), e); } }
From source file:org.datacleaner.spark.ApplicationDriver.java
License:Open Source License
private List<String> buildJarFiles(MutableRef<String> primaryJarRef) throws IOException { final List<String> list = new ArrayList<>(); final Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://" + _hostname + ":" + _port); final FileSystem fs = FileSystem.newInstance(conf); try {//from w ww . j av a 2 s . com final Path directoryPath = new Path(_jarDirectoryPath); final RemoteIterator<LocatedFileStatus> files = fs.listFiles(directoryPath, false); while (files.hasNext()) { final LocatedFileStatus file = files.next(); final Path path = file.getPath(); final String filename = path.getName(); if (filename.startsWith(PRIMARY_JAR_FILENAME_PREFIX)) { primaryJarRef.set(path.toString()); } else { list.add(path.toString()); } } } finally { FileHelper.safeClose(fs); } if (primaryJarRef.get() == null) { throw new IllegalArgumentException("Failed to find primary jar (starting with '" + PRIMARY_JAR_FILENAME_PREFIX + "') in JAR file directory: " + _jarDirectoryPath); } return list; }
From source file:org.deeplearning4j.datasets.DatasetReaderFromHdfs.java
License:Apache License
protected void doInitialize() { FileSystem fs = CommonUtils.openHdfsConnect(); try {//from ww w.j a va 2s .c o m if (train) { hdfsIter = fs.listFiles(new Path(CommonUtils.TRAIN_HDFS_PATH), true); } else { hdfsIter = fs.listFiles(new Path(CommonUtils.VALIDATE_HDFS_PATH), true); } while (hdfsIter.hasNext()) { LocatedFileStatus next = hdfsIter.next(); Path path = next.getPath(); String currentPath = path.toUri().getPath(); fileNames.add(path.toString()); String name = FilenameUtils.getBaseName((new File(currentPath)).getParent()); if (!labels.contains(name)) { labels.add(name); } } Collections.shuffle(fileNames); fileIterator = fileNames.iterator(); numExample = fileNames.size(); } catch (Exception e) { throw new RuntimeException(e); } finally { CommonUtils.closeHdfsConnect(fs); } }
From source file:org.deeplearning4j.datasets.DatasetReaderFromHdfs.java
License:Apache License
public static void main(String[] args) throws IOException { DatasetReaderFromHdfs ds = new DatasetReaderFromHdfs(); int j = 0;/*from w w w.j a v a2s . c om*/ while (hdfsIter.hasNext()) { LocatedFileStatus next = hdfsIter.next(); Path path = next.getPath(); String currentPath = path.toUri().getPath(); //String index = getRelativeFilename(currentPath); System.out.println("file name : i = " + j++ + " path=" + currentPath); } }
From source file:org.deeplearning4j.examples.MDSIterator.java
License:Apache License
private MultiDataSet nextMultiDataSet(int num) throws IOException { String previousPath = stack.isEmpty() ? "" : stack.peek().toUri().getPath(); for (int i = 0; i < num && hdfsIterator.hasNext(); i++) { for (int j = 0; j < numSteps; j++) { if (!hdfsIterator.hasNext()) break; LocatedFileStatus next = hdfsIterator.next(); Path path = next.getPath(); String currentPath = path.toUri().getPath(); String index = getRelativeFilename(currentPath); if (previousPath.contains(index.split("_")[0])) { if (j >= numSteps - 1 || !hdfsIterator.hasNext()) { pushAndClear(path, index); } else { stack.push(path);/*from w ww .j a v a 2 s. com*/ } previousPath = currentPath; } else { if (j >= numSteps - 1 || !hdfsIterator.hasNext()) { pushAndClear(path, index); } ssRecordReader.newRecord(stack); stack.push(path); if (!previousPath.isEmpty()) { break; } previousPath = currentPath; } } } return ssRecordReader.toMultiDataSet(vectorSize, labelSize); }
From source file:org.deeplearning4j.hadoop.datasetiterator.BaseHdfsDataSetIterator.java
License:Apache License
/** * List all of the files in the //ww w .j a v a2s .com * hdfsUriRootDir directory * @return the list of paths in the directory * @throws Exception if one occurs */ public List<Path> filesInDir() throws Exception { FileSystem fs = FileSystem.get(conf); List<Path> paths = new ArrayList<Path>(); RemoteIterator<LocatedFileStatus> iter = fs.listFiles(new Path(hdfsUriRootDir), true); while (iter.hasNext()) { LocatedFileStatus l = iter.next(); paths.add(l.getPath()); } fs.close(); return paths; }
From source file:org.gbif.occurrence.download.oozie.ArchiveBuilder.java
License:Creative Commons License
/** * Appends the compressed files found within the directory to the zip stream as the named file *///from w ww. ja v a 2 s . c o m private void appendPreCompressedFile(ModalZipOutputStream out, Path dir, String filename, String headerRow) throws IOException { RemoteIterator<LocatedFileStatus> files = hdfs.listFiles(dir, false); List<InputStream> parts = Lists.newArrayList(); // Add the header first, which must also be compressed ByteArrayOutputStream header = new ByteArrayOutputStream(); D2Utils.compress(new ByteArrayInputStream(headerRow.getBytes()), header); parts.add(new ByteArrayInputStream(header.toByteArray())); // Locate the streams to the compressed content on HDFS while (files.hasNext()) { LocatedFileStatus fs = files.next(); Path path = fs.getPath(); if (path.toString().endsWith(D2Utils.FILE_EXTENSION)) { LOG.info("Deflated content to merge: " + path); parts.add(hdfs.open(path)); } } // create the Zip entry, and write the compressed bytes org.gbif.hadoop.compress.d2.zip.ZipEntry ze = new org.gbif.hadoop.compress.d2.zip.ZipEntry(filename); out.putNextEntry(ze, ModalZipOutputStream.MODE.PRE_DEFLATED); try (D2CombineInputStream in = new D2CombineInputStream(parts)) { ByteStreams.copy(in, out); in.close(); // important so counts are accurate ze.setSize(in.getUncompressedLength()); // important to set the sizes and CRC ze.setCompressedSize(in.getCompressedLength()); ze.setCrc(in.getCrc32()); } finally { out.closeEntry(); } }
From source file:org.opencloudengine.garuda.backend.hdfs.HdfsServiceImpl.java
License:Open Source License
@Override public HdfsListInfo list(String path, int start, int end, final String filter) throws Exception { HdfsListInfo hdfsListInfo = new HdfsListInfo(); this.indexCheck(start, end); this.mustExists(path); FileSystem fs = fileSystemFactory.getFileSystem(); Path fsPath = new Path(path); FileStatus fileStatus = fs.getFileStatus(fsPath); if (!fileStatus.isDirectory()) { this.notDirectoryException(fsPath.toString()); }//from ww w .jav a 2 s . c o m List<HdfsFileInfo> listStatus = new ArrayList<>(); int count = 0; FileStatus fileStatuses = null; LocatedFileStatus next = null; RemoteIterator<LocatedFileStatus> remoteIterator = fs.listLocatedStatus(fsPath); while (remoteIterator.hasNext()) { next = remoteIterator.next(); if (!StringUtils.isEmpty(filter)) { if (next.getPath().getName().contains(filter)) { count++; if (count >= start && count <= end) { fileStatuses = fs.getFileStatus(next.getPath()); listStatus .add(new HdfsFileInfo(fileStatuses, fs.getContentSummary(fileStatuses.getPath()))); } } } else { count++; if (count >= start && count <= end) { fileStatuses = fs.getFileStatus(next.getPath()); listStatus.add(new HdfsFileInfo(fileStatuses, fs.getContentSummary(fileStatuses.getPath()))); } } } hdfsListInfo.setFileInfoList(listStatus); hdfsListInfo.setCount(count); return hdfsListInfo; }