Example usage for org.apache.hadoop.fs LocatedFileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs LocatedFileStatus getPath.

Prototype

public Path getPath()

Source Link

Usage

From source file:org.apache.tez.mapreduce.hadoop.TestMRInputHelpers.java

License:Apache License

@Test(timeout = 5000)
public void testNewSplitsGen() throws Exception {

    DataSourceDescriptor dataSource = generateDataSourceDescriptorMapReduce(newSplitsDir);

    Assert.assertTrue(dataSource.getAdditionalLocalFiles().containsKey(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME));
    Assert.assertTrue(// www  .  j  ava2s.co m
            dataSource.getAdditionalLocalFiles().containsKey(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME));

    RemoteIterator<LocatedFileStatus> files = remoteFs.listFiles(newSplitsDir, false);

    boolean foundSplitsFile = false;
    boolean foundMetaFile = false;
    int totalFilesFound = 0;

    while (files.hasNext()) {
        LocatedFileStatus status = files.next();
        String fName = status.getPath().getName();
        totalFilesFound++;
        if (fName.equals(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME)) {
            foundSplitsFile = true;
        } else if (fName.equals(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME)) {
            foundMetaFile = true;
        } else {
            Assert.fail("Found invalid file in splits dir, filename=" + fName);
        }
        Assert.assertTrue(status.getLen() > 0);
    }

    Assert.assertEquals(2, totalFilesFound);
    Assert.assertTrue(foundSplitsFile);
    Assert.assertTrue(foundMetaFile);

    verifyLocationHints(newSplitsDir, dataSource.getLocationHint().getTaskLocationHints());
}

From source file:org.apache.tez.mapreduce.hadoop.TestMRInputHelpers.java

License:Apache License

@Test(timeout = 5000)
public void testOldSplitsGen() throws Exception {
    DataSourceDescriptor dataSource = generateDataSourceDescriptorMapRed(oldSplitsDir);
    Assert.assertTrue(dataSource.getAdditionalLocalFiles().containsKey(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME));
    Assert.assertTrue(//w ww .j  a  v a 2s .  co  m
            dataSource.getAdditionalLocalFiles().containsKey(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME));

    RemoteIterator<LocatedFileStatus> files = remoteFs.listFiles(oldSplitsDir, false);

    boolean foundSplitsFile = false;
    boolean foundMetaFile = false;
    int totalFilesFound = 0;

    while (files.hasNext()) {
        LocatedFileStatus status = files.next();
        String fName = status.getPath().getName();
        totalFilesFound++;
        if (fName.equals(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME)) {
            foundSplitsFile = true;
        } else if (fName.equals(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME)) {
            foundMetaFile = true;
        } else {
            Assert.fail("Found invalid file in splits dir, filename=" + fName);
        }
        Assert.assertTrue(status.getLen() > 0);
    }

    Assert.assertEquals(2, totalFilesFound);
    Assert.assertTrue(foundSplitsFile);
    Assert.assertTrue(foundMetaFile);

    verifyLocationHints(oldSplitsDir, dataSource.getLocationHint().getTaskLocationHints());
}

From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer.java

License:Apache License

public static File copyDirectoryIfNonExistent(final FileSystem fileSystem, final String directory) {
    try {//from  w  w  w  . ja va2  s . c om
        final String hadoopGremlinLibsRemote = "hadoop-gremlin-" + Gremlin.version() + "-libs";
        final Path path = new Path(directory);
        if (Boolean.valueOf(System.getProperty("is.testing", "false"))
                || (fileSystem.exists(path) && fileSystem.isDirectory(path))) {
            final File tempDirectory = new File(
                    System.getProperty("java.io.tmpdir") + File.separator + hadoopGremlinLibsRemote);
            assert tempDirectory.exists() || tempDirectory.mkdirs();
            final String tempPath = tempDirectory.getAbsolutePath() + File.separator + path.getName();
            final RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(path, false);
            while (files.hasNext()) {
                final LocatedFileStatus f = files.next();
                fileSystem.copyToLocalFile(false, f.getPath(),
                        new Path(tempPath + System.getProperty("file.separator") + f.getPath().getName()),
                        true);
            }
            return new File(tempPath);
        } else
            return new File(directory);
    } catch (final IOException e) {
        throw new IllegalStateException(e.getMessage(), e);
    }
}

From source file:org.datacleaner.spark.ApplicationDriver.java

License:Open Source License

private List<String> buildJarFiles(MutableRef<String> primaryJarRef) throws IOException {
    final List<String> list = new ArrayList<>();

    final Configuration conf = new Configuration();
    conf.set("fs.defaultFS", "hdfs://" + _hostname + ":" + _port);

    final FileSystem fs = FileSystem.newInstance(conf);
    try {//from  w ww  . j  av  a  2 s  .  com
        final Path directoryPath = new Path(_jarDirectoryPath);
        final RemoteIterator<LocatedFileStatus> files = fs.listFiles(directoryPath, false);
        while (files.hasNext()) {
            final LocatedFileStatus file = files.next();
            final Path path = file.getPath();
            final String filename = path.getName();
            if (filename.startsWith(PRIMARY_JAR_FILENAME_PREFIX)) {
                primaryJarRef.set(path.toString());
            } else {
                list.add(path.toString());
            }
        }
    } finally {
        FileHelper.safeClose(fs);
    }

    if (primaryJarRef.get() == null) {
        throw new IllegalArgumentException("Failed to find primary jar (starting with '"
                + PRIMARY_JAR_FILENAME_PREFIX + "') in JAR file directory: " + _jarDirectoryPath);
    }

    return list;
}

From source file:org.deeplearning4j.datasets.DatasetReaderFromHdfs.java

License:Apache License

protected void doInitialize() {
    FileSystem fs = CommonUtils.openHdfsConnect();
    try {//from  ww  w.j  a va 2s  .c  o m
        if (train) {
            hdfsIter = fs.listFiles(new Path(CommonUtils.TRAIN_HDFS_PATH), true);
        } else {
            hdfsIter = fs.listFiles(new Path(CommonUtils.VALIDATE_HDFS_PATH), true);
        }
        while (hdfsIter.hasNext()) {
            LocatedFileStatus next = hdfsIter.next();
            Path path = next.getPath();
            String currentPath = path.toUri().getPath();
            fileNames.add(path.toString());
            String name = FilenameUtils.getBaseName((new File(currentPath)).getParent());
            if (!labels.contains(name)) {
                labels.add(name);
            }

        }
        Collections.shuffle(fileNames);
        fileIterator = fileNames.iterator();
        numExample = fileNames.size();
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        CommonUtils.closeHdfsConnect(fs);
    }
}

From source file:org.deeplearning4j.datasets.DatasetReaderFromHdfs.java

License:Apache License

public static void main(String[] args) throws IOException {
    DatasetReaderFromHdfs ds = new DatasetReaderFromHdfs();
    int j = 0;/*from  w w  w.j a  v a2s  .  c  om*/
    while (hdfsIter.hasNext()) {
        LocatedFileStatus next = hdfsIter.next();
        Path path = next.getPath();
        String currentPath = path.toUri().getPath();
        //String index = getRelativeFilename(currentPath);
        System.out.println("file name : i = " + j++ + " path=" + currentPath);
    }
}

From source file:org.deeplearning4j.examples.MDSIterator.java

License:Apache License

private MultiDataSet nextMultiDataSet(int num) throws IOException {
    String previousPath = stack.isEmpty() ? "" : stack.peek().toUri().getPath();

    for (int i = 0; i < num && hdfsIterator.hasNext(); i++) {
        for (int j = 0; j < numSteps; j++) {
            if (!hdfsIterator.hasNext())
                break;
            LocatedFileStatus next = hdfsIterator.next();
            Path path = next.getPath();

            String currentPath = path.toUri().getPath();
            String index = getRelativeFilename(currentPath);

            if (previousPath.contains(index.split("_")[0])) {
                if (j >= numSteps - 1 || !hdfsIterator.hasNext()) {
                    pushAndClear(path, index);
                } else {
                    stack.push(path);/*from w ww  .j  a  v a  2  s.  com*/
                }
                previousPath = currentPath;
            } else {
                if (j >= numSteps - 1 || !hdfsIterator.hasNext()) {
                    pushAndClear(path, index);
                }
                ssRecordReader.newRecord(stack);
                stack.push(path);
                if (!previousPath.isEmpty()) {
                    break;
                }
                previousPath = currentPath;
            }
        }
    }
    return ssRecordReader.toMultiDataSet(vectorSize, labelSize);
}

From source file:org.deeplearning4j.hadoop.datasetiterator.BaseHdfsDataSetIterator.java

License:Apache License

/**
 * List all of the files in the //ww w  .j a v a2s .com
 * hdfsUriRootDir directory
 * @return the list of paths in the directory
 * @throws Exception if one occurs
 */
public List<Path> filesInDir() throws Exception {
    FileSystem fs = FileSystem.get(conf);
    List<Path> paths = new ArrayList<Path>();
    RemoteIterator<LocatedFileStatus> iter = fs.listFiles(new Path(hdfsUriRootDir), true);
    while (iter.hasNext()) {
        LocatedFileStatus l = iter.next();
        paths.add(l.getPath());
    }

    fs.close();
    return paths;

}

From source file:org.gbif.occurrence.download.oozie.ArchiveBuilder.java

License:Creative Commons License

/**
 * Appends the compressed files found within the directory to the zip stream as the named file
 *///from w  ww. ja v  a  2 s .  c  o  m
private void appendPreCompressedFile(ModalZipOutputStream out, Path dir, String filename, String headerRow)
        throws IOException {
    RemoteIterator<LocatedFileStatus> files = hdfs.listFiles(dir, false);
    List<InputStream> parts = Lists.newArrayList();

    // Add the header first, which must also be compressed
    ByteArrayOutputStream header = new ByteArrayOutputStream();
    D2Utils.compress(new ByteArrayInputStream(headerRow.getBytes()), header);
    parts.add(new ByteArrayInputStream(header.toByteArray()));

    // Locate the streams to the compressed content on HDFS
    while (files.hasNext()) {
        LocatedFileStatus fs = files.next();
        Path path = fs.getPath();
        if (path.toString().endsWith(D2Utils.FILE_EXTENSION)) {
            LOG.info("Deflated content to merge: " + path);
            parts.add(hdfs.open(path));
        }
    }

    // create the Zip entry, and write the compressed bytes
    org.gbif.hadoop.compress.d2.zip.ZipEntry ze = new org.gbif.hadoop.compress.d2.zip.ZipEntry(filename);
    out.putNextEntry(ze, ModalZipOutputStream.MODE.PRE_DEFLATED);
    try (D2CombineInputStream in = new D2CombineInputStream(parts)) {
        ByteStreams.copy(in, out);
        in.close(); // important so counts are accurate
        ze.setSize(in.getUncompressedLength()); // important to set the sizes and CRC
        ze.setCompressedSize(in.getCompressedLength());
        ze.setCrc(in.getCrc32());
    } finally {
        out.closeEntry();
    }
}

From source file:org.opencloudengine.garuda.backend.hdfs.HdfsServiceImpl.java

License:Open Source License

@Override
public HdfsListInfo list(String path, int start, int end, final String filter) throws Exception {
    HdfsListInfo hdfsListInfo = new HdfsListInfo();

    this.indexCheck(start, end);
    this.mustExists(path);

    FileSystem fs = fileSystemFactory.getFileSystem();
    Path fsPath = new Path(path);

    FileStatus fileStatus = fs.getFileStatus(fsPath);
    if (!fileStatus.isDirectory()) {
        this.notDirectoryException(fsPath.toString());
    }//from   ww w  .jav a  2 s . c  o  m

    List<HdfsFileInfo> listStatus = new ArrayList<>();
    int count = 0;
    FileStatus fileStatuses = null;
    LocatedFileStatus next = null;
    RemoteIterator<LocatedFileStatus> remoteIterator = fs.listLocatedStatus(fsPath);
    while (remoteIterator.hasNext()) {
        next = remoteIterator.next();
        if (!StringUtils.isEmpty(filter)) {
            if (next.getPath().getName().contains(filter)) {
                count++;
                if (count >= start && count <= end) {
                    fileStatuses = fs.getFileStatus(next.getPath());
                    listStatus
                            .add(new HdfsFileInfo(fileStatuses, fs.getContentSummary(fileStatuses.getPath())));
                }
            }
        } else {
            count++;
            if (count >= start && count <= end) {
                fileStatuses = fs.getFileStatus(next.getPath());
                listStatus.add(new HdfsFileInfo(fileStatuses, fs.getContentSummary(fileStatuses.getPath())));
            }
        }
    }

    hdfsListInfo.setFileInfoList(listStatus);
    hdfsListInfo.setCount(count);
    return hdfsListInfo;
}