Example usage for org.apache.hadoop.fs LocatedFileStatus getPath

List of usage examples for org.apache.hadoop.fs LocatedFileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs LocatedFileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:org.apache.tez.mapreduce.hadoop.TestMRInputHelpers.java

License:Apache License

@Test(timeout = 5000)
public void testNewSplitsGen() throws Exception {

    DataSourceDescriptor dataSource = generateDataSourceDescriptorMapReduce(newSplitsDir);

    Assert.assertTrue(dataSource.getAdditionalLocalFiles().containsKey(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME));
    Assert.assertTrue(// www  .  j  ava2s.co m
            dataSource.getAdditionalLocalFiles().containsKey(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME));

    RemoteIterator<LocatedFileStatus> files = remoteFs.listFiles(newSplitsDir, false);

    boolean foundSplitsFile = false;
    boolean foundMetaFile = false;
    int totalFilesFound = 0;

    while (files.hasNext()) {
        LocatedFileStatus status = files.next();
        String fName = status.getPath().getName();
        totalFilesFound++;
        if (fName.equals(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME)) {
            foundSplitsFile = true;
        } else if (fName.equals(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME)) {
            foundMetaFile = true;
        } else {
            Assert.fail("Found invalid file in splits dir, filename=" + fName);
        }
        Assert.assertTrue(status.getLen() > 0);
    }

    Assert.assertEquals(2, totalFilesFound);
    Assert.assertTrue(foundSplitsFile);
    Assert.assertTrue(foundMetaFile);

    verifyLocationHints(newSplitsDir, dataSource.getLocationHint().getTaskLocationHints());
}

From source file:org.apache.tez.mapreduce.hadoop.TestMRInputHelpers.java

License:Apache License

@Test(timeout = 5000)
public void testOldSplitsGen() throws Exception {
    DataSourceDescriptor dataSource = generateDataSourceDescriptorMapRed(oldSplitsDir);
    Assert.assertTrue(dataSource.getAdditionalLocalFiles().containsKey(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME));
    Assert.assertTrue(//w ww .j  a  v a 2s .  co  m
            dataSource.getAdditionalLocalFiles().containsKey(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME));

    RemoteIterator<LocatedFileStatus> files = remoteFs.listFiles(oldSplitsDir, false);

    boolean foundSplitsFile = false;
    boolean foundMetaFile = false;
    int totalFilesFound = 0;

    while (files.hasNext()) {
        LocatedFileStatus status = files.next();
        String fName = status.getPath().getName();
        totalFilesFound++;
        if (fName.equals(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME)) {
            foundSplitsFile = true;
        } else if (fName.equals(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME)) {
            foundMetaFile = true;
        } else {
            Assert.fail("Found invalid file in splits dir, filename=" + fName);
        }
        Assert.assertTrue(status.getLen() > 0);
    }

    Assert.assertEquals(2, totalFilesFound);
    Assert.assertTrue(foundSplitsFile);
    Assert.assertTrue(foundMetaFile);

    verifyLocationHints(oldSplitsDir, dataSource.getLocationHint().getTaskLocationHints());
}

From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer.java

License:Apache License

public static File copyDirectoryIfNonExistent(final FileSystem fileSystem, final String directory) {
    try {//from  w  w  w  . ja va2  s . c om
        final String hadoopGremlinLibsRemote = "hadoop-gremlin-" + Gremlin.version() + "-libs";
        final Path path = new Path(directory);
        if (Boolean.valueOf(System.getProperty("is.testing", "false"))
                || (fileSystem.exists(path) && fileSystem.isDirectory(path))) {
            final File tempDirectory = new File(
                    System.getProperty("java.io.tmpdir") + File.separator + hadoopGremlinLibsRemote);
            assert tempDirectory.exists() || tempDirectory.mkdirs();
            final String tempPath = tempDirectory.getAbsolutePath() + File.separator + path.getName();
            final RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(path, false);
            while (files.hasNext()) {
                final LocatedFileStatus f = files.next();
                fileSystem.copyToLocalFile(false, f.getPath(),
                        new Path(tempPath + System.getProperty("file.separator") + f.getPath().getName()),
                        true);
            }
            return new File(tempPath);
        } else
            return new File(directory);
    } catch (final IOException e) {
        throw new IllegalStateException(e.getMessage(), e);
    }
}

From source file:org.datacleaner.spark.ApplicationDriver.java

License:Open Source License

private List<String> buildJarFiles(MutableRef<String> primaryJarRef) throws IOException {
    final List<String> list = new ArrayList<>();

    final Configuration conf = new Configuration();
    conf.set("fs.defaultFS", "hdfs://" + _hostname + ":" + _port);

    final FileSystem fs = FileSystem.newInstance(conf);
    try {//from  w ww  . j  av  a  2 s  .  com
        final Path directoryPath = new Path(_jarDirectoryPath);
        final RemoteIterator<LocatedFileStatus> files = fs.listFiles(directoryPath, false);
        while (files.hasNext()) {
            final LocatedFileStatus file = files.next();
            final Path path = file.getPath();
            final String filename = path.getName();
            if (filename.startsWith(PRIMARY_JAR_FILENAME_PREFIX)) {
                primaryJarRef.set(path.toString());
            } else {
                list.add(path.toString());
            }
        }
    } finally {
        FileHelper.safeClose(fs);
    }

    if (primaryJarRef.get() == null) {
        throw new IllegalArgumentException("Failed to find primary jar (starting with '"
                + PRIMARY_JAR_FILENAME_PREFIX + "') in JAR file directory: " + _jarDirectoryPath);
    }

    return list;
}

From source file:org.deeplearning4j.datasets.DatasetReaderFromHdfs.java

License:Apache License

protected void doInitialize() {
    FileSystem fs = CommonUtils.openHdfsConnect();
    try {//from  ww  w.j  a va 2s  .c  o m
        if (train) {
            hdfsIter = fs.listFiles(new Path(CommonUtils.TRAIN_HDFS_PATH), true);
        } else {
            hdfsIter = fs.listFiles(new Path(CommonUtils.VALIDATE_HDFS_PATH), true);
        }
        while (hdfsIter.hasNext()) {
            LocatedFileStatus next = hdfsIter.next();
            Path path = next.getPath();
            String currentPath = path.toUri().getPath();
            fileNames.add(path.toString());
            String name = FilenameUtils.getBaseName((new File(currentPath)).getParent());
            if (!labels.contains(name)) {
                labels.add(name);
            }

        }
        Collections.shuffle(fileNames);
        fileIterator = fileNames.iterator();
        numExample = fileNames.size();
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        CommonUtils.closeHdfsConnect(fs);
    }
}

From source file:org.deeplearning4j.datasets.DatasetReaderFromHdfs.java

License:Apache License

public static void main(String[] args) throws IOException {
    DatasetReaderFromHdfs ds = new DatasetReaderFromHdfs();
    int j = 0;/*from  w w  w.j a  v a2s  .  c  om*/
    while (hdfsIter.hasNext()) {
        LocatedFileStatus next = hdfsIter.next();
        Path path = next.getPath();
        String currentPath = path.toUri().getPath();
        //String index = getRelativeFilename(currentPath);
        System.out.println("file name : i = " + j++ + " path=" + currentPath);
    }
}

From source file:org.deeplearning4j.examples.MDSIterator.java

License:Apache License

private MultiDataSet nextMultiDataSet(int num) throws IOException {
    String previousPath = stack.isEmpty() ? "" : stack.peek().toUri().getPath();

    for (int i = 0; i < num && hdfsIterator.hasNext(); i++) {
        for (int j = 0; j < numSteps; j++) {
            if (!hdfsIterator.hasNext())
                break;
            LocatedFileStatus next = hdfsIterator.next();
            Path path = next.getPath();

            String currentPath = path.toUri().getPath();
            String index = getRelativeFilename(currentPath);

            if (previousPath.contains(index.split("_")[0])) {
                if (j >= numSteps - 1 || !hdfsIterator.hasNext()) {
                    pushAndClear(path, index);
                } else {
                    stack.push(path);/*from w ww  .j  a  v a  2  s.  com*/
                }
                previousPath = currentPath;
            } else {
                if (j >= numSteps - 1 || !hdfsIterator.hasNext()) {
                    pushAndClear(path, index);
                }
                ssRecordReader.newRecord(stack);
                stack.push(path);
                if (!previousPath.isEmpty()) {
                    break;
                }
                previousPath = currentPath;
            }
        }
    }
    return ssRecordReader.toMultiDataSet(vectorSize, labelSize);
}

From source file:org.deeplearning4j.hadoop.datasetiterator.BaseHdfsDataSetIterator.java

License:Apache License

/**
 * List all of the files in the //ww w  .j a v a2s .com
 * hdfsUriRootDir directory
 * @return the list of paths in the directory
 * @throws Exception if one occurs
 */
public List<Path> filesInDir() throws Exception {
    FileSystem fs = FileSystem.get(conf);
    List<Path> paths = new ArrayList<Path>();
    RemoteIterator<LocatedFileStatus> iter = fs.listFiles(new Path(hdfsUriRootDir), true);
    while (iter.hasNext()) {
        LocatedFileStatus l = iter.next();
        paths.add(l.getPath());
    }

    fs.close();
    return paths;

}

From source file:org.gbif.occurrence.download.oozie.ArchiveBuilder.java

License:Creative Commons License

/**
 * Appends the compressed files found within the directory to the zip stream as the named file
 *///from w  ww. ja v  a  2 s .  c  o  m
private void appendPreCompressedFile(ModalZipOutputStream out, Path dir, String filename, String headerRow)
        throws IOException {
    RemoteIterator<LocatedFileStatus> files = hdfs.listFiles(dir, false);
    List<InputStream> parts = Lists.newArrayList();

    // Add the header first, which must also be compressed
    ByteArrayOutputStream header = new ByteArrayOutputStream();
    D2Utils.compress(new ByteArrayInputStream(headerRow.getBytes()), header);
    parts.add(new ByteArrayInputStream(header.toByteArray()));

    // Locate the streams to the compressed content on HDFS
    while (files.hasNext()) {
        LocatedFileStatus fs = files.next();
        Path path = fs.getPath();
        if (path.toString().endsWith(D2Utils.FILE_EXTENSION)) {
            LOG.info("Deflated content to merge: " + path);
            parts.add(hdfs.open(path));
        }
    }

    // create the Zip entry, and write the compressed bytes
    org.gbif.hadoop.compress.d2.zip.ZipEntry ze = new org.gbif.hadoop.compress.d2.zip.ZipEntry(filename);
    out.putNextEntry(ze, ModalZipOutputStream.MODE.PRE_DEFLATED);
    try (D2CombineInputStream in = new D2CombineInputStream(parts)) {
        ByteStreams.copy(in, out);
        in.close(); // important so counts are accurate
        ze.setSize(in.getUncompressedLength()); // important to set the sizes and CRC
        ze.setCompressedSize(in.getCompressedLength());
        ze.setCrc(in.getCrc32());
    } finally {
        out.closeEntry();
    }
}

From source file:org.opencloudengine.garuda.backend.hdfs.HdfsServiceImpl.java

License:Open Source License

@Override
public HdfsListInfo list(String path, int start, int end, final String filter) throws Exception {
    HdfsListInfo hdfsListInfo = new HdfsListInfo();

    this.indexCheck(start, end);
    this.mustExists(path);

    FileSystem fs = fileSystemFactory.getFileSystem();
    Path fsPath = new Path(path);

    FileStatus fileStatus = fs.getFileStatus(fsPath);
    if (!fileStatus.isDirectory()) {
        this.notDirectoryException(fsPath.toString());
    }//from   ww w  .jav a  2 s . c  o  m

    List<HdfsFileInfo> listStatus = new ArrayList<>();
    int count = 0;
    FileStatus fileStatuses = null;
    LocatedFileStatus next = null;
    RemoteIterator<LocatedFileStatus> remoteIterator = fs.listLocatedStatus(fsPath);
    while (remoteIterator.hasNext()) {
        next = remoteIterator.next();
        if (!StringUtils.isEmpty(filter)) {
            if (next.getPath().getName().contains(filter)) {
                count++;
                if (count >= start && count <= end) {
                    fileStatuses = fs.getFileStatus(next.getPath());
                    listStatus
                            .add(new HdfsFileInfo(fileStatuses, fs.getContentSummary(fileStatuses.getPath())));
                }
            }
        } else {
            count++;
            if (count >= start && count <= end) {
                fileStatuses = fs.getFileStatus(next.getPath());
                listStatus.add(new HdfsFileInfo(fileStatuses, fs.getContentSummary(fileStatuses.getPath())));
            }
        }
    }

    hdfsListInfo.setFileInfoList(listStatus);
    hdfsListInfo.setCount(count);
    return hdfsListInfo;
}