Example usage for org.apache.hadoop.fs LocatedFileStatus getPath

List of usage examples for org.apache.hadoop.fs LocatedFileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs LocatedFileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java

License:Apache License

/**
 * Add files in the input path recursively into the results.
 * @param result//ww  w. j  av  a 2s .c  o  m
 *          The List to store all files.
 * @param fs
 *          The FileSystem.
 * @param path
 *          The input path.
 * @param inputFilter
 *          The input filter that can be used to filter files/dirs. 
 * @throws IOException
 */
protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path,
        PathFilter inputFilter) throws IOException {
    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
    while (iter.hasNext()) {
        LocatedFileStatus stat = iter.next();
        if (inputFilter.accept(stat.getPath())) {
            if (stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
            } else {
                result.add(stat);
            }
        }
    }
}

From source file:com.ikanow.aleph2.remote.hdfs_test.SimpleHdfsTest.java

License:Apache License

public void runTest() throws AccessControlException, FileNotFoundException, UnsupportedFileSystemException,
        IllegalArgumentException, IOException {
    final String temp_dir = System.getProperty("java.io.tmpdir") + File.separator;

    final IStorageService storage = _service_context.getStorageService();

    final FileContext fc = (FileContext) storage
            .getUnderlyingPlatformDriver(FileContext.class, Optional.empty()).get();

    final FileContext lfc = (FileContext) storage
            .getUnderlyingPlatformDriver(FileContext.class, IStorageService.LOCAL_FS).get();

    System.out.println("FILES IN BUCKET ROOT");

    final RemoteIterator<LocatedFileStatus> it = fc.util().listFiles(new Path(storage.getBucketRootPath()),
            true);//ww w  .j a va  2  s .co  m
    boolean first = true;
    while (it.hasNext()) {
        final LocatedFileStatus lfs = it.next();
        if (first) {
            first = false;
            lfc.util().copy(lfs.getPath(), lfc.makeQualified(new Path(temp_dir + "ALEX.txt")));
        }
        System.out.println(lfs);
    }

    System.out.println("FILES/DIRS IN BUCKET ROOT");

    Stream<FileStatus> dirstream = Arrays.stream(fc.util().listStatus(new Path(storage.getBucketRootPath())));

    dirstream.forEach(fs -> System.out.println(fs));
}

From source file:com.linkedin.pinot.filesystem.HadoopPinotFS.java

License:Apache License

@Override
public String[] listFiles(URI fileUri) throws IOException {
    ArrayList<String> filePathStrings = new ArrayList<>();
    Path path = new Path(fileUri);
    if (hadoopFS.exists(path)) {
        RemoteIterator<LocatedFileStatus> fileListItr = hadoopFS.listFiles(path, true);
        while (fileListItr != null && fileListItr.hasNext()) {
            LocatedFileStatus file = fileListItr.next();
            filePathStrings.add(file.getPath().toUri().toString());
        }//from w w  w .  j  a v  a  2s .c o m
    } else {
        throw new IllegalArgumentException("segmentUri is not valid");
    }
    String[] retArray = new String[filePathStrings.size()];
    filePathStrings.toArray(retArray);
    return retArray;
}

From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java

License:Apache License

@Override
public void open(Configuration parameters) throws Exception {
    super.open(parameters);

    subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();

    state = new State<T>();

    Path baseDirectory = new Path(basePath);
    hadoopConf = HadoopFileSystem.getHadoopConfiguration();
    FileSystem fs = baseDirectory.getFileSystem(hadoopConf);
    refTruncate = reflectTruncate(fs);// w w  w.  jav a2s  . c  o  m

    long currentProcessingTime = ((StreamingRuntimeContext) getRuntimeContext()).getCurrentProcessingTime();

    checkForInactiveBuckets(currentProcessingTime);

    ((StreamingRuntimeContext) getRuntimeContext())
            .registerTimer(currentProcessingTime + inactiveBucketCheckInterval, this);

    this.clock = new Clock() {
        @Override
        public long currentTimeMillis() {
            return ((StreamingRuntimeContext) getRuntimeContext()).getCurrentProcessingTime();
        }
    };

    // delete pending/in-progress files that might be left if we fail while
    // no checkpoint has yet been done
    try {
        if (fs.exists(baseDirectory) && cleanupOnOpen) {
            RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(baseDirectory, true);

            while (bucketFiles.hasNext()) {
                LocatedFileStatus file = bucketFiles.next();
                if (file.getPath().toString().endsWith(pendingSuffix)) {
                    // only delete files that contain our subtask index
                    if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                        LOG.debug("(OPEN) Deleting leftover pending file {}", file.getPath().toString());
                        fs.delete(file.getPath(), true);
                    }
                }
                if (file.getPath().toString().endsWith(inProgressSuffix)) {
                    // only delete files that contain our subtask index
                    if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                        LOG.debug("(OPEN) Deleting leftover in-progress file {}", file.getPath().toString());
                        fs.delete(file.getPath(), true);
                    }
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting leftover pending/in-progress files: {}", e);
        throw new RuntimeException("Error while deleting leftover pending/in-progress files.", e);
    }
}

From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java

License:Apache License

@Override
public void restoreState(State<T> state) {
    this.state = state;

    FileSystem fs;//www. ja va 2 s .  c  o  m
    try {
        fs = new Path(basePath).getFileSystem(HadoopFileSystem.getHadoopConfiguration());
    } catch (IOException e) {
        LOG.error("Error while creating FileSystem in checkpoint restore.", e);
        throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e);
    }

    for (BucketState<T> bucketState : state.bucketStates.values()) {
        // we can clean all the pending files since they where renamed to final files
        // after this checkpoint was successful
        bucketState.pendingFiles.clear();

        if (bucketState.currentFile != null) {
            // We were writing to a file when the last checkpoint occured. This file can either
            // be still in-progress or became a pending file at some point after the checkpoint.
            // Either way, we have to truncate it back to a valid state (or write a .valid-length)
            // file that specifies up to which length it is valid and rename it to the final name
            // before starting a new bucket file.
            Path partPath = new Path(bucketState.currentFile);
            try {
                Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName())
                        .suffix(pendingSuffix);
                Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName())
                        .suffix(inProgressSuffix);

                if (fs.exists(partPendingPath)) {
                    LOG.debug(
                            "In-progress file {} has been moved to pending after checkpoint, moving to final location.",
                            partPath);
                    // has been moved to pending in the mean time, rename to final location
                    fs.rename(partPendingPath, partPath);
                } else if (fs.exists(partInProgressPath)) {
                    LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath);
                    // it was still in progress, rename to final path
                    fs.rename(partInProgressPath, partPath);
                } else if (fs.exists(partPath)) {
                    LOG.debug("In-Progress file {} was already moved to final location {}.",
                            bucketState.currentFile, partPath);
                } else {
                    LOG.debug(
                            "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, "
                                    + "it was moved to final location by a previous snapshot restore",
                            bucketState.currentFile);
                }

                refTruncate = reflectTruncate(fs);
                // truncate it or write a ".valid-length" file to specify up to which point it is valid
                if (refTruncate != null) {
                    LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength);
                    // some-one else might still hold the lease from a previous try, we are
                    // recovering, after all ...
                    if (fs instanceof DistributedFileSystem) {
                        DistributedFileSystem dfs = (DistributedFileSystem) fs;
                        LOG.debug("Trying to recover file lease {}", partPath);
                        dfs.recoverLease(partPath);
                        boolean isclosed = dfs.isFileClosed(partPath);
                        StopWatch sw = new StopWatch();
                        sw.start();
                        while (!isclosed) {
                            if (sw.getTime() > asyncTimeout) {
                                break;
                            }
                            try {
                                Thread.sleep(500);
                            } catch (InterruptedException e1) {
                                // ignore it
                            }
                            isclosed = dfs.isFileClosed(partPath);
                        }
                    }
                    Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath,
                            bucketState.currentFileValidLength);
                    if (!truncated) {
                        LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath);

                        // we must wait for the asynchronous truncate operation to complete
                        StopWatch sw = new StopWatch();
                        sw.start();
                        long newLen = fs.getFileStatus(partPath).getLen();
                        while (newLen != bucketState.currentFileValidLength) {
                            if (sw.getTime() > asyncTimeout) {
                                break;
                            }
                            try {
                                Thread.sleep(500);
                            } catch (InterruptedException e1) {
                                // ignore it
                            }
                            newLen = fs.getFileStatus(partPath).getLen();
                        }
                        if (newLen != bucketState.currentFileValidLength) {
                            throw new RuntimeException("Truncate did not truncate to right length. Should be "
                                    + bucketState.currentFileValidLength + " is " + newLen + ".");
                        }
                    }

                } else {
                    LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath,
                            bucketState.currentFileValidLength);
                    Path validLengthFilePath = new Path(partPath.getParent(),
                            validLengthPrefix + partPath.getName()).suffix(validLengthSuffix);
                    if (!fs.exists(validLengthFilePath)) {
                        FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath);
                        lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength));
                        lengthFileOut.close();
                    }
                }

                // Now that we've restored the bucket to a valid state, reset the current file info
                bucketState.currentFile = null;
                bucketState.currentFileValidLength = -1;
            } catch (IOException e) {
                LOG.error("Error while restoring BucketingSink state.", e);
                throw new RuntimeException("Error while restoring BucketingSink state.", e);
            } catch (InvocationTargetException | IllegalAccessException e) {
                LOG.error("Cound not invoke truncate.", e);
                throw new RuntimeException("Could not invoke truncate.", e);
            }
        }

        LOG.debug("Clearing pending/in-progress files.");

        // Move files that are confirmed by a checkpoint but did not get moved to final location
        // because the checkpoint notification did not happen before a failure

        Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet();
        LOG.debug("Moving pending files to final location on restore.");
        for (Long pastCheckpointId : pastCheckpointIds) {
            // All the pending files are buckets that have been completed but are waiting to be renamed
            // to their final name
            for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) {
                Path finalPath = new Path(filename);
                Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName())
                        .suffix(pendingSuffix);

                try {
                    if (fs.exists(pendingPath)) {
                        LOG.debug(
                                "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.",
                                pendingPath, pastCheckpointId);
                        fs.rename(pendingPath, finalPath);
                    }
                } catch (IOException e) {
                    LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}",
                            pendingPath, finalPath, e);
                    throw new RuntimeException(
                            "Error while renaming pending file " + pendingPath + " to final path " + finalPath,
                            e);
                }
            }
        }

        synchronized (bucketState.pendingFilesPerCheckpoint) {
            bucketState.pendingFilesPerCheckpoint.clear();
        }
    }

    // we need to get this here since open() has not yet been called
    int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    // delete pending files
    try {

        RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true);

        while (bucketFiles.hasNext()) {
            LocatedFileStatus file = bucketFiles.next();
            if (file.getPath().toString().endsWith(pendingSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
            if (file.getPath().toString().endsWith(inProgressSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting old pending files: {}", e);
        throw new RuntimeException("Error while deleting old pending files.", e);
    }
}

From source file:com.nagarro.nteg.utils.HDFSDirectoryFilesDataReader.java

License:Apache License

@Override
protected FileDataBufferedReader getFileDataBufferedReaderForNewFile() throws IOException {

    final Path hdfsPath = new Path(dirPathName);

    Path locatedFilePath = null;//www .  j ava  2 s  .c o  m
    final RemoteIterator<LocatedFileStatus> locatedFileIterator = hdfs.listFiles(hdfsPath, true);
    while (locatedFileIterator != null && locatedFileIterator.hasNext()) {
        final LocatedFileStatus locatedFileStatus = locatedFileIterator.next();

        final Path tmpPath = locatedFileStatus.getPath();

        final String pathName = tmpPath.getName();

        if (LOG.isInfoEnabled()) {
            LOG.info("Checking file with name[Log]: " + pathName);
        }

        if (!(pathName.endsWith(FileDataBufferedReader.IN_PROGRESS_FILE_SUFFIX)
                || pathName.endsWith(FileDataBufferedReader.PROCESSED_FILE_SUFFIX))) {
            locatedFilePath = tmpPath;
            break;
        }
    }

    FileDataBufferedReader fileDataBufferedReader = null;
    if (locatedFilePath != null) {
        fileDataBufferedReader = new HDFSFileDataBufferedReader(locatedFilePath, batchSize);
    }

    return fileDataBufferedReader;
}

From source file:com.streamsets.pipeline.stage.it.HdfsAvroSchemaSerIT.java

License:Apache License

private void verifySerializationLocation(String location) throws IOException {
    FileSystem fs = BaseHiveIT.getDefaultFileSystem();
    Path path = new Path(BaseHiveIT.getDefaultFsUri() + location);
    Assert.assertTrue("Location does not exist:" + location, fs.exists(path));
    boolean found = false;
    RemoteIterator<LocatedFileStatus> fsIterator = fs.listFiles(path, false);
    while (!found || fsIterator.hasNext()) {
        LocatedFileStatus status = fsIterator.next();
        LOG.info("Found file: " + status.getPath().getName());
        found = status.getPath().getName().startsWith(AVRO_SCHEMA_SERIALIZATION_PREFIX);
    }//from   w  ww.  j  av a  2  s  .  c o m
    fs.delete(path, true);
    Assert.assertTrue("Avro schema file not found in the location " + location, found);
}

From source file:com.thinkbiganalytics.spark.io.ZipStreamingOutputTest.java

License:Apache License

/**
 * Verify streaming output./*from  ww  w .  j  av  a 2s . c  om*/
 */
@Test
public void test() throws Exception {
    // Mock file system
    final FileSystem fs = Mockito.mock(FileSystem.class);
    final Path source = new Path("/tmp/source");

    final LocatedFileStatus file1 = createFile("_SUCCESS", source);
    final LocatedFileStatus file2 = createFile("part-0", source);
    Mockito.when(fs.listFiles(source, false))
            .thenReturn(new ForwardingRemoteIterator<>(Iterators.forArray(file1, file2)));

    final FSDataInputStream fileStream = new FSDataInputStream(new SeekableNullInputStream());
    Mockito.when(fs.open(file1.getPath())).thenReturn(fileStream);
    Mockito.when(fs.open(file2.getPath())).thenReturn(fileStream);

    final CountDownLatch deleteLatch = new CountDownLatch(1);
    Mockito.when(fs.delete(source, true)).then(new Answer<Boolean>() {
        @Override
        public Boolean answer(final InvocationOnMock invocation) {
            deleteLatch.countDown();
            return true;
        }
    });

    // Write ZIP to output stream
    final ByteArrayOutputStream out = new ByteArrayOutputStream();
    final ZipStreamingOutput zip = new ZipStreamingOutput(source, fs);
    zip.write(out);

    // Verify output stream
    final ZipInputStream in = new ZipInputStream(out.toInputStream());

    ZipEntry entry = in.getNextEntry();
    Assert.assertNotNull("Missing _SUCCESS entry", entry);
    Assert.assertEquals("_SUCCESS", entry.getName());

    entry = in.getNextEntry();
    Assert.assertNotNull("Missing part-0 entry", entry);
    Assert.assertEquals("part-0", entry.getName());

    entry = in.getNextEntry();
    Assert.assertNull("Unexpected entry", entry);

    // Verify path deleted
    deleteLatch.await(1, TimeUnit.SECONDS);
    Mockito.verify(fs).delete(source, true);
}

From source file:com.toy.TomcatContainerRunnable.java

License:Apache License

@Override
public void run() {
    LOG.info("Setting up Tomcat container launch for container id {} / war {}", container.getId(), war);
    ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class);
    // Set the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    try {//from  ww  w . j  a  va 2s . c o m
        final RemoteIterator<LocatedFileStatus> libs = fs.listFiles(path, false);
        while (libs.hasNext()) {
            final LocatedFileStatus next = libs.next();
            LOG.debug("Register {} for container", next.getPath());
            LocalResource lib = Records.newRecord(LocalResource.class);
            lib.setType(LocalResourceType.FILE);
            lib.setVisibility(LocalResourceVisibility.APPLICATION);
            lib.setResource(ConverterUtils.getYarnUrlFromURI(next.getPath().toUri()));
            lib.setTimestamp(next.getModificationTime());
            lib.setSize(next.getLen());
            localResources.put(next.getPath().getName(), lib);
        }
        ctx.setLocalResources(localResources);
    } catch (IOException e) {
        LOG.error("Error while fetching Tomcat libraries : {}", e.getLocalizedMessage(), e);
    }

    // Build classpath
    StringBuilder classPathEnv = new StringBuilder(ApplicationConstants.Environment.CLASSPATH.$())
            .append(File.pathSeparatorChar).append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
        classPathEnv.append(File.pathSeparatorChar);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }
    Map<String, String> env = new HashMap<String, String>();
    env.put("CLASSPATH", classPathEnv.toString());
    env.put(Constants.WAR, war);
    env.put(Constants.ZOOKEEPER_QUORUM, System.getenv(Constants.ZOOKEEPER_QUORUM));
    ctx.setEnvironment(env);

    // Set the necessary command to execute the application master
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);
    // Set java executable command
    LOG.info("Setting up app master command");
    vargs.add(ApplicationConstants.Environment.JAVA_HOME.$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + 32 + "m");
    vargs.add("com.toy.TomcatLauncher");
    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/Tomcat.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/Tomcat.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());
    ctx.setCommands(commands);

    nmClientAsync.startContainerAsync(container, ctx);
}

From source file:com.uber.hoodie.common.model.HoodieTestUtils.java

License:Apache License

public static FileStatus[] listAllDataFilesInPath(FileSystem fs, String basePath) throws IOException {
    RemoteIterator<LocatedFileStatus> itr = fs.listFiles(new Path(basePath), true);
    List<FileStatus> returns = Lists.newArrayList();
    while (itr.hasNext()) {
        LocatedFileStatus status = itr.next();
        if (status.getPath().getName().contains(".parquet")) {
            returns.add(status);//from w w w. ja  v  a  2  s.c  o m
        }
    }
    return returns.toArray(new FileStatus[returns.size()]);
}