List of usage examples for org.apache.hadoop.fs LocatedFileStatus getPath
public Path getPath()
From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java
License:Apache License
/** * Add files in the input path recursively into the results. * @param result//ww w. j av a 2s .c o m * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } }
From source file:com.ikanow.aleph2.remote.hdfs_test.SimpleHdfsTest.java
License:Apache License
public void runTest() throws AccessControlException, FileNotFoundException, UnsupportedFileSystemException, IllegalArgumentException, IOException { final String temp_dir = System.getProperty("java.io.tmpdir") + File.separator; final IStorageService storage = _service_context.getStorageService(); final FileContext fc = (FileContext) storage .getUnderlyingPlatformDriver(FileContext.class, Optional.empty()).get(); final FileContext lfc = (FileContext) storage .getUnderlyingPlatformDriver(FileContext.class, IStorageService.LOCAL_FS).get(); System.out.println("FILES IN BUCKET ROOT"); final RemoteIterator<LocatedFileStatus> it = fc.util().listFiles(new Path(storage.getBucketRootPath()), true);//ww w .j a va 2 s .co m boolean first = true; while (it.hasNext()) { final LocatedFileStatus lfs = it.next(); if (first) { first = false; lfc.util().copy(lfs.getPath(), lfc.makeQualified(new Path(temp_dir + "ALEX.txt"))); } System.out.println(lfs); } System.out.println("FILES/DIRS IN BUCKET ROOT"); Stream<FileStatus> dirstream = Arrays.stream(fc.util().listStatus(new Path(storage.getBucketRootPath()))); dirstream.forEach(fs -> System.out.println(fs)); }
From source file:com.linkedin.pinot.filesystem.HadoopPinotFS.java
License:Apache License
@Override public String[] listFiles(URI fileUri) throws IOException { ArrayList<String> filePathStrings = new ArrayList<>(); Path path = new Path(fileUri); if (hadoopFS.exists(path)) { RemoteIterator<LocatedFileStatus> fileListItr = hadoopFS.listFiles(path, true); while (fileListItr != null && fileListItr.hasNext()) { LocatedFileStatus file = fileListItr.next(); filePathStrings.add(file.getPath().toUri().toString()); }//from w w w . j a v a 2s .c o m } else { throw new IllegalArgumentException("segmentUri is not valid"); } String[] retArray = new String[filePathStrings.size()]; filePathStrings.toArray(retArray); return retArray; }
From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java
License:Apache License
@Override public void open(Configuration parameters) throws Exception { super.open(parameters); subtaskIndex = getRuntimeContext().getIndexOfThisSubtask(); state = new State<T>(); Path baseDirectory = new Path(basePath); hadoopConf = HadoopFileSystem.getHadoopConfiguration(); FileSystem fs = baseDirectory.getFileSystem(hadoopConf); refTruncate = reflectTruncate(fs);// w w w. jav a2s . c o m long currentProcessingTime = ((StreamingRuntimeContext) getRuntimeContext()).getCurrentProcessingTime(); checkForInactiveBuckets(currentProcessingTime); ((StreamingRuntimeContext) getRuntimeContext()) .registerTimer(currentProcessingTime + inactiveBucketCheckInterval, this); this.clock = new Clock() { @Override public long currentTimeMillis() { return ((StreamingRuntimeContext) getRuntimeContext()).getCurrentProcessingTime(); } }; // delete pending/in-progress files that might be left if we fail while // no checkpoint has yet been done try { if (fs.exists(baseDirectory) && cleanupOnOpen) { RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(baseDirectory, true); while (bucketFiles.hasNext()) { LocatedFileStatus file = bucketFiles.next(); if (file.getPath().toString().endsWith(pendingSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(OPEN) Deleting leftover pending file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } if (file.getPath().toString().endsWith(inProgressSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(OPEN) Deleting leftover in-progress file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } } } } catch (IOException e) { LOG.error("Error while deleting leftover pending/in-progress files: {}", e); throw new RuntimeException("Error while deleting leftover pending/in-progress files.", e); } }
From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java
License:Apache License
@Override public void restoreState(State<T> state) { this.state = state; FileSystem fs;//www. ja va 2 s . c o m try { fs = new Path(basePath).getFileSystem(HadoopFileSystem.getHadoopConfiguration()); } catch (IOException e) { LOG.error("Error while creating FileSystem in checkpoint restore.", e); throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e); } for (BucketState<T> bucketState : state.bucketStates.values()) { // we can clean all the pending files since they where renamed to final files // after this checkpoint was successful bucketState.pendingFiles.clear(); if (bucketState.currentFile != null) { // We were writing to a file when the last checkpoint occured. This file can either // be still in-progress or became a pending file at some point after the checkpoint. // Either way, we have to truncate it back to a valid state (or write a .valid-length) // file that specifies up to which length it is valid and rename it to the final name // before starting a new bucket file. Path partPath = new Path(bucketState.currentFile); try { Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName()) .suffix(pendingSuffix); Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName()) .suffix(inProgressSuffix); if (fs.exists(partPendingPath)) { LOG.debug( "In-progress file {} has been moved to pending after checkpoint, moving to final location.", partPath); // has been moved to pending in the mean time, rename to final location fs.rename(partPendingPath, partPath); } else if (fs.exists(partInProgressPath)) { LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath); // it was still in progress, rename to final path fs.rename(partInProgressPath, partPath); } else if (fs.exists(partPath)) { LOG.debug("In-Progress file {} was already moved to final location {}.", bucketState.currentFile, partPath); } else { LOG.debug( "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, " + "it was moved to final location by a previous snapshot restore", bucketState.currentFile); } refTruncate = reflectTruncate(fs); // truncate it or write a ".valid-length" file to specify up to which point it is valid if (refTruncate != null) { LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength); // some-one else might still hold the lease from a previous try, we are // recovering, after all ... if (fs instanceof DistributedFileSystem) { DistributedFileSystem dfs = (DistributedFileSystem) fs; LOG.debug("Trying to recover file lease {}", partPath); dfs.recoverLease(partPath); boolean isclosed = dfs.isFileClosed(partPath); StopWatch sw = new StopWatch(); sw.start(); while (!isclosed) { if (sw.getTime() > asyncTimeout) { break; } try { Thread.sleep(500); } catch (InterruptedException e1) { // ignore it } isclosed = dfs.isFileClosed(partPath); } } Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath, bucketState.currentFileValidLength); if (!truncated) { LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath); // we must wait for the asynchronous truncate operation to complete StopWatch sw = new StopWatch(); sw.start(); long newLen = fs.getFileStatus(partPath).getLen(); while (newLen != bucketState.currentFileValidLength) { if (sw.getTime() > asyncTimeout) { break; } try { Thread.sleep(500); } catch (InterruptedException e1) { // ignore it } newLen = fs.getFileStatus(partPath).getLen(); } if (newLen != bucketState.currentFileValidLength) { throw new RuntimeException("Truncate did not truncate to right length. Should be " + bucketState.currentFileValidLength + " is " + newLen + "."); } } } else { LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath, bucketState.currentFileValidLength); Path validLengthFilePath = new Path(partPath.getParent(), validLengthPrefix + partPath.getName()).suffix(validLengthSuffix); if (!fs.exists(validLengthFilePath)) { FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath); lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength)); lengthFileOut.close(); } } // Now that we've restored the bucket to a valid state, reset the current file info bucketState.currentFile = null; bucketState.currentFileValidLength = -1; } catch (IOException e) { LOG.error("Error while restoring BucketingSink state.", e); throw new RuntimeException("Error while restoring BucketingSink state.", e); } catch (InvocationTargetException | IllegalAccessException e) { LOG.error("Cound not invoke truncate.", e); throw new RuntimeException("Could not invoke truncate.", e); } } LOG.debug("Clearing pending/in-progress files."); // Move files that are confirmed by a checkpoint but did not get moved to final location // because the checkpoint notification did not happen before a failure Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet(); LOG.debug("Moving pending files to final location on restore."); for (Long pastCheckpointId : pastCheckpointIds) { // All the pending files are buckets that have been completed but are waiting to be renamed // to their final name for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) { Path finalPath = new Path(filename); Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName()) .suffix(pendingSuffix); try { if (fs.exists(pendingPath)) { LOG.debug( "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.", pendingPath, pastCheckpointId); fs.rename(pendingPath, finalPath); } } catch (IOException e) { LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}", pendingPath, finalPath, e); throw new RuntimeException( "Error while renaming pending file " + pendingPath + " to final path " + finalPath, e); } } } synchronized (bucketState.pendingFilesPerCheckpoint) { bucketState.pendingFilesPerCheckpoint.clear(); } } // we need to get this here since open() has not yet been called int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask(); // delete pending files try { RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true); while (bucketFiles.hasNext()) { LocatedFileStatus file = bucketFiles.next(); if (file.getPath().toString().endsWith(pendingSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } if (file.getPath().toString().endsWith(inProgressSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } } } catch (IOException e) { LOG.error("Error while deleting old pending files: {}", e); throw new RuntimeException("Error while deleting old pending files.", e); } }
From source file:com.nagarro.nteg.utils.HDFSDirectoryFilesDataReader.java
License:Apache License
@Override protected FileDataBufferedReader getFileDataBufferedReaderForNewFile() throws IOException { final Path hdfsPath = new Path(dirPathName); Path locatedFilePath = null;//www . j ava 2 s .c o m final RemoteIterator<LocatedFileStatus> locatedFileIterator = hdfs.listFiles(hdfsPath, true); while (locatedFileIterator != null && locatedFileIterator.hasNext()) { final LocatedFileStatus locatedFileStatus = locatedFileIterator.next(); final Path tmpPath = locatedFileStatus.getPath(); final String pathName = tmpPath.getName(); if (LOG.isInfoEnabled()) { LOG.info("Checking file with name[Log]: " + pathName); } if (!(pathName.endsWith(FileDataBufferedReader.IN_PROGRESS_FILE_SUFFIX) || pathName.endsWith(FileDataBufferedReader.PROCESSED_FILE_SUFFIX))) { locatedFilePath = tmpPath; break; } } FileDataBufferedReader fileDataBufferedReader = null; if (locatedFilePath != null) { fileDataBufferedReader = new HDFSFileDataBufferedReader(locatedFilePath, batchSize); } return fileDataBufferedReader; }
From source file:com.streamsets.pipeline.stage.it.HdfsAvroSchemaSerIT.java
License:Apache License
private void verifySerializationLocation(String location) throws IOException { FileSystem fs = BaseHiveIT.getDefaultFileSystem(); Path path = new Path(BaseHiveIT.getDefaultFsUri() + location); Assert.assertTrue("Location does not exist:" + location, fs.exists(path)); boolean found = false; RemoteIterator<LocatedFileStatus> fsIterator = fs.listFiles(path, false); while (!found || fsIterator.hasNext()) { LocatedFileStatus status = fsIterator.next(); LOG.info("Found file: " + status.getPath().getName()); found = status.getPath().getName().startsWith(AVRO_SCHEMA_SERIALIZATION_PREFIX); }//from w ww. j av a 2 s . c o m fs.delete(path, true); Assert.assertTrue("Avro schema file not found in the location " + location, found); }
From source file:com.thinkbiganalytics.spark.io.ZipStreamingOutputTest.java
License:Apache License
/** * Verify streaming output./*from ww w . j av a 2s . c om*/ */ @Test public void test() throws Exception { // Mock file system final FileSystem fs = Mockito.mock(FileSystem.class); final Path source = new Path("/tmp/source"); final LocatedFileStatus file1 = createFile("_SUCCESS", source); final LocatedFileStatus file2 = createFile("part-0", source); Mockito.when(fs.listFiles(source, false)) .thenReturn(new ForwardingRemoteIterator<>(Iterators.forArray(file1, file2))); final FSDataInputStream fileStream = new FSDataInputStream(new SeekableNullInputStream()); Mockito.when(fs.open(file1.getPath())).thenReturn(fileStream); Mockito.when(fs.open(file2.getPath())).thenReturn(fileStream); final CountDownLatch deleteLatch = new CountDownLatch(1); Mockito.when(fs.delete(source, true)).then(new Answer<Boolean>() { @Override public Boolean answer(final InvocationOnMock invocation) { deleteLatch.countDown(); return true; } }); // Write ZIP to output stream final ByteArrayOutputStream out = new ByteArrayOutputStream(); final ZipStreamingOutput zip = new ZipStreamingOutput(source, fs); zip.write(out); // Verify output stream final ZipInputStream in = new ZipInputStream(out.toInputStream()); ZipEntry entry = in.getNextEntry(); Assert.assertNotNull("Missing _SUCCESS entry", entry); Assert.assertEquals("_SUCCESS", entry.getName()); entry = in.getNextEntry(); Assert.assertNotNull("Missing part-0 entry", entry); Assert.assertEquals("part-0", entry.getName()); entry = in.getNextEntry(); Assert.assertNull("Unexpected entry", entry); // Verify path deleted deleteLatch.await(1, TimeUnit.SECONDS); Mockito.verify(fs).delete(source, true); }
From source file:com.toy.TomcatContainerRunnable.java
License:Apache License
@Override public void run() { LOG.info("Setting up Tomcat container launch for container id {} / war {}", container.getId(), war); ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); // Set the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); try {//from ww w . j a va 2s . c o m final RemoteIterator<LocatedFileStatus> libs = fs.listFiles(path, false); while (libs.hasNext()) { final LocatedFileStatus next = libs.next(); LOG.debug("Register {} for container", next.getPath()); LocalResource lib = Records.newRecord(LocalResource.class); lib.setType(LocalResourceType.FILE); lib.setVisibility(LocalResourceVisibility.APPLICATION); lib.setResource(ConverterUtils.getYarnUrlFromURI(next.getPath().toUri())); lib.setTimestamp(next.getModificationTime()); lib.setSize(next.getLen()); localResources.put(next.getPath().getName(), lib); } ctx.setLocalResources(localResources); } catch (IOException e) { LOG.error("Error while fetching Tomcat libraries : {}", e.getLocalizedMessage(), e); } // Build classpath StringBuilder classPathEnv = new StringBuilder(ApplicationConstants.Environment.CLASSPATH.$()) .append(File.pathSeparatorChar).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } Map<String, String> env = new HashMap<String, String>(); env.put("CLASSPATH", classPathEnv.toString()); env.put(Constants.WAR, war); env.put(Constants.ZOOKEEPER_QUORUM, System.getenv(Constants.ZOOKEEPER_QUORUM)); ctx.setEnvironment(env); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(ApplicationConstants.Environment.JAVA_HOME.$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + 32 + "m"); vargs.add("com.toy.TomcatLauncher"); vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/Tomcat.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/Tomcat.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); ctx.setCommands(commands); nmClientAsync.startContainerAsync(container, ctx); }
From source file:com.uber.hoodie.common.model.HoodieTestUtils.java
License:Apache License
public static FileStatus[] listAllDataFilesInPath(FileSystem fs, String basePath) throws IOException { RemoteIterator<LocatedFileStatus> itr = fs.listFiles(new Path(basePath), true); List<FileStatus> returns = Lists.newArrayList(); while (itr.hasNext()) { LocatedFileStatus status = itr.next(); if (status.getPath().getName().contains(".parquet")) { returns.add(status);//from w w w. ja v a 2 s.c o m } } return returns.toArray(new FileStatus[returns.size()]); }