List of usage examples for org.apache.hadoop.fs LocatedFileStatus getPath
public Path getPath()
From source file:org.apache.druid.storage.hdfs.HdfsDataSegmentPuller.java
License:Apache License
FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir) throws SegmentLoadingException { try {//from w w w . ja va 2s . c o m org.apache.commons.io.FileUtils.forceMkdir(outDir); } catch (IOException e) { throw new SegmentLoadingException(e, ""); } try { final FileSystem fs = path.getFileSystem(config); if (fs.isDirectory(path)) { // -------- directory --------- try { return RetryUtils.retry(() -> { if (!fs.exists(path)) { throw new SegmentLoadingException("No files found at [%s]", path.toString()); } final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false); final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult(); while (children.hasNext()) { final LocatedFileStatus child = children.next(); final Path childPath = child.getPath(); final String fname = childPath.getName(); if (fs.isDirectory(childPath)) { log.warn("[%s] is a child directory, skipping", childPath.toString()); } else { final File outFile = new File(outDir, fname); try (final FSDataInputStream in = fs.open(childPath)) { NativeIO.chunkedCopy(in, outFile); } result.addFile(outFile); } } log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath()); return result; }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT); } catch (Exception e) { throw new RuntimeException(e); } } else if (CompressionUtils.isZip(path.getName())) { // -------- zip --------- final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() { @Override public InputStream openStream() throws IOException { return getInputStream(path); } }, outDir, shouldRetryPredicate(), false); log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath()); return result; } else if (CompressionUtils.isGz(path.getName())) { // -------- gzip --------- final String fname = path.getName(); final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname)); final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() { @Override public InputStream openStream() throws IOException { return getInputStream(path); } }, outFile); log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outFile.getAbsolutePath()); return result; } else { throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString()); } } catch (IOException e) { throw new SegmentLoadingException(e, "Error loading [%s]", path.toString()); } }
From source file:org.apache.druid.storage.hdfs.tasklog.HdfsTaskLogs.java
License:Apache License
@Override public void killOlderThan(long timestamp) throws IOException { Path taskLogDir = new Path(config.getDirectory()); FileSystem fs = taskLogDir.getFileSystem(hadoopConfig); if (fs.exists(taskLogDir)) { if (!fs.isDirectory(taskLogDir)) { throw new IOE("taskLogDir [%s] must be a directory.", taskLogDir); }/* www . j a va2 s . c o m*/ RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(taskLogDir); while (iter.hasNext()) { LocatedFileStatus file = iter.next(); if (file.getModificationTime() < timestamp) { Path p = file.getPath(); log.info("Deleting hdfs task log [%s].", p.toUri().toString()); fs.delete(p, true); } if (Thread.currentThread().isInterrupted()) { throw new IOException( new InterruptedException("Thread interrupted. Couldn't delete all tasklogs.")); } } } }
From source file:org.apache.falcon.extensions.store.ExtensionStore.java
License:Apache License
public String getResource(final String extensionResourcePath) throws FalconException { StringBuilder definition = new StringBuilder(); Path resourcePath = new Path(extensionResourcePath); FileSystem fileSystem = HadoopClientFactory.get().createFalconFileSystem(resourcePath.toUri()); try {//from w ww . jav a2 s. c om if (fileSystem.isFile(resourcePath)) { definition.append(getExtensionResource(extensionResourcePath.toString())); } else { RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem.listFiles(resourcePath, false); while (fileStatusListIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusListIterator.next(); Path filePath = fileStatus.getPath(); definition.append("Contents of file ").append(filePath.getName()).append(":\n"); definition.append(getExtensionResource(filePath.toString())).append("\n \n"); } } } catch (IOException e) { LOG.error("Exception while getting file(s) with path : " + extensionResourcePath, e); throw new StoreAccessException(e); } return definition.toString(); }
From source file:org.apache.falcon.service.SharedLibraryHostingService.java
License:Apache License
private void pushExtensionArtifactsToCluster(final Cluster cluster, final FileSystem clusterFs) throws FalconException { if (!Services.get().isRegistered(ExtensionService.SERVICE_NAME)) { LOG.info("ExtensionService not registered, return"); return;/*from w w w . j a v a2s .c o m*/ } ExtensionStore store = ExtensionStore.get(); if (!store.isExtensionStoreInitialized()) { LOG.info( "Extension store not initialized by Extension service. Make sure Extension service is added in " + "start up properties"); return; } final String filterPath = "/apps/falcon/extensions/mirroring/"; Path extensionStorePath = store.getExtensionStorePath(); LOG.info("extensionStorePath :{}", extensionStorePath); FileSystem falconFileSystem = HadoopClientFactory.get().createFalconFileSystem(extensionStorePath.toUri()); String nameNode = StringUtils .removeEnd(falconFileSystem.getConf().get(HadoopClientFactory.FS_DEFAULT_NAME_KEY), File.separator); String clusterStorageUrl = StringUtils.removeEnd(ClusterHelper.getStorageUrl(cluster), File.separator); // If default fs for Falcon server is same as cluster fs abort copy if (nameNode.equalsIgnoreCase(clusterStorageUrl)) { LOG.info("clusterStorageUrl :{} same return", clusterStorageUrl); return; } try { RemoteIterator<LocatedFileStatus> fileStatusListIterator = falconFileSystem .listFiles(extensionStorePath, true); while (fileStatusListIterator.hasNext()) { LocatedFileStatus srcfileStatus = fileStatusListIterator.next(); Path filePath = Path.getPathWithoutSchemeAndAuthority(srcfileStatus.getPath()); if (filePath != null && filePath.toString().startsWith(filterPath)) { /* HiveDR uses filter path as store path in DRStatusStore, so skip it. Copy only the extension artifacts */ continue; } if (srcfileStatus.isDirectory()) { if (!clusterFs.exists(filePath)) { HadoopClientFactory.mkdirs(clusterFs, filePath, srcfileStatus.getPermission()); } } else { if (clusterFs.exists(filePath)) { FileStatus targetfstat = clusterFs.getFileStatus(filePath); if (targetfstat.getLen() == srcfileStatus.getLen()) { continue; } } Path parentPath = filePath.getParent(); if (!clusterFs.exists(parentPath)) { FsPermission dirPerm = falconFileSystem.getFileStatus(parentPath).getPermission(); HadoopClientFactory.mkdirs(clusterFs, parentPath, dirPerm); } FileUtil.copy(falconFileSystem, srcfileStatus, clusterFs, filePath, false, true, falconFileSystem.getConf()); FileUtil.chmod(clusterFs.makeQualified(filePath).toString(), srcfileStatus.getPermission().toString()); } } } catch (IOException | InterruptedException e) { throw new FalconException("Failed to copy extension artifacts to cluster" + cluster.getName(), e); } }
From source file:org.apache.flink.streaming.connectors.fs.bucketing.BucketingSinkFaultTolerance2ITCase.java
License:Apache License
@Override public void postSubmit() throws Exception { // We read the files and verify that we have read all the strings. If a valid-length // file exists we only read the file to that point. (This test should work with // FileSystems that support truncate() and with others as well.) Pattern messageRegex = Pattern.compile("message (\\d*)"); // Keep a set of the message IDs that we read. The size must equal the read count and // the NUM_STRINGS. If numRead is bigger than the size of the set we have seen some // elements twice. Set<Integer> readNumbers = Sets.newHashSet(); int numRead = 0; RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true); while (files.hasNext()) { LocatedFileStatus file = files.next(); if (!file.getPath().toString().endsWith(".valid-length")) { int validLength = (int) file.getLen(); if (dfs.exists(file.getPath().suffix(".valid-length"))) { FSDataInputStream inStream = dfs.open(file.getPath().suffix(".valid-length")); String validLengthString = inStream.readUTF(); validLength = Integer.parseInt(validLengthString); System.out.println("VALID LENGTH: " + validLength); }/*from www . j av a 2 s .c o m*/ FSDataInputStream inStream = dfs.open(file.getPath()); byte[] buffer = new byte[validLength]; inStream.readFully(0, buffer, 0, validLength); inStream.close(); ByteArrayInputStream bais = new ByteArrayInputStream(buffer); InputStreamReader inStreamReader = new InputStreamReader(bais); BufferedReader br = new BufferedReader(inStreamReader); String line = br.readLine(); while (line != null) { Matcher matcher = messageRegex.matcher(line); if (matcher.matches()) { numRead++; int messageId = Integer.parseInt(matcher.group(1)); readNumbers.add(messageId); } else { Assert.fail("Read line does not match expected pattern."); } line = br.readLine(); } br.close(); inStreamReader.close(); bais.close(); } } // Verify that we read all strings (at-least-once) Assert.assertEquals(NUM_STRINGS, readNumbers.size()); // Verify that we don't have duplicates (boom!, exactly-once) Assert.assertEquals(NUM_STRINGS, numRead); }
From source file:org.apache.flink.streaming.connectors.fs.bucketing.BucketingSinkFaultToleranceITCase.java
License:Apache License
@Override public void postSubmit() throws Exception { // We read the files and verify that we have read all the strings. If a valid-length // file exists we only read the file to that point. (This test should work with // FileSystems that support truncate() and with others as well.) Pattern messageRegex = Pattern.compile("message (\\d*)"); // Keep a set of the message IDs that we read. The size must equal the read count and // the NUM_STRINGS. If numRead is bigger than the size of the set we have seen some // elements twice. Set<Integer> readNumbers = Sets.newHashSet(); HashSet<String> uniqMessagesRead = new HashSet<>(); HashSet<String> messagesInCommittedFiles = new HashSet<>(); RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true); while (files.hasNext()) { LocatedFileStatus file = files.next(); if (!file.getPath().toString().endsWith(".valid-length")) { int validLength = (int) file.getLen(); if (dfs.exists(file.getPath().suffix(".valid-length"))) { FSDataInputStream inStream = dfs.open(file.getPath().suffix(".valid-length")); String validLengthString = inStream.readUTF(); validLength = Integer.parseInt(validLengthString); System.out.println("VALID LENGTH: " + validLength); }// ww w .j a v a 2 s .c om FSDataInputStream inStream = dfs.open(file.getPath()); byte[] buffer = new byte[validLength]; inStream.readFully(0, buffer, 0, validLength); inStream.close(); ByteArrayInputStream bais = new ByteArrayInputStream(buffer); InputStreamReader inStreamReader = new InputStreamReader(bais); BufferedReader br = new BufferedReader(inStreamReader); String line = br.readLine(); while (line != null) { Matcher matcher = messageRegex.matcher(line); if (matcher.matches()) { uniqMessagesRead.add(line); // check that in the committed files there are no duplicates if (!file.getPath().toString().endsWith(IN_PROGRESS_SUFFIX) && !file.getPath().toString().endsWith(PENDING_SUFFIX)) { if (!messagesInCommittedFiles.add(line)) { Assert.fail("Duplicate entry in committed bucket."); } } int messageId = Integer.parseInt(matcher.group(1)); readNumbers.add(messageId); } else { Assert.fail("Read line does not match expected pattern."); } line = br.readLine(); } br.close(); inStreamReader.close(); bais.close(); } } // Verify that we read all strings (at-least-once) Assert.assertEquals(NUM_STRINGS, readNumbers.size()); // Verify that we don't have duplicates (boom!, exactly-once) Assert.assertEquals(NUM_STRINGS, uniqMessagesRead.size()); }
From source file:org.apache.flink.streaming.connectors.fs.bucketing.BucketingSinkTest.java
License:Apache License
/** * This uses {@link DateTimeBucketer} to * produce rolling files. We use {@link OneInputStreamOperatorTestHarness} to manually * advance processing time.// ww w .ja va2 s.co m */ @Test public void testDateTimeRollingStringWriter() throws Exception { final int numElements = 20; final String outPath = hdfsURI + "/rolling-out"; BucketingSink<String> sink = new BucketingSink<String>(outPath) .setBucketer(new DateTimeBucketer<String>("ss")).setPartPrefix(PART_PREFIX).setPendingPrefix("") .setPendingSuffix(""); OneInputStreamOperatorTestHarness<String, Object> testHarness = createTestSink(sink, 1, 0); testHarness.setProcessingTime(0L); testHarness.setup(); testHarness.open(); for (int i = 0; i < numElements; i++) { // Every 5 elements, increase the clock time. We should end up with 5 elements per bucket. if (i % 5 == 0) { testHarness.setProcessingTime(i * 1000L); } testHarness.processElement(new StreamRecord<>("message #" + Integer.toString(i))); } testHarness.close(); RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true); // We should have 4 rolling files across 4 time intervals int numFiles = 0; while (files.hasNext()) { LocatedFileStatus file = files.next(); numFiles++; if (file.getPath().toString().contains("rolling-out/00")) { FSDataInputStream inStream = dfs.open(file.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inStream)); for (int i = 0; i < 5; i++) { String line = br.readLine(); Assert.assertEquals("message #" + i, line); } inStream.close(); } else if (file.getPath().toString().contains("rolling-out/05")) { FSDataInputStream inStream = dfs.open(file.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inStream)); for (int i = 5; i < 10; i++) { String line = br.readLine(); Assert.assertEquals("message #" + i, line); } inStream.close(); } else if (file.getPath().toString().contains("rolling-out/10")) { FSDataInputStream inStream = dfs.open(file.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inStream)); for (int i = 10; i < 15; i++) { String line = br.readLine(); Assert.assertEquals("message #" + i, line); } inStream.close(); } else if (file.getPath().toString().contains("rolling-out/15")) { FSDataInputStream inStream = dfs.open(file.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inStream)); for (int i = 15; i < 20; i++) { String line = br.readLine(); Assert.assertEquals("message #" + i, line); } inStream.close(); } else { Assert.fail("File " + file + " does not match any expected roll pattern."); } } Assert.assertEquals(4, numFiles); }
From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java
License:Apache License
@Override public void open(Configuration parameters) throws Exception { super.open(parameters); subtaskIndex = getRuntimeContext().getIndexOfThisSubtask(); partCounter = 0;//w w w . ja v a 2 s . c o m this.writer = writerTemplate.duplicate(); if (bucketState == null) { bucketState = new BucketState(); } FileSystem fs = new Path(basePath).getFileSystem(new org.apache.hadoop.conf.Configuration()); refTruncate = reflectTruncate(fs); // delete pending/in-progress files that might be left if we fail while // no checkpoint has yet been done try { if (fs.exists(new Path(basePath)) && cleanupOnOpen) { RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true); while (bucketFiles.hasNext()) { LocatedFileStatus file = bucketFiles.next(); if (file.getPath().toString().endsWith(pendingSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(OPEN) Deleting leftover pending file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } if (file.getPath().toString().endsWith(inProgressSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(OPEN) Deleting leftover in-progress file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } } } } catch (IOException e) { LOG.error("Error while deleting leftover pending/in-progress files: {}", e); throw new RuntimeException("Error while deleting leftover pending/in-progress files.", e); } }
From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java
License:Apache License
@Override public void restoreState(BucketState state) { bucketState = state;//from w w w . jav a 2 s .co m // we can clean all the pending files since they where renamed to final files // after this checkpoint was successfull bucketState.pendingFiles.clear(); FileSystem fs = null; try { fs = new Path(basePath).getFileSystem(new org.apache.hadoop.conf.Configuration()); } catch (IOException e) { LOG.error("Error while creating FileSystem in checkpoint restore.", e); throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e); } if (bucketState.currentFile != null) { // We were writing to a file when the last checkpoint occured. This file can either // be still in-progress or became a pending file at some point after the checkpoint. // Either way, we have to truncate it back to a valid state (or write a .valid-length) // file that specifies up to which length it is valid and rename it to the final name // before starting a new bucket file. Path partPath = new Path(bucketState.currentFile); try { Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName()) .suffix(pendingSuffix); Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName()) .suffix(inProgressSuffix); if (fs.exists(partPendingPath)) { LOG.debug( "In-progress file {} has been moved to pending after checkpoint, moving to final location.", partPath); // has been moved to pending in the mean time, rename to final location fs.rename(partPendingPath, partPath); } else if (fs.exists(partInProgressPath)) { LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath); // it was still in progress, rename to final path fs.rename(partInProgressPath, partPath); } else if (fs.exists(partPath)) { LOG.debug("In-Progress file {} was already moved to final location {}.", bucketState.currentFile, partPath); } else { LOG.debug( "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, " + "it was moved to final location by a previous snapshot restore", bucketState.currentFile); } refTruncate = reflectTruncate(fs); // truncate it or write a ".valid-length" file to specify up to which point it is valid if (refTruncate != null) { LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength); // some-one else might still hold the lease from a previous try, we are // recovering, after all ... if (fs instanceof DistributedFileSystem) { DistributedFileSystem dfs = (DistributedFileSystem) fs; LOG.debug("Trying to recover file lease {}", partPath); dfs.recoverLease(partPath); boolean isclosed = dfs.isFileClosed(partPath); StopWatch sw = new StopWatch(); sw.start(); while (!isclosed) { if (sw.getTime() > asyncTimeout) { break; } try { Thread.sleep(500); } catch (InterruptedException e1) { // ignore it } isclosed = dfs.isFileClosed(partPath); } } Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath, bucketState.currentFileValidLength); if (!truncated) { LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath); // we must wait for the asynchronous truncate operation to complete StopWatch sw = new StopWatch(); sw.start(); long newLen = fs.getFileStatus(partPath).getLen(); while (newLen != bucketState.currentFileValidLength) { if (sw.getTime() > asyncTimeout) { break; } try { Thread.sleep(500); } catch (InterruptedException e1) { // ignore it } newLen = fs.getFileStatus(partPath).getLen(); } if (newLen != bucketState.currentFileValidLength) { throw new RuntimeException("Truncate did not truncate to right length. Should be " + bucketState.currentFileValidLength + " is " + newLen + "."); } } } else { LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath, bucketState.currentFileValidLength); Path validLengthFilePath = new Path(partPath.getParent(), validLengthPrefix + partPath.getName()).suffix(validLengthSuffix); if (!fs.exists(validLengthFilePath)) { FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath); lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength)); lengthFileOut.close(); } } // invalidate in the state object bucketState.currentFile = null; bucketState.currentFileValidLength = -1; } catch (IOException e) { LOG.error("Error while restoring RollingSink state.", e); throw new RuntimeException("Error while restoring RollingSink state.", e); } catch (InvocationTargetException | IllegalAccessException e) { LOG.error("Cound not invoke truncate.", e); throw new RuntimeException("Could not invoke truncate.", e); } } LOG.debug("Clearing pending/in-progress files."); // Move files that are confirmed by a checkpoint but did not get moved to final location // because the checkpoint notification did not happen before a failure Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet(); LOG.debug("Moving pending files to final location on restore."); for (Long pastCheckpointId : pastCheckpointIds) { // All the pending files are buckets that have been completed but are waiting to be renamed // to their final name for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) { Path finalPath = new Path(filename); Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName()) .suffix(pendingSuffix); try { if (fs.exists(pendingPath)) { LOG.debug( "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.", pendingPath, pastCheckpointId); fs.rename(pendingPath, finalPath); } } catch (IOException e) { LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}", pendingPath, finalPath, e); throw new RuntimeException( "Error while renaming pending file " + pendingPath + " to final path " + finalPath, e); } } } bucketState.pendingFiles.clear(); synchronized (bucketState.pendingFilesPerCheckpoint) { bucketState.pendingFilesPerCheckpoint.clear(); } // we need to get this here since open() has not yet been called int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask(); // delete pending files try { RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true); while (bucketFiles.hasNext()) { LocatedFileStatus file = bucketFiles.next(); if (file.getPath().toString().endsWith(pendingSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } if (file.getPath().toString().endsWith(inProgressSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } } } catch (IOException e) { LOG.error("Error while deleting old pending files: {}", e); throw new RuntimeException("Error while deleting old pending files.", e); } }
From source file:org.apache.flink.streaming.connectors.fs.RollingSinkITCase.java
License:Apache License
/** * This uses {@link org.apache.flink.streaming.connectors.fs.DateTimeBucketer} to * produce rolling files. The clock of DateTimeBucketer is set to * {@link ModifyableClock} to keep the time in lockstep with the processing of elements using * latches./*from w w w . j av a 2s.c o m*/ */ @Test public void testDateTimeRollingStringWriter() throws Exception { final int NUM_ELEMENTS = 20; final int PARALLELISM = 2; final String outPath = hdfsURI + "/rolling-out"; DateTimeBucketer.setClock(new ModifyableClock()); ModifyableClock.setCurrentTime(0); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(PARALLELISM); DataStream<Tuple2<Integer, String>> source = env.addSource(new WaitingTestSourceFunction(NUM_ELEMENTS)) .broadcast(); // the parallel flatMap is chained to the sink, so when it has seen 5 elements it can // fire the latch DataStream<String> mapped = source.flatMap(new RichFlatMapFunction<Tuple2<Integer, String>, String>() { private static final long serialVersionUID = 1L; int count = 0; @Override public void flatMap(Tuple2<Integer, String> value, Collector<String> out) throws Exception { out.collect(value.f1); count++; if (count >= 5) { if (getRuntimeContext().getIndexOfThisSubtask() == 0) { latch1.trigger(); } else { latch2.trigger(); } count = 0; } } }); RollingSink<String> sink = new RollingSink<String>(outPath).setBucketer(new DateTimeBucketer("ss")) .setPartPrefix("part").setPendingPrefix("").setPendingSuffix(""); mapped.addSink(sink); env.execute("RollingSink String Write Test"); RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true); // we should have 8 rolling files, 4 time intervals and parallelism of 2 int numFiles = 0; while (files.hasNext()) { LocatedFileStatus file = files.next(); numFiles++; if (file.getPath().toString().contains("rolling-out/00")) { FSDataInputStream inStream = dfs.open(file.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inStream)); for (int i = 0; i < 5; i++) { String line = br.readLine(); Assert.assertEquals("message #" + i, line); } inStream.close(); } else if (file.getPath().toString().contains("rolling-out/05")) { FSDataInputStream inStream = dfs.open(file.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inStream)); for (int i = 5; i < 10; i++) { String line = br.readLine(); Assert.assertEquals("message #" + i, line); } inStream.close(); } else if (file.getPath().toString().contains("rolling-out/10")) { FSDataInputStream inStream = dfs.open(file.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inStream)); for (int i = 10; i < 15; i++) { String line = br.readLine(); Assert.assertEquals("message #" + i, line); } inStream.close(); } else if (file.getPath().toString().contains("rolling-out/15")) { FSDataInputStream inStream = dfs.open(file.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inStream)); for (int i = 15; i < 20; i++) { String line = br.readLine(); Assert.assertEquals("message #" + i, line); } inStream.close(); } else { Assert.fail("File " + file + " does not match any expected roll pattern."); } } Assert.assertEquals(8, numFiles); }