Example usage for org.apache.hadoop.fs LocatedFileStatus getPath

List of usage examples for org.apache.hadoop.fs LocatedFileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs LocatedFileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:org.apache.druid.storage.hdfs.HdfsDataSegmentPuller.java

License:Apache License

FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir) throws SegmentLoadingException {
    try {//from  w  w  w .  ja  va  2s  .  c  o  m
        org.apache.commons.io.FileUtils.forceMkdir(outDir);
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "");
    }
    try {
        final FileSystem fs = path.getFileSystem(config);
        if (fs.isDirectory(path)) {

            // --------    directory     ---------

            try {
                return RetryUtils.retry(() -> {
                    if (!fs.exists(path)) {
                        throw new SegmentLoadingException("No files found at [%s]", path.toString());
                    }

                    final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false);
                    final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
                    while (children.hasNext()) {
                        final LocatedFileStatus child = children.next();
                        final Path childPath = child.getPath();
                        final String fname = childPath.getName();
                        if (fs.isDirectory(childPath)) {
                            log.warn("[%s] is a child directory, skipping", childPath.toString());
                        } else {
                            final File outFile = new File(outDir, fname);
                            try (final FSDataInputStream in = fs.open(childPath)) {
                                NativeIO.chunkedCopy(in, outFile);
                            }
                            result.addFile(outFile);
                        }
                    }
                    log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(),
                            outDir.getAbsolutePath());
                    return result;
                }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        } else if (CompressionUtils.isZip(path.getName())) {

            // --------    zip     ---------

            final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() {
                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outDir, shouldRetryPredicate(), false);

            log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(),
                    outDir.getAbsolutePath());

            return result;
        } else if (CompressionUtils.isGz(path.getName())) {

            // --------    gzip     ---------

            final String fname = path.getName();
            final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname));
            final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() {
                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outFile);

            log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(),
                    outFile.getAbsolutePath());
            return result;
        } else {
            throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString());
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Error loading [%s]", path.toString());
    }
}

From source file:org.apache.druid.storage.hdfs.tasklog.HdfsTaskLogs.java

License:Apache License

@Override
public void killOlderThan(long timestamp) throws IOException {
    Path taskLogDir = new Path(config.getDirectory());
    FileSystem fs = taskLogDir.getFileSystem(hadoopConfig);
    if (fs.exists(taskLogDir)) {

        if (!fs.isDirectory(taskLogDir)) {
            throw new IOE("taskLogDir [%s] must be a directory.", taskLogDir);
        }/*  www . j a va2 s  .  c o m*/

        RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(taskLogDir);
        while (iter.hasNext()) {
            LocatedFileStatus file = iter.next();
            if (file.getModificationTime() < timestamp) {
                Path p = file.getPath();
                log.info("Deleting hdfs task log [%s].", p.toUri().toString());
                fs.delete(p, true);
            }

            if (Thread.currentThread().isInterrupted()) {
                throw new IOException(
                        new InterruptedException("Thread interrupted. Couldn't delete all tasklogs."));
            }
        }
    }
}

From source file:org.apache.falcon.extensions.store.ExtensionStore.java

License:Apache License

public String getResource(final String extensionResourcePath) throws FalconException {
    StringBuilder definition = new StringBuilder();
    Path resourcePath = new Path(extensionResourcePath);
    FileSystem fileSystem = HadoopClientFactory.get().createFalconFileSystem(resourcePath.toUri());
    try {//from  w  ww . jav a2  s. c  om
        if (fileSystem.isFile(resourcePath)) {
            definition.append(getExtensionResource(extensionResourcePath.toString()));
        } else {
            RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem.listFiles(resourcePath,
                    false);
            while (fileStatusListIterator.hasNext()) {
                LocatedFileStatus fileStatus = fileStatusListIterator.next();
                Path filePath = fileStatus.getPath();
                definition.append("Contents of file ").append(filePath.getName()).append(":\n");
                definition.append(getExtensionResource(filePath.toString())).append("\n \n");
            }
        }
    } catch (IOException e) {
        LOG.error("Exception while getting file(s) with path : " + extensionResourcePath, e);
        throw new StoreAccessException(e);
    }

    return definition.toString();

}

From source file:org.apache.falcon.service.SharedLibraryHostingService.java

License:Apache License

private void pushExtensionArtifactsToCluster(final Cluster cluster, final FileSystem clusterFs)
        throws FalconException {
    if (!Services.get().isRegistered(ExtensionService.SERVICE_NAME)) {
        LOG.info("ExtensionService not registered, return");
        return;/*from  w w w  . j  a v  a2s .c  o m*/
    }

    ExtensionStore store = ExtensionStore.get();
    if (!store.isExtensionStoreInitialized()) {
        LOG.info(
                "Extension store not initialized by Extension service. Make sure Extension service is added in "
                        + "start up properties");
        return;
    }

    final String filterPath = "/apps/falcon/extensions/mirroring/";
    Path extensionStorePath = store.getExtensionStorePath();
    LOG.info("extensionStorePath :{}", extensionStorePath);
    FileSystem falconFileSystem = HadoopClientFactory.get().createFalconFileSystem(extensionStorePath.toUri());
    String nameNode = StringUtils
            .removeEnd(falconFileSystem.getConf().get(HadoopClientFactory.FS_DEFAULT_NAME_KEY), File.separator);

    String clusterStorageUrl = StringUtils.removeEnd(ClusterHelper.getStorageUrl(cluster), File.separator);

    // If default fs for Falcon server is same as cluster fs abort copy
    if (nameNode.equalsIgnoreCase(clusterStorageUrl)) {
        LOG.info("clusterStorageUrl :{} same return", clusterStorageUrl);
        return;
    }

    try {
        RemoteIterator<LocatedFileStatus> fileStatusListIterator = falconFileSystem
                .listFiles(extensionStorePath, true);

        while (fileStatusListIterator.hasNext()) {
            LocatedFileStatus srcfileStatus = fileStatusListIterator.next();
            Path filePath = Path.getPathWithoutSchemeAndAuthority(srcfileStatus.getPath());

            if (filePath != null && filePath.toString().startsWith(filterPath)) {
                /* HiveDR uses filter path as store path in DRStatusStore, so skip it. Copy only the extension
                 artifacts */
                continue;
            }

            if (srcfileStatus.isDirectory()) {
                if (!clusterFs.exists(filePath)) {
                    HadoopClientFactory.mkdirs(clusterFs, filePath, srcfileStatus.getPermission());
                }
            } else {
                if (clusterFs.exists(filePath)) {
                    FileStatus targetfstat = clusterFs.getFileStatus(filePath);
                    if (targetfstat.getLen() == srcfileStatus.getLen()) {
                        continue;
                    }
                }

                Path parentPath = filePath.getParent();
                if (!clusterFs.exists(parentPath)) {
                    FsPermission dirPerm = falconFileSystem.getFileStatus(parentPath).getPermission();
                    HadoopClientFactory.mkdirs(clusterFs, parentPath, dirPerm);
                }

                FileUtil.copy(falconFileSystem, srcfileStatus, clusterFs, filePath, false, true,
                        falconFileSystem.getConf());
                FileUtil.chmod(clusterFs.makeQualified(filePath).toString(),
                        srcfileStatus.getPermission().toString());
            }
        }
    } catch (IOException | InterruptedException e) {
        throw new FalconException("Failed to copy extension artifacts to cluster" + cluster.getName(), e);
    }
}

From source file:org.apache.flink.streaming.connectors.fs.bucketing.BucketingSinkFaultTolerance2ITCase.java

License:Apache License

@Override
public void postSubmit() throws Exception {
    // We read the files and verify that we have read all the strings. If a valid-length
    // file exists we only read the file to that point. (This test should work with
    // FileSystems that support truncate() and with others as well.)

    Pattern messageRegex = Pattern.compile("message (\\d*)");

    // Keep a set of the message IDs that we read. The size must equal the read count and
    // the NUM_STRINGS. If numRead is bigger than the size of the set we have seen some
    // elements twice.
    Set<Integer> readNumbers = Sets.newHashSet();
    int numRead = 0;

    RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true);

    while (files.hasNext()) {
        LocatedFileStatus file = files.next();

        if (!file.getPath().toString().endsWith(".valid-length")) {
            int validLength = (int) file.getLen();
            if (dfs.exists(file.getPath().suffix(".valid-length"))) {
                FSDataInputStream inStream = dfs.open(file.getPath().suffix(".valid-length"));
                String validLengthString = inStream.readUTF();
                validLength = Integer.parseInt(validLengthString);
                System.out.println("VALID LENGTH: " + validLength);
            }/*from  www  . j  av a 2  s  .c  o  m*/
            FSDataInputStream inStream = dfs.open(file.getPath());
            byte[] buffer = new byte[validLength];
            inStream.readFully(0, buffer, 0, validLength);
            inStream.close();

            ByteArrayInputStream bais = new ByteArrayInputStream(buffer);

            InputStreamReader inStreamReader = new InputStreamReader(bais);
            BufferedReader br = new BufferedReader(inStreamReader);

            String line = br.readLine();
            while (line != null) {
                Matcher matcher = messageRegex.matcher(line);
                if (matcher.matches()) {
                    numRead++;
                    int messageId = Integer.parseInt(matcher.group(1));
                    readNumbers.add(messageId);
                } else {
                    Assert.fail("Read line does not match expected pattern.");
                }
                line = br.readLine();
            }
            br.close();
            inStreamReader.close();
            bais.close();
        }
    }

    // Verify that we read all strings (at-least-once)
    Assert.assertEquals(NUM_STRINGS, readNumbers.size());

    // Verify that we don't have duplicates (boom!, exactly-once)
    Assert.assertEquals(NUM_STRINGS, numRead);
}

From source file:org.apache.flink.streaming.connectors.fs.bucketing.BucketingSinkFaultToleranceITCase.java

License:Apache License

@Override
public void postSubmit() throws Exception {
    // We read the files and verify that we have read all the strings. If a valid-length
    // file exists we only read the file to that point. (This test should work with
    // FileSystems that support truncate() and with others as well.)

    Pattern messageRegex = Pattern.compile("message (\\d*)");

    // Keep a set of the message IDs that we read. The size must equal the read count and
    // the NUM_STRINGS. If numRead is bigger than the size of the set we have seen some
    // elements twice.
    Set<Integer> readNumbers = Sets.newHashSet();

    HashSet<String> uniqMessagesRead = new HashSet<>();
    HashSet<String> messagesInCommittedFiles = new HashSet<>();

    RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true);

    while (files.hasNext()) {
        LocatedFileStatus file = files.next();

        if (!file.getPath().toString().endsWith(".valid-length")) {
            int validLength = (int) file.getLen();
            if (dfs.exists(file.getPath().suffix(".valid-length"))) {
                FSDataInputStream inStream = dfs.open(file.getPath().suffix(".valid-length"));
                String validLengthString = inStream.readUTF();
                validLength = Integer.parseInt(validLengthString);
                System.out.println("VALID LENGTH: " + validLength);
            }// ww w  .j a  v a 2  s .c om
            FSDataInputStream inStream = dfs.open(file.getPath());
            byte[] buffer = new byte[validLength];
            inStream.readFully(0, buffer, 0, validLength);
            inStream.close();

            ByteArrayInputStream bais = new ByteArrayInputStream(buffer);

            InputStreamReader inStreamReader = new InputStreamReader(bais);
            BufferedReader br = new BufferedReader(inStreamReader);

            String line = br.readLine();
            while (line != null) {
                Matcher matcher = messageRegex.matcher(line);
                if (matcher.matches()) {
                    uniqMessagesRead.add(line);

                    // check that in the committed files there are no duplicates
                    if (!file.getPath().toString().endsWith(IN_PROGRESS_SUFFIX)
                            && !file.getPath().toString().endsWith(PENDING_SUFFIX)) {
                        if (!messagesInCommittedFiles.add(line)) {
                            Assert.fail("Duplicate entry in committed bucket.");
                        }
                    }

                    int messageId = Integer.parseInt(matcher.group(1));
                    readNumbers.add(messageId);
                } else {
                    Assert.fail("Read line does not match expected pattern.");
                }
                line = br.readLine();
            }
            br.close();
            inStreamReader.close();
            bais.close();
        }
    }

    // Verify that we read all strings (at-least-once)
    Assert.assertEquals(NUM_STRINGS, readNumbers.size());

    // Verify that we don't have duplicates (boom!, exactly-once)
    Assert.assertEquals(NUM_STRINGS, uniqMessagesRead.size());
}

From source file:org.apache.flink.streaming.connectors.fs.bucketing.BucketingSinkTest.java

License:Apache License

/**
 * This uses {@link DateTimeBucketer} to
 * produce rolling files. We use {@link OneInputStreamOperatorTestHarness} to manually
 * advance processing time.//  ww  w .ja  va2  s.co m
 */
@Test
public void testDateTimeRollingStringWriter() throws Exception {
    final int numElements = 20;

    final String outPath = hdfsURI + "/rolling-out";

    BucketingSink<String> sink = new BucketingSink<String>(outPath)
            .setBucketer(new DateTimeBucketer<String>("ss")).setPartPrefix(PART_PREFIX).setPendingPrefix("")
            .setPendingSuffix("");

    OneInputStreamOperatorTestHarness<String, Object> testHarness = createTestSink(sink, 1, 0);

    testHarness.setProcessingTime(0L);

    testHarness.setup();
    testHarness.open();

    for (int i = 0; i < numElements; i++) {
        // Every 5 elements, increase the clock time. We should end up with 5 elements per bucket.
        if (i % 5 == 0) {
            testHarness.setProcessingTime(i * 1000L);
        }
        testHarness.processElement(new StreamRecord<>("message #" + Integer.toString(i)));
    }

    testHarness.close();

    RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true);

    // We should have 4 rolling files across 4 time intervals
    int numFiles = 0;
    while (files.hasNext()) {
        LocatedFileStatus file = files.next();
        numFiles++;
        if (file.getPath().toString().contains("rolling-out/00")) {
            FSDataInputStream inStream = dfs.open(file.getPath());

            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

            for (int i = 0; i < 5; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }

            inStream.close();
        } else if (file.getPath().toString().contains("rolling-out/05")) {
            FSDataInputStream inStream = dfs.open(file.getPath());

            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

            for (int i = 5; i < 10; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }

            inStream.close();
        } else if (file.getPath().toString().contains("rolling-out/10")) {
            FSDataInputStream inStream = dfs.open(file.getPath());

            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

            for (int i = 10; i < 15; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }

            inStream.close();
        } else if (file.getPath().toString().contains("rolling-out/15")) {
            FSDataInputStream inStream = dfs.open(file.getPath());

            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

            for (int i = 15; i < 20; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }

            inStream.close();
        } else {
            Assert.fail("File " + file + " does not match any expected roll pattern.");
        }
    }

    Assert.assertEquals(4, numFiles);
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java

License:Apache License

@Override
public void open(Configuration parameters) throws Exception {
    super.open(parameters);

    subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    partCounter = 0;//w  w w .  ja  v a 2 s  .  c  o  m

    this.writer = writerTemplate.duplicate();

    if (bucketState == null) {
        bucketState = new BucketState();
    }

    FileSystem fs = new Path(basePath).getFileSystem(new org.apache.hadoop.conf.Configuration());
    refTruncate = reflectTruncate(fs);

    // delete pending/in-progress files that might be left if we fail while
    // no checkpoint has yet been done
    try {
        if (fs.exists(new Path(basePath)) && cleanupOnOpen) {
            RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true);

            while (bucketFiles.hasNext()) {
                LocatedFileStatus file = bucketFiles.next();
                if (file.getPath().toString().endsWith(pendingSuffix)) {
                    // only delete files that contain our subtask index
                    if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                        LOG.debug("(OPEN) Deleting leftover pending file {}", file.getPath().toString());
                        fs.delete(file.getPath(), true);
                    }
                }
                if (file.getPath().toString().endsWith(inProgressSuffix)) {
                    // only delete files that contain our subtask index
                    if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                        LOG.debug("(OPEN) Deleting leftover in-progress file {}", file.getPath().toString());
                        fs.delete(file.getPath(), true);
                    }
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting leftover pending/in-progress files: {}", e);
        throw new RuntimeException("Error while deleting leftover pending/in-progress files.", e);
    }
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java

License:Apache License

@Override
public void restoreState(BucketState state) {
    bucketState = state;//from   w w  w  .  jav  a  2  s .co m
    // we can clean all the pending files since they where renamed to final files
    // after this checkpoint was successfull
    bucketState.pendingFiles.clear();
    FileSystem fs = null;
    try {
        fs = new Path(basePath).getFileSystem(new org.apache.hadoop.conf.Configuration());
    } catch (IOException e) {
        LOG.error("Error while creating FileSystem in checkpoint restore.", e);
        throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e);
    }
    if (bucketState.currentFile != null) {
        // We were writing to a file when the last checkpoint occured. This file can either
        // be still in-progress or became a pending file at some point after the checkpoint.
        // Either way, we have to truncate it back to a valid state (or write a .valid-length)
        // file that specifies up to which length it is valid and rename it to the final name
        // before starting a new bucket file.
        Path partPath = new Path(bucketState.currentFile);
        try {
            Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName())
                    .suffix(pendingSuffix);
            Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName())
                    .suffix(inProgressSuffix);

            if (fs.exists(partPendingPath)) {
                LOG.debug(
                        "In-progress file {} has been moved to pending after checkpoint, moving to final location.",
                        partPath);
                // has been moved to pending in the mean time, rename to final location
                fs.rename(partPendingPath, partPath);
            } else if (fs.exists(partInProgressPath)) {
                LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath);
                // it was still in progress, rename to final path
                fs.rename(partInProgressPath, partPath);
            } else if (fs.exists(partPath)) {
                LOG.debug("In-Progress file {} was already moved to final location {}.",
                        bucketState.currentFile, partPath);
            } else {
                LOG.debug(
                        "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, "
                                + "it was moved to final location by a previous snapshot restore",
                        bucketState.currentFile);
            }

            refTruncate = reflectTruncate(fs);
            // truncate it or write a ".valid-length" file to specify up to which point it is valid
            if (refTruncate != null) {
                LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength);
                // some-one else might still hold the lease from a previous try, we are
                // recovering, after all ...
                if (fs instanceof DistributedFileSystem) {
                    DistributedFileSystem dfs = (DistributedFileSystem) fs;
                    LOG.debug("Trying to recover file lease {}", partPath);
                    dfs.recoverLease(partPath);
                    boolean isclosed = dfs.isFileClosed(partPath);
                    StopWatch sw = new StopWatch();
                    sw.start();
                    while (!isclosed) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        isclosed = dfs.isFileClosed(partPath);
                    }
                }
                Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath,
                        bucketState.currentFileValidLength);
                if (!truncated) {
                    LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath);

                    // we must wait for the asynchronous truncate operation to complete
                    StopWatch sw = new StopWatch();
                    sw.start();
                    long newLen = fs.getFileStatus(partPath).getLen();
                    while (newLen != bucketState.currentFileValidLength) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        newLen = fs.getFileStatus(partPath).getLen();
                    }
                    if (newLen != bucketState.currentFileValidLength) {
                        throw new RuntimeException("Truncate did not truncate to right length. Should be "
                                + bucketState.currentFileValidLength + " is " + newLen + ".");
                    }
                }

            } else {
                LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath,
                        bucketState.currentFileValidLength);
                Path validLengthFilePath = new Path(partPath.getParent(),
                        validLengthPrefix + partPath.getName()).suffix(validLengthSuffix);
                if (!fs.exists(validLengthFilePath)) {
                    FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath);
                    lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength));
                    lengthFileOut.close();
                }
            }

            // invalidate in the state object
            bucketState.currentFile = null;
            bucketState.currentFileValidLength = -1;
        } catch (IOException e) {
            LOG.error("Error while restoring RollingSink state.", e);
            throw new RuntimeException("Error while restoring RollingSink state.", e);
        } catch (InvocationTargetException | IllegalAccessException e) {
            LOG.error("Cound not invoke truncate.", e);
            throw new RuntimeException("Could not invoke truncate.", e);
        }
    }

    LOG.debug("Clearing pending/in-progress files.");

    // Move files that are confirmed by a checkpoint but did not get moved to final location
    // because the checkpoint notification did not happen before a failure

    Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet();
    LOG.debug("Moving pending files to final location on restore.");
    for (Long pastCheckpointId : pastCheckpointIds) {
        // All the pending files are buckets that have been completed but are waiting to be renamed
        // to their final name
        for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) {
            Path finalPath = new Path(filename);
            Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName())
                    .suffix(pendingSuffix);

            try {
                if (fs.exists(pendingPath)) {
                    LOG.debug(
                            "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.",
                            pendingPath, pastCheckpointId);
                    fs.rename(pendingPath, finalPath);
                }
            } catch (IOException e) {
                LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}", pendingPath,
                        finalPath, e);
                throw new RuntimeException(
                        "Error while renaming pending file " + pendingPath + " to final path " + finalPath, e);
            }
        }
    }
    bucketState.pendingFiles.clear();
    synchronized (bucketState.pendingFilesPerCheckpoint) {
        bucketState.pendingFilesPerCheckpoint.clear();
    }

    // we need to get this here since open() has not yet been called
    int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    // delete pending files
    try {

        RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true);

        while (bucketFiles.hasNext()) {
            LocatedFileStatus file = bucketFiles.next();
            if (file.getPath().toString().endsWith(pendingSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
            if (file.getPath().toString().endsWith(inProgressSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting old pending files: {}", e);
        throw new RuntimeException("Error while deleting old pending files.", e);
    }
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSinkITCase.java

License:Apache License

/**
 * This uses {@link org.apache.flink.streaming.connectors.fs.DateTimeBucketer} to
 * produce rolling files. The clock of DateTimeBucketer is set to
 * {@link ModifyableClock} to keep the time in lockstep with the processing of elements using
 * latches./*from   w w  w  . j  av  a 2s.c o  m*/
 */
@Test
public void testDateTimeRollingStringWriter() throws Exception {
    final int NUM_ELEMENTS = 20;
    final int PARALLELISM = 2;
    final String outPath = hdfsURI + "/rolling-out";
    DateTimeBucketer.setClock(new ModifyableClock());
    ModifyableClock.setCurrentTime(0);

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);

    DataStream<Tuple2<Integer, String>> source = env.addSource(new WaitingTestSourceFunction(NUM_ELEMENTS))
            .broadcast();

    // the parallel flatMap is chained to the sink, so when it has seen 5 elements it can
    // fire the latch
    DataStream<String> mapped = source.flatMap(new RichFlatMapFunction<Tuple2<Integer, String>, String>() {
        private static final long serialVersionUID = 1L;

        int count = 0;

        @Override
        public void flatMap(Tuple2<Integer, String> value, Collector<String> out) throws Exception {
            out.collect(value.f1);
            count++;
            if (count >= 5) {
                if (getRuntimeContext().getIndexOfThisSubtask() == 0) {
                    latch1.trigger();
                } else {
                    latch2.trigger();
                }
                count = 0;
            }
        }

    });

    RollingSink<String> sink = new RollingSink<String>(outPath).setBucketer(new DateTimeBucketer("ss"))
            .setPartPrefix("part").setPendingPrefix("").setPendingSuffix("");

    mapped.addSink(sink);

    env.execute("RollingSink String Write Test");

    RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true);

    // we should have 8 rolling files, 4 time intervals and parallelism of 2
    int numFiles = 0;
    while (files.hasNext()) {
        LocatedFileStatus file = files.next();
        numFiles++;
        if (file.getPath().toString().contains("rolling-out/00")) {
            FSDataInputStream inStream = dfs.open(file.getPath());

            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

            for (int i = 0; i < 5; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }

            inStream.close();
        } else if (file.getPath().toString().contains("rolling-out/05")) {
            FSDataInputStream inStream = dfs.open(file.getPath());

            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

            for (int i = 5; i < 10; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }

            inStream.close();
        } else if (file.getPath().toString().contains("rolling-out/10")) {
            FSDataInputStream inStream = dfs.open(file.getPath());

            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

            for (int i = 10; i < 15; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }

            inStream.close();
        } else if (file.getPath().toString().contains("rolling-out/15")) {
            FSDataInputStream inStream = dfs.open(file.getPath());

            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

            for (int i = 15; i < 20; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }

            inStream.close();
        } else {
            Assert.fail("File " + file + " does not match any expected roll pattern.");
        }
    }

    Assert.assertEquals(8, numFiles);
}