Example usage for org.apache.hadoop.fs LocatedFileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs LocatedFileStatus getPath.

Prototype

public Path getPath()

Source Link

Usage

From source file:org.apache.druid.storage.hdfs.HdfsDataSegmentPuller.java

License:Apache License

FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir) throws SegmentLoadingException {
    try {//from  w  w  w .  ja  va  2s  .  c  o  m
        org.apache.commons.io.FileUtils.forceMkdir(outDir);
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "");
    }
    try {
        final FileSystem fs = path.getFileSystem(config);
        if (fs.isDirectory(path)) {

            // --------    directory     ---------

            try {
                return RetryUtils.retry(() -> {
                    if (!fs.exists(path)) {
                        throw new SegmentLoadingException("No files found at [%s]", path.toString());
                    }

                    final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false);
                    final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
                    while (children.hasNext()) {
                        final LocatedFileStatus child = children.next();
                        final Path childPath = child.getPath();
                        final String fname = childPath.getName();
                        if (fs.isDirectory(childPath)) {
                            log.warn("[%s] is a child directory, skipping", childPath.toString());
                        } else {
                            final File outFile = new File(outDir, fname);
                            try (final FSDataInputStream in = fs.open(childPath)) {
                                NativeIO.chunkedCopy(in, outFile);
                            }
                            result.addFile(outFile);
                        }
                    }
                    log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(),
                            outDir.getAbsolutePath());
                    return result;
                }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        } else if (CompressionUtils.isZip(path.getName())) {

            // --------    zip     ---------

            final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() {
                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outDir, shouldRetryPredicate(), false);

            log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(),
                    outDir.getAbsolutePath());

            return result;
        } else if (CompressionUtils.isGz(path.getName())) {

            // --------    gzip     ---------

            final String fname = path.getName();
            final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname));
            final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() {
                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outFile);

            log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(),
                    outFile.getAbsolutePath());
            return result;
        } else {
            throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString());
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Error loading [%s]", path.toString());
    }
}

From source file:org.apache.druid.storage.hdfs.tasklog.HdfsTaskLogs.java

License:Apache License

@Override
public void killOlderThan(long timestamp) throws IOException {
    Path taskLogDir = new Path(config.getDirectory());
    FileSystem fs = taskLogDir.getFileSystem(hadoopConfig);
    if (fs.exists(taskLogDir)) {

        if (!fs.isDirectory(taskLogDir)) {
            throw new IOE("taskLogDir [%s] must be a directory.", taskLogDir);
        }/*  www . j a va2 s  .  c o m*/

        RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(taskLogDir);
        while (iter.hasNext()) {
            LocatedFileStatus file = iter.next();
            if (file.getModificationTime() < timestamp) {
                Path p = file.getPath();
                log.info("Deleting hdfs task log [%s].", p.toUri().toString());
                fs.delete(p, true);
            }

            if (Thread.currentThread().isInterrupted()) {
                throw new IOException(
                        new InterruptedException("Thread interrupted. Couldn't delete all tasklogs."));
            }
        }
    }
}

From source file:org.apache.falcon.extensions.store.ExtensionStore.java

License:Apache License

public String getResource(final String extensionResourcePath) throws FalconException {
    StringBuilder definition = new StringBuilder();
    Path resourcePath = new Path(extensionResourcePath);
    FileSystem fileSystem = HadoopClientFactory.get().createFalconFileSystem(resourcePath.toUri());
    try {//from  w  ww . jav a2  s. c  om
        if (fileSystem.isFile(resourcePath)) {
            definition.append(getExtensionResource(extensionResourcePath.toString()));
        } else {
            RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem.listFiles(resourcePath,
                    false);
            while (fileStatusListIterator.hasNext()) {
                LocatedFileStatus fileStatus = fileStatusListIterator.next();
                Path filePath = fileStatus.getPath();
                definition.append("Contents of file ").append(filePath.getName()).append(":\n");
                definition.append(getExtensionResource(filePath.toString())).append("\n \n");
            }
        }
    } catch (IOException e) {
        LOG.error("Exception while getting file(s) with path : " + extensionResourcePath, e);
        throw new StoreAccessException(e);
    }

    return definition.toString();

}

From source file:org.apache.falcon.service.SharedLibraryHostingService.java

License:Apache License

private void pushExtensionArtifactsToCluster(final Cluster cluster, final FileSystem clusterFs)
        throws FalconException {
    if (!Services.get().isRegistered(ExtensionService.SERVICE_NAME)) {
        LOG.info("ExtensionService not registered, return");
        return;/*from  w w w  . j  a v  a2s .c  o m*/
    }

    ExtensionStore store = ExtensionStore.get();
    if (!store.isExtensionStoreInitialized()) {
        LOG.info(
                "Extension store not initialized by Extension service. Make sure Extension service is added in "
                        + "start up properties");
        return;
    }

    final String filterPath = "/apps/falcon/extensions/mirroring/";
    Path extensionStorePath = store.getExtensionStorePath();
    LOG.info("extensionStorePath :{}", extensionStorePath);
    FileSystem falconFileSystem = HadoopClientFactory.get().createFalconFileSystem(extensionStorePath.toUri());
    String nameNode = StringUtils
            .removeEnd(falconFileSystem.getConf().get(HadoopClientFactory.FS_DEFAULT_NAME_KEY), File.separator);

    String clusterStorageUrl = StringUtils.removeEnd(ClusterHelper.getStorageUrl(cluster), File.separator);

    // If default fs for Falcon server is same as cluster fs abort copy
    if (nameNode.equalsIgnoreCase(clusterStorageUrl)) {
        LOG.info("clusterStorageUrl :{} same return", clusterStorageUrl);
        return;
    }

    try {
        RemoteIterator<LocatedFileStatus> fileStatusListIterator = falconFileSystem
                .listFiles(extensionStorePath, true);

        while (fileStatusListIterator.hasNext()) {
            LocatedFileStatus srcfileStatus = fileStatusListIterator.next();
            Path filePath = Path.getPathWithoutSchemeAndAuthority(srcfileStatus.getPath());

            if (filePath != null && filePath.toString().startsWith(filterPath)) {
                /* HiveDR uses filter path as store path in DRStatusStore, so skip it. Copy only the extension
                 artifacts */
                continue;
            }

            if (srcfileStatus.isDirectory()) {
                if (!clusterFs.exists(filePath)) {
                    HadoopClientFactory.mkdirs(clusterFs, filePath, srcfileStatus.getPermission());
                }
            } else {
                if (clusterFs.exists(filePath)) {
                    FileStatus targetfstat = clusterFs.getFileStatus(filePath);
                    if (targetfstat.getLen() == srcfileStatus.getLen()) {
                        continue;
                    }
                }

                Path parentPath = filePath.getParent();
                if (!clusterFs.exists(parentPath)) {
                    FsPermission dirPerm = falconFileSystem.getFileStatus(parentPath).getPermission();
                    HadoopClientFactory.mkdirs(clusterFs, parentPath, dirPerm);
                }

                FileUtil.copy(falconFileSystem, srcfileStatus, clusterFs, filePath, false, true,
                        falconFileSystem.getConf());
                FileUtil.chmod(clusterFs.makeQualified(filePath).toString(),
                        srcfileStatus.getPermission().toString());
            }
        }
    } catch (IOException | InterruptedException e) {
        throw new FalconException("Failed to copy extension artifacts to cluster" + cluster.getName(), e);
    }
}

From source file:org.apache.flink.streaming.connectors.fs.bucketing.BucketingSinkFaultTolerance2ITCase.java

License:Apache License

@Override
public void postSubmit() throws Exception {
    // We read the files and verify that we have read all the strings. If a valid-length
    // file exists we only read the file to that point. (This test should work with
    // FileSystems that support truncate() and with others as well.)

    Pattern messageRegex = Pattern.compile("message (\\d*)");

    // Keep a set of the message IDs that we read. The size must equal the read count and
    // the NUM_STRINGS. If numRead is bigger than the size of the set we have seen some
    // elements twice.
    Set<Integer> readNumbers = Sets.newHashSet();
    int numRead = 0;

    RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true);

    while (files.hasNext()) {
        LocatedFileStatus file = files.next();

        if (!file.getPath().toString().endsWith(".valid-length")) {
            int validLength = (int) file.getLen();
            if (dfs.exists(file.getPath().suffix(".valid-length"))) {
                FSDataInputStream inStream = dfs.open(file.getPath().suffix(".valid-length"));
                String validLengthString = inStream.readUTF();
                validLength = Integer.parseInt(validLengthString);
                System.out.println("VALID LENGTH: " + validLength);
            }/*from  www  . j  av a 2  s  .c  o  m*/
            FSDataInputStream inStream = dfs.open(file.getPath());
            byte[] buffer = new byte[validLength];
            inStream.readFully(0, buffer, 0, validLength);
            inStream.close();

            ByteArrayInputStream bais = new ByteArrayInputStream(buffer);

            InputStreamReader inStreamReader = new InputStreamReader(bais);
            BufferedReader br = new BufferedReader(inStreamReader);

            String line = br.readLine();
            while (line != null) {
                Matcher matcher = messageRegex.matcher(line);
                if (matcher.matches()) {
                    numRead++;
                    int messageId = Integer.parseInt(matcher.group(1));
                    readNumbers.add(messageId);
                } else {
                    Assert.fail("Read line does not match expected pattern.");
                }
                line = br.readLine();
            }
            br.close();
            inStreamReader.close();
            bais.close();
        }
    }

    // Verify that we read all strings (at-least-once)
    Assert.assertEquals(NUM_STRINGS, readNumbers.size());

    // Verify that we don't have duplicates (boom!, exactly-once)
    Assert.assertEquals(NUM_STRINGS, numRead);
}

From source file:org.apache.flink.streaming.connectors.fs.bucketing.BucketingSinkFaultToleranceITCase.java

License:Apache License

@Override
public void postSubmit() throws Exception {
    // We read the files and verify that we have read all the strings. If a valid-length
    // file exists we only read the file to that point. (This test should work with
    // FileSystems that support truncate() and with others as well.)

    Pattern messageRegex = Pattern.compile("message (\\d*)");

    // Keep a set of the message IDs that we read. The size must equal the read count and
    // the NUM_STRINGS. If numRead is bigger than the size of the set we have seen some
    // elements twice.
    Set<Integer> readNumbers = Sets.newHashSet();

    HashSet<String> uniqMessagesRead = new HashSet<>();
    HashSet<String> messagesInCommittedFiles = new HashSet<>();

    RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true);

    while (files.hasNext()) {
        LocatedFileStatus file = files.next();

        if (!file.getPath().toString().endsWith(".valid-length")) {
            int validLength = (int) file.getLen();
            if (dfs.exists(file.getPath().suffix(".valid-length"))) {
                FSDataInputStream inStream = dfs.open(file.getPath().suffix(".valid-length"));
                String validLengthString = inStream.readUTF();
                validLength = Integer.parseInt(validLengthString);
                System.out.println("VALID LENGTH: " + validLength);
            }// ww w  .j a  v a 2  s .c om
            FSDataInputStream inStream = dfs.open(file.getPath());
            byte[] buffer = new byte[validLength];
            inStream.readFully(0, buffer, 0, validLength);
            inStream.close();

            ByteArrayInputStream bais = new ByteArrayInputStream(buffer);

            InputStreamReader inStreamReader = new InputStreamReader(bais);
            BufferedReader br = new BufferedReader(inStreamReader);

            String line = br.readLine();
            while (line != null) {
                Matcher matcher = messageRegex.matcher(line);
                if (matcher.matches()) {
                    uniqMessagesRead.add(line);

                    // check that in the committed files there are no duplicates
                    if (!file.getPath().toString().endsWith(IN_PROGRESS_SUFFIX)
                            && !file.getPath().toString().endsWith(PENDING_SUFFIX)) {
                        if (!messagesInCommittedFiles.add(line)) {
                            Assert.fail("Duplicate entry in committed bucket.");
                        }
                    }

                    int messageId = Integer.parseInt(matcher.group(1));
                    readNumbers.add(messageId);
                } else {
                    Assert.fail("Read line does not match expected pattern.");
                }
                line = br.readLine();
            }
            br.close();
            inStreamReader.close();
            bais.close();
        }
    }

    // Verify that we read all strings (at-least-once)
    Assert.assertEquals(NUM_STRINGS, readNumbers.size());

    // Verify that we don't have duplicates (boom!, exactly-once)
    Assert.assertEquals(NUM_STRINGS, uniqMessagesRead.size());
}

From source file:org.apache.flink.streaming.connectors.fs.bucketing.BucketingSinkTest.java

License:Apache License

/**
 * This uses {@link DateTimeBucketer} to
 * produce rolling files. We use {@link OneInputStreamOperatorTestHarness} to manually
 * advance processing time.//  ww  w .ja  va2  s.co m
 */
@Test
public void testDateTimeRollingStringWriter() throws Exception {
    final int numElements = 20;

    final String outPath = hdfsURI + "/rolling-out";

    BucketingSink<String> sink = new BucketingSink<String>(outPath)
            .setBucketer(new DateTimeBucketer<String>("ss")).setPartPrefix(PART_PREFIX).setPendingPrefix("")
            .setPendingSuffix("");

    OneInputStreamOperatorTestHarness<String, Object> testHarness = createTestSink(sink, 1, 0);

    testHarness.setProcessingTime(0L);

    testHarness.setup();
    testHarness.open();

    for (int i = 0; i < numElements; i++) {
        // Every 5 elements, increase the clock time. We should end up with 5 elements per bucket.
        if (i % 5 == 0) {
            testHarness.setProcessingTime(i * 1000L);
        }
        testHarness.processElement(new StreamRecord<>("message #" + Integer.toString(i)));
    }

    testHarness.close();

    RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true);

    // We should have 4 rolling files across 4 time intervals
    int numFiles = 0;
    while (files.hasNext()) {
        LocatedFileStatus file = files.next();
        numFiles++;
        if (file.getPath().toString().contains("rolling-out/00")) {
            FSDataInputStream inStream = dfs.open(file.getPath());

            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

            for (int i = 0; i < 5; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }

            inStream.close();
        } else if (file.getPath().toString().contains("rolling-out/05")) {
            FSDataInputStream inStream = dfs.open(file.getPath());

            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

            for (int i = 5; i < 10; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }

            inStream.close();
        } else if (file.getPath().toString().contains("rolling-out/10")) {
            FSDataInputStream inStream = dfs.open(file.getPath());

            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

            for (int i = 10; i < 15; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }

            inStream.close();
        } else if (file.getPath().toString().contains("rolling-out/15")) {
            FSDataInputStream inStream = dfs.open(file.getPath());

            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

            for (int i = 15; i < 20; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }

            inStream.close();
        } else {
            Assert.fail("File " + file + " does not match any expected roll pattern.");
        }
    }

    Assert.assertEquals(4, numFiles);
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java

License:Apache License

@Override
public void open(Configuration parameters) throws Exception {
    super.open(parameters);

    subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    partCounter = 0;//w  w w .  ja  v a 2 s  .  c  o  m

    this.writer = writerTemplate.duplicate();

    if (bucketState == null) {
        bucketState = new BucketState();
    }

    FileSystem fs = new Path(basePath).getFileSystem(new org.apache.hadoop.conf.Configuration());
    refTruncate = reflectTruncate(fs);

    // delete pending/in-progress files that might be left if we fail while
    // no checkpoint has yet been done
    try {
        if (fs.exists(new Path(basePath)) && cleanupOnOpen) {
            RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true);

            while (bucketFiles.hasNext()) {
                LocatedFileStatus file = bucketFiles.next();
                if (file.getPath().toString().endsWith(pendingSuffix)) {
                    // only delete files that contain our subtask index
                    if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                        LOG.debug("(OPEN) Deleting leftover pending file {}", file.getPath().toString());
                        fs.delete(file.getPath(), true);
                    }
                }
                if (file.getPath().toString().endsWith(inProgressSuffix)) {
                    // only delete files that contain our subtask index
                    if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                        LOG.debug("(OPEN) Deleting leftover in-progress file {}", file.getPath().toString());
                        fs.delete(file.getPath(), true);
                    }
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting leftover pending/in-progress files: {}", e);
        throw new RuntimeException("Error while deleting leftover pending/in-progress files.", e);
    }
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java

License:Apache License

@Override
public void restoreState(BucketState state) {
    bucketState = state;//from   w w  w  .  jav  a  2  s .co m
    // we can clean all the pending files since they where renamed to final files
    // after this checkpoint was successfull
    bucketState.pendingFiles.clear();
    FileSystem fs = null;
    try {
        fs = new Path(basePath).getFileSystem(new org.apache.hadoop.conf.Configuration());
    } catch (IOException e) {
        LOG.error("Error while creating FileSystem in checkpoint restore.", e);
        throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e);
    }
    if (bucketState.currentFile != null) {
        // We were writing to a file when the last checkpoint occured. This file can either
        // be still in-progress or became a pending file at some point after the checkpoint.
        // Either way, we have to truncate it back to a valid state (or write a .valid-length)
        // file that specifies up to which length it is valid and rename it to the final name
        // before starting a new bucket file.
        Path partPath = new Path(bucketState.currentFile);
        try {
            Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName())
                    .suffix(pendingSuffix);
            Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName())
                    .suffix(inProgressSuffix);

            if (fs.exists(partPendingPath)) {
                LOG.debug(
                        "In-progress file {} has been moved to pending after checkpoint, moving to final location.",
                        partPath);
                // has been moved to pending in the mean time, rename to final location
                fs.rename(partPendingPath, partPath);
            } else if (fs.exists(partInProgressPath)) {
                LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath);
                // it was still in progress, rename to final path
                fs.rename(partInProgressPath, partPath);
            } else if (fs.exists(partPath)) {
                LOG.debug("In-Progress file {} was already moved to final location {}.",
                        bucketState.currentFile, partPath);
            } else {
                LOG.debug(
                        "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, "
                                + "it was moved to final location by a previous snapshot restore",
                        bucketState.currentFile);
            }

            refTruncate = reflectTruncate(fs);
            // truncate it or write a ".valid-length" file to specify up to which point it is valid
            if (refTruncate != null) {
                LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength);
                // some-one else might still hold the lease from a previous try, we are
                // recovering, after all ...
                if (fs instanceof DistributedFileSystem) {
                    DistributedFileSystem dfs = (DistributedFileSystem) fs;
                    LOG.debug("Trying to recover file lease {}", partPath);
                    dfs.recoverLease(partPath);
                    boolean isclosed = dfs.isFileClosed(partPath);
                    StopWatch sw = new StopWatch();
                    sw.start();
                    while (!isclosed) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        isclosed = dfs.isFileClosed(partPath);
                    }
                }
                Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath,
                        bucketState.currentFileValidLength);
                if (!truncated) {
                    LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath);

                    // we must wait for the asynchronous truncate operation to complete
                    StopWatch sw = new StopWatch();
                    sw.start();
                    long newLen = fs.getFileStatus(partPath).getLen();
                    while (newLen != bucketState.currentFileValidLength) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        newLen = fs.getFileStatus(partPath).getLen();
                    }
                    if (newLen != bucketState.currentFileValidLength) {
                        throw new RuntimeException("Truncate did not truncate to right length. Should be "
                                + bucketState.currentFileValidLength + " is " + newLen + ".");
                    }
                }

            } else {
                LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath,
                        bucketState.currentFileValidLength);
                Path validLengthFilePath = new Path(partPath.getParent(),
                        validLengthPrefix + partPath.getName()).suffix(validLengthSuffix);
                if (!fs.exists(validLengthFilePath)) {
                    FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath);
                    lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength));
                    lengthFileOut.close();
                }
            }

            // invalidate in the state object
            bucketState.currentFile = null;
            bucketState.currentFileValidLength = -1;
        } catch (IOException e) {
            LOG.error("Error while restoring RollingSink state.", e);
            throw new RuntimeException("Error while restoring RollingSink state.", e);
        } catch (InvocationTargetException | IllegalAccessException e) {
            LOG.error("Cound not invoke truncate.", e);
            throw new RuntimeException("Could not invoke truncate.", e);
        }
    }

    LOG.debug("Clearing pending/in-progress files.");

    // Move files that are confirmed by a checkpoint but did not get moved to final location
    // because the checkpoint notification did not happen before a failure

    Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet();
    LOG.debug("Moving pending files to final location on restore.");
    for (Long pastCheckpointId : pastCheckpointIds) {
        // All the pending files are buckets that have been completed but are waiting to be renamed
        // to their final name
        for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) {
            Path finalPath = new Path(filename);
            Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName())
                    .suffix(pendingSuffix);

            try {
                if (fs.exists(pendingPath)) {
                    LOG.debug(
                            "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.",
                            pendingPath, pastCheckpointId);
                    fs.rename(pendingPath, finalPath);
                }
            } catch (IOException e) {
                LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}", pendingPath,
                        finalPath, e);
                throw new RuntimeException(
                        "Error while renaming pending file " + pendingPath + " to final path " + finalPath, e);
            }
        }
    }
    bucketState.pendingFiles.clear();
    synchronized (bucketState.pendingFilesPerCheckpoint) {
        bucketState.pendingFilesPerCheckpoint.clear();
    }

    // we need to get this here since open() has not yet been called
    int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    // delete pending files
    try {

        RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true);

        while (bucketFiles.hasNext()) {
            LocatedFileStatus file = bucketFiles.next();
            if (file.getPath().toString().endsWith(pendingSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
            if (file.getPath().toString().endsWith(inProgressSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting old pending files: {}", e);
        throw new RuntimeException("Error while deleting old pending files.", e);
    }
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSinkITCase.java

License:Apache License

/**
 * This uses {@link org.apache.flink.streaming.connectors.fs.DateTimeBucketer} to
 * produce rolling files. The clock of DateTimeBucketer is set to
 * {@link ModifyableClock} to keep the time in lockstep with the processing of elements using
 * latches./*from   w w  w  . j  av  a 2s.c o  m*/
 */
@Test
public void testDateTimeRollingStringWriter() throws Exception {
    final int NUM_ELEMENTS = 20;
    final int PARALLELISM = 2;
    final String outPath = hdfsURI + "/rolling-out";
    DateTimeBucketer.setClock(new ModifyableClock());
    ModifyableClock.setCurrentTime(0);

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);

    DataStream<Tuple2<Integer, String>> source = env.addSource(new WaitingTestSourceFunction(NUM_ELEMENTS))
            .broadcast();

    // the parallel flatMap is chained to the sink, so when it has seen 5 elements it can
    // fire the latch
    DataStream<String> mapped = source.flatMap(new RichFlatMapFunction<Tuple2<Integer, String>, String>() {
        private static final long serialVersionUID = 1L;

        int count = 0;

        @Override
        public void flatMap(Tuple2<Integer, String> value, Collector<String> out) throws Exception {
            out.collect(value.f1);
            count++;
            if (count >= 5) {
                if (getRuntimeContext().getIndexOfThisSubtask() == 0) {
                    latch1.trigger();
                } else {
                    latch2.trigger();
                }
                count = 0;
            }
        }

    });

    RollingSink<String> sink = new RollingSink<String>(outPath).setBucketer(new DateTimeBucketer("ss"))
            .setPartPrefix("part").setPendingPrefix("").setPendingSuffix("");

    mapped.addSink(sink);

    env.execute("RollingSink String Write Test");

    RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true);

    // we should have 8 rolling files, 4 time intervals and parallelism of 2
    int numFiles = 0;
    while (files.hasNext()) {
        LocatedFileStatus file = files.next();
        numFiles++;
        if (file.getPath().toString().contains("rolling-out/00")) {
            FSDataInputStream inStream = dfs.open(file.getPath());

            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

            for (int i = 0; i < 5; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }

            inStream.close();
        } else if (file.getPath().toString().contains("rolling-out/05")) {
            FSDataInputStream inStream = dfs.open(file.getPath());

            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

            for (int i = 5; i < 10; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }

            inStream.close();
        } else if (file.getPath().toString().contains("rolling-out/10")) {
            FSDataInputStream inStream = dfs.open(file.getPath());

            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

            for (int i = 10; i < 15; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }

            inStream.close();
        } else if (file.getPath().toString().contains("rolling-out/15")) {
            FSDataInputStream inStream = dfs.open(file.getPath());

            BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

            for (int i = 15; i < 20; i++) {
                String line = br.readLine();
                Assert.assertEquals("message #" + i, line);
            }

            inStream.close();
        } else {
            Assert.fail("File " + file + " does not match any expected roll pattern.");
        }
    }

    Assert.assertEquals(8, numFiles);
}