Example usage for org.apache.hadoop.hdfs DistributedFileSystem isFileClosed

List of usage examples for org.apache.hadoop.hdfs DistributedFileSystem isFileClosed

Introduction

In this page you can find the example usage for org.apache.hadoop.hdfs DistributedFileSystem isFileClosed.

Prototype

public boolean isFileClosed(final Path src) throws IOException 

Source Link

Document

Get the close status of a file

Usage

From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java

License:Apache License

@Override
public void restoreState(State<T> state) {
    this.state = state;

    FileSystem fs;//w  w w .j  a v a2s. com
    try {
        fs = new Path(basePath).getFileSystem(HadoopFileSystem.getHadoopConfiguration());
    } catch (IOException e) {
        LOG.error("Error while creating FileSystem in checkpoint restore.", e);
        throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e);
    }

    for (BucketState<T> bucketState : state.bucketStates.values()) {
        // we can clean all the pending files since they where renamed to final files
        // after this checkpoint was successful
        bucketState.pendingFiles.clear();

        if (bucketState.currentFile != null) {
            // We were writing to a file when the last checkpoint occured. This file can either
            // be still in-progress or became a pending file at some point after the checkpoint.
            // Either way, we have to truncate it back to a valid state (or write a .valid-length)
            // file that specifies up to which length it is valid and rename it to the final name
            // before starting a new bucket file.
            Path partPath = new Path(bucketState.currentFile);
            try {
                Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName())
                        .suffix(pendingSuffix);
                Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName())
                        .suffix(inProgressSuffix);

                if (fs.exists(partPendingPath)) {
                    LOG.debug(
                            "In-progress file {} has been moved to pending after checkpoint, moving to final location.",
                            partPath);
                    // has been moved to pending in the mean time, rename to final location
                    fs.rename(partPendingPath, partPath);
                } else if (fs.exists(partInProgressPath)) {
                    LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath);
                    // it was still in progress, rename to final path
                    fs.rename(partInProgressPath, partPath);
                } else if (fs.exists(partPath)) {
                    LOG.debug("In-Progress file {} was already moved to final location {}.",
                            bucketState.currentFile, partPath);
                } else {
                    LOG.debug(
                            "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, "
                                    + "it was moved to final location by a previous snapshot restore",
                            bucketState.currentFile);
                }

                refTruncate = reflectTruncate(fs);
                // truncate it or write a ".valid-length" file to specify up to which point it is valid
                if (refTruncate != null) {
                    LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength);
                    // some-one else might still hold the lease from a previous try, we are
                    // recovering, after all ...
                    if (fs instanceof DistributedFileSystem) {
                        DistributedFileSystem dfs = (DistributedFileSystem) fs;
                        LOG.debug("Trying to recover file lease {}", partPath);
                        dfs.recoverLease(partPath);
                        boolean isclosed = dfs.isFileClosed(partPath);
                        StopWatch sw = new StopWatch();
                        sw.start();
                        while (!isclosed) {
                            if (sw.getTime() > asyncTimeout) {
                                break;
                            }
                            try {
                                Thread.sleep(500);
                            } catch (InterruptedException e1) {
                                // ignore it
                            }
                            isclosed = dfs.isFileClosed(partPath);
                        }
                    }
                    Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath,
                            bucketState.currentFileValidLength);
                    if (!truncated) {
                        LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath);

                        // we must wait for the asynchronous truncate operation to complete
                        StopWatch sw = new StopWatch();
                        sw.start();
                        long newLen = fs.getFileStatus(partPath).getLen();
                        while (newLen != bucketState.currentFileValidLength) {
                            if (sw.getTime() > asyncTimeout) {
                                break;
                            }
                            try {
                                Thread.sleep(500);
                            } catch (InterruptedException e1) {
                                // ignore it
                            }
                            newLen = fs.getFileStatus(partPath).getLen();
                        }
                        if (newLen != bucketState.currentFileValidLength) {
                            throw new RuntimeException("Truncate did not truncate to right length. Should be "
                                    + bucketState.currentFileValidLength + " is " + newLen + ".");
                        }
                    }

                } else {
                    LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath,
                            bucketState.currentFileValidLength);
                    Path validLengthFilePath = new Path(partPath.getParent(),
                            validLengthPrefix + partPath.getName()).suffix(validLengthSuffix);
                    if (!fs.exists(validLengthFilePath)) {
                        FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath);
                        lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength));
                        lengthFileOut.close();
                    }
                }

                // Now that we've restored the bucket to a valid state, reset the current file info
                bucketState.currentFile = null;
                bucketState.currentFileValidLength = -1;
            } catch (IOException e) {
                LOG.error("Error while restoring BucketingSink state.", e);
                throw new RuntimeException("Error while restoring BucketingSink state.", e);
            } catch (InvocationTargetException | IllegalAccessException e) {
                LOG.error("Cound not invoke truncate.", e);
                throw new RuntimeException("Could not invoke truncate.", e);
            }
        }

        LOG.debug("Clearing pending/in-progress files.");

        // Move files that are confirmed by a checkpoint but did not get moved to final location
        // because the checkpoint notification did not happen before a failure

        Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet();
        LOG.debug("Moving pending files to final location on restore.");
        for (Long pastCheckpointId : pastCheckpointIds) {
            // All the pending files are buckets that have been completed but are waiting to be renamed
            // to their final name
            for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) {
                Path finalPath = new Path(filename);
                Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName())
                        .suffix(pendingSuffix);

                try {
                    if (fs.exists(pendingPath)) {
                        LOG.debug(
                                "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.",
                                pendingPath, pastCheckpointId);
                        fs.rename(pendingPath, finalPath);
                    }
                } catch (IOException e) {
                    LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}",
                            pendingPath, finalPath, e);
                    throw new RuntimeException(
                            "Error while renaming pending file " + pendingPath + " to final path " + finalPath,
                            e);
                }
            }
        }

        synchronized (bucketState.pendingFilesPerCheckpoint) {
            bucketState.pendingFilesPerCheckpoint.clear();
        }
    }

    // we need to get this here since open() has not yet been called
    int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    // delete pending files
    try {

        RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true);

        while (bucketFiles.hasNext()) {
            LocatedFileStatus file = bucketFiles.next();
            if (file.getPath().toString().endsWith(pendingSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
            if (file.getPath().toString().endsWith(inProgressSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting old pending files: {}", e);
        throw new RuntimeException("Error while deleting old pending files.", e);
    }
}

From source file:org.apache.flink.runtime.fs.hdfs.HadoopRecoverableFsDataOutputStream.java

License:Apache License

/**
 * Called when resuming execution after a failure and waits until the lease
 * of the file we are resuming is free./*  w  w w .j  a  va 2  s . c o  m*/
 *
 * <p>The lease of the file we are resuming writing/committing to may still
 * belong to the process that failed previously and whose state we are
 * recovering.
 *
 * @param path The path to the file we want to resume writing to.
 */
private boolean waitUntilLeaseIsRevoked(final Path path) throws IOException {
    Preconditions.checkState(fs instanceof DistributedFileSystem);

    final DistributedFileSystem dfs = (DistributedFileSystem) fs;
    dfs.recoverLease(path);

    final Deadline deadline = Deadline.now().plus(Duration.ofMillis(LEASE_TIMEOUT));

    final StopWatch sw = new StopWatch();
    sw.start();

    boolean isClosed = dfs.isFileClosed(path);
    while (!isClosed && deadline.hasTimeLeft()) {
        try {
            Thread.sleep(500L);
        } catch (InterruptedException e1) {
            throw new IOException("Recovering the lease failed: ", e1);
        }
        isClosed = dfs.isFileClosed(path);
    }
    return isClosed;
}

From source file:org.apache.flink.streaming.connectors.fs.bucketing.BucketingSink.java

License:Apache License

private void handlePendingInProgressFile(String file, long validLength) {
    if (file != null) {

        // We were writing to a file when the last checkpoint occurred. This file can either
        // be still in-progress or became a pending file at some point after the checkpoint.
        // Either way, we have to truncate it back to a valid state (or write a .valid-length
        // file that specifies up to which length it is valid) and rename it to the final name
        // before starting a new bucket file.

        Path partPath = new Path(file);
        try {//  ww w  . ja va 2 s.co m
            Path partPendingPath = getPendingPathFor(partPath);
            Path partInProgressPath = getInProgressPathFor(partPath);

            if (fs.exists(partPendingPath)) {
                LOG.debug(
                        "In-progress file {} has been moved to pending after checkpoint, moving to final location.",
                        partPath);
                // has been moved to pending in the mean time, rename to final location
                fs.rename(partPendingPath, partPath);
            } else if (fs.exists(partInProgressPath)) {
                LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath);
                // it was still in progress, rename to final path
                fs.rename(partInProgressPath, partPath);
            } else if (fs.exists(partPath)) {
                LOG.debug("In-Progress file {} was already moved to final location {}.", file, partPath);
            } else {
                LOG.debug(
                        "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, "
                                + "it was moved to final location by a previous snapshot restore",
                        file);
            }

            // We use reflection to get the .truncate() method, this
            // is only available starting with Hadoop 2.7
            if (this.refTruncate == null) {
                this.refTruncate = reflectTruncate(fs);
            }

            // truncate it or write a ".valid-length" file to specify up to which point it is valid
            if (refTruncate != null) {
                LOG.debug("Truncating {} to valid length {}", partPath, validLength);
                // some-one else might still hold the lease from a previous try, we are
                // recovering, after all ...
                if (fs instanceof DistributedFileSystem) {
                    DistributedFileSystem dfs = (DistributedFileSystem) fs;
                    LOG.debug("Trying to recover file lease {}", partPath);
                    dfs.recoverLease(partPath);
                    boolean isclosed = dfs.isFileClosed(partPath);
                    StopWatch sw = new StopWatch();
                    sw.start();
                    while (!isclosed) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        isclosed = dfs.isFileClosed(partPath);
                    }
                }
                Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath, validLength);
                if (!truncated) {
                    LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath);

                    // we must wait for the asynchronous truncate operation to complete
                    StopWatch sw = new StopWatch();
                    sw.start();
                    long newLen = fs.getFileStatus(partPath).getLen();
                    while (newLen != validLength) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        newLen = fs.getFileStatus(partPath).getLen();
                    }
                    if (newLen != validLength) {
                        throw new RuntimeException("Truncate did not truncate to right length. Should be "
                                + validLength + " is " + newLen + ".");
                    }
                }
            } else {
                LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath, validLength);
                Path validLengthFilePath = getValidLengthPathFor(partPath);
                if (!fs.exists(validLengthFilePath) && fs.exists(partPath)) {
                    FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath);
                    lengthFileOut.writeUTF(Long.toString(validLength));
                    lengthFileOut.close();
                }
            }

        } catch (IOException e) {
            LOG.error("Error while restoring BucketingSink state.", e);
            throw new RuntimeException("Error while restoring BucketingSink state.", e);
        } catch (InvocationTargetException | IllegalAccessException e) {
            LOG.error("Could not invoke truncate.", e);
            throw new RuntimeException("Could not invoke truncate.", e);
        }
    }
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java

License:Apache License

@Override
public void restoreState(BucketState state) {
    bucketState = state;/*from   ww w.  j  a v a2  s.  co  m*/
    // we can clean all the pending files since they where renamed to final files
    // after this checkpoint was successfull
    bucketState.pendingFiles.clear();
    FileSystem fs = null;
    try {
        fs = new Path(basePath).getFileSystem(new org.apache.hadoop.conf.Configuration());
    } catch (IOException e) {
        LOG.error("Error while creating FileSystem in checkpoint restore.", e);
        throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e);
    }
    if (bucketState.currentFile != null) {
        // We were writing to a file when the last checkpoint occured. This file can either
        // be still in-progress or became a pending file at some point after the checkpoint.
        // Either way, we have to truncate it back to a valid state (or write a .valid-length)
        // file that specifies up to which length it is valid and rename it to the final name
        // before starting a new bucket file.
        Path partPath = new Path(bucketState.currentFile);
        try {
            Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName())
                    .suffix(pendingSuffix);
            Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName())
                    .suffix(inProgressSuffix);

            if (fs.exists(partPendingPath)) {
                LOG.debug(
                        "In-progress file {} has been moved to pending after checkpoint, moving to final location.",
                        partPath);
                // has been moved to pending in the mean time, rename to final location
                fs.rename(partPendingPath, partPath);
            } else if (fs.exists(partInProgressPath)) {
                LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath);
                // it was still in progress, rename to final path
                fs.rename(partInProgressPath, partPath);
            } else if (fs.exists(partPath)) {
                LOG.debug("In-Progress file {} was already moved to final location {}.",
                        bucketState.currentFile, partPath);
            } else {
                LOG.debug(
                        "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, "
                                + "it was moved to final location by a previous snapshot restore",
                        bucketState.currentFile);
            }

            refTruncate = reflectTruncate(fs);
            // truncate it or write a ".valid-length" file to specify up to which point it is valid
            if (refTruncate != null) {
                LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength);
                // some-one else might still hold the lease from a previous try, we are
                // recovering, after all ...
                if (fs instanceof DistributedFileSystem) {
                    DistributedFileSystem dfs = (DistributedFileSystem) fs;
                    LOG.debug("Trying to recover file lease {}", partPath);
                    dfs.recoverLease(partPath);
                    boolean isclosed = dfs.isFileClosed(partPath);
                    StopWatch sw = new StopWatch();
                    sw.start();
                    while (!isclosed) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        isclosed = dfs.isFileClosed(partPath);
                    }
                }
                Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath,
                        bucketState.currentFileValidLength);
                if (!truncated) {
                    LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath);

                    // we must wait for the asynchronous truncate operation to complete
                    StopWatch sw = new StopWatch();
                    sw.start();
                    long newLen = fs.getFileStatus(partPath).getLen();
                    while (newLen != bucketState.currentFileValidLength) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        newLen = fs.getFileStatus(partPath).getLen();
                    }
                    if (newLen != bucketState.currentFileValidLength) {
                        throw new RuntimeException("Truncate did not truncate to right length. Should be "
                                + bucketState.currentFileValidLength + " is " + newLen + ".");
                    }
                }

            } else {
                LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath,
                        bucketState.currentFileValidLength);
                Path validLengthFilePath = new Path(partPath.getParent(),
                        validLengthPrefix + partPath.getName()).suffix(validLengthSuffix);
                if (!fs.exists(validLengthFilePath)) {
                    FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath);
                    lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength));
                    lengthFileOut.close();
                }
            }

            // invalidate in the state object
            bucketState.currentFile = null;
            bucketState.currentFileValidLength = -1;
        } catch (IOException e) {
            LOG.error("Error while restoring RollingSink state.", e);
            throw new RuntimeException("Error while restoring RollingSink state.", e);
        } catch (InvocationTargetException | IllegalAccessException e) {
            LOG.error("Cound not invoke truncate.", e);
            throw new RuntimeException("Could not invoke truncate.", e);
        }
    }

    LOG.debug("Clearing pending/in-progress files.");

    // Move files that are confirmed by a checkpoint but did not get moved to final location
    // because the checkpoint notification did not happen before a failure

    Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet();
    LOG.debug("Moving pending files to final location on restore.");
    for (Long pastCheckpointId : pastCheckpointIds) {
        // All the pending files are buckets that have been completed but are waiting to be renamed
        // to their final name
        for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) {
            Path finalPath = new Path(filename);
            Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName())
                    .suffix(pendingSuffix);

            try {
                if (fs.exists(pendingPath)) {
                    LOG.debug(
                            "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.",
                            pendingPath, pastCheckpointId);
                    fs.rename(pendingPath, finalPath);
                }
            } catch (IOException e) {
                LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}", pendingPath,
                        finalPath, e);
                throw new RuntimeException(
                        "Error while renaming pending file " + pendingPath + " to final path " + finalPath, e);
            }
        }
    }
    bucketState.pendingFiles.clear();
    synchronized (bucketState.pendingFilesPerCheckpoint) {
        bucketState.pendingFilesPerCheckpoint.clear();
    }

    // we need to get this here since open() has not yet been called
    int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    // delete pending files
    try {

        RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true);

        while (bucketFiles.hasNext()) {
            LocatedFileStatus file = bucketFiles.next();
            if (file.getPath().toString().endsWith(pendingSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
            if (file.getPath().toString().endsWith(inProgressSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting old pending files: {}", e);
        throw new RuntimeException("Error while deleting old pending files.", e);
    }
}

From source file:org.apache.orc.tools.FileDump.java

License:Apache License

/**
 * This method returns an ORC reader object if the specified file is readable. If the specified
 * file has side file (_flush_length) file, then max footer offset will be read from the side
 * file and orc reader will be created from that offset. Since both data file and side file
 * use hflush() for flushing the data, there could be some inconsistencies and both files could be
 * out-of-sync. Following are the cases under which null will be returned
 *
 * 1) If the file specified by path or its side file is still open for writes
 * 2) If *_flush_length file does not return any footer offset
 * 3) If *_flush_length returns a valid footer offset but the data file is not readable at that
 *    position (incomplete data file)/* w w w. j  a v  a 2 s.  c  om*/
 * 4) If *_flush_length file length is not a multiple of 8, then reader will be created from
 *    previous valid footer. If there is no such footer (file length > 0 and < 8), then null will
 *    be returned
 *
 * Also, if this method detects any file corruption (mismatch between data file and side file)
 * then it will add the corresponding file to the specified input list for corrupted files.
 *
 * In all other cases, where the file is readable this method will return a reader object.
 *
 * @param path - file to get reader for
 * @param conf - configuration object
 * @param corruptFiles - fills this list with all possible corrupted files
 * @return - reader for the specified file or null
 * @throws IOException
 */
static Reader getReader(final Path path, final Configuration conf, final List<String> corruptFiles)
        throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    long dataFileLen = fs.getFileStatus(path).getLen();
    System.err.println("Processing data file " + path + " [length: " + dataFileLen + "]");
    Path sideFile = OrcAcidUtils.getSideFile(path);
    final boolean sideFileExists = fs.exists(sideFile);
    boolean openDataFile = false;
    boolean openSideFile = false;
    if (fs instanceof DistributedFileSystem) {
        DistributedFileSystem dfs = (DistributedFileSystem) fs;
        openDataFile = !dfs.isFileClosed(path);
        openSideFile = sideFileExists && !dfs.isFileClosed(sideFile);
    }

    if (openDataFile || openSideFile) {
        if (openDataFile && openSideFile) {
            System.err.println("Unable to perform file dump as " + path + " and " + sideFile
                    + " are still open for writes.");
        } else if (openSideFile) {
            System.err.println("Unable to perform file dump as " + sideFile + " is still open for writes.");
        } else {
            System.err.println("Unable to perform file dump as " + path + " is still open for writes.");
        }

        return null;
    }

    Reader reader = null;
    if (sideFileExists) {
        final long maxLen = OrcAcidUtils.getLastFlushLength(fs, path);
        final long sideFileLen = fs.getFileStatus(sideFile).getLen();
        System.err.println("Found flush length file " + sideFile + " [length: " + sideFileLen
                + ", maxFooterOffset: " + maxLen + "]");
        // no offsets read from side file
        if (maxLen == -1) {

            // if data file is larger than last flush length, then additional data could be recovered
            if (dataFileLen > maxLen) {
                System.err.println("Data file has more data than max footer offset:" + maxLen
                        + ". Adding data file to recovery list.");
                if (corruptFiles != null) {
                    corruptFiles.add(path.toUri().toString());
                }
            }
            return null;
        }

        try {
            reader = OrcFile.createReader(path, OrcFile.readerOptions(conf).maxLength(maxLen));

            // if data file is larger than last flush length, then additional data could be recovered
            if (dataFileLen > maxLen) {
                System.err.println("Data file has more data than max footer offset:" + maxLen
                        + ". Adding data file to recovery list.");
                if (corruptFiles != null) {
                    corruptFiles.add(path.toUri().toString());
                }
            }
        } catch (Exception e) {
            if (corruptFiles != null) {
                corruptFiles.add(path.toUri().toString());
            }
            System.err.println(
                    "Unable to read data from max footer offset." + " Adding data file to recovery list.");
            return null;
        }
    } else {
        reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    }

    return reader;
}