Example usage for org.apache.hadoop.fs FSDataInputStream readFully

List of usage examples for org.apache.hadoop.fs FSDataInputStream readFully


In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream readFully.


public void readFully(long position, byte[] buffer, int offset, int length) throws IOException 

Source Link


Read bytes from the given position in the stream to the given buffer.


From source file:org.apache.orc.tools.FileDump.java

License:Apache License

private static void recoverFiles(final List<String> corruptFiles, final Configuration conf, final String backup)
        throws IOException {
    for (String corruptFile : corruptFiles) {
        System.err.println("Recovering file " + corruptFile);
        Path corruptPath = new Path(corruptFile);
        FileSystem fs = corruptPath.getFileSystem(conf);
        FSDataInputStream fdis = fs.open(corruptPath);
        try {// ww  w . j  av a2s  .  c o  m
            long corruptFileLen = fs.getFileStatus(corruptPath).getLen();
            long remaining = corruptFileLen;
            List<Long> footerOffsets = Lists.newArrayList();

            // start reading the data file form top to bottom and record the valid footers
            while (remaining > 0) {
                int toRead = (int) Math.min(DEFAULT_BLOCK_SIZE, remaining);
                byte[] data = new byte[toRead];
                long startPos = corruptFileLen - remaining;
                fdis.readFully(startPos, data, 0, toRead);

                // find all MAGIC string and see if the file is readable from there
                int index = 0;
                long nextFooterOffset;

                while (index != -1) {
                    index = indexOf(data, OrcFile.MAGIC.getBytes(), index + 1);
                    if (index != -1) {
                        nextFooterOffset = startPos + index + OrcFile.MAGIC.length() + 1;
                        if (isReadable(corruptPath, conf, nextFooterOffset)) {

                System.err.println("Scanning for valid footers - startPos: " + startPos + " toRead: " + toRead
                        + " remaining: " + remaining);
                remaining = remaining - toRead;

            System.err.println("Readable footerOffsets: " + footerOffsets);
            recoverFile(corruptPath, fs, conf, footerOffsets, backup);
        } catch (Exception e) {
            Path recoveryFile = getRecoveryFile(corruptPath);
            if (fs.exists(recoveryFile)) {
                fs.delete(recoveryFile, false);
            System.err.println("Unable to recover file " + corruptFile);
        } finally {
        System.err.println(corruptFile + " recovered successfully!");

From source file:org.apache.orc.tools.FileDump.java

License:Apache License

private static void recoverFile(final Path corruptPath, final FileSystem fs, final Configuration conf,
        final List<Long> footerOffsets, final String backup) throws IOException {

    // first recover the file to .recovered file and then once successful rename it to actual file
    Path recoveredPath = getRecoveryFile(corruptPath);

    // make sure that file does not exist
    if (fs.exists(recoveredPath)) {
        fs.delete(recoveredPath, false);
    }/*from   w  w  w  . j  a v a2 s  . co m*/

    // if there are no valid footers, the file should still be readable so create an empty orc file
    if (footerOffsets == null || footerOffsets.isEmpty()) {
        System.err.println("No readable footers found. Creating empty orc file.");
        TypeDescription schema = TypeDescription.createStruct();
        Writer writer = OrcFile.createWriter(recoveredPath, OrcFile.writerOptions(conf).setSchema(schema));
    } else {
        FSDataInputStream fdis = fs.open(corruptPath);
        FileStatus fileStatus = fs.getFileStatus(corruptPath);
        // read corrupt file and copy it to recovered file until last valid footer
        FSDataOutputStream fdos = fs.create(recoveredPath, true, conf.getInt("io.file.buffer.size", 4096),
                fileStatus.getReplication(), fileStatus.getBlockSize());
        try {
            long fileLen = footerOffsets.get(footerOffsets.size() - 1);
            long remaining = fileLen;

            while (remaining > 0) {
                int toRead = (int) Math.min(DEFAULT_BLOCK_SIZE, remaining);
                byte[] data = new byte[toRead];
                long startPos = fileLen - remaining;
                fdis.readFully(startPos, data, 0, toRead);
                System.err.println("Copying data to recovery file - startPos: " + startPos + " toRead: "
                        + toRead + " remaining: " + remaining);
                remaining = remaining - toRead;
        } catch (Exception e) {
            fs.delete(recoveredPath, false);
            throw new IOException(e);
        } finally {

    // validate the recovered file once again and start moving corrupt files to backup folder
    if (isReadable(recoveredPath, conf, Long.MAX_VALUE)) {
        Path backupDataPath;
        String scheme = corruptPath.toUri().getScheme();
        String authority = corruptPath.toUri().getAuthority();
        String filePath = corruptPath.toUri().getPath();

        // use the same filesystem as corrupt file if backup-path is not explicitly specified
        if (backup.equals(DEFAULT_BACKUP_PATH)) {
            backupDataPath = new Path(scheme, authority, DEFAULT_BACKUP_PATH + filePath);
        } else {
            backupDataPath = Path.mergePaths(new Path(backup), corruptPath);

        // Move data file to backup path
        moveFiles(fs, corruptPath, backupDataPath);

        // Move side file to backup path
        Path sideFilePath = OrcAcidUtils.getSideFile(corruptPath);
        Path backupSideFilePath = new Path(backupDataPath.getParent(), sideFilePath.getName());
        moveFiles(fs, sideFilePath, backupSideFilePath);

        // finally move recovered file to actual file
        moveFiles(fs, recoveredPath, corruptPath);

        // we are done recovering, backing up and validating
        System.err.println("Validation of recovered file successful!");

From source file:org.gridgain.grid.ggfs.GridGgfsHadoopDualAbstractSelfTest.java

License:Open Source License

 * Check how prefetch override works./*w w  w .ja va2  s .  c o m*/
 * @throws Exception IF failed.
public void testOpenPrefetchOverride() throws Exception {
    create(ggfsSecondary, paths(DIR, SUBDIR), paths(FILE));

    // Write enough data to the secondary file system.
    final int blockSize = GGFS_BLOCK_SIZE;

    GridGgfsOutputStream out = ggfsSecondary.append(FILE, false);

    int totalWritten = 0;

    while (totalWritten < blockSize * 2 + chunk.length) {

        totalWritten += chunk.length;


    awaitFileClose(ggfsSecondary, FILE);

    // Instantiate file system with overridden "seq reads before prefetch" property.
    Configuration cfg = new Configuration();


    int seqReads = SEQ_READS_BEFORE_PREFETCH + 1;

    cfg.setInt(String.format(PARAM_GGFS_SEQ_READS_BEFORE_PREFETCH, "ggfs:grid@"), seqReads);

    FileSystem fs = FileSystem.get(new URI(PRIMARY_URI), cfg);

    // Read the first two blocks.
    Path fsHome = new Path(PRIMARY_URI);
    Path dir = new Path(fsHome, DIR.name());
    Path subdir = new Path(dir, SUBDIR.name());
    Path file = new Path(subdir, FILE.name());

    FSDataInputStream fsIn = fs.open(file);

    final byte[] readBuf = new byte[blockSize * 2];

    fsIn.readFully(0, readBuf, 0, readBuf.length);

    // Wait for a while for prefetch to finish (if any).
    GridGgfsMetaManager meta = ggfs.context().meta();

    GridGgfsFileInfo info = meta.info(meta.fileId(FILE));

    GridGgfsBlockKey key = new GridGgfsBlockKey(info.id(), info.affinityKey(), info.evictExclude(), 2);

    GridCache<GridGgfsBlockKey, byte[]> dataCache = ggfs.context().kernalContext().cache()

    for (int i = 0; i < 10; i++) {
        if (dataCache.containsKey(key))


    // Remove the file from the secondary file system.
    ggfsSecondary.delete(FILE, false);

    // Try reading the third block. Should fail.
    GridTestUtils.assertThrows(log, new Callable<Object>() {
        public Object call() throws Exception {
            GridGgfsInputStream in0 = ggfs.open(FILE);

            in0.seek(blockSize * 2);

            try {
            } finally {

            return null;
    }, IOException.class, "Failed to read data due to secondary file system exception: /dir/subdir/file");