Example usage for org.apache.hadoop.fs FSDataInputStream readFully

List of usage examples for org.apache.hadoop.fs FSDataInputStream readFully

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream readFully.

Prototype

@Override
public void readFully(long position, byte[] buffer, int offset, int length) throws IOException 

Source Link

Document

Read bytes from the given position in the stream to the given buffer.

Usage

From source file:org.apache.orc.tools.FileDump.java

License:Apache License

private static void recoverFiles(final List<String> corruptFiles, final Configuration conf, final String backup)
        throws IOException {
    for (String corruptFile : corruptFiles) {
        System.err.println("Recovering file " + corruptFile);
        Path corruptPath = new Path(corruptFile);
        FileSystem fs = corruptPath.getFileSystem(conf);
        FSDataInputStream fdis = fs.open(corruptPath);
        try {// ww  w . j  av a2s  .  c o  m
            long corruptFileLen = fs.getFileStatus(corruptPath).getLen();
            long remaining = corruptFileLen;
            List<Long> footerOffsets = Lists.newArrayList();

            // start reading the data file form top to bottom and record the valid footers
            while (remaining > 0) {
                int toRead = (int) Math.min(DEFAULT_BLOCK_SIZE, remaining);
                byte[] data = new byte[toRead];
                long startPos = corruptFileLen - remaining;
                fdis.readFully(startPos, data, 0, toRead);

                // find all MAGIC string and see if the file is readable from there
                int index = 0;
                long nextFooterOffset;

                while (index != -1) {
                    index = indexOf(data, OrcFile.MAGIC.getBytes(), index + 1);
                    if (index != -1) {
                        nextFooterOffset = startPos + index + OrcFile.MAGIC.length() + 1;
                        if (isReadable(corruptPath, conf, nextFooterOffset)) {
                            footerOffsets.add(nextFooterOffset);
                        }
                    }
                }

                System.err.println("Scanning for valid footers - startPos: " + startPos + " toRead: " + toRead
                        + " remaining: " + remaining);
                remaining = remaining - toRead;
            }

            System.err.println("Readable footerOffsets: " + footerOffsets);
            recoverFile(corruptPath, fs, conf, footerOffsets, backup);
        } catch (Exception e) {
            Path recoveryFile = getRecoveryFile(corruptPath);
            if (fs.exists(recoveryFile)) {
                fs.delete(recoveryFile, false);
            }
            System.err.println("Unable to recover file " + corruptFile);
            e.printStackTrace();
            System.err.println(SEPARATOR);
            continue;
        } finally {
            fdis.close();
        }
        System.err.println(corruptFile + " recovered successfully!");
        System.err.println(SEPARATOR);
    }
}

From source file:org.apache.orc.tools.FileDump.java

License:Apache License

private static void recoverFile(final Path corruptPath, final FileSystem fs, final Configuration conf,
        final List<Long> footerOffsets, final String backup) throws IOException {

    // first recover the file to .recovered file and then once successful rename it to actual file
    Path recoveredPath = getRecoveryFile(corruptPath);

    // make sure that file does not exist
    if (fs.exists(recoveredPath)) {
        fs.delete(recoveredPath, false);
    }/*from   w  w  w  . j  a v a2 s  . co m*/

    // if there are no valid footers, the file should still be readable so create an empty orc file
    if (footerOffsets == null || footerOffsets.isEmpty()) {
        System.err.println("No readable footers found. Creating empty orc file.");
        TypeDescription schema = TypeDescription.createStruct();
        Writer writer = OrcFile.createWriter(recoveredPath, OrcFile.writerOptions(conf).setSchema(schema));
        writer.close();
    } else {
        FSDataInputStream fdis = fs.open(corruptPath);
        FileStatus fileStatus = fs.getFileStatus(corruptPath);
        // read corrupt file and copy it to recovered file until last valid footer
        FSDataOutputStream fdos = fs.create(recoveredPath, true, conf.getInt("io.file.buffer.size", 4096),
                fileStatus.getReplication(), fileStatus.getBlockSize());
        try {
            long fileLen = footerOffsets.get(footerOffsets.size() - 1);
            long remaining = fileLen;

            while (remaining > 0) {
                int toRead = (int) Math.min(DEFAULT_BLOCK_SIZE, remaining);
                byte[] data = new byte[toRead];
                long startPos = fileLen - remaining;
                fdis.readFully(startPos, data, 0, toRead);
                fdos.write(data);
                System.err.println("Copying data to recovery file - startPos: " + startPos + " toRead: "
                        + toRead + " remaining: " + remaining);
                remaining = remaining - toRead;
            }
        } catch (Exception e) {
            fs.delete(recoveredPath, false);
            throw new IOException(e);
        } finally {
            fdis.close();
            fdos.close();
        }
    }

    // validate the recovered file once again and start moving corrupt files to backup folder
    if (isReadable(recoveredPath, conf, Long.MAX_VALUE)) {
        Path backupDataPath;
        String scheme = corruptPath.toUri().getScheme();
        String authority = corruptPath.toUri().getAuthority();
        String filePath = corruptPath.toUri().getPath();

        // use the same filesystem as corrupt file if backup-path is not explicitly specified
        if (backup.equals(DEFAULT_BACKUP_PATH)) {
            backupDataPath = new Path(scheme, authority, DEFAULT_BACKUP_PATH + filePath);
        } else {
            backupDataPath = Path.mergePaths(new Path(backup), corruptPath);
        }

        // Move data file to backup path
        moveFiles(fs, corruptPath, backupDataPath);

        // Move side file to backup path
        Path sideFilePath = OrcAcidUtils.getSideFile(corruptPath);
        Path backupSideFilePath = new Path(backupDataPath.getParent(), sideFilePath.getName());
        moveFiles(fs, sideFilePath, backupSideFilePath);

        // finally move recovered file to actual file
        moveFiles(fs, recoveredPath, corruptPath);

        // we are done recovering, backing up and validating
        System.err.println("Validation of recovered file successful!");
    }
}

From source file:org.gridgain.grid.ggfs.GridGgfsHadoopDualAbstractSelfTest.java

License:Open Source License

/**
 * Check how prefetch override works./*w w  w .ja va2  s .  c o m*/
 *
 * @throws Exception IF failed.
 */
public void testOpenPrefetchOverride() throws Exception {
    create(ggfsSecondary, paths(DIR, SUBDIR), paths(FILE));

    // Write enough data to the secondary file system.
    final int blockSize = GGFS_BLOCK_SIZE;

    GridGgfsOutputStream out = ggfsSecondary.append(FILE, false);

    int totalWritten = 0;

    while (totalWritten < blockSize * 2 + chunk.length) {
        out.write(chunk);

        totalWritten += chunk.length;
    }

    out.close();

    awaitFileClose(ggfsSecondary, FILE);

    // Instantiate file system with overridden "seq reads before prefetch" property.
    Configuration cfg = new Configuration();

    cfg.addResource(U.resolveGridGainUrl(PRIMARY_CFG));

    int seqReads = SEQ_READS_BEFORE_PREFETCH + 1;

    cfg.setInt(String.format(PARAM_GGFS_SEQ_READS_BEFORE_PREFETCH, "ggfs:grid@"), seqReads);

    FileSystem fs = FileSystem.get(new URI(PRIMARY_URI), cfg);

    // Read the first two blocks.
    Path fsHome = new Path(PRIMARY_URI);
    Path dir = new Path(fsHome, DIR.name());
    Path subdir = new Path(dir, SUBDIR.name());
    Path file = new Path(subdir, FILE.name());

    FSDataInputStream fsIn = fs.open(file);

    final byte[] readBuf = new byte[blockSize * 2];

    fsIn.readFully(0, readBuf, 0, readBuf.length);

    // Wait for a while for prefetch to finish (if any).
    GridGgfsMetaManager meta = ggfs.context().meta();

    GridGgfsFileInfo info = meta.info(meta.fileId(FILE));

    GridGgfsBlockKey key = new GridGgfsBlockKey(info.id(), info.affinityKey(), info.evictExclude(), 2);

    GridCache<GridGgfsBlockKey, byte[]> dataCache = ggfs.context().kernalContext().cache()
            .cache(ggfs.configuration().getDataCacheName());

    for (int i = 0; i < 10; i++) {
        if (dataCache.containsKey(key))
            break;
        else
            U.sleep(100);
    }

    fsIn.close();

    // Remove the file from the secondary file system.
    ggfsSecondary.delete(FILE, false);

    // Try reading the third block. Should fail.
    GridTestUtils.assertThrows(log, new Callable<Object>() {
        @Override
        public Object call() throws Exception {
            GridGgfsInputStream in0 = ggfs.open(FILE);

            in0.seek(blockSize * 2);

            try {
                in0.read(readBuf);
            } finally {
                U.closeQuiet(in0);
            }

            return null;
        }
    }, IOException.class, "Failed to read data due to secondary file system exception: /dir/subdir/file");
}