List of usage examples for org.apache.hadoop.fs FSDataInputStream readFully
@Override public void readFully(long position, byte[] buffer, int offset, int length) throws IOException
From source file:org.apache.orc.tools.FileDump.java
License:Apache License
private static void recoverFiles(final List<String> corruptFiles, final Configuration conf, final String backup) throws IOException { for (String corruptFile : corruptFiles) { System.err.println("Recovering file " + corruptFile); Path corruptPath = new Path(corruptFile); FileSystem fs = corruptPath.getFileSystem(conf); FSDataInputStream fdis = fs.open(corruptPath); try {// ww w . j av a2s . c o m long corruptFileLen = fs.getFileStatus(corruptPath).getLen(); long remaining = corruptFileLen; List<Long> footerOffsets = Lists.newArrayList(); // start reading the data file form top to bottom and record the valid footers while (remaining > 0) { int toRead = (int) Math.min(DEFAULT_BLOCK_SIZE, remaining); byte[] data = new byte[toRead]; long startPos = corruptFileLen - remaining; fdis.readFully(startPos, data, 0, toRead); // find all MAGIC string and see if the file is readable from there int index = 0; long nextFooterOffset; while (index != -1) { index = indexOf(data, OrcFile.MAGIC.getBytes(), index + 1); if (index != -1) { nextFooterOffset = startPos + index + OrcFile.MAGIC.length() + 1; if (isReadable(corruptPath, conf, nextFooterOffset)) { footerOffsets.add(nextFooterOffset); } } } System.err.println("Scanning for valid footers - startPos: " + startPos + " toRead: " + toRead + " remaining: " + remaining); remaining = remaining - toRead; } System.err.println("Readable footerOffsets: " + footerOffsets); recoverFile(corruptPath, fs, conf, footerOffsets, backup); } catch (Exception e) { Path recoveryFile = getRecoveryFile(corruptPath); if (fs.exists(recoveryFile)) { fs.delete(recoveryFile, false); } System.err.println("Unable to recover file " + corruptFile); e.printStackTrace(); System.err.println(SEPARATOR); continue; } finally { fdis.close(); } System.err.println(corruptFile + " recovered successfully!"); System.err.println(SEPARATOR); } }
From source file:org.apache.orc.tools.FileDump.java
License:Apache License
private static void recoverFile(final Path corruptPath, final FileSystem fs, final Configuration conf, final List<Long> footerOffsets, final String backup) throws IOException { // first recover the file to .recovered file and then once successful rename it to actual file Path recoveredPath = getRecoveryFile(corruptPath); // make sure that file does not exist if (fs.exists(recoveredPath)) { fs.delete(recoveredPath, false); }/*from w w w . j a v a2 s . co m*/ // if there are no valid footers, the file should still be readable so create an empty orc file if (footerOffsets == null || footerOffsets.isEmpty()) { System.err.println("No readable footers found. Creating empty orc file."); TypeDescription schema = TypeDescription.createStruct(); Writer writer = OrcFile.createWriter(recoveredPath, OrcFile.writerOptions(conf).setSchema(schema)); writer.close(); } else { FSDataInputStream fdis = fs.open(corruptPath); FileStatus fileStatus = fs.getFileStatus(corruptPath); // read corrupt file and copy it to recovered file until last valid footer FSDataOutputStream fdos = fs.create(recoveredPath, true, conf.getInt("io.file.buffer.size", 4096), fileStatus.getReplication(), fileStatus.getBlockSize()); try { long fileLen = footerOffsets.get(footerOffsets.size() - 1); long remaining = fileLen; while (remaining > 0) { int toRead = (int) Math.min(DEFAULT_BLOCK_SIZE, remaining); byte[] data = new byte[toRead]; long startPos = fileLen - remaining; fdis.readFully(startPos, data, 0, toRead); fdos.write(data); System.err.println("Copying data to recovery file - startPos: " + startPos + " toRead: " + toRead + " remaining: " + remaining); remaining = remaining - toRead; } } catch (Exception e) { fs.delete(recoveredPath, false); throw new IOException(e); } finally { fdis.close(); fdos.close(); } } // validate the recovered file once again and start moving corrupt files to backup folder if (isReadable(recoveredPath, conf, Long.MAX_VALUE)) { Path backupDataPath; String scheme = corruptPath.toUri().getScheme(); String authority = corruptPath.toUri().getAuthority(); String filePath = corruptPath.toUri().getPath(); // use the same filesystem as corrupt file if backup-path is not explicitly specified if (backup.equals(DEFAULT_BACKUP_PATH)) { backupDataPath = new Path(scheme, authority, DEFAULT_BACKUP_PATH + filePath); } else { backupDataPath = Path.mergePaths(new Path(backup), corruptPath); } // Move data file to backup path moveFiles(fs, corruptPath, backupDataPath); // Move side file to backup path Path sideFilePath = OrcAcidUtils.getSideFile(corruptPath); Path backupSideFilePath = new Path(backupDataPath.getParent(), sideFilePath.getName()); moveFiles(fs, sideFilePath, backupSideFilePath); // finally move recovered file to actual file moveFiles(fs, recoveredPath, corruptPath); // we are done recovering, backing up and validating System.err.println("Validation of recovered file successful!"); } }
From source file:org.gridgain.grid.ggfs.GridGgfsHadoopDualAbstractSelfTest.java
License:Open Source License
/** * Check how prefetch override works./*w w w .ja va2 s . c o m*/ * * @throws Exception IF failed. */ public void testOpenPrefetchOverride() throws Exception { create(ggfsSecondary, paths(DIR, SUBDIR), paths(FILE)); // Write enough data to the secondary file system. final int blockSize = GGFS_BLOCK_SIZE; GridGgfsOutputStream out = ggfsSecondary.append(FILE, false); int totalWritten = 0; while (totalWritten < blockSize * 2 + chunk.length) { out.write(chunk); totalWritten += chunk.length; } out.close(); awaitFileClose(ggfsSecondary, FILE); // Instantiate file system with overridden "seq reads before prefetch" property. Configuration cfg = new Configuration(); cfg.addResource(U.resolveGridGainUrl(PRIMARY_CFG)); int seqReads = SEQ_READS_BEFORE_PREFETCH + 1; cfg.setInt(String.format(PARAM_GGFS_SEQ_READS_BEFORE_PREFETCH, "ggfs:grid@"), seqReads); FileSystem fs = FileSystem.get(new URI(PRIMARY_URI), cfg); // Read the first two blocks. Path fsHome = new Path(PRIMARY_URI); Path dir = new Path(fsHome, DIR.name()); Path subdir = new Path(dir, SUBDIR.name()); Path file = new Path(subdir, FILE.name()); FSDataInputStream fsIn = fs.open(file); final byte[] readBuf = new byte[blockSize * 2]; fsIn.readFully(0, readBuf, 0, readBuf.length); // Wait for a while for prefetch to finish (if any). GridGgfsMetaManager meta = ggfs.context().meta(); GridGgfsFileInfo info = meta.info(meta.fileId(FILE)); GridGgfsBlockKey key = new GridGgfsBlockKey(info.id(), info.affinityKey(), info.evictExclude(), 2); GridCache<GridGgfsBlockKey, byte[]> dataCache = ggfs.context().kernalContext().cache() .cache(ggfs.configuration().getDataCacheName()); for (int i = 0; i < 10; i++) { if (dataCache.containsKey(key)) break; else U.sleep(100); } fsIn.close(); // Remove the file from the secondary file system. ggfsSecondary.delete(FILE, false); // Try reading the third block. Should fail. GridTestUtils.assertThrows(log, new Callable<Object>() { @Override public Object call() throws Exception { GridGgfsInputStream in0 = ggfs.open(FILE); in0.seek(blockSize * 2); try { in0.read(readBuf); } finally { U.closeQuiet(in0); } return null; } }, IOException.class, "Failed to read data due to secondary file system exception: /dir/subdir/file"); }