List of usage examples for org.apache.hadoop.fs FSDataInputStream getPos
@Override public long getPos() throws IOException
From source file:co.cask.tigon.logging.LogFileReader.java
License:Apache License
private long determineTrueFileSize(Path path, FileStatus status) throws IOException { FSDataInputStream stream = fileSystem.open(path); try {//from w w w .ja va2s . c om stream.seek(status.getLen()); // we need to read repeatedly until we reach the end of the file byte[] buffer = new byte[1024 * 1024]; while (stream.read(buffer, 0, buffer.length) >= 0) { // empty body. } long trueSize = stream.getPos(); return trueSize; } finally { stream.close(); } }
From source file:com.alectenharmsel.research.WholeBlockRecordReader.java
License:Apache License
public boolean nextKeyValue() throws IOException, InterruptedException { if (!processed) { System.err.println("start is " + start); Path file = fileSplit.getPath(); String tmp = file.toString(); System.err.println("File: " + tmp); currKey.set(tmp);/*ww w .j a v a 2s . com*/ System.err.println("Reached this point"); FileSystem fs = file.getFileSystem(conf); System.err.println("fs blocksize: " + fs.getDefaultBlockSize(file)); System.err.println("linecount blocksize: " + blockSize); byte[] contents; FSDataInputStream in = null; try { in = fs.open(file); System.err.println("getPos(): " + in.getPos()); if ((start + blockSize) > fileLength) { blockSize = (int) (fileLength - start); processed = true; } contents = new byte[blockSize]; //IOUtils.readFully(in, contents, start, blockSize); //IOUtils.readFully(in, contents, 0, blockSize); in.readFully(start, contents); start += blockSize; currValue.set(contents); } finally { IOUtils.closeStream(in); } return true; } return false; }
From source file:com.aliyun.fs.oss.contract.TestAliyunOSSContractSeek.java
License:Apache License
@Test public void testSeekBeyondDownloadSize() throws Throwable { describe("seek and read beyond download size."); Path byteFile = path("byte_file.txt"); // 'fs.oss.multipart.download.size' = 100 * 1024 byte[] block = dataset(100 * 1024 + 10, 0, 255); FileSystem fs = getFileSystem(); createFile(fs, byteFile, true, block); FSDataInputStream instream = getFileSystem().open(byteFile); instream.seek(100 * 1024 - 1);/* w w w . j a v a2s . c o m*/ assertEquals(100 * 1024 - 1, instream.getPos()); assertEquals(144, instream.read()); instream.seek(100 * 1024 + 1); assertEquals(100 * 1024 + 1, instream.getPos()); assertEquals(146, instream.read()); }
From source file:com.aliyun.fs.oss.TestAliyunOSSInputStream.java
License:Apache License
@Test public void testSeekFile() throws Exception { Path smallSeekFile = setPath("/test/smallSeekFile.txt"); long size = 5 * 1024 * 1024; ContractTestUtils.generateTestFile(this.fs, smallSeekFile, size, 256, 255); LOG.info("5MB file created: smallSeekFile.txt"); FSDataInputStream instream = this.fs.open(smallSeekFile); int seekTimes = 5; LOG.info("multiple fold position seeking test...:"); for (int i = 0; i < seekTimes; i++) { long pos = size / (seekTimes - i) - 1; LOG.info("begin seeking for pos: " + pos); instream.seek(pos);/*from w ww. j ava 2s. c o m*/ assertTrue("expected position at:" + pos + ", but got:" + instream.getPos(), instream.getPos() == pos); LOG.info("completed seeking at pos: " + instream.getPos()); } LOG.info("random position seeking test...:"); Random rand = new Random(); for (int i = 0; i < seekTimes; i++) { long pos = Math.abs(rand.nextLong()) % size; LOG.info("begin seeking for pos: " + pos); instream.seek(pos); assertTrue("expected position at:" + pos + ", but got:" + instream.getPos(), instream.getPos() == pos); LOG.info("completed seeking at pos: " + instream.getPos()); } IOUtils.closeStream(instream); }
From source file:com.cloudera.crunch.io.text.CBZip2InputStream.java
License:Apache License
public CBZip2InputStream(FSDataInputStream zStream, int blockSize, long end) throws IOException { endOffsetOfSplit = end;/* www .j ava 2s . co m*/ // initialize retPos to the beginning of the current InputSplit // see comments in getPos() to understand how this is used. retPos = zStream.getPos(); ll8 = null; tt = null; checkComputedCombinedCRC = blockSize == -1; bsSetStream(zStream); initialize(blockSize); initBlock(blockSize != -1); setupBlock(); }
From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.handlers.READHandler.java
License:Apache License
@Override protected READResponse doHandle(NFS4Handler server, Session session, READRequest request) throws NFS4Exception, IOException { if (session.getCurrentFileHandle() == null) { throw new NFS4Exception(NFS4ERR_NOFILEHANDLE); }/* ww w. ja va2 s.c o m*/ int size = Math.min(request.getCount(), NFS4_MAX_RWSIZE); if (size < 0) { throw new NFS4Exception(NFS4ERR_INVAL); } FileHandle fileHandle = session.getCurrentFileHandle(); Path path = server.getPath(fileHandle); FileSystem fs = session.getFileSystem(); FSDataInputStream inputStream = server.forRead(request.getStateID(), fs, fileHandle); synchronized (inputStream) { if (inputStream.getPos() != request.getOffset()) { try { inputStream.seek(request.getOffset()); } catch (IOException e) { throw new IOException(e.getMessage() + ": " + inputStream.getPos() + ", " + request.getOffset(), e); } server.incrementMetric("NFS_RANDOM_READS", 1); } READResponse response = createResponse(); byte[] data = new byte[size]; int count = inputStream.read(data); long fileLength = -1; if (count > 0 && count != data.length && (request.getOffset() + count) < (fileLength = fs.getFileStatus(path).getLen())) { LOGGER.info("Short read " + path + " at pos = " + request.getOffset() + ", wanted " + data.length + " and read " + count + ", fileLength = " + fileLength); server.incrementMetric("NFS_SHORT_READS", 1); } boolean eof = count < 0; if (eof) { data = new byte[0]; count = 0; } server.incrementMetric("HDFS_BYTES_READ", count); response.setData(data, 0, count); response.setEOF(eof); response.setStatus(NFS4_OK); return response; } }
From source file:com.datatorrent.lib.io.AbstractHDFSInputOperator.java
License:Open Source License
@Override public long getFilePointer(FSDataInputStream stream) { try {//from www. ja v a2s.c o m return stream.getPos(); } catch (IOException ex) { throw new RuntimeException(ex.getCause()); } }
From source file:com.datatorrent.lib.io.fs.AbstractFileOutputOperator.java
License:Open Source License
@Override public void setup(Context.OperatorContext context) { LOG.debug("setup initiated"); rollingFile = maxLength < Long.MAX_VALUE; //Getting required file system instance. try {//ww w .j a va 2 s. co m fs = getFSInstance(); } catch (IOException ex) { throw new RuntimeException(ex); } if (replication <= 0) { replication = fs.getDefaultReplication(new Path(filePath)); } LOG.debug("FS class {}", fs.getClass()); //When an entry is removed from the cache, removal listener is notified and it closes the output stream. RemovalListener<String, FSDataOutputStream> removalListener = new RemovalListener<String, FSDataOutputStream>() { @Override public void onRemoval(RemovalNotification<String, FSDataOutputStream> notification) { FSDataOutputStream value = notification.getValue(); if (value != null) { try { LOG.debug("closing {}", notification.getKey()); value.close(); } catch (IOException e) { throw new RuntimeException(e); } } } }; //Define cache CacheLoader<String, FSDataOutputStream> loader = new CacheLoader<String, FSDataOutputStream>() { @Override public FSDataOutputStream load(String filename) { String partFileName = getPartFileNamePri(filename); Path lfilepath = new Path(filePath + Path.SEPARATOR + partFileName); FSDataOutputStream fsOutput; boolean sawThisFileBefore = endOffsets.containsKey(filename); try { if (fs.exists(lfilepath)) { if (sawThisFileBefore) { FileStatus fileStatus = fs.getFileStatus(lfilepath); MutableLong endOffset = endOffsets.get(filename); if (endOffset != null) { endOffset.setValue(fileStatus.getLen()); } else { endOffsets.put(filename, new MutableLong(fileStatus.getLen())); } fsOutput = fs.append(lfilepath); LOG.debug("appending to {}", lfilepath); } //We never saw this file before and we don't want to append else { //If the file is rolling we need to delete all its parts. if (rollingFile) { int part = 0; while (true) { Path seenPartFilePath = new Path( filePath + Path.SEPARATOR + getPartFileName(filename, part)); if (!fs.exists(seenPartFilePath)) { break; } fs.delete(seenPartFilePath, true); part = part + 1; } fsOutput = fs.create(lfilepath, (short) replication); } //Not rolling is easy, just delete the file and create it again. else { fs.delete(lfilepath, true); fsOutput = fs.create(lfilepath, (short) replication); } } } else { fsOutput = fs.create(lfilepath, (short) replication); } //Get the end offset of the file. LOG.info("opened: {}", fs.getFileStatus(lfilepath).getPath()); return fsOutput; } catch (IOException e) { throw new RuntimeException(e); } } }; streamsCache = CacheBuilder.newBuilder().maximumSize(maxOpenFiles).removalListener(removalListener) .build(loader); try { LOG.debug("File system class: {}", fs.getClass()); LOG.debug("end-offsets {}", endOffsets); //Restore the files in case they were corrupted and the operator Path writerPath = new Path(filePath); if (fs.exists(writerPath)) { for (String seenFileName : endOffsets.keySet()) { String seenFileNamePart = getPartFileNamePri(seenFileName); LOG.debug("seenFileNamePart: {}", seenFileNamePart); Path seenPartFilePath = new Path(filePath + Path.SEPARATOR + seenFileNamePart); if (fs.exists(seenPartFilePath)) { LOG.debug("file exists {}", seenFileNamePart); long offset = endOffsets.get(seenFileName).longValue(); FSDataInputStream inputStream = fs.open(seenPartFilePath); FileStatus status = fs.getFileStatus(seenPartFilePath); if (status.getLen() != offset) { LOG.info("file corrupted {} {} {}", seenFileNamePart, offset, status.getLen()); byte[] buffer = new byte[COPY_BUFFER_SIZE]; Path tmpFilePath = new Path( filePath + Path.SEPARATOR + seenFileNamePart + TMP_EXTENSION); FSDataOutputStream fsOutput = fs.create(tmpFilePath, (short) replication); while (inputStream.getPos() < offset) { long remainingBytes = offset - inputStream.getPos(); int bytesToWrite = remainingBytes < COPY_BUFFER_SIZE ? (int) remainingBytes : COPY_BUFFER_SIZE; inputStream.read(buffer); fsOutput.write(buffer, 0, bytesToWrite); } flush(fsOutput); fsOutput.close(); inputStream.close(); FileContext fileContext = FileContext.getFileContext(fs.getUri()); LOG.debug("temp file path {}, rolling file path {}", tmpFilePath.toString(), status.getPath().toString()); fileContext.rename(tmpFilePath, status.getPath(), Options.Rename.OVERWRITE); } else { inputStream.close(); } } } } //delete the left over future rolling files produced from the previous crashed instance //of this operator. if (rollingFile) { for (String seenFileName : endOffsets.keySet()) { try { Integer part = openPart.get(seenFileName).getValue() + 1; while (true) { Path seenPartFilePath = new Path( filePath + Path.SEPARATOR + getPartFileName(seenFileName, part)); if (!fs.exists(seenPartFilePath)) { break; } fs.delete(seenPartFilePath, true); part = part + 1; } Path seenPartFilePath = new Path(filePath + Path.SEPARATOR + getPartFileName(seenFileName, openPart.get(seenFileName).intValue())); //Handle the case when restoring to a checkpoint where the current rolling file //already has a length greater than max length. if (fs.getFileStatus(seenPartFilePath).getLen() > maxLength) { LOG.debug("rotating file at setup."); rotate(seenFileName); } } catch (IOException e) { throw new RuntimeException(e); } catch (ExecutionException e) { throw new RuntimeException(e); } } } LOG.debug("setup completed"); LOG.debug("end-offsets {}", endOffsets); } catch (IOException e) { throw new RuntimeException(e); } this.context = context; lastTimeStamp = System.currentTimeMillis(); fileCounters.setCounter(Counters.TOTAL_BYTES_WRITTEN, new MutableLong()); fileCounters.setCounter(Counters.TOTAL_TIME_ELAPSED, new MutableLong()); }
From source file:com.datatorrent.lib.io.fs.AbstractFSWriter.java
License:Open Source License
@Override public void setup(Context.OperatorContext context) { rollingFile = maxLength < Long.MAX_VALUE; //Getting required file system instance. try {/*from ww w. j a va2 s .c om*/ fs = getFSInstance(); } catch (IOException ex) { throw new RuntimeException(ex); } LOG.debug("FS class {}", fs.getClass()); //Setting listener for debugging LOG.debug("setup initiated"); RemovalListener<String, FSDataOutputStream> removalListener = new RemovalListener<String, FSDataOutputStream>() { @Override public void onRemoval(RemovalNotification<String, FSDataOutputStream> notification) { FSDataOutputStream value = notification.getValue(); if (value != null) { try { LOG.debug("closing {}", notification.getKey()); value.close(); } catch (IOException e) { throw new RuntimeException(e); } } } }; //Define cache CacheLoader<String, FSDataOutputStream> loader = new CacheLoader<String, FSDataOutputStream>() { @Override public FSDataOutputStream load(String filename) { String partFileName = getPartFileNamePri(filename); Path lfilepath = new Path(filePath + File.separator + partFileName); FSDataOutputStream fsOutput; if (replication <= 0) { replication = fs.getDefaultReplication(lfilepath); } boolean sawThisFileBefore = endOffsets.containsKey(filename); try { if (fs.exists(lfilepath)) { if (sawThisFileBefore || append) { FileStatus fileStatus = fs.getFileStatus(lfilepath); MutableLong endOffset = endOffsets.get(filename); if (endOffset != null) { endOffset.setValue(fileStatus.getLen()); } else { endOffsets.put(filename, new MutableLong(fileStatus.getLen())); } fsOutput = fs.append(lfilepath); LOG.debug("appending to {}", lfilepath); } //We never saw this file before and we don't want to append else { //If the file is rolling we need to delete all its parts. if (rollingFile) { int part = 0; while (true) { Path seenPartFilePath = new Path( filePath + "/" + getPartFileName(filename, part)); if (!fs.exists(seenPartFilePath)) { break; } fs.delete(seenPartFilePath, true); part = part + 1; } fsOutput = fs.create(lfilepath, (short) replication); } //Not rolling is easy, just delete the file and create it again. else { fs.delete(lfilepath, true); fsOutput = fs.create(lfilepath, (short) replication); } } } else { fsOutput = fs.create(lfilepath, (short) replication); } //Get the end offset of the file. LOG.debug("full path: {}", fs.getFileStatus(lfilepath).getPath()); return fsOutput; } catch (IOException e) { throw new RuntimeException(e); } } }; streamsCache = CacheBuilder.newBuilder().maximumSize(maxOpenFiles).removalListener(removalListener) .build(loader); try { LOG.debug("File system class: {}", fs.getClass()); LOG.debug("end-offsets {}", endOffsets); //Restore the files in case they were corrupted and the operator Path writerPath = new Path(filePath); if (fs.exists(writerPath)) { for (String seenFileName : endOffsets.keySet()) { String seenFileNamePart = getPartFileNamePri(seenFileName); LOG.debug("seenFileNamePart: {}", seenFileNamePart); Path seenPartFilePath = new Path(filePath + "/" + seenFileNamePart); if (fs.exists(seenPartFilePath)) { LOG.debug("file exists {}", seenFileNamePart); long offset = endOffsets.get(seenFileName).longValue(); FSDataInputStream inputStream = fs.open(seenPartFilePath); FileStatus status = fs.getFileStatus(seenPartFilePath); if (status.getLen() != offset) { LOG.info("file corrupted {} {} {}", seenFileNamePart, offset, status.getLen()); byte[] buffer = new byte[COPY_BUFFER_SIZE]; String tmpFileName = seenFileNamePart + TMP_EXTENSION; FSDataOutputStream fsOutput = streamsCache.get(tmpFileName); while (inputStream.getPos() < offset) { long remainingBytes = offset - inputStream.getPos(); int bytesToWrite = remainingBytes < COPY_BUFFER_SIZE ? (int) remainingBytes : COPY_BUFFER_SIZE; inputStream.read(buffer); fsOutput.write(buffer, 0, bytesToWrite); } flush(fsOutput); FileContext fileContext = FileContext.getFileContext(fs.getUri()); String tempTmpFilePath = getPartFileNamePri(filePath + File.separator + tmpFileName); Path tmpFilePath = new Path(tempTmpFilePath); tmpFilePath = fs.getFileStatus(tmpFilePath).getPath(); LOG.debug("temp file path {}, rolling file path {}", tmpFilePath.toString(), status.getPath().toString()); fileContext.rename(tmpFilePath, status.getPath(), Options.Rename.OVERWRITE); } } } } //delete the left over future rolling files produced from the previous crashed instance //of this operator. if (rollingFile) { for (String seenFileName : endOffsets.keySet()) { try { Integer part = openPart.get(seenFileName).getValue() + 1; while (true) { Path seenPartFilePath = new Path(filePath + "/" + getPartFileName(seenFileName, part)); if (!fs.exists(seenPartFilePath)) { break; } fs.delete(seenPartFilePath, true); part = part + 1; } Path seenPartFilePath = new Path(filePath + "/" + getPartFileName(seenFileName, openPart.get(seenFileName).intValue())); //Handle the case when restoring to a checkpoint where the current rolling file //already has a length greater than max length. if (fs.getFileStatus(seenPartFilePath).getLen() > maxLength) { LOG.debug("rotating file at setup."); rotate(seenFileName); } } catch (IOException e) { throw new RuntimeException(e); } catch (ExecutionException e) { throw new RuntimeException(e); } } } LOG.debug("setup completed"); LOG.debug("end-offsets {}", endOffsets); } catch (IOException e) { throw new RuntimeException(e); } catch (ExecutionException e) { throw new RuntimeException(e); } this.context = context; lastTimeStamp = System.currentTimeMillis(); fileCounters.setCounter(Counters.TOTAL_BYTES_WRITTEN, new MutableLong()); fileCounters.setCounter(Counters.TOTAL_TIME_ELAPSED, new MutableLong()); }
From source file:com.hadoop.compression.lzo.LzoIndex.java
License:Open Source License
/** * Index an lzo file to allow the input format to split them into separate map * jobs.//ww w . j av a 2 s . co m * * @param fs File system that contains the file. * @param lzoFile the lzo file to index. For filename.lzo, the created index file will be * filename.lzo.index. * @throws IOException */ public static void createIndex(FileSystem fs, Path lzoFile) throws IOException { Configuration conf = fs.getConf(); CompressionCodecFactory factory = new CompressionCodecFactory(conf); CompressionCodec codec = factory.getCodec(lzoFile); if (null == codec) { throw new IOException("Could not find codec for file " + lzoFile + " - you may need to add the LZO codec to your io.compression.codecs " + "configuration in core-site.xml"); } ((Configurable) codec).setConf(conf); FSDataInputStream is = null; FSDataOutputStream os = null; Path outputFile = lzoFile.suffix(LZO_INDEX_SUFFIX); Path tmpOutputFile = lzoFile.suffix(LZO_TMP_INDEX_SUFFIX); // Track whether an exception was thrown or not, so we know to either // delete the tmp index file on failure, or rename it to the new index file on success. boolean indexingSucceeded = false; try { is = fs.open(lzoFile); os = fs.create(tmpOutputFile); LzopDecompressor decompressor = (LzopDecompressor) codec.createDecompressor(); // Solely for reading the header codec.createInputStream(is, decompressor); int numCompressedChecksums = decompressor.getCompressedChecksumsCount(); int numDecompressedChecksums = decompressor.getDecompressedChecksumsCount(); while (true) { // read and ignore, we just want to get to the next int int uncompressedBlockSize = is.readInt(); if (uncompressedBlockSize == 0) { break; } else if (uncompressedBlockSize < 0) { throw new EOFException(); } int compressedBlockSize = is.readInt(); if (compressedBlockSize <= 0) { throw new IOException("Could not read compressed block size"); } // See LzopInputStream.getCompressedData boolean isUncompressedBlock = (uncompressedBlockSize == compressedBlockSize); int numChecksumsToSkip = isUncompressedBlock ? numDecompressedChecksums : numDecompressedChecksums + numCompressedChecksums; long pos = is.getPos(); // write the pos of the block start os.writeLong(pos - 8); // seek to the start of the next block, skip any checksums is.seek(pos + compressedBlockSize + (4 * numChecksumsToSkip)); } // If we're here, indexing was successful. indexingSucceeded = true; } finally { // Close any open streams. if (is != null) { is.close(); } if (os != null) { os.close(); } if (!indexingSucceeded) { // If indexing didn't succeed (i.e. an exception was thrown), clean up after ourselves. fs.delete(tmpOutputFile, false); } else { // Otherwise, rename filename.lzo.index.tmp to filename.lzo.index. fs.rename(tmpOutputFile, outputFile); } } }