Example usage for org.apache.hadoop.fs FSDataInputStream getPos

List of usage examples for org.apache.hadoop.fs FSDataInputStream getPos

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream getPos.

Prototype

@Override
public long getPos() throws IOException 

Source Link

Document

Get the current position in the input stream.

Usage

From source file:co.cask.tigon.logging.LogFileReader.java

License:Apache License

private long determineTrueFileSize(Path path, FileStatus status) throws IOException {
    FSDataInputStream stream = fileSystem.open(path);
    try {//from w  w  w .ja  va2s .  c  om
        stream.seek(status.getLen());
        // we need to read repeatedly until we reach the end of the file
        byte[] buffer = new byte[1024 * 1024];
        while (stream.read(buffer, 0, buffer.length) >= 0) {
            // empty body.
        }
        long trueSize = stream.getPos();
        return trueSize;
    } finally {
        stream.close();
    }
}

From source file:com.alectenharmsel.research.WholeBlockRecordReader.java

License:Apache License

public boolean nextKeyValue() throws IOException, InterruptedException {
    if (!processed) {
        System.err.println("start is " + start);
        Path file = fileSplit.getPath();
        String tmp = file.toString();
        System.err.println("File: " + tmp);
        currKey.set(tmp);/*ww  w .j  a  v  a  2s . com*/
        System.err.println("Reached this point");
        FileSystem fs = file.getFileSystem(conf);
        System.err.println("fs blocksize: " + fs.getDefaultBlockSize(file));
        System.err.println("linecount blocksize: " + blockSize);
        byte[] contents;
        FSDataInputStream in = null;
        try {
            in = fs.open(file);
            System.err.println("getPos(): " + in.getPos());

            if ((start + blockSize) > fileLength) {
                blockSize = (int) (fileLength - start);
                processed = true;
            }

            contents = new byte[blockSize];

            //IOUtils.readFully(in, contents, start, blockSize);
            //IOUtils.readFully(in, contents, 0, blockSize);
            in.readFully(start, contents);

            start += blockSize;

            currValue.set(contents);
        } finally {
            IOUtils.closeStream(in);
        }
        return true;
    }
    return false;
}

From source file:com.aliyun.fs.oss.contract.TestAliyunOSSContractSeek.java

License:Apache License

@Test
public void testSeekBeyondDownloadSize() throws Throwable {
    describe("seek and read beyond download size.");

    Path byteFile = path("byte_file.txt");
    // 'fs.oss.multipart.download.size' = 100 * 1024
    byte[] block = dataset(100 * 1024 + 10, 0, 255);
    FileSystem fs = getFileSystem();
    createFile(fs, byteFile, true, block);

    FSDataInputStream instream = getFileSystem().open(byteFile);
    instream.seek(100 * 1024 - 1);/* w  w  w  .  j a v  a2s .  c  o  m*/
    assertEquals(100 * 1024 - 1, instream.getPos());
    assertEquals(144, instream.read());
    instream.seek(100 * 1024 + 1);
    assertEquals(100 * 1024 + 1, instream.getPos());
    assertEquals(146, instream.read());
}

From source file:com.aliyun.fs.oss.TestAliyunOSSInputStream.java

License:Apache License

@Test
public void testSeekFile() throws Exception {
    Path smallSeekFile = setPath("/test/smallSeekFile.txt");
    long size = 5 * 1024 * 1024;

    ContractTestUtils.generateTestFile(this.fs, smallSeekFile, size, 256, 255);
    LOG.info("5MB file created: smallSeekFile.txt");

    FSDataInputStream instream = this.fs.open(smallSeekFile);
    int seekTimes = 5;
    LOG.info("multiple fold position seeking test...:");
    for (int i = 0; i < seekTimes; i++) {
        long pos = size / (seekTimes - i) - 1;
        LOG.info("begin seeking for pos: " + pos);
        instream.seek(pos);/*from   w  ww.  j  ava  2s. c  o m*/
        assertTrue("expected position at:" + pos + ", but got:" + instream.getPos(), instream.getPos() == pos);
        LOG.info("completed seeking at pos: " + instream.getPos());
    }
    LOG.info("random position seeking test...:");
    Random rand = new Random();
    for (int i = 0; i < seekTimes; i++) {
        long pos = Math.abs(rand.nextLong()) % size;
        LOG.info("begin seeking for pos: " + pos);
        instream.seek(pos);
        assertTrue("expected position at:" + pos + ", but got:" + instream.getPos(), instream.getPos() == pos);
        LOG.info("completed seeking at pos: " + instream.getPos());
    }
    IOUtils.closeStream(instream);
}

From source file:com.cloudera.crunch.io.text.CBZip2InputStream.java

License:Apache License

public CBZip2InputStream(FSDataInputStream zStream, int blockSize, long end) throws IOException {
    endOffsetOfSplit = end;/*  www  .j ava  2s  . co  m*/
    // initialize retPos to the beginning of the current InputSplit
    // see comments in getPos() to understand how this is used.
    retPos = zStream.getPos();
    ll8 = null;
    tt = null;
    checkComputedCombinedCRC = blockSize == -1;
    bsSetStream(zStream);
    initialize(blockSize);
    initBlock(blockSize != -1);
    setupBlock();
}

From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.handlers.READHandler.java

License:Apache License

@Override
protected READResponse doHandle(NFS4Handler server, Session session, READRequest request)
        throws NFS4Exception, IOException {
    if (session.getCurrentFileHandle() == null) {
        throw new NFS4Exception(NFS4ERR_NOFILEHANDLE);
    }/* ww w.  ja va2  s.c  o  m*/
    int size = Math.min(request.getCount(), NFS4_MAX_RWSIZE);
    if (size < 0) {
        throw new NFS4Exception(NFS4ERR_INVAL);
    }
    FileHandle fileHandle = session.getCurrentFileHandle();
    Path path = server.getPath(fileHandle);
    FileSystem fs = session.getFileSystem();
    FSDataInputStream inputStream = server.forRead(request.getStateID(), fs, fileHandle);
    synchronized (inputStream) {
        if (inputStream.getPos() != request.getOffset()) {
            try {
                inputStream.seek(request.getOffset());
            } catch (IOException e) {
                throw new IOException(e.getMessage() + ": " + inputStream.getPos() + ", " + request.getOffset(),
                        e);
            }
            server.incrementMetric("NFS_RANDOM_READS", 1);
        }
        READResponse response = createResponse();
        byte[] data = new byte[size];
        int count = inputStream.read(data);
        long fileLength = -1;
        if (count > 0 && count != data.length
                && (request.getOffset() + count) < (fileLength = fs.getFileStatus(path).getLen())) {
            LOGGER.info("Short read " + path + " at pos = " + request.getOffset() + ", wanted " + data.length
                    + " and read " + count + ", fileLength = " + fileLength);
            server.incrementMetric("NFS_SHORT_READS", 1);
        }
        boolean eof = count < 0;
        if (eof) {
            data = new byte[0];
            count = 0;
        }
        server.incrementMetric("HDFS_BYTES_READ", count);
        response.setData(data, 0, count);
        response.setEOF(eof);
        response.setStatus(NFS4_OK);
        return response;
    }
}

From source file:com.datatorrent.lib.io.AbstractHDFSInputOperator.java

License:Open Source License

@Override
public long getFilePointer(FSDataInputStream stream) {
    try {//from www.  ja v  a2s.c  o  m
        return stream.getPos();
    } catch (IOException ex) {
        throw new RuntimeException(ex.getCause());
    }
}

From source file:com.datatorrent.lib.io.fs.AbstractFileOutputOperator.java

License:Open Source License

@Override
public void setup(Context.OperatorContext context) {
    LOG.debug("setup initiated");
    rollingFile = maxLength < Long.MAX_VALUE;

    //Getting required file system instance.
    try {//ww  w .j a va  2 s.  co  m
        fs = getFSInstance();
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }

    if (replication <= 0) {
        replication = fs.getDefaultReplication(new Path(filePath));
    }

    LOG.debug("FS class {}", fs.getClass());

    //When an entry is removed from the cache, removal listener is notified and it closes the output stream.
    RemovalListener<String, FSDataOutputStream> removalListener = new RemovalListener<String, FSDataOutputStream>() {
        @Override
        public void onRemoval(RemovalNotification<String, FSDataOutputStream> notification) {
            FSDataOutputStream value = notification.getValue();
            if (value != null) {
                try {
                    LOG.debug("closing {}", notification.getKey());
                    value.close();
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
        }
    };

    //Define cache
    CacheLoader<String, FSDataOutputStream> loader = new CacheLoader<String, FSDataOutputStream>() {
        @Override
        public FSDataOutputStream load(String filename) {
            String partFileName = getPartFileNamePri(filename);
            Path lfilepath = new Path(filePath + Path.SEPARATOR + partFileName);

            FSDataOutputStream fsOutput;

            boolean sawThisFileBefore = endOffsets.containsKey(filename);

            try {
                if (fs.exists(lfilepath)) {
                    if (sawThisFileBefore) {
                        FileStatus fileStatus = fs.getFileStatus(lfilepath);
                        MutableLong endOffset = endOffsets.get(filename);

                        if (endOffset != null) {
                            endOffset.setValue(fileStatus.getLen());
                        } else {
                            endOffsets.put(filename, new MutableLong(fileStatus.getLen()));
                        }

                        fsOutput = fs.append(lfilepath);
                        LOG.debug("appending to {}", lfilepath);
                    }
                    //We never saw this file before and we don't want to append
                    else {
                        //If the file is rolling we need to delete all its parts.
                        if (rollingFile) {
                            int part = 0;

                            while (true) {
                                Path seenPartFilePath = new Path(
                                        filePath + Path.SEPARATOR + getPartFileName(filename, part));
                                if (!fs.exists(seenPartFilePath)) {
                                    break;
                                }

                                fs.delete(seenPartFilePath, true);
                                part = part + 1;
                            }

                            fsOutput = fs.create(lfilepath, (short) replication);
                        }
                        //Not rolling is easy, just delete the file and create it again.
                        else {
                            fs.delete(lfilepath, true);
                            fsOutput = fs.create(lfilepath, (short) replication);
                        }
                    }
                } else {
                    fsOutput = fs.create(lfilepath, (short) replication);
                }

                //Get the end offset of the file.

                LOG.info("opened: {}", fs.getFileStatus(lfilepath).getPath());
                return fsOutput;
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    };

    streamsCache = CacheBuilder.newBuilder().maximumSize(maxOpenFiles).removalListener(removalListener)
            .build(loader);

    try {
        LOG.debug("File system class: {}", fs.getClass());
        LOG.debug("end-offsets {}", endOffsets);

        //Restore the files in case they were corrupted and the operator
        Path writerPath = new Path(filePath);
        if (fs.exists(writerPath)) {
            for (String seenFileName : endOffsets.keySet()) {
                String seenFileNamePart = getPartFileNamePri(seenFileName);
                LOG.debug("seenFileNamePart: {}", seenFileNamePart);
                Path seenPartFilePath = new Path(filePath + Path.SEPARATOR + seenFileNamePart);
                if (fs.exists(seenPartFilePath)) {
                    LOG.debug("file exists {}", seenFileNamePart);
                    long offset = endOffsets.get(seenFileName).longValue();
                    FSDataInputStream inputStream = fs.open(seenPartFilePath);
                    FileStatus status = fs.getFileStatus(seenPartFilePath);

                    if (status.getLen() != offset) {
                        LOG.info("file corrupted {} {} {}", seenFileNamePart, offset, status.getLen());
                        byte[] buffer = new byte[COPY_BUFFER_SIZE];

                        Path tmpFilePath = new Path(
                                filePath + Path.SEPARATOR + seenFileNamePart + TMP_EXTENSION);
                        FSDataOutputStream fsOutput = fs.create(tmpFilePath, (short) replication);
                        while (inputStream.getPos() < offset) {
                            long remainingBytes = offset - inputStream.getPos();
                            int bytesToWrite = remainingBytes < COPY_BUFFER_SIZE ? (int) remainingBytes
                                    : COPY_BUFFER_SIZE;
                            inputStream.read(buffer);
                            fsOutput.write(buffer, 0, bytesToWrite);
                        }

                        flush(fsOutput);
                        fsOutput.close();
                        inputStream.close();

                        FileContext fileContext = FileContext.getFileContext(fs.getUri());
                        LOG.debug("temp file path {}, rolling file path {}", tmpFilePath.toString(),
                                status.getPath().toString());
                        fileContext.rename(tmpFilePath, status.getPath(), Options.Rename.OVERWRITE);
                    } else {
                        inputStream.close();
                    }
                }
            }
        }

        //delete the left over future rolling files produced from the previous crashed instance
        //of this operator.
        if (rollingFile) {
            for (String seenFileName : endOffsets.keySet()) {
                try {
                    Integer part = openPart.get(seenFileName).getValue() + 1;

                    while (true) {
                        Path seenPartFilePath = new Path(
                                filePath + Path.SEPARATOR + getPartFileName(seenFileName, part));
                        if (!fs.exists(seenPartFilePath)) {
                            break;
                        }

                        fs.delete(seenPartFilePath, true);
                        part = part + 1;
                    }

                    Path seenPartFilePath = new Path(filePath + Path.SEPARATOR
                            + getPartFileName(seenFileName, openPart.get(seenFileName).intValue()));

                    //Handle the case when restoring to a checkpoint where the current rolling file
                    //already has a length greater than max length.
                    if (fs.getFileStatus(seenPartFilePath).getLen() > maxLength) {
                        LOG.debug("rotating file at setup.");
                        rotate(seenFileName);
                    }
                } catch (IOException e) {
                    throw new RuntimeException(e);
                } catch (ExecutionException e) {
                    throw new RuntimeException(e);
                }
            }
        }

        LOG.debug("setup completed");
        LOG.debug("end-offsets {}", endOffsets);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    this.context = context;
    lastTimeStamp = System.currentTimeMillis();

    fileCounters.setCounter(Counters.TOTAL_BYTES_WRITTEN, new MutableLong());
    fileCounters.setCounter(Counters.TOTAL_TIME_ELAPSED, new MutableLong());
}

From source file:com.datatorrent.lib.io.fs.AbstractFSWriter.java

License:Open Source License

@Override
public void setup(Context.OperatorContext context) {
    rollingFile = maxLength < Long.MAX_VALUE;

    //Getting required file system instance.
    try {/*from ww  w. j  a  va2 s .c  om*/
        fs = getFSInstance();
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }

    LOG.debug("FS class {}", fs.getClass());

    //Setting listener for debugging
    LOG.debug("setup initiated");
    RemovalListener<String, FSDataOutputStream> removalListener = new RemovalListener<String, FSDataOutputStream>() {
        @Override
        public void onRemoval(RemovalNotification<String, FSDataOutputStream> notification) {
            FSDataOutputStream value = notification.getValue();
            if (value != null) {
                try {
                    LOG.debug("closing {}", notification.getKey());
                    value.close();
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
        }
    };

    //Define cache
    CacheLoader<String, FSDataOutputStream> loader = new CacheLoader<String, FSDataOutputStream>() {
        @Override
        public FSDataOutputStream load(String filename) {
            String partFileName = getPartFileNamePri(filename);
            Path lfilepath = new Path(filePath + File.separator + partFileName);

            FSDataOutputStream fsOutput;
            if (replication <= 0) {
                replication = fs.getDefaultReplication(lfilepath);
            }

            boolean sawThisFileBefore = endOffsets.containsKey(filename);

            try {
                if (fs.exists(lfilepath)) {
                    if (sawThisFileBefore || append) {
                        FileStatus fileStatus = fs.getFileStatus(lfilepath);
                        MutableLong endOffset = endOffsets.get(filename);

                        if (endOffset != null) {
                            endOffset.setValue(fileStatus.getLen());
                        } else {
                            endOffsets.put(filename, new MutableLong(fileStatus.getLen()));
                        }

                        fsOutput = fs.append(lfilepath);
                        LOG.debug("appending to {}", lfilepath);
                    }
                    //We never saw this file before and we don't want to append
                    else {
                        //If the file is rolling we need to delete all its parts.
                        if (rollingFile) {
                            int part = 0;

                            while (true) {
                                Path seenPartFilePath = new Path(
                                        filePath + "/" + getPartFileName(filename, part));
                                if (!fs.exists(seenPartFilePath)) {
                                    break;
                                }

                                fs.delete(seenPartFilePath, true);
                                part = part + 1;
                            }

                            fsOutput = fs.create(lfilepath, (short) replication);
                        }
                        //Not rolling is easy, just delete the file and create it again.
                        else {
                            fs.delete(lfilepath, true);
                            fsOutput = fs.create(lfilepath, (short) replication);
                        }
                    }
                } else {
                    fsOutput = fs.create(lfilepath, (short) replication);
                }

                //Get the end offset of the file.

                LOG.debug("full path: {}", fs.getFileStatus(lfilepath).getPath());
                return fsOutput;
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    };

    streamsCache = CacheBuilder.newBuilder().maximumSize(maxOpenFiles).removalListener(removalListener)
            .build(loader);

    try {
        LOG.debug("File system class: {}", fs.getClass());
        LOG.debug("end-offsets {}", endOffsets);

        //Restore the files in case they were corrupted and the operator
        Path writerPath = new Path(filePath);
        if (fs.exists(writerPath)) {
            for (String seenFileName : endOffsets.keySet()) {
                String seenFileNamePart = getPartFileNamePri(seenFileName);
                LOG.debug("seenFileNamePart: {}", seenFileNamePart);
                Path seenPartFilePath = new Path(filePath + "/" + seenFileNamePart);
                if (fs.exists(seenPartFilePath)) {
                    LOG.debug("file exists {}", seenFileNamePart);
                    long offset = endOffsets.get(seenFileName).longValue();
                    FSDataInputStream inputStream = fs.open(seenPartFilePath);
                    FileStatus status = fs.getFileStatus(seenPartFilePath);

                    if (status.getLen() != offset) {
                        LOG.info("file corrupted {} {} {}", seenFileNamePart, offset, status.getLen());
                        byte[] buffer = new byte[COPY_BUFFER_SIZE];

                        String tmpFileName = seenFileNamePart + TMP_EXTENSION;
                        FSDataOutputStream fsOutput = streamsCache.get(tmpFileName);
                        while (inputStream.getPos() < offset) {
                            long remainingBytes = offset - inputStream.getPos();
                            int bytesToWrite = remainingBytes < COPY_BUFFER_SIZE ? (int) remainingBytes
                                    : COPY_BUFFER_SIZE;
                            inputStream.read(buffer);
                            fsOutput.write(buffer, 0, bytesToWrite);
                        }

                        flush(fsOutput);
                        FileContext fileContext = FileContext.getFileContext(fs.getUri());
                        String tempTmpFilePath = getPartFileNamePri(filePath + File.separator + tmpFileName);

                        Path tmpFilePath = new Path(tempTmpFilePath);
                        tmpFilePath = fs.getFileStatus(tmpFilePath).getPath();
                        LOG.debug("temp file path {}, rolling file path {}", tmpFilePath.toString(),
                                status.getPath().toString());
                        fileContext.rename(tmpFilePath, status.getPath(), Options.Rename.OVERWRITE);
                    }
                }
            }
        }

        //delete the left over future rolling files produced from the previous crashed instance
        //of this operator.
        if (rollingFile) {
            for (String seenFileName : endOffsets.keySet()) {
                try {
                    Integer part = openPart.get(seenFileName).getValue() + 1;

                    while (true) {
                        Path seenPartFilePath = new Path(filePath + "/" + getPartFileName(seenFileName, part));
                        if (!fs.exists(seenPartFilePath)) {
                            break;
                        }

                        fs.delete(seenPartFilePath, true);
                        part = part + 1;
                    }

                    Path seenPartFilePath = new Path(filePath + "/"
                            + getPartFileName(seenFileName, openPart.get(seenFileName).intValue()));

                    //Handle the case when restoring to a checkpoint where the current rolling file
                    //already has a length greater than max length.
                    if (fs.getFileStatus(seenPartFilePath).getLen() > maxLength) {
                        LOG.debug("rotating file at setup.");
                        rotate(seenFileName);
                    }
                } catch (IOException e) {
                    throw new RuntimeException(e);
                } catch (ExecutionException e) {
                    throw new RuntimeException(e);
                }
            }
        }

        LOG.debug("setup completed");
        LOG.debug("end-offsets {}", endOffsets);
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (ExecutionException e) {
        throw new RuntimeException(e);
    }

    this.context = context;
    lastTimeStamp = System.currentTimeMillis();

    fileCounters.setCounter(Counters.TOTAL_BYTES_WRITTEN, new MutableLong());
    fileCounters.setCounter(Counters.TOTAL_TIME_ELAPSED, new MutableLong());
}

From source file:com.hadoop.compression.lzo.LzoIndex.java

License:Open Source License

/**
 * Index an lzo file to allow the input format to split them into separate map
 * jobs.//ww  w .  j  av a  2 s  .  co m
 *
 * @param fs File system that contains the file.
 * @param lzoFile the lzo file to index.  For filename.lzo, the created index file will be
 * filename.lzo.index.
 * @throws IOException
 */
public static void createIndex(FileSystem fs, Path lzoFile) throws IOException {

    Configuration conf = fs.getConf();
    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    CompressionCodec codec = factory.getCodec(lzoFile);
    if (null == codec) {
        throw new IOException("Could not find codec for file " + lzoFile
                + " - you may need to add the LZO codec to your io.compression.codecs "
                + "configuration in core-site.xml");
    }
    ((Configurable) codec).setConf(conf);

    FSDataInputStream is = null;
    FSDataOutputStream os = null;
    Path outputFile = lzoFile.suffix(LZO_INDEX_SUFFIX);
    Path tmpOutputFile = lzoFile.suffix(LZO_TMP_INDEX_SUFFIX);

    // Track whether an exception was thrown or not, so we know to either
    // delete the tmp index file on failure, or rename it to the new index file on success.
    boolean indexingSucceeded = false;
    try {
        is = fs.open(lzoFile);
        os = fs.create(tmpOutputFile);
        LzopDecompressor decompressor = (LzopDecompressor) codec.createDecompressor();
        // Solely for reading the header
        codec.createInputStream(is, decompressor);
        int numCompressedChecksums = decompressor.getCompressedChecksumsCount();
        int numDecompressedChecksums = decompressor.getDecompressedChecksumsCount();

        while (true) {
            // read and ignore, we just want to get to the next int
            int uncompressedBlockSize = is.readInt();
            if (uncompressedBlockSize == 0) {
                break;
            } else if (uncompressedBlockSize < 0) {
                throw new EOFException();
            }

            int compressedBlockSize = is.readInt();
            if (compressedBlockSize <= 0) {
                throw new IOException("Could not read compressed block size");
            }

            // See LzopInputStream.getCompressedData
            boolean isUncompressedBlock = (uncompressedBlockSize == compressedBlockSize);
            int numChecksumsToSkip = isUncompressedBlock ? numDecompressedChecksums
                    : numDecompressedChecksums + numCompressedChecksums;
            long pos = is.getPos();
            // write the pos of the block start
            os.writeLong(pos - 8);
            // seek to the start of the next block, skip any checksums
            is.seek(pos + compressedBlockSize + (4 * numChecksumsToSkip));
        }
        // If we're here, indexing was successful.
        indexingSucceeded = true;
    } finally {
        // Close any open streams.
        if (is != null) {
            is.close();
        }

        if (os != null) {
            os.close();
        }

        if (!indexingSucceeded) {
            // If indexing didn't succeed (i.e. an exception was thrown), clean up after ourselves.
            fs.delete(tmpOutputFile, false);
        } else {
            // Otherwise, rename filename.lzo.index.tmp to filename.lzo.index.
            fs.rename(tmpOutputFile, outputFile);
        }
    }
}