Example usage for org.apache.hadoop.fs FSDataInputStream read

List of usage examples for org.apache.hadoop.fs FSDataInputStream read


In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream read.


public int read(long position, byte[] buffer, int offset, int length) throws IOException 

Source Link


Read bytes from the given position in the stream to the given buffer.


From source file:com.cloudera.CacheTool.java

License:Apache License

public static void main(String[] args) throws Exception {
    conf = new Configuration();
    conf.addResource(new Path("/home/james/hdfs-conf/hdfs-site.xml"));
    conf.addResource(new Path("/home/james/hdfs-conf/core-site.xml"));
    URI uri = FileSystem.getDefaultUri(conf);
    final FileSystem fs = FileSystem.get(uri, conf);

    for (int i = 0; i < 8000; i += 10) {
        final int i_copy = i;
        pool.submit(new Runnable() {
            public void run() {
                for (int j = 0; j < 10; j++) {
                    try {
                        createFile(fs, new Path("/home/james/large" + (i_copy + j)), 1024 * 1024);
                    } catch (IOException ioe) {
                    }/*from   ww w.  j a v  a  2s. c  o  m*/
    pool.awaitTermination(1, TimeUnit.DAYS);

    long start = Time.monotonicNow();
    Random r = new Random(0);
    for (int i = 0; i < 100; i++) {
        FSDataInputStream fdis = fs.open(new Path("/home/james/large" + r.nextInt(8000)), 512);
        byte[] buffer = new byte[512];

        for (int j = 0; j < 100; j++) {
            int offset = r.nextInt(1024 * 1024 - 511);
            fdis.read(offset, buffer, 0, 512);
    System.out.println("Time taken for 10000 random 512 byte reads: " + (Time.monotonicNow() - start) / 1000.0);


From source file:com.google.cloud.hadoop.fs.gcs.HadoopFileSystemIntegrationHelper.java

License:Open Source License

 * Helper that reads text from the given file at the given offset
 * and returns it. If checkOverflow is true, it will make sure that
 * no more than 'len' bytes were read.//w  ww  .j av  a  2  s .  c o m
protected String readTextFile(Path hadoopPath, int offset, int len, boolean checkOverflow) throws IOException {
    String text = null;
    FSDataInputStream readStream = null;
    long fileSystemBytesRead = 0;
    FileSystem.Statistics stats = FileSystem.getStatistics(ghfsFileSystemDescriptor.getScheme(),
    if (stats != null) {
        // Let it be null in case no stats have been added for our scheme yet.
        fileSystemBytesRead = stats.getBytesRead();

    try {
        int bufferSize = len;
        bufferSize += checkOverflow ? 1 : 0;
        byte[] readBuffer = new byte[bufferSize];
        readStream = ghfs.open(hadoopPath, GoogleHadoopFileSystemBase.BUFFERSIZE_DEFAULT);
        int numBytesRead;
        if (offset > 0) {
            numBytesRead = readStream.read(offset, readBuffer, 0, bufferSize);
        } else {
            numBytesRead = readStream.read(readBuffer);
        Assert.assertEquals(len, numBytesRead);
        text = new String(readBuffer, 0, numBytesRead, StandardCharsets.UTF_8);
    } finally {
        if (readStream != null) {

    // After the read, the stats better be non-null for our ghfs scheme.
    stats = FileSystem.getStatistics(ghfsFileSystemDescriptor.getScheme(), ghfs.getClass());
    long endFileSystemBytesRead = stats.getBytesRead();
    int bytesReadStats = (int) (endFileSystemBytesRead - fileSystemBytesRead);
    if (statistics == FileSystemStatistics.EXACT) {
        Assert.assertEquals(String.format("FS statistics mismatch fetched from class '%s'", ghfs.getClass()),
                len, bytesReadStats);
    } else if (statistics == FileSystemStatistics.GREATER_OR_EQUAL) {
        Assert.assertTrue(String.format("Expected %d <= %d", len, bytesReadStats), len <= bytesReadStats);
    } else if (statistics == FileSystemStatistics.NONE) {
        Assert.assertEquals("FS statistics expected to be 0", 0, fileSystemBytesRead);
        Assert.assertEquals("FS statistics expected to be 0", 0, endFileSystemBytesRead);
    } else if (statistics == FileSystemStatistics.IGNORE) {
        // NO-OP

    return text;

From source file:com.mellanox.r4h.TestWriteRead.java

License:Apache License

 * read chunks into buffer repeatedly until total of VisibleLen byte are read.
 * Return total number of bytes read/*from www .  j a v  a 2 s .c  o m*/
private long readUntilEnd(FSDataInputStream in, byte[] buffer, long size, String fname, long pos,
        long visibleLen, boolean positionReadOption) throws IOException {

    if (pos >= visibleLen || visibleLen <= 0)
        return 0;

    int chunkNumber = 0;
    long totalByteRead = 0;
    long currentPosition = pos;
    int byteRead = 0;
    long byteLeftToRead = visibleLen - pos;
    int byteToReadThisRound = 0;

    if (!positionReadOption) {
        currentPosition = in.getPos();
    if (verboseOption)
        LOG.info("reader begin: position: " + pos + " ; currentOffset = " + currentPosition + " ; bufferSize ="
                + buffer.length + " ; Filename = " + fname);
    try {
        while (byteLeftToRead > 0 && currentPosition < visibleLen) {
            byteToReadThisRound = (int) (byteLeftToRead >= buffer.length ? buffer.length : byteLeftToRead);
            if (positionReadOption) {
                byteRead = in.read(currentPosition, buffer, 0, byteToReadThisRound);
            } else {
                byteRead = in.read(buffer, 0, byteToReadThisRound);
            if (byteRead <= 0)
            totalByteRead += byteRead;
            currentPosition += byteRead;
            byteLeftToRead -= byteRead;

            if (verboseOption) {
                LOG.info("reader: Number of byte read: " + byteRead + " ; totalByteRead = " + totalByteRead
                        + " ; currentPosition=" + currentPosition + " ; chunkNumber =" + chunkNumber
                        + "; File name = " + fname);
    } catch (IOException e) {
        throw new IOException("#### Exception caught in readUntilEnd: reader  currentOffset = "
                + currentPosition + " ; totalByteRead =" + totalByteRead + " ; latest byteRead = " + byteRead
                + "; visibleLen= " + visibleLen + " ; bufferLen = " + buffer.length + " ; Filename = " + fname,

    if (verboseOption)
        LOG.info("reader end:   position: " + pos + " ; currentOffset = " + currentPosition
                + " ; totalByteRead =" + totalByteRead + " ; Filename = " + fname);

    return totalByteRead;

From source file:eu.scape_project.pt.mapred.input.ControlFileInputFormatTest.java

License:Apache License

public void testWriteNewControlFileAndCreateSplits() throws IOException {
    MockupFileSystem fs = new MockupFileSystem();
    Path newControlFile = new Path("newControlFile");
    fs.addFile("newControlFile", true, null);
    Map<String, ArrayList<String>> locationMap = new HashMap<String, ArrayList<String>>() {
        {/*from w  ww.j  a  v  a 2  s.c o m*/
            put("host1", new ArrayList<String>() {
            put("host2", new ArrayList<String>() {
            put("host3", new ArrayList<String>() {
            put("host4", new ArrayList<String>() {
    List<FileSplit> splits = ControlFileInputFormat.writeNewControlFileAndCreateSplits(newControlFile, fs,
            locationMap, 3);

    FSDataInputStream bis = fs.open(newControlFile);
    try {
        int i = 0;
        for (FileSplit split : splits) {
            LOG.debug(++i + ".split = " + split.toString());
            byte[] content = new byte[(int) split.getLength()];
            bis.read((int) split.getStart(), content, 0, (int) split.getLength());
            String cont = new String(content);
            LOG.debug("  content = " + new String(content));
            if (cont.startsWith("line1-1")) {
                String expected = "";
                for (String line : locationMap.get("host1")) {
                    expected += line + "\n";
                assertEquals(expected, cont);
            } else if (cont.startsWith("line2-1")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host2")) {
                    expected += line + "\n";
                    if (++j == 3)
                assertEquals(expected, cont);
            } else if (cont.startsWith("line2-4")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host2")) {
                    if (++j <= 3)
                    expected += line + "\n";
                assertEquals(expected, cont);
            } else if (cont.startsWith("line3-1")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host3")) {
                    expected += line + "\n";
                    if (++j == 4)
                assertEquals(expected, cont);
            } else if (cont.startsWith("line3-5")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host3")) {
                    if (++j <= 4)
                    expected += line + "\n";
                assertEquals(expected, cont);
            } else if (cont.startsWith("line4-1")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host4")) {
                    expected += line + "\n";
                    if (++j == 3)
                assertEquals(expected, cont);
            } else if (cont.startsWith("line4-4")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host4")) {
                    if (++j <= 3)
                    expected += line + "\n";
                    if (++j > 7)
                assertEquals(expected, cont);
            } else if (cont.startsWith("line4-7")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host4")) {
                    if (++j <= 6)
                    expected += line + "\n";
                assertEquals(expected, cont);
            } else {
                fail("wrong split");
    } finally {

From source file:fi.tkk.ics.hadoop.bam.FastaInputFormat.java

License:Open Source License

public List<InputSplit> getSplits(JobContext job) throws IOException {

    // Note: We generate splits that correspond to different sections in the FASTA
    // input (which here are called "chromosomes", delimited by '>' and
    // followed by a single line description.
    // Some locality is preserved since the locations are formed from the input
    // splits, although no special attention is given to this issues (FASTA files
    // are assumed to be smallish).
    // The splits are generated on the client. In the future the split generation
    // should be only performed once and an index file stored inside HDFS for
    // peformance reasons. Currently this is not attempted (again: FASTA files
    // aren't all that big).

    // we first make sure we are given only a single file

    List<InputSplit> splits = super.getSplits(job);

    // first sort by input path
    Collections.sort(splits, new Comparator<InputSplit>() {
        public int compare(InputSplit a, InputSplit b) {
            FileSplit fa = (FileSplit) a, fb = (FileSplit) b;
            return fa.getPath().compareTo(fb.getPath());
        }// ww w .j a v a2s .c  om

    for (int i = 0; i < splits.size() - 1;) {
        FileSplit fa = (FileSplit) splits.get(i);
        FileSplit fb = (FileSplit) splits.get(i + 1);

        if (fa.getPath().compareTo(fb.getPath()) != 0)
            throw new IOException("FastaInputFormat assumes single FASTA input file!");

    // now we are sure we only have one FASTA input file

    final List<InputSplit> newSplits = new ArrayList<InputSplit>(splits.size());
    FileSplit fileSplit = (FileSplit) splits.get(0);
    Path path = fileSplit.getPath();

    FileSystem fs = path.getFileSystem(ContextUtil.getConfiguration(job));
    FSDataInputStream fis = fs.open(path);
    byte[] buffer = new byte[1024];

    long byte_counter = 0;
    long prev_chromosome_byte_offset = 0;
    boolean first_chromosome = true;

    for (int j = 0; j < splits.size(); j++) {
        FileSplit origsplit = (FileSplit) splits.get(j);

        while (byte_counter < origsplit.getStart() + origsplit.getLength()) {
            long bytes_read = fis.read(byte_counter, buffer, 0,
                    (int) Math.min(buffer.length, origsplit.getStart() + origsplit.getLength() - byte_counter));
            //System.err.println("bytes_read: "+Integer.toString((int)bytes_read)+" of "+Integer.toString(splits.size())+" splits");
            if (bytes_read > 0) {
                for (int i = 0; i < bytes_read; i++) {
                    if (buffer[i] == (byte) '>') {
                        //System.err.println("found chromosome at position "+Integer.toString((int)byte_counter+i));

                        if (!first_chromosome) {
                            FileSplit fsplit = new FileSplit(path, prev_chromosome_byte_offset,
                                    byte_counter + i - 1 - prev_chromosome_byte_offset,
                            //System.err.println("adding split: start: "+Integer.toString((int)fsplit.getStart())+" length: "+Integer.toString((int)fsplit.getLength()));

                        first_chromosome = false;
                        prev_chromosome_byte_offset = byte_counter + i;
                byte_counter += bytes_read;

        if (j == splits.size() - 1) {
            FileSplit fsplit = new FileSplit(path, prev_chromosome_byte_offset,
                    byte_counter - prev_chromosome_byte_offset, origsplit.getLocations());
            newSplits.add(fsplit); //conf));
            //System.err.println("adding split: "+fsplit.toString());

    return newSplits;

From source file:fuse4j.hadoopfs.HdfsClientImpl.java

License:Apache License

 * read()//from   ww  w  . java 2s.  co  m
public boolean read(int uid, Object hdfsFile, ByteBuffer buf, long offset) {
    HdfsFileIoContext file = (HdfsFileIoContext) hdfsFile;

    if (!(file.getIoStream() instanceof FSDataInputStream)) {
        return false;

    FSDataInputStream input = (FSDataInputStream) file.getIoStream();

    byte[] readBuf = new byte[buf.capacity()];

    int bytesRead = 0;
    try {
        bytesRead = input.read(offset, readBuf, 0, readBuf.length);
    } catch (IOException ioe) {
        return false;

    // otherwise return how much we read
    // TODO: does this handle 0 bytes?
    if (bytesRead > 0)
        buf.put(readBuf, 0, bytesRead);
    return true;

From source file:fuse4j.hadoopfs.HdfsClientReal.java

License:Apache License

 * read()/*from  w  ww  .j  av  a2 s . c o m*/
public boolean read(Object hdfsFile, ByteBuffer buf, long offset) {
    HdfsFileIoContext file = (HdfsFileIoContext) hdfsFile;

    if (!(file.ioStream instanceof FSDataInputStream)) {
        return false;

    FSDataInputStream input = (FSDataInputStream) file.ioStream;

    byte[] readBuf = new byte[buf.capacity()];

    int bytesRead = 0;
    try {
        bytesRead = input.read(offset, readBuf, 0, readBuf.length);
    } catch (IOException ioe) {
        return false;

    // otherwise return how much we read
    // TODO: does this handle 0 bytes?
    buf.put(readBuf, 0, bytesRead);
    return true;

From source file:iumfs.hdfs.HdfsFile.java

License:Apache License

public long read(ByteBuffer buf, long size, long offset)
        throws FileNotFoundException, IOException, NotSupportedException {
    int ret;/* w  ww .  j av  a2s . c  om*/

    FSDataInputStream fsdis = fs.open(new Path(getPath()));
    ret = fsdis.read(offset, buf.array(), Request.RESPONSE_HEADER_SIZE, (int) size);
    logger.fine("read offset=" + offset + ",size=" + size);
    return ret;

From source file:org.apache.blur.shell.DiscoverFileBufferSizeUtil.java

License:Apache License

private static long readFile(PrintWriter out, Random random, int bufSize, FSDataInputStream inputStream,
        long length, int readSamples) throws IOException {
    byte[] buf = new byte[bufSize];
    long start = System.nanoTime();
    long time = 0;
    for (int i = 0; i < readSamples; i++) {
        long now = System.nanoTime();
        if (start + 5000000000l < now) {
            double complete = (((double) i / (double) readSamples) * 100.0);
            out.println(complete + "% Complete");
            out.flush();/*from  w w  w .ja v  a2s  . c  o m*/
            start = System.nanoTime();
        long position = getPosition(bufSize, random, length);
        long s = System.nanoTime();
        int offset = 0;
        int len = bufSize;
        while (len > 0) {
            int amount = inputStream.read(position, buf, offset, len);
            len -= amount;
            offset += amount;
            position += amount;
        long e = System.nanoTime();
        time += (e - s);
        length -= len;
    return time;

From source file:org.apache.drill.exec.store.parquet.FooterGatherer.java

License:Apache License

private static final void readFully(FSDataInputStream stream, long start, byte[] output, int offset, int len)
        throws IOException {
    int bytesRead = 0;
    while (bytesRead > -1 && bytesRead < len) {
        bytesRead += stream.read(start + bytesRead, output, offset + bytesRead, len - bytesRead);
    }/*  w  w w  .  j a  v  a 2 s.  c  o  m*/