Example usage for org.apache.hadoop.fs FSDataInputStream read

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream read.

Prototype

@Override
public int read(long position, byte[] buffer, int offset, int length) throws IOException

Source Link

Document

Read bytes from the given position in the stream to the given buffer.

Usage

From source file:com.cloudera.CacheTool.java

License:Apache License

public static void main(String[] args) throws Exception {
    conf = new Configuration();
    conf.addResource(new Path("/home/james/hdfs-conf/hdfs-site.xml"));
    conf.addResource(new Path("/home/james/hdfs-conf/core-site.xml"));
    URI uri = FileSystem.getDefaultUri(conf);
    final FileSystem fs = FileSystem.get(uri, conf);

    for (int i = 0; i < 8000; i += 10) {
        final int i_copy = i;
        pool.submit(new Runnable() {
            public void run() {
                for (int j = 0; j < 10; j++) {
                    try {
                        createFile(fs, new Path("/home/james/large" + (i_copy + j)), 1024 * 1024);
                    } catch (IOException ioe) {
                        System.out.println(ioe);
                    }/*from   ww w.  j a v  a  2s. c  o  m*/
                }
            }
        });
    }
    pool.shutdown();
    pool.awaitTermination(1, TimeUnit.DAYS);

    long start = Time.monotonicNow();
    Random r = new Random(0);
    for (int i = 0; i < 100; i++) {
        FSDataInputStream fdis = fs.open(new Path("/home/james/large" + r.nextInt(8000)), 512);
        byte[] buffer = new byte[512];

        for (int j = 0; j < 100; j++) {
            int offset = r.nextInt(1024 * 1024 - 511);
            fdis.read(offset, buffer, 0, 512);
        }
    }
    System.out.println("Time taken for 10000 random 512 byte reads: " + (Time.monotonicNow() - start) / 1000.0);

}

From source file:com.google.cloud.hadoop.fs.gcs.HadoopFileSystemIntegrationHelper.java

License:Open Source License

/**
 * Helper that reads text from the given file at the given offset
 * and returns it. If checkOverflow is true, it will make sure that
 * no more than 'len' bytes were read.//w  ww  .j av  a  2  s .  c o m
 */
protected String readTextFile(Path hadoopPath, int offset, int len, boolean checkOverflow) throws IOException {
    String text = null;
    FSDataInputStream readStream = null;
    long fileSystemBytesRead = 0;
    FileSystem.Statistics stats = FileSystem.getStatistics(ghfsFileSystemDescriptor.getScheme(),
            ghfs.getClass());
    if (stats != null) {
        // Let it be null in case no stats have been added for our scheme yet.
        fileSystemBytesRead = stats.getBytesRead();
    }

    try {
        int bufferSize = len;
        bufferSize += checkOverflow ? 1 : 0;
        byte[] readBuffer = new byte[bufferSize];
        readStream = ghfs.open(hadoopPath, GoogleHadoopFileSystemBase.BUFFERSIZE_DEFAULT);
        int numBytesRead;
        if (offset > 0) {
            numBytesRead = readStream.read(offset, readBuffer, 0, bufferSize);
        } else {
            numBytesRead = readStream.read(readBuffer);
        }
        Assert.assertEquals(len, numBytesRead);
        text = new String(readBuffer, 0, numBytesRead, StandardCharsets.UTF_8);
    } finally {
        if (readStream != null) {
            readStream.close();
        }
    }

    // After the read, the stats better be non-null for our ghfs scheme.
    stats = FileSystem.getStatistics(ghfsFileSystemDescriptor.getScheme(), ghfs.getClass());
    Assert.assertNotNull(stats);
    long endFileSystemBytesRead = stats.getBytesRead();
    int bytesReadStats = (int) (endFileSystemBytesRead - fileSystemBytesRead);
    if (statistics == FileSystemStatistics.EXACT) {
        Assert.assertEquals(String.format("FS statistics mismatch fetched from class '%s'", ghfs.getClass()),
                len, bytesReadStats);
    } else if (statistics == FileSystemStatistics.GREATER_OR_EQUAL) {
        Assert.assertTrue(String.format("Expected %d <= %d", len, bytesReadStats), len <= bytesReadStats);
    } else if (statistics == FileSystemStatistics.NONE) {
        Assert.assertEquals("FS statistics expected to be 0", 0, fileSystemBytesRead);
        Assert.assertEquals("FS statistics expected to be 0", 0, endFileSystemBytesRead);
    } else if (statistics == FileSystemStatistics.IGNORE) {
        // NO-OP
    }

    return text;
}

From source file:com.mellanox.r4h.TestWriteRead.java

License:Apache License

/**
 * read chunks into buffer repeatedly until total of VisibleLen byte are read.
 * Return total number of bytes read/*from www .  j a v  a 2 s .c  o m*/
 */
private long readUntilEnd(FSDataInputStream in, byte[] buffer, long size, String fname, long pos,
        long visibleLen, boolean positionReadOption) throws IOException {

    if (pos >= visibleLen || visibleLen <= 0)
        return 0;

    int chunkNumber = 0;
    long totalByteRead = 0;
    long currentPosition = pos;
    int byteRead = 0;
    long byteLeftToRead = visibleLen - pos;
    int byteToReadThisRound = 0;

    if (!positionReadOption) {
        in.seek(pos);
        currentPosition = in.getPos();
    }
    if (verboseOption)
        LOG.info("reader begin: position: " + pos + " ; currentOffset = " + currentPosition + " ; bufferSize ="
                + buffer.length + " ; Filename = " + fname);
    try {
        while (byteLeftToRead > 0 && currentPosition < visibleLen) {
            byteToReadThisRound = (int) (byteLeftToRead >= buffer.length ? buffer.length : byteLeftToRead);
            if (positionReadOption) {
                byteRead = in.read(currentPosition, buffer, 0, byteToReadThisRound);
            } else {
                byteRead = in.read(buffer, 0, byteToReadThisRound);
            }
            if (byteRead <= 0)
                break;
            chunkNumber++;
            totalByteRead += byteRead;
            currentPosition += byteRead;
            byteLeftToRead -= byteRead;

            if (verboseOption) {
                LOG.info("reader: Number of byte read: " + byteRead + " ; totalByteRead = " + totalByteRead
                        + " ; currentPosition=" + currentPosition + " ; chunkNumber =" + chunkNumber
                        + "; File name = " + fname);
            }
        }
    } catch (IOException e) {
        throw new IOException("#### Exception caught in readUntilEnd: reader  currentOffset = "
                + currentPosition + " ; totalByteRead =" + totalByteRead + " ; latest byteRead = " + byteRead
                + "; visibleLen= " + visibleLen + " ; bufferLen = " + buffer.length + " ; Filename = " + fname,
                e);
    }

    if (verboseOption)
        LOG.info("reader end:   position: " + pos + " ; currentOffset = " + currentPosition
                + " ; totalByteRead =" + totalByteRead + " ; Filename = " + fname);

    return totalByteRead;
}

From source file:eu.scape_project.pt.mapred.input.ControlFileInputFormatTest.java

License:Apache License

@Test
public void testWriteNewControlFileAndCreateSplits() throws IOException {
    MockupFileSystem fs = new MockupFileSystem();
    Path newControlFile = new Path("newControlFile");
    fs.addFile("newControlFile", true, null);
    Map<String, ArrayList<String>> locationMap = new HashMap<String, ArrayList<String>>() {
        {/*from w  ww.j  a  v  a 2  s.c o m*/
            put("host1", new ArrayList<String>() {
                {
                    add("line1-1");
                    add("line1-2");
                    add("line1-3");
                }
            });
            put("host2", new ArrayList<String>() {
                {
                    add("line2-1");
                    add("line2-2");
                    add("line2-3");
                    add("line2-4");
                    add("line2-5");
                    add("line2-6");
                }
            });
            put("host3", new ArrayList<String>() {
                {
                    add("line3-1");
                    add("line3-2");
                    add("line3-3");
                    add("line3-4");
                    add("line3-5");
                    add("line3-6");
                    add("line3-7");
                    add("line3-8");
                }
            });
            put("host4", new ArrayList<String>() {
                {
                    add("line4-1");
                    add("line4-2");
                    add("line4-3");
                    add("line4-4");
                    add("line4-5");
                    add("line4-6");
                    add("line4-7");
                    add("line4-8");
                    add("line4-9");
                    add("line4-10");
                }
            });
        }
    };
    List<FileSplit> splits = ControlFileInputFormat.writeNewControlFileAndCreateSplits(newControlFile, fs,
            locationMap, 3);

    FSDataInputStream bis = fs.open(newControlFile);
    try {
        int i = 0;
        for (FileSplit split : splits) {
            LOG.debug(++i + ".split = " + split.toString());
            byte[] content = new byte[(int) split.getLength()];
            bis.read((int) split.getStart(), content, 0, (int) split.getLength());
            String cont = new String(content);
            LOG.debug("  content = " + new String(content));
            if (cont.startsWith("line1-1")) {
                String expected = "";
                for (String line : locationMap.get("host1")) {
                    expected += line + "\n";
                }
                assertEquals(expected, cont);
            } else if (cont.startsWith("line2-1")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host2")) {
                    expected += line + "\n";
                    if (++j == 3)
                        break;
                }
                assertEquals(expected, cont);
            } else if (cont.startsWith("line2-4")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host2")) {
                    if (++j <= 3)
                        continue;
                    expected += line + "\n";
                }
                assertEquals(expected, cont);
            } else if (cont.startsWith("line3-1")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host3")) {
                    expected += line + "\n";
                    if (++j == 4)
                        break;
                }
                assertEquals(expected, cont);
            } else if (cont.startsWith("line3-5")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host3")) {
                    if (++j <= 4)
                        continue;
                    expected += line + "\n";
                }
                assertEquals(expected, cont);
            } else if (cont.startsWith("line4-1")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host4")) {
                    expected += line + "\n";
                    if (++j == 3)
                        break;
                }
                assertEquals(expected, cont);
            } else if (cont.startsWith("line4-4")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host4")) {
                    if (++j <= 3)
                        continue;
                    expected += line + "\n";
                    if (++j > 7)
                        break;
                }
                assertEquals(expected, cont);
            } else if (cont.startsWith("line4-7")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host4")) {
                    if (++j <= 6)
                        continue;
                    expected += line + "\n";
                }
                assertEquals(expected, cont);
            } else {
                fail("wrong split");
            }
        }
    } finally {
        bis.close();
    }
}

From source file:fi.tkk.ics.hadoop.bam.FastaInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {

    // Note: We generate splits that correspond to different sections in the FASTA
    // input (which here are called "chromosomes", delimited by '>' and
    // followed by a single line description.
    // Some locality is preserved since the locations are formed from the input
    // splits, although no special attention is given to this issues (FASTA files
    // are assumed to be smallish).
    // The splits are generated on the client. In the future the split generation
    // should be only performed once and an index file stored inside HDFS for
    // peformance reasons. Currently this is not attempted (again: FASTA files
    // aren't all that big).

    // we first make sure we are given only a single file

    List<InputSplit> splits = super.getSplits(job);

    // first sort by input path
    Collections.sort(splits, new Comparator<InputSplit>() {
        public int compare(InputSplit a, InputSplit b) {
            FileSplit fa = (FileSplit) a, fb = (FileSplit) b;
            return fa.getPath().compareTo(fb.getPath());
        }// ww w .j a v a2s .c  om
    });

    for (int i = 0; i < splits.size() - 1;) {
        FileSplit fa = (FileSplit) splits.get(i);
        FileSplit fb = (FileSplit) splits.get(i + 1);

        if (fa.getPath().compareTo(fb.getPath()) != 0)
            throw new IOException("FastaInputFormat assumes single FASTA input file!");
    }

    // now we are sure we only have one FASTA input file

    final List<InputSplit> newSplits = new ArrayList<InputSplit>(splits.size());
    FileSplit fileSplit = (FileSplit) splits.get(0);
    Path path = fileSplit.getPath();

    FileSystem fs = path.getFileSystem(ContextUtil.getConfiguration(job));
    FSDataInputStream fis = fs.open(path);
    byte[] buffer = new byte[1024];

    long byte_counter = 0;
    long prev_chromosome_byte_offset = 0;
    boolean first_chromosome = true;

    for (int j = 0; j < splits.size(); j++) {
        FileSplit origsplit = (FileSplit) splits.get(j);

        while (byte_counter < origsplit.getStart() + origsplit.getLength()) {
            long bytes_read = fis.read(byte_counter, buffer, 0,
                    (int) Math.min(buffer.length, origsplit.getStart() + origsplit.getLength() - byte_counter));
            //System.err.println("bytes_read: "+Integer.toString((int)bytes_read)+" of "+Integer.toString(splits.size())+" splits");
            if (bytes_read > 0) {
                for (int i = 0; i < bytes_read; i++) {
                    if (buffer[i] == (byte) '>') {
                        //System.err.println("found chromosome at position "+Integer.toString((int)byte_counter+i));

                        if (!first_chromosome) {
                            FileSplit fsplit = new FileSplit(path, prev_chromosome_byte_offset,
                                    byte_counter + i - 1 - prev_chromosome_byte_offset,
                                    origsplit.getLocations());
                            //System.err.println("adding split: start: "+Integer.toString((int)fsplit.getStart())+" length: "+Integer.toString((int)fsplit.getLength()));

                            newSplits.add(fsplit);
                        }
                        first_chromosome = false;
                        prev_chromosome_byte_offset = byte_counter + i;
                    }
                }
                byte_counter += bytes_read;
            }
        }

        if (j == splits.size() - 1) {
            //System.err.println("EOF");
            FileSplit fsplit = new FileSplit(path, prev_chromosome_byte_offset,
                    byte_counter - prev_chromosome_byte_offset, origsplit.getLocations());
            newSplits.add(fsplit); //conf));
            //System.err.println("adding split: "+fsplit.toString());
            break;
        }
    }

    return newSplits;
}

From source file:fuse4j.hadoopfs.HdfsClientImpl.java

License:Apache License

/**
 * read()//from   ww  w  . java 2s.  co  m
 */
@Override
public boolean read(int uid, Object hdfsFile, ByteBuffer buf, long offset) {
    HdfsFileIoContext file = (HdfsFileIoContext) hdfsFile;

    if (!(file.getIoStream() instanceof FSDataInputStream)) {
        return false;
    }

    FSDataInputStream input = (FSDataInputStream) file.getIoStream();

    byte[] readBuf = new byte[buf.capacity()];

    int bytesRead = 0;
    try {
        bytesRead = input.read(offset, readBuf, 0, readBuf.length);
    } catch (IOException ioe) {
        return false;
    }

    // otherwise return how much we read
    // TODO: does this handle 0 bytes?
    if (bytesRead > 0)
        buf.put(readBuf, 0, bytesRead);
    return true;
}

From source file:fuse4j.hadoopfs.HdfsClientReal.java

License:Apache License

/**
 * read()/*from  w  ww  .j  av  a2 s . c o m*/
 */
public boolean read(Object hdfsFile, ByteBuffer buf, long offset) {
    HdfsFileIoContext file = (HdfsFileIoContext) hdfsFile;

    if (!(file.ioStream instanceof FSDataInputStream)) {
        return false;
    }

    FSDataInputStream input = (FSDataInputStream) file.ioStream;

    byte[] readBuf = new byte[buf.capacity()];

    int bytesRead = 0;
    try {
        bytesRead = input.read(offset, readBuf, 0, readBuf.length);
    } catch (IOException ioe) {
        return false;
    }

    // otherwise return how much we read
    // TODO: does this handle 0 bytes?
    buf.put(readBuf, 0, bytesRead);
    return true;
}

From source file:iumfs.hdfs.HdfsFile.java

License:Apache License

@Override
public long read(ByteBuffer buf, long size, long offset)
        throws FileNotFoundException, IOException, NotSupportedException {
    int ret;/* w  ww .  j av  a2s . c  om*/

    FSDataInputStream fsdis = fs.open(new Path(getPath()));
    ret = fsdis.read(offset, buf.array(), Request.RESPONSE_HEADER_SIZE, (int) size);
    fsdis.close();
    logger.fine("read offset=" + offset + ",size=" + size);
    return ret;
}

From source file:org.apache.blur.shell.DiscoverFileBufferSizeUtil.java

License:Apache License

private static long readFile(PrintWriter out, Random random, int bufSize, FSDataInputStream inputStream,
        long length, int readSamples) throws IOException {
    byte[] buf = new byte[bufSize];
    long start = System.nanoTime();
    long time = 0;
    for (int i = 0; i < readSamples; i++) {
        long now = System.nanoTime();
        if (start + 5000000000l < now) {
            double complete = (((double) i / (double) readSamples) * 100.0);
            out.println(complete + "% Complete");
            out.flush();/*from  w w  w .ja v  a2s  . c  o m*/
            start = System.nanoTime();
        }
        random.nextBytes(buf);
        long position = getPosition(bufSize, random, length);
        long s = System.nanoTime();
        int offset = 0;
        int len = bufSize;
        while (len > 0) {
            int amount = inputStream.read(position, buf, offset, len);
            len -= amount;
            offset += amount;
            position += amount;
        }
        long e = System.nanoTime();
        time += (e - s);
        length -= len;
    }
    return time;
}

From source file:org.apache.drill.exec.store.parquet.FooterGatherer.java

License:Apache License

private static final void readFully(FSDataInputStream stream, long start, byte[] output, int offset, int len)
        throws IOException {
    int bytesRead = 0;
    while (bytesRead > -1 && bytesRead < len) {
        bytesRead += stream.read(start + bytesRead, output, offset + bytesRead, len - bytesRead);
    }/*  w  w w  .  j a  v  a 2 s.  c  o  m*/
}