List of usage examples for org.apache.hadoop.fs FSDataInputStream read
@Override public int read(long position, byte[] buffer, int offset, int length) throws IOException
From source file:com.cloudera.CacheTool.java
License:Apache License
public static void main(String[] args) throws Exception { conf = new Configuration(); conf.addResource(new Path("/home/james/hdfs-conf/hdfs-site.xml")); conf.addResource(new Path("/home/james/hdfs-conf/core-site.xml")); URI uri = FileSystem.getDefaultUri(conf); final FileSystem fs = FileSystem.get(uri, conf); for (int i = 0; i < 8000; i += 10) { final int i_copy = i; pool.submit(new Runnable() { public void run() { for (int j = 0; j < 10; j++) { try { createFile(fs, new Path("/home/james/large" + (i_copy + j)), 1024 * 1024); } catch (IOException ioe) { System.out.println(ioe); }/*from ww w. j a v a 2s. c o m*/ } } }); } pool.shutdown(); pool.awaitTermination(1, TimeUnit.DAYS); long start = Time.monotonicNow(); Random r = new Random(0); for (int i = 0; i < 100; i++) { FSDataInputStream fdis = fs.open(new Path("/home/james/large" + r.nextInt(8000)), 512); byte[] buffer = new byte[512]; for (int j = 0; j < 100; j++) { int offset = r.nextInt(1024 * 1024 - 511); fdis.read(offset, buffer, 0, 512); } } System.out.println("Time taken for 10000 random 512 byte reads: " + (Time.monotonicNow() - start) / 1000.0); }
From source file:com.google.cloud.hadoop.fs.gcs.HadoopFileSystemIntegrationHelper.java
License:Open Source License
/** * Helper that reads text from the given file at the given offset * and returns it. If checkOverflow is true, it will make sure that * no more than 'len' bytes were read.//w ww .j av a 2 s . c o m */ protected String readTextFile(Path hadoopPath, int offset, int len, boolean checkOverflow) throws IOException { String text = null; FSDataInputStream readStream = null; long fileSystemBytesRead = 0; FileSystem.Statistics stats = FileSystem.getStatistics(ghfsFileSystemDescriptor.getScheme(), ghfs.getClass()); if (stats != null) { // Let it be null in case no stats have been added for our scheme yet. fileSystemBytesRead = stats.getBytesRead(); } try { int bufferSize = len; bufferSize += checkOverflow ? 1 : 0; byte[] readBuffer = new byte[bufferSize]; readStream = ghfs.open(hadoopPath, GoogleHadoopFileSystemBase.BUFFERSIZE_DEFAULT); int numBytesRead; if (offset > 0) { numBytesRead = readStream.read(offset, readBuffer, 0, bufferSize); } else { numBytesRead = readStream.read(readBuffer); } Assert.assertEquals(len, numBytesRead); text = new String(readBuffer, 0, numBytesRead, StandardCharsets.UTF_8); } finally { if (readStream != null) { readStream.close(); } } // After the read, the stats better be non-null for our ghfs scheme. stats = FileSystem.getStatistics(ghfsFileSystemDescriptor.getScheme(), ghfs.getClass()); Assert.assertNotNull(stats); long endFileSystemBytesRead = stats.getBytesRead(); int bytesReadStats = (int) (endFileSystemBytesRead - fileSystemBytesRead); if (statistics == FileSystemStatistics.EXACT) { Assert.assertEquals(String.format("FS statistics mismatch fetched from class '%s'", ghfs.getClass()), len, bytesReadStats); } else if (statistics == FileSystemStatistics.GREATER_OR_EQUAL) { Assert.assertTrue(String.format("Expected %d <= %d", len, bytesReadStats), len <= bytesReadStats); } else if (statistics == FileSystemStatistics.NONE) { Assert.assertEquals("FS statistics expected to be 0", 0, fileSystemBytesRead); Assert.assertEquals("FS statistics expected to be 0", 0, endFileSystemBytesRead); } else if (statistics == FileSystemStatistics.IGNORE) { // NO-OP } return text; }
From source file:com.mellanox.r4h.TestWriteRead.java
License:Apache License
/** * read chunks into buffer repeatedly until total of VisibleLen byte are read. * Return total number of bytes read/*from www . j a v a 2 s .c o m*/ */ private long readUntilEnd(FSDataInputStream in, byte[] buffer, long size, String fname, long pos, long visibleLen, boolean positionReadOption) throws IOException { if (pos >= visibleLen || visibleLen <= 0) return 0; int chunkNumber = 0; long totalByteRead = 0; long currentPosition = pos; int byteRead = 0; long byteLeftToRead = visibleLen - pos; int byteToReadThisRound = 0; if (!positionReadOption) { in.seek(pos); currentPosition = in.getPos(); } if (verboseOption) LOG.info("reader begin: position: " + pos + " ; currentOffset = " + currentPosition + " ; bufferSize =" + buffer.length + " ; Filename = " + fname); try { while (byteLeftToRead > 0 && currentPosition < visibleLen) { byteToReadThisRound = (int) (byteLeftToRead >= buffer.length ? buffer.length : byteLeftToRead); if (positionReadOption) { byteRead = in.read(currentPosition, buffer, 0, byteToReadThisRound); } else { byteRead = in.read(buffer, 0, byteToReadThisRound); } if (byteRead <= 0) break; chunkNumber++; totalByteRead += byteRead; currentPosition += byteRead; byteLeftToRead -= byteRead; if (verboseOption) { LOG.info("reader: Number of byte read: " + byteRead + " ; totalByteRead = " + totalByteRead + " ; currentPosition=" + currentPosition + " ; chunkNumber =" + chunkNumber + "; File name = " + fname); } } } catch (IOException e) { throw new IOException("#### Exception caught in readUntilEnd: reader currentOffset = " + currentPosition + " ; totalByteRead =" + totalByteRead + " ; latest byteRead = " + byteRead + "; visibleLen= " + visibleLen + " ; bufferLen = " + buffer.length + " ; Filename = " + fname, e); } if (verboseOption) LOG.info("reader end: position: " + pos + " ; currentOffset = " + currentPosition + " ; totalByteRead =" + totalByteRead + " ; Filename = " + fname); return totalByteRead; }
From source file:eu.scape_project.pt.mapred.input.ControlFileInputFormatTest.java
License:Apache License
@Test public void testWriteNewControlFileAndCreateSplits() throws IOException { MockupFileSystem fs = new MockupFileSystem(); Path newControlFile = new Path("newControlFile"); fs.addFile("newControlFile", true, null); Map<String, ArrayList<String>> locationMap = new HashMap<String, ArrayList<String>>() { {/*from w ww.j a v a 2 s.c o m*/ put("host1", new ArrayList<String>() { { add("line1-1"); add("line1-2"); add("line1-3"); } }); put("host2", new ArrayList<String>() { { add("line2-1"); add("line2-2"); add("line2-3"); add("line2-4"); add("line2-5"); add("line2-6"); } }); put("host3", new ArrayList<String>() { { add("line3-1"); add("line3-2"); add("line3-3"); add("line3-4"); add("line3-5"); add("line3-6"); add("line3-7"); add("line3-8"); } }); put("host4", new ArrayList<String>() { { add("line4-1"); add("line4-2"); add("line4-3"); add("line4-4"); add("line4-5"); add("line4-6"); add("line4-7"); add("line4-8"); add("line4-9"); add("line4-10"); } }); } }; List<FileSplit> splits = ControlFileInputFormat.writeNewControlFileAndCreateSplits(newControlFile, fs, locationMap, 3); FSDataInputStream bis = fs.open(newControlFile); try { int i = 0; for (FileSplit split : splits) { LOG.debug(++i + ".split = " + split.toString()); byte[] content = new byte[(int) split.getLength()]; bis.read((int) split.getStart(), content, 0, (int) split.getLength()); String cont = new String(content); LOG.debug(" content = " + new String(content)); if (cont.startsWith("line1-1")) { String expected = ""; for (String line : locationMap.get("host1")) { expected += line + "\n"; } assertEquals(expected, cont); } else if (cont.startsWith("line2-1")) { String expected = ""; int j = 0; for (String line : locationMap.get("host2")) { expected += line + "\n"; if (++j == 3) break; } assertEquals(expected, cont); } else if (cont.startsWith("line2-4")) { String expected = ""; int j = 0; for (String line : locationMap.get("host2")) { if (++j <= 3) continue; expected += line + "\n"; } assertEquals(expected, cont); } else if (cont.startsWith("line3-1")) { String expected = ""; int j = 0; for (String line : locationMap.get("host3")) { expected += line + "\n"; if (++j == 4) break; } assertEquals(expected, cont); } else if (cont.startsWith("line3-5")) { String expected = ""; int j = 0; for (String line : locationMap.get("host3")) { if (++j <= 4) continue; expected += line + "\n"; } assertEquals(expected, cont); } else if (cont.startsWith("line4-1")) { String expected = ""; int j = 0; for (String line : locationMap.get("host4")) { expected += line + "\n"; if (++j == 3) break; } assertEquals(expected, cont); } else if (cont.startsWith("line4-4")) { String expected = ""; int j = 0; for (String line : locationMap.get("host4")) { if (++j <= 3) continue; expected += line + "\n"; if (++j > 7) break; } assertEquals(expected, cont); } else if (cont.startsWith("line4-7")) { String expected = ""; int j = 0; for (String line : locationMap.get("host4")) { if (++j <= 6) continue; expected += line + "\n"; } assertEquals(expected, cont); } else { fail("wrong split"); } } } finally { bis.close(); } }
From source file:fi.tkk.ics.hadoop.bam.FastaInputFormat.java
License:Open Source License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { // Note: We generate splits that correspond to different sections in the FASTA // input (which here are called "chromosomes", delimited by '>' and // followed by a single line description. // Some locality is preserved since the locations are formed from the input // splits, although no special attention is given to this issues (FASTA files // are assumed to be smallish). // The splits are generated on the client. In the future the split generation // should be only performed once and an index file stored inside HDFS for // peformance reasons. Currently this is not attempted (again: FASTA files // aren't all that big). // we first make sure we are given only a single file List<InputSplit> splits = super.getSplits(job); // first sort by input path Collections.sort(splits, new Comparator<InputSplit>() { public int compare(InputSplit a, InputSplit b) { FileSplit fa = (FileSplit) a, fb = (FileSplit) b; return fa.getPath().compareTo(fb.getPath()); }// ww w .j a v a2s .c om }); for (int i = 0; i < splits.size() - 1;) { FileSplit fa = (FileSplit) splits.get(i); FileSplit fb = (FileSplit) splits.get(i + 1); if (fa.getPath().compareTo(fb.getPath()) != 0) throw new IOException("FastaInputFormat assumes single FASTA input file!"); } // now we are sure we only have one FASTA input file final List<InputSplit> newSplits = new ArrayList<InputSplit>(splits.size()); FileSplit fileSplit = (FileSplit) splits.get(0); Path path = fileSplit.getPath(); FileSystem fs = path.getFileSystem(ContextUtil.getConfiguration(job)); FSDataInputStream fis = fs.open(path); byte[] buffer = new byte[1024]; long byte_counter = 0; long prev_chromosome_byte_offset = 0; boolean first_chromosome = true; for (int j = 0; j < splits.size(); j++) { FileSplit origsplit = (FileSplit) splits.get(j); while (byte_counter < origsplit.getStart() + origsplit.getLength()) { long bytes_read = fis.read(byte_counter, buffer, 0, (int) Math.min(buffer.length, origsplit.getStart() + origsplit.getLength() - byte_counter)); //System.err.println("bytes_read: "+Integer.toString((int)bytes_read)+" of "+Integer.toString(splits.size())+" splits"); if (bytes_read > 0) { for (int i = 0; i < bytes_read; i++) { if (buffer[i] == (byte) '>') { //System.err.println("found chromosome at position "+Integer.toString((int)byte_counter+i)); if (!first_chromosome) { FileSplit fsplit = new FileSplit(path, prev_chromosome_byte_offset, byte_counter + i - 1 - prev_chromosome_byte_offset, origsplit.getLocations()); //System.err.println("adding split: start: "+Integer.toString((int)fsplit.getStart())+" length: "+Integer.toString((int)fsplit.getLength())); newSplits.add(fsplit); } first_chromosome = false; prev_chromosome_byte_offset = byte_counter + i; } } byte_counter += bytes_read; } } if (j == splits.size() - 1) { //System.err.println("EOF"); FileSplit fsplit = new FileSplit(path, prev_chromosome_byte_offset, byte_counter - prev_chromosome_byte_offset, origsplit.getLocations()); newSplits.add(fsplit); //conf)); //System.err.println("adding split: "+fsplit.toString()); break; } } return newSplits; }
From source file:fuse4j.hadoopfs.HdfsClientImpl.java
License:Apache License
/** * read()//from ww w . java 2s. co m */ @Override public boolean read(int uid, Object hdfsFile, ByteBuffer buf, long offset) { HdfsFileIoContext file = (HdfsFileIoContext) hdfsFile; if (!(file.getIoStream() instanceof FSDataInputStream)) { return false; } FSDataInputStream input = (FSDataInputStream) file.getIoStream(); byte[] readBuf = new byte[buf.capacity()]; int bytesRead = 0; try { bytesRead = input.read(offset, readBuf, 0, readBuf.length); } catch (IOException ioe) { return false; } // otherwise return how much we read // TODO: does this handle 0 bytes? if (bytesRead > 0) buf.put(readBuf, 0, bytesRead); return true; }
From source file:fuse4j.hadoopfs.HdfsClientReal.java
License:Apache License
/** * read()/*from w ww .j av a2 s . c o m*/ */ public boolean read(Object hdfsFile, ByteBuffer buf, long offset) { HdfsFileIoContext file = (HdfsFileIoContext) hdfsFile; if (!(file.ioStream instanceof FSDataInputStream)) { return false; } FSDataInputStream input = (FSDataInputStream) file.ioStream; byte[] readBuf = new byte[buf.capacity()]; int bytesRead = 0; try { bytesRead = input.read(offset, readBuf, 0, readBuf.length); } catch (IOException ioe) { return false; } // otherwise return how much we read // TODO: does this handle 0 bytes? buf.put(readBuf, 0, bytesRead); return true; }
From source file:iumfs.hdfs.HdfsFile.java
License:Apache License
@Override public long read(ByteBuffer buf, long size, long offset) throws FileNotFoundException, IOException, NotSupportedException { int ret;/* w ww . j av a2s . c om*/ FSDataInputStream fsdis = fs.open(new Path(getPath())); ret = fsdis.read(offset, buf.array(), Request.RESPONSE_HEADER_SIZE, (int) size); fsdis.close(); logger.fine("read offset=" + offset + ",size=" + size); return ret; }
From source file:org.apache.blur.shell.DiscoverFileBufferSizeUtil.java
License:Apache License
private static long readFile(PrintWriter out, Random random, int bufSize, FSDataInputStream inputStream, long length, int readSamples) throws IOException { byte[] buf = new byte[bufSize]; long start = System.nanoTime(); long time = 0; for (int i = 0; i < readSamples; i++) { long now = System.nanoTime(); if (start + 5000000000l < now) { double complete = (((double) i / (double) readSamples) * 100.0); out.println(complete + "% Complete"); out.flush();/*from w w w .ja v a2s . c o m*/ start = System.nanoTime(); } random.nextBytes(buf); long position = getPosition(bufSize, random, length); long s = System.nanoTime(); int offset = 0; int len = bufSize; while (len > 0) { int amount = inputStream.read(position, buf, offset, len); len -= amount; offset += amount; position += amount; } long e = System.nanoTime(); time += (e - s); length -= len; } return time; }
From source file:org.apache.drill.exec.store.parquet.FooterGatherer.java
License:Apache License
private static final void readFully(FSDataInputStream stream, long start, byte[] output, int offset, int len) throws IOException { int bytesRead = 0; while (bytesRead > -1 && bytesRead < len) { bytesRead += stream.read(start + bytesRead, output, offset + bytesRead, len - bytesRead); }/* w w w . j a v a 2 s. c o m*/ }