List of usage examples for org.apache.hadoop.fs FSDataInputStream seek
@Override public void seek(long desired) throws IOException
From source file:ras.test.hadoop.fs.InMemoryFileSystemUnitTest.java
License:Apache License
@Test public void testInputStreamSeek() throws IOException { Path path = new Path("message.txt"); writeMessage(path);//from ww w .j a v a2s. co m byte[] bytesOut = message.getBytes(); long seekPosition = 6; FSDataInputStream in = inMemoryFileSystem.open(path); byte[] bytesIn = new byte[bytesOut.length - (int) seekPosition]; in.seek(seekPosition); assertThat("Wrong position after seek", in.getPos(), is(equalTo(seekPosition))); in.read(bytesIn); for (int i = 6; i < bytesOut.length; i++) { assertThat("Wrong byte at index " + i, bytesIn[i - 6], is(equalTo(bytesOut[i]))); } }
From source file:redpoll.examples.sogou.SogouRecordReader.java
License:Apache License
public SogouRecordReader(Configuration job, FileSplit split) throws IOException { start = split.getStart();/*from w ww.ja va 2s.c o m*/ end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); if (codec != null) { in = new SogouCorpusReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) fileIn.seek(start); in = new SogouCorpusReader(fileIn, job); } this.pos = start; }
From source file:simsql.runtime.InputFileDeserializer.java
License:Apache License
/** * Initializes the RecordDeserializer for use in Hadoop *//*from w w w . j a v a2 s. c o m*/ public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); // open the file and seek to the start of the split; also remember the start, end, and current position Path file = split.getPath(); FileSystem fs = file.getFileSystem(job); FSDataInputStream fileInLocal = fs.open(split.getPath()); // do not do anything with a zero length split if (split.getLength() == 0) { done = true; return; } // not a non-zero length split start = split.getStart(); end = split.getStart() + split.getLength() - 1; posCompressed = fileInLocal; fileInLocal.seek(start); // compression isCompressed = true; fileIn = RecordCompression.getInputStream(fileInLocal); pos = start; // now, find the start marker in our split fName = file.getName(); findStart(); }
From source file:stargate.drivers.sourcefs.hdfs.HDFSChunkReader.java
License:Open Source License
private void initialize(FSDataInputStream is, long offset, int size) throws IOException { this.is = is; this.offset = offset; this.size = size; this.currentOffset = offset; is.seek(offset); long offsetMoved = is.getPos(); if (offsetMoved != this.offset) { throw new IOException("failed to move offset to " + offsetMoved); }//w w w.j av a 2 s . co m }
From source file:StorageEngineClient.MyLineRecordReader.java
License:Open Source License
public MyLineRecordReader(Configuration job, FileSplit split) throws IOException { this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart();//from w w w .j a v a 2 s .c om end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); this.tempConf = job; this.tempSplit = split; FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { in = new LineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } in = new LineReader(fileIn, job); } if (skipFirstLine) { start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:streaming.core.DownloadRunner.java
License:Apache License
public static int getRawFileByPath(HttpServletResponse res, String path, long position) { try {//from w ww .j ava 2s .c o m FileSystem fs = FileSystem.get(new Configuration()); Path p = new Path(path); if (fs.exists(p)) { List<FileStatus> files = new ArrayList<FileStatus>(); // if (fs.isFile(p)) { files.add(fs.getFileStatus(p)); } else if (fs.isDirectory(p)) { FileStatus[] fileStatusArr = fs.listStatus(p); if (fileStatusArr != null && fileStatusArr.length > 0) { for (FileStatus cur : fileStatusArr) { files.add(cur); } } } //?? if (files.size() > 0) { logger.info(path + "" + files.size()); FSDataInputStream inputStream = null; OutputStream outputStream = res.getOutputStream(); int len = files.size(); int i = 1; long allPosition = 0; for (FileStatus cur : files) { logger.info("[" + i++ + "/" + len + "]" + path + ",?" + cur); inputStream = fs.open(cur.getPath()); if (position > 0) { if (allPosition + cur.getLen() > position) { inputStream.seek(position - allPosition); logger.info("seek position " + (position - allPosition)); position = -1; } allPosition += cur.getLen(); } org.apache.commons.io.IOUtils.copyLarge(inputStream, outputStream); inputStream.close(); } outputStream.flush(); outputStream.close(); return 200; } else { logger.info(path + "" + files.size()); } } else { return 400; } } catch (Exception e) { e.printStackTrace(); } return 500; }
From source file:trec.MyLineRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart();/*from w w w . j a v a 2 s .com*/ end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { in = new MyLineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } in = new MyLineReader(fileIn, job); } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:trec.PRRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart();//ww w .j a v a2s . c om end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { in = new LineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } in = new LineReader(fileIn, job); } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:trec.TrecRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); start = split.getStart();//from www . j a va2 s . c om end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { in = new LineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } in = new LineReader(fileIn, job); } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }