List of usage examples for org.apache.hadoop.fs FSDataInputStream seek
@Override public void seek(long desired) throws IOException
From source file:hadoop.examples.hdfs.FileSystemCat.java
License:Open Source License
public static void main(String[] args) throws Exception { String uri = "hdfs://exoplatform:9000/user/haint/input-0/test.txt"; FileSystem fs = FileSystem.get(URI.create(uri), new Configuration()); InputStream in = null;/* w w w. j av a 2 s . co m*/ try { in = fs.open(new Path(uri)); IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } System.out.println("---------------------------------------------------"); FSDataInputStream fsDataInputStream = null; try { fsDataInputStream = fs.open(new Path(uri)); IOUtils.copyBytes(fsDataInputStream, System.out, 256, false); System.out.println("---------------------------------------------------"); fsDataInputStream.seek(0); IOUtils.copyBytes(fsDataInputStream, System.out, 256, false); } finally { IOUtils.closeStream(fsDataInputStream); } }
From source file:hadoop.inputsplit.FastaLineRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); done = false;/*from w ww.j av a2s .c o m*/ this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart(); end = start + split.getLength(); file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); currentValue = new ValueWritable(); value = new Text(); tmpValue = new Text(); tmp = new Text(); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); String homeHdfs = context.getConfiguration().get("HDFS_HOME_DIR"); //maxK = HadoopUtil.getMaxkFromPatterns(fs, new Path(homeHdfs+Constant.HDFS_PATTERNS_FILE_HDFS)); if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = new LineReader(cIn, job, recordDelimiterBytes); start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { in = new LineReader(codec.createInputStream(fileIn, decompressor), job, recordDelimiterBytes); filePosition = fileIn; } } else { fileIn.seek(start); in = new LineReader(fileIn, job, recordDelimiterBytes); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; setKeySeq(fs, job); //Set currentKey nextMyKeyValue(); //Leggo il primo record se esiste. }
From source file:hadoop.TweetRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); start = split.getStart();/*from w w w .j ava 2 s . co m*/ end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = new LineReader(cIn, job); start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { in = new LineReader(codec.createInputStream(fileIn, decompressor), job); filePosition = fileIn; } } else { fileIn.seek(start); in = new LineReader(fileIn, job); filePosition = fileIn; } this.pos = start; }
From source file:hdfs.jsr203.HadoopFileSystem.java
License:Apache License
SeekableByteChannel newByteChannel(org.apache.hadoop.fs.Path path, Set<? extends OpenOption> options, FileAttribute<?>... attrs) throws IOException { // simple one : this.fs.create(hadoopPath); // TODO Auto-generated method stub //this.hdfs./*from w w w . j a v a2 s .c om*/ // throw new IOException("Not implemented"); checkOptions(options); // Check that file not exists if (options.contains(CREATE_NEW) && this.fs.exists(path)) { throw new FileAlreadyExistsException(path.toString()); } if (options.contains(WRITE) || options.contains(APPEND)) { checkWritable(); beginRead(); try { final WritableByteChannel wbc = Channels.newChannel(newOutputStream(path, options, attrs)); long leftover = 0; if (options.contains(APPEND)) { /*Entry e = getEntry0(path); if (e != null && e.size >= 0) leftover = e.size;*/ throw new IOException("APPEND NOT IMPLEMENTED"); } final long offset = leftover; return new SeekableByteChannel() { long written = offset; public boolean isOpen() { return wbc.isOpen(); } public long position() throws IOException { return written; } public SeekableByteChannel position(long pos) throws IOException { throw new UnsupportedOperationException(); } public int read(ByteBuffer dst) throws IOException { throw new UnsupportedOperationException(); } public SeekableByteChannel truncate(long size) throws IOException { throw new UnsupportedOperationException(); } public int write(ByteBuffer src) throws IOException { int n = wbc.write(src); written += n; return n; } public long size() throws IOException { return written; } public void close() throws IOException { wbc.close(); } }; } finally { endRead(); } } else { beginRead(); try { ensureOpen(); FileStatus e = this.fs.getFileStatus(path); if (e == null || e.isDirectory()) throw new NoSuchFileException(path.toString()); final FSDataInputStream inputStream = getInputStream(path); final ReadableByteChannel rbc = Channels.newChannel(inputStream); final long size = e.getLen(); return new SeekableByteChannel() { long read = 0; public boolean isOpen() { return rbc.isOpen(); } public long position() throws IOException { return read; } public SeekableByteChannel position(long pos) throws IOException { // ReadableByteChannel is not buffered, so it reads through inputStream.seek(pos); read = pos; return this; } public int read(ByteBuffer dst) throws IOException { int n = rbc.read(dst); if (n > 0) { read += n; } return n; } public SeekableByteChannel truncate(long size) throws IOException { throw new NonWritableChannelException(); } public int write(ByteBuffer src) throws IOException { throw new NonWritableChannelException(); } public long size() throws IOException { return size; } public void close() throws IOException { rbc.close(); } }; } finally { endRead(); } } }
From source file:InvertedIndex.NLineRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.job = job; this.context = context; this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart();/*from ww w.j a v a2s.c o m*/ end = start + split.getLength(); final Path file = split.getPath(); this.path = file; this.length = split.getLength(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { if (0 == split.getLength() && job.getBoolean("mapred.ignore.badcompress", false)) { if (null != context && context instanceof TaskInputOutputContext) { ((TaskInputOutputContext) context).getCounter("Input Counter", "Gzip File length is zero") .increment(1); } if (null != this.path) { LOG.warn("Skip 0-length Zip file: " + this.path.toString()); } in = new NLineReader(fileIn, job); } else { try { in = new NLineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } catch (IOException e) { if (isIgnoreBadCompress(job, e)) { in = new NLineReader(fileIn, job); end = start; LOG.warn("Skip Bad Compress File: " + this.path.toString()); LOG.warn("initialize line read error", e); ((TaskInputOutputContext) context).getCounter("Input Counter", "Skip Bad Zip File") .increment(1); ((TaskInputOutputContext) context).getCounter("Input Counter", "Total Skip Bad Zip Length") .increment(this.length); } else { throw e; } } } } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } in = new NLineReader(fileIn, job); } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:io.aos.hdfs.basics.AbstractHdfsFileTest.java
License:Apache License
private void testCreateHdfsFile(Path path) throws IOException { getLogger().info("Testing HDFS file creation."); byte[] buff = "The HDFS File content".getBytes(Charset.forName("UTF-8")); FSDataOutputStream outputStream = getFileSystem().create(path); outputStream.write(buff, 0, buff.length); outputStream.close();// ww w. ja v a2 s . c om FSDataInputStream in = getFileSystem().open(path); IOUtils.copyBytes(in, System.out, 4096, false); in.seek(0); IOUtils.copyBytes(in, System.out, 4096, false); IOUtils.closeStream(in); }
From source file:io.aos.hdfs.basics.AbstractHdfsFileTest.java
License:Apache License
private void test1() throws IOException { Path srcPath = new Path("./src/test/resources/log4j.properties"); Path dstPath = new Path("/log4j.properties"); getFileSystem().copyFromLocalFile(srcPath, dstPath); FSDataInputStream in = getFileSystem().open(new Path("./log4j.properties")); IOUtils.copyBytes(in, System.out, 4096, false); in.seek(0); IOUtils.copyBytes(in, System.out, 4096, false); IOUtils.closeStream(in);//from w w w . j a v a2s. co m OutputStream out = getFileSystem().create(new Path("./log4j.properties"), new Progressable() { @Override public void progress() { System.out.println("."); } }); IOUtils.copyBytes(in, out, 4096, true); }
From source file:io.aos.hdfs.FileSystemDoubleCat.java
License:Apache License
public static void main(String... args) throws Exception { String uri = args[0];//from w w w .ja v a 2s . c o m Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); FSDataInputStream in = null; try { in = fs.open(new Path(uri)); IOUtils.copyBytes(in, System.out, 4096, false); in.seek(0); // go back to the start of the file IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } }
From source file:io.druid.storage.hdfs.tasklog.HdfsTaskLogs.java
License:Apache License
@Override public Optional<ByteSource> streamTaskLog(final String taskId, final long offset) throws IOException { final Path path = getTaskLogFileFromId(taskId); final FileSystem fs = path.getFileSystem(hadoopConfig); if (fs.exists(path)) { return Optional.<ByteSource>of(new ByteSource() { @Override// w w w . j ava 2 s. c om public InputStream openStream() throws IOException { log.info("Reading task log from: %s", path); final long seekPos; if (offset < 0) { final FileStatus stat = fs.getFileStatus(path); seekPos = Math.max(0, stat.getLen() + offset); } else { seekPos = offset; } final FSDataInputStream inputStream = fs.open(path); inputStream.seek(seekPos); log.info("Read task log from: %s (seek = %,d)", path, seekPos); return inputStream; } }); } else { return Optional.absent(); } }
From source file:io.hops.erasure_coding.FileStripeReader.java
License:Apache License
@Override public InputStream[] getNextStripeInputs() throws IOException { InputStream[] blocks = new InputStream[codec.stripeLength]; try {// w w w. j a va2s . c om for (int i = 0; i < codec.stripeLength; i++) { long seekOffset = stripeStartOffset + i * blockSize; if (seekOffset < srcSize) { FSDataInputStream in = fs.open(srcFile, bufferSize); in.seek(seekOffset); LOG.info("Opening stream at " + srcFile + ":" + seekOffset); blocks[i] = in; } else { LOG.info("Using zeros at offset " + seekOffset); // We have no src data at this offset. blocks[i] = new RaidUtils.ZeroInputStream(seekOffset + blockSize); } } stripeStartOffset += blockSize * codec.stripeLength; return blocks; } catch (IOException e) { // If there is an error during opening a stream, close the previously // opened streams and re-throw. RaidUtils.closeStreams(blocks); throw e; } }