List of usage examples for org.apache.hadoop.fs FSDataInputStream seek
@Override public void seek(long desired) throws IOException
From source file:matrixFormat.MatrixRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { //FileSplit split = (FileSplit) genericSplit; MatrixFileSplit split = (MatrixFileSplit) genericSplit; Configuration job = context.getConfiguration(); method = (job.get("method").compareTo("IPB") == 0) ? 1 : ((job.get("method").compareTo("OPB") == 0) ? 2 : 0); sparse = job.getBoolean("Sparse", false); this.maxLength = job.getInt("mapred.matrixrecordreader.maxlength", Integer.MAX_VALUE); start1 = split.getStart();//from ww w . j ava2 s . c om start2 = split.getStart(1); end1 = start1 + split.getLength(0); end2 = start2 + split.getLength(1); blkID = split.getId(); final Path file = split.getPath(0); final Path file2 = split.getPath(1); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FileSystem fs2 = file2.getFileSystem(job); FSDataInputStream fileIn1 = fs.open(split.getPath(0)); FSDataInputStream fileIn2 = fs2.open(split.getPath(1)); //FileInputStream fileIn2 = new FileInputStream(file2.toString()); //Don't care the compression stuff /*if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).createInputStream( fileIn1, decompressor, start1, end1, SplittableCompressionCodec.READ_MODE.BYBLOCK); final SplitCompressionInputStream cIn2 = ((SplittableCompressionCodec)codec).createInputStream( fileIn2, decompressor, start2, end2, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = new MatrixReader(cIn, cIn2); start1 = cIn.getAdjustedStart(); end1 = cIn.getAdjustedEnd(); filePosition1 = cIn; } else { in = new MatrixReader(codec.createInputStream(fileIn1, decompressor), codec.createInputStream(fileIn2, decompressor), job, split.getStarts(0), split.getStarts(1) ); filePosition1 = fileIn1; } } else {*/ fileIn1.seek(start1); fileIn2.seek(start2); if (sparse) { in = new MatrixReader(fileIn1, fileIn2, job, split.getStart(0), split.getStart(1)); } else { in = new MatrixReader(fileIn1, fileIn2, job, split.getStarts(0), split.getStarts(1)); } //in = new MatrixReader(file, file2, job, split.getStarts(0), split.getStarts(1)); filePosition1 = fileIn1; filePosition2 = fileIn2; //} // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. /*if (start1 != 0) { start1 += in.readOldBlock(maxLength, maxBytesToConsume(pos1)); this.pos1 = start1; } in.readBlocks(maxLength, maxBytesToConsume(pos1)); start1 += in.getBytesComsumed(0); //start2 += in.getBytesComsumed(1); this.pos1 = start1;*/ }
From source file:ml.shifu.guagua.hadoop.io.GuaguaLineRecordReader.java
License:Apache License
@Override public void initialize(GuaguaFileSplit genericSplit) throws IOException { this.maxLineLength = Integer.MAX_VALUE; start = genericSplit.getOffset();// www . jav a 2 s . co m end = start + genericSplit.getLength(); final Path file = new Path(genericSplit.getPath()); compressionCodecs = new CompressionCodecFactory(this.conf); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(this.conf); FSDataInputStream fileIn = fs.open(file); boolean skipFirstLine = false; if (codec != null) { if (null == this.recordDelimiterBytes) { in = new LineReader(codec.createInputStream(fileIn), GuaguaConstants.DEFAULT_IO_BUFFER_SIZE); } else { in = new LineReader(codec.createInputStream(fileIn), GuaguaConstants.DEFAULT_IO_BUFFER_SIZE, this.recordDelimiterBytes); } end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } if (null == this.recordDelimiterBytes) { in = new LineReader(fileIn, GuaguaConstants.DEFAULT_IO_BUFFER_SIZE); } else { in = new LineReader(fileIn, GuaguaConstants.DEFAULT_IO_BUFFER_SIZE, this.recordDelimiterBytes); } } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:ml.shifu.guagua.mapreduce.GuaguaLineRecordReader.java
License:Apache License
@Override public void initialize(GuaguaFileSplit split) throws IOException { this.maxLineLength = Integer.MAX_VALUE; start = split.getOffset();/*from w ww . java 2s . c o m*/ end = start + split.getLength(); final Path file = new Path(split.getPath()); compressionCodecs = new CompressionCodecFactory(this.conf); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(new Configuration()); FSDataInputStream fileIn = fs.open(file); boolean skipFirstLine = false; if (codec != null) { in = new LineReader(codec.createInputStream(fileIn), GuaguaConstants.DEFAULT_IO_BUFFER_SIZE); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } in = new LineReader(fileIn, GuaguaConstants.DEFAULT_IO_BUFFER_SIZE); } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:ml.shifu.guagua.yarn.GuaguaLineRecordReader.java
License:Apache License
@Override public void initialize(GuaguaFileSplit genericSplit) throws IOException { this.maxLineLength = Integer.MAX_VALUE; start = genericSplit.getOffset();// w w w. j ava 2 s. co m end = start + genericSplit.getLength(); final Path file = new Path(genericSplit.getPath()); compressionCodecs = new CompressionCodecFactory(new Configuration()); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(new Configuration()); FSDataInputStream fileIn = fs.open(file); boolean skipFirstLine = false; if (codec != null) { if (null == this.recordDelimiterBytes) { in = new LineReader(codec.createInputStream(fileIn), GuaguaConstants.DEFAULT_IO_BUFFER_SIZE); } else { in = new LineReader(codec.createInputStream(fileIn), GuaguaConstants.DEFAULT_IO_BUFFER_SIZE, this.recordDelimiterBytes); } end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } if (null == this.recordDelimiterBytes) { in = new LineReader(fileIn, GuaguaConstants.DEFAULT_IO_BUFFER_SIZE); } else { in = new LineReader(fileIn, GuaguaConstants.DEFAULT_IO_BUFFER_SIZE, this.recordDelimiterBytes); } } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:ml.shifu.guagua.yarn.GuaguaYarnTask.java
License:Apache License
@SuppressWarnings({ "unchecked", "unused" }) private <T> T getSplitDetails(Path file, long offset) throws IOException { FileSystem fs = file.getFileSystem(getYarnConf()); FSDataInputStream inFile = null; T split = null;/*from w w w.ja v a2 s. c o m*/ try { inFile = fs.open(file); inFile.seek(offset); String className = Text.readString(inFile); Class<T> cls; try { cls = (Class<T>) getYarnConf().getClassByName(className); } catch (ClassNotFoundException ce) { IOException wrap = new IOException(String.format("Split class %s not found", className)); wrap.initCause(ce); throw wrap; } SerializationFactory factory = new SerializationFactory(getYarnConf()); Deserializer<T> deserializer = (Deserializer<T>) factory.getDeserializer(cls); deserializer.open(inFile); split = deserializer.deserialize(null); } finally { IOUtils.closeStream(inFile); } return split; }
From source file:ml.shifu.shifu.core.mr.input.CombineRecordReader.java
License:Apache License
private void initializeOne(TaskAttemptContext context, FileSplit split) throws IOException { Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart();/*from ww w. j av a 2s . com*/ end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { if (null == this.recordDelimiterBytes) { in = new LineReader(codec.createInputStream(fileIn), job); } else { in = new LineReader(codec.createInputStream(fileIn), job, this.recordDelimiterBytes); } end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } if (null == this.recordDelimiterBytes) { in = new LineReader(fileIn, job); } else { in = new LineReader(fileIn, job, this.recordDelimiterBytes); } } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:net.darkseraphim.webanalytics.hadoop.csv.CSVLineRecordReader.java
License:Apache License
public void configure(InputSplit genericSplit, JobConf conf) throws IOException { FileSplit split = (FileSplit) genericSplit; start = split.getStart();//from w w w .j a v a 2 s .c om end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(conf); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(conf); FSDataInputStream fileIn = fs.open(split.getPath()); if (codec != null) { is = codec.createInputStream(fileIn); end = Long.MAX_VALUE; } else { if (start != 0) { fileIn.seek(start); } is = fileIn; } this.pos = start; init(is, conf); }
From source file:net.shun.mapreduce.lib.input.XmlRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); String[] beginMarks = job.getStrings("mapred.xmlrecordreader.begin", "<page>"); this.beginMark = beginMarks[0]; String[] endMarks = job.getStrings("mapred.xmlrecordreader.begin", "</page>"); this.endMark = endMarks[0]; start = split.getStart();/*from ww w. jav a 2s .com*/ end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); fileIn.seek(start); in = new BufferedInputStream(fileIn); /* boolean skipFirstLine = false; if (codec != null) { in = new LineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } in = new LineReader(fileIn, job); } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int)Math.min((long)Integer.MAX_VALUE, end - start)); } */ this.pos = start; readUntilMatch(beginMark, false, null); }
From source file:nl.bioinf.wvanhelvoirt.HadoopPhredCalculator.NReadRecordReader.java
License:Open Source License
/** * Override method for instantiation./*ww w .j av a2 s. c o m*/ * * @param inputSplit The InputSplit to read. * @param context The context for this task. * @throws IOException Returns default exception. * @throws InterruptedException Returns default exception. */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { // Initialize. Configuration conf = context.getConfiguration(); FileSplit split = (FileSplit) inputSplit; Path file = split.getPath(); FileSystem fs = file.getFileSystem(conf); FSDataInputStream infile = fs.open(split.getPath()); // Use number of lines given by user and set parameters. this.NLINESTOPROCESS = NLineInputFormat.getNumLinesPerSplit(context); this.maxLineLength = conf.getInt("mapreduce.input.linerecordreader.line.maxlength", Integer.MAX_VALUE); this.start = split.getStart(); this.end = this.start + split.getLength(); boolean skipFirstLine = false; // Skip first line? if (this.start != 0) { skipFirstLine = true; this.start--; infile.seek(this.start); } this.in = new LineReader(infile, conf); if (skipFirstLine) { this.start += this.in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, this.end - this.start)); } this.pos = this.start; }
From source file:nl.cwi.kba2013.thrift.bin.ThriftRecordReader.java
License:Apache License
/** Boilerplate initialization code for file input streams. */ @Override//from w w w . j av a 2s. c o m public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { conf = context.getConfiguration(); fileSplit = (FileSplit) split; start = fileSplit.getStart(); length = fileSplit.getLength(); position = start; Path path = fileSplit.getPath(); FileSystem fs = path.getFileSystem(conf); FSDataInputStream fileIn = fs.open(path); compressionCodecs = new CompressionCodecFactory(conf); codec = compressionCodecs.getCodec(path); if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); in = new DataInputStream(codec.createInputStream(fileIn, decompressor)); filePosition = fileIn; //LOG.info("Successfully initialized input stream for compressed data."); } else { fileIn.seek(start); in = fileIn; filePosition = fileIn; } tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(in)); }