List of usage examples for org.apache.hadoop.io IOUtils readFully
public static void readFully(InputStream in, byte[] buf, int off, int len) throws IOException
From source file:kogiri.common.hadoop.io.reader.fasta.FastaReadDescriptionReader.java
License:Open Source License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); this.start = split.getStart(); this.end = this.start + split.getLength(); final Path file = split.getPath(); this.compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = this.compressionCodecs.getCodec(file); this.filename = file.getName(); this.firstRead = true; // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); // get uncompressed length if (codec instanceof GzipCodec) { this.isCompressed = true; FSDataInputStream fileInCheckSize = fs.open(file); byte[] len = new byte[4]; try {//from ww w. ja va2s.c om LOG.info("compressed input : " + file.getName()); LOG.info("compressed file size : " + this.end); fileInCheckSize.skip(this.end - 4); IOUtils.readFully(fileInCheckSize, len, 0, len.length); this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0]; if (this.uncompressedSize < 0) { this.uncompressedSize = this.end; } LOG.info("uncompressed file size : " + this.uncompressedSize); } finally { fileInCheckSize.close(); } this.end = Long.MAX_VALUE; } else if (codec != null) { this.isCompressed = true; this.end = Long.MAX_VALUE; this.uncompressedSize = Long.MAX_VALUE; } else { this.isCompressed = false; } // get inputstream FSDataInputStream fileIn = fs.open(file); if (codec != null) { this.in = new LineReader(codec.createInputStream(fileIn), job); } else { if (this.start != 0) { fileIn.seek(this.start); } this.in = new LineReader(fileIn, job); } // skip lines until we meet new record start while (this.start < this.end) { Text skipText = new Text(); long newSize = this.in.readLine(skipText, this.maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength)); if (newSize == 0) { // EOF this.hasNextRecord = false; this.pos = this.end; break; } if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) { this.prevLine = skipText; this.prevSize = newSize; this.hasNextRecord = true; this.pos = this.start; break; } this.start += newSize; if (this.start >= this.end) { // EOF this.hasNextRecord = false; this.pos = this.end; break; } } this.key = null; this.value = null; }
From source file:libra.common.hadoop.io.reader.fasta.FastaKmerReader.java
License:Apache License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; Configuration conf = context.getConfiguration(); this.kmersize = FastaKmerInputFormat.getKmerSize(conf); this.maxLineLength = conf.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); this.start = split.getStart(); this.end = this.start + split.getLength(); final Path file = split.getPath(); this.compressionCodecs = new CompressionCodecFactory(conf); final CompressionCodec codec = this.compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(conf); // get uncompressed length if (codec instanceof GzipCodec) { this.isCompressed = true; FSDataInputStream fileInCheckSize = fs.open(file); byte[] len = new byte[4]; try {/*from ww w .ja va2 s . co m*/ LOG.info("compressed input : " + file.getName()); LOG.info("compressed file size : " + this.end); fileInCheckSize.skip(this.end - 4); IOUtils.readFully(fileInCheckSize, len, 0, len.length); this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0]; if (this.uncompressedSize < 0) { this.uncompressedSize = this.end; } LOG.info("uncompressed file size : " + this.uncompressedSize); } finally { fileInCheckSize.close(); } this.end = Long.MAX_VALUE; } else if (codec != null) { this.isCompressed = true; this.end = Long.MAX_VALUE; this.uncompressedSize = Long.MAX_VALUE; } else { this.isCompressed = false; } // get inputstream FSDataInputStream fileIn = fs.open(file); boolean inTheMiddle = false; if (codec != null) { this.in = new LineReader(codec.createInputStream(fileIn), conf); } else { if (this.start != 0) { this.start--; fileIn.seek(this.start); inTheMiddle = true; } this.in = new LineReader(fileIn, conf); } this.buffer = new Text(); if (inTheMiddle) { // find new start line this.start += this.in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, this.end - this.start)); // back off FSDataInputStream fileIn2 = fs.open(file); fileIn2.seek(this.start - 1000); LineReader in2 = new LineReader(fileIn2, conf); Text tempLine = new Text(); long curpos = this.start - 1000; while (curpos < this.start) { curpos += in2.readLine(tempLine, 0, (int) (this.start - curpos)); } if (tempLine.charAt(0) == READ_DELIMITER) { // clean start this.buffer.clear(); } else { // leave k-1 seq in the buffer String seq = tempLine.toString().trim(); String left = seq.substring(seq.length() - this.kmersize + 1); this.buffer.set(left); } in2.close(); } this.pos = this.start; this.key = null; this.value = null; }
From source file:org.acacia.partitioner.java.WholeFileRecordReader.java
License:Apache License
@Override public boolean next(NullWritable key, BytesWritable value) throws IOException { if (!processed) { byte[] contents = new byte[(int) fileSplit.getLength()]; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); FSDataInputStream in = null;//from w w w . j a va2 s. co m try { in = fs.open(file); IOUtils.readFully(in, contents, 0, contents.length); value.set(contents, 0, contents.length); } finally { IOUtils.closeStream(in); } processed = true; return true; } return false; }
From source file:org.apache.falcon.resource.AbstractEntityManager.java
License:Apache License
private String getAsString(InputStream xmlStream) throws IOException { byte[] data = new byte[XML_DEBUG_LEN]; IOUtils.readFully(xmlStream, data, 0, XML_DEBUG_LEN); return new String(data); }
From source file:org.apache.mahout.text.WholeFileRecordReader.java
License:Apache License
@Override public boolean nextKeyValue() throws IOException { if (!processed) { byte[] contents = new byte[(int) fileSplit.getLength()]; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(this.configuration); if (!fs.isFile(file)) { return false; }//from w ww. j ava2 s. co m FileStatus[] fileStatuses; if (pathFilter != null) { fileStatuses = fs.listStatus(file, pathFilter); } else { fileStatuses = fs.listStatus(file); } FSDataInputStream in = null; if (fileStatuses.length == 1) { try { in = fs.open(fileStatuses[0].getPath()); IOUtils.readFully(in, contents, 0, contents.length); value.setCapacity(contents.length); value.set(contents, 0, contents.length); } finally { Closeables.close(in, false); } processed = true; return true; } } return false; }
From source file:org.apache.tez.engine.common.shuffle.impl.Fetcher.java
License:Apache License
private void shuffleToMemory(MapHost host, MapOutput mapOutput, InputStream input, int decompressedLength, int compressedLength) throws IOException { IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, job); input = checksumIn;/* w w w. j a v a 2s .c om*/ // Are map-outputs compressed? if (codec != null) { decompressor.reset(); input = codec.createInputStream(input, decompressor); } // Copy map-output into an in-memory buffer byte[] shuffleData = mapOutput.getMemory(); try { IOUtils.readFully(input, shuffleData, 0, shuffleData.length); metrics.inputBytes(shuffleData.length); reporter.progress(); LOG.info("Read " + shuffleData.length + " bytes from map-output for " + mapOutput.getMapId()); } catch (IOException ioe) { // Close the streams IOUtils.cleanup(LOG, input); // Re-throw throw ioe; } }
From source file:org.apache.tez.engine.common.sort.impl.IFileInputStream.java
License:Apache License
private int doRead(byte[] b, int off, int len) throws IOException { // If we are trying to read past the end of data, just read // the left over data if (currentOffset + len > dataLength) { len = (int) dataLength - (int) currentOffset; }/*ww w . ja va 2 s . co m*/ int bytesRead = in.read(b, off, len); if (bytesRead < 0) { throw new ChecksumException("Checksum Error", 0); } checksum(b, off, bytesRead); currentOffset += bytesRead; if (disableChecksumValidation) { return bytesRead; } if (currentOffset == dataLength) { // The last four bytes are checksum. Strip them and verify sum.update(buffer, 0, offset); csum = new byte[checksumSize]; IOUtils.readFully(in, csum, 0, checksumSize); if (!sum.compare(csum, 0)) { throw new ChecksumException("Checksum Error", 0); } } return bytesRead; }
From source file:org.apache.tez.engine.common.sort.impl.TezSpillRecord.java
License:Apache License
public TezSpillRecord(Path indexFileName, Configuration job, Checksum crc, String expectedIndexOwner) throws IOException { final FileSystem rfs = FileSystem.getLocal(job).getRaw(); final FSDataInputStream in = rfs.open(indexFileName); try {/*w w w .j a va2 s .c o m*/ final long length = rfs.getFileStatus(indexFileName).getLen(); final int partitions = (int) length / Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH; final int size = partitions * Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH; buf = ByteBuffer.allocate(size); if (crc != null) { crc.reset(); CheckedInputStream chk = new CheckedInputStream(in, crc); IOUtils.readFully(chk, buf.array(), 0, size); if (chk.getChecksum().getValue() != in.readLong()) { throw new ChecksumException("Checksum error reading spill index: " + indexFileName, -1); } } else { IOUtils.readFully(in, buf.array(), 0, size); } entries = buf.asLongBuffer(); } finally { in.close(); } }
From source file:org.apache.tez.runtime.library.broadcast.output.FileBasedKVWriter.java
License:Apache License
public byte[] getData() throws IOException { Preconditions.checkState(closed, "Only available after the Writer has been closed"); FSDataInputStream inStream = null;// w w w .jav a2 s. c o m byte[] buf = null; try { inStream = rfs.open(outputPath); buf = new byte[(int) getCompressedLength()]; IOUtils.readFully(inStream, buf, 0, (int) getCompressedLength()); } finally { if (inStream != null) { inStream.close(); } } return buf; }
From source file:org.apache.tez.runtime.library.common.shuffle.impl.Fetcher.java
License:Apache License
private void shuffleToMemory(MapHost host, MapOutput mapOutput, InputStream input, int decompressedLength, int compressedLength) throws IOException { IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, ifileReadAhead, ifileReadAheadLength);//from www . j a v a 2 s. co m input = checksumIn; // Are map-outputs compressed? if (codec != null) { decompressor.reset(); input = codec.createInputStream(input, decompressor); } // Copy map-output into an in-memory buffer byte[] shuffleData = mapOutput.getMemory(); try { IOUtils.readFully(input, shuffleData, 0, shuffleData.length); metrics.inputBytes(shuffleData.length); LOG.info("Read " + shuffleData.length + " bytes from map-output for " + mapOutput.getAttemptIdentifier()); } catch (IOException ioe) { // Close the streams IOUtils.cleanup(LOG, input); // Re-throw throw ioe; } }