List of usage examples for org.apache.hadoop.io IOUtils readFully
public static void readFully(InputStream in, byte[] buf, int off, int len) throws IOException
From source file:com.moz.fiji.mapreduce.input.impl.WholeFileRecordReader.java
License:Apache License
/** * <p>If the file has not already been read, this reads it into memory, so that a call * to getCurrentValue() will return the entire contents of this file as Text, * and getCurrentKey() will return the qualified path to this file as Text. Then, returns * true. If it has already been read, then returns false without updating any internal state.</p> * * @return Whether the file was read or not. * @throws IOException if there is an error reading the file. * @throws InterruptedException if there is an error. *//*from w w w . j a va 2 s .c o m*/ @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (!mProcessed) { if (mFileLength > (long) Integer.MAX_VALUE) { throw new IOException("File is longer than Integer.MAX_VALUE."); } byte[] contents = new byte[(int) mFileLength]; FileSystem fs = mFileToRead.getFileSystem(mConf); FSDataInputStream in = null; try { // Set the contents of this file. in = fs.open(mFileToRead); IOUtils.readFully(in, contents, 0, contents.length); mFileText.set(contents, 0, contents.length); // Set the name of this file. String fileName = mFileToRead.makeQualified(fs).toString(); mFileName.set(fileName); } finally { IOUtils.closeStream(in); } mProcessed = true; return true; } return false; }
From source file:com.twitter.hraven.mapreduce.JobFileRawLoaderMapper.java
License:Apache License
/** * Get the raw bytes and the last modification millis for this JobFile * //from ww w . ja v a 2s . c om * @return the contents of the job file. * @throws IOException * when bad things happen during reading */ private byte[] readJobFile(FileStatus fileStatus) throws IOException { byte[] rawBytes = null; FSDataInputStream fsdis = null; try { long fileLength = fileStatus.getLen(); int fileLengthInt = (int) fileLength; rawBytes = new byte[fileLengthInt]; fsdis = hdfs.open(fileStatus.getPath()); IOUtils.readFully(fsdis, rawBytes, 0, fileLengthInt); } finally { IOUtils.closeStream(fsdis); } return rawBytes; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public boolean nextKeyValue() throws IOException, InterruptedException { if (!processed) { byte[] contents = new byte[(int) fileSplit.getLength()]; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); FSDataInputStream in = null; try { in = fs.open(file);//www. ja v a 2 s . c om IOUtils.readFully(in, contents, 0, contents.length); value.set(contents, 0, contents.length); } finally { IOUtils.closeStream(in); } processed = true; return true; } return false; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public boolean next(NullWritable key, BytesWritable value) throws IOException { if (!processed) { byte[] contents = new byte[(int) fileSplit.getLength()]; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); FSDataInputStream in = null; try { in = fs.open(file);//from w w w . j a v a2s .c om IOUtils.readFully(in, contents, 0, contents.length); value.set(contents, 0, contents.length); } finally { IOUtils.closeStream(in); } processed = true; return true; } return false; }
From source file:de.l3s.common.hadoop.WholeFileRecordReader.java
License:Apache License
@Override public boolean nextKeyValue() throws IOException { if (!processed) { byte[] contents = new byte[(int) fileSplit.getLength()]; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); FSDataInputStream in = null;//from w w w . j a v a 2s. co m try { in = fs.open(file); IOUtils.readFully(in, contents, 0, contents.length); value.set(contents, 0, contents.length); } finally { IOUtils.closeStream(in); } processed = true; return true; } return false; }
From source file:edu.usc.pgroup.louvain.hadoop.GraphPartitionRecordReader.java
License:Apache License
@Override public boolean nextKeyValue() throws IOException, InterruptedException { if (isFinished) { return false; }// w w w .ja v a 2s . c om int fileLength = (int) split.getLength(); byte[] result = new byte[fileLength]; FileSystem fs = FileSystem.get(conf); Path path = split.getPath(); currentKey = new Text(path.getName()); FSDataInputStream in = null; try { in = fs.open(split.getPath()); IOUtils.readFully(in, result, 0, fileLength); currentValue.set(result, 0, fileLength); } finally { IOUtils.closeStream(in); } this.isFinished = true; return true; }
From source file:eu.edisonproject.utility.commons.WholeFileRecordReader.java
License:Apache License
@Override public boolean nextKeyValue() throws IOException { if (!processed) { byte[] contents = new byte[(int) fileSplit.getLength()]; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); FSDataInputStream in = null;/*from w ww .ja v a 2 s . c o m*/ try { in = fs.open(file); IOUtils.readFully(in, contents, 0, contents.length); // value.set(contents, 0, contents.length); value.set(new String(contents)); } finally { IOUtils.closeStream(in); } processed = true; return true; } return false; }
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.Cat.java
License:Open Source License
@Override protected int run(final CmdLineParser parser) { final List<String> args = parser.getRemainingArgs(); if (args.isEmpty()) { System.err.println("cat :: OUTPATH not given."); return 3; }/*from w ww . j av a2 s . c om*/ if (args.size() == 1) { System.err.println("cat :: no INPATHs given."); return 3; } final Path outPath = new Path(args.get(0)); final List<String> ins = args.subList(1, args.size()); final boolean verbose = parser.getBoolean(verboseOpt); final SAMFileReader.ValidationStringency stringency = Utils.toStringency(parser.getOptionValue( stringencyOpt, SAMFileReader.ValidationStringency.DEFAULT_STRINGENCY.toString()), "cat"); if (stringency == null) return 3; final Configuration conf = getConf(); // Expand the glob patterns. final List<Path> inputs = new ArrayList<Path>(ins.size()); for (final String in : ins) { try { final Path p = new Path(in); for (final FileStatus fstat : p.getFileSystem(conf).globStatus(p)) inputs.add(fstat.getPath()); } catch (IOException e) { System.err.printf("cat :: Could not expand glob pattern '%s': %s\n", in, e.getMessage()); } } final Path input0 = inputs.get(0); // Infer the format from the first input path or contents. // the first input path or contents. SAMFormat format = SAMFormat.inferFromFilePath(input0); if (format == null) { try { format = SAMFormat.inferFromData(input0.getFileSystem(conf).open(input0)); } catch (IOException e) { System.err.printf("cat :: Could not read input '%s': %s\n", input0, e.getMessage()); return 4; } if (format == null) { System.err.printf("cat :: Unknown SAM format in input '%s'\n", inputs.get(0)); return 4; } } // Choose the header. final SAMFileHeader header; try { final SAMFileReader r = new SAMFileReader(input0.getFileSystem(conf).open(input0)); header = r.getFileHeader(); r.close(); } catch (IOException e) { System.err.printf("cat :: Could not read input '%s': %s\n", input0, e.getMessage()); return 5; } // Open the output. final OutputStream out; try { out = outPath.getFileSystem(conf).create(outPath); } catch (IOException e) { System.err.printf("cat :: Could not create output file: %s\n", e.getMessage()); return 6; } // Output the header. try { // Don't use the returned stream, because we're concatenating directly // and don't want to apply another layer of compression to BAM. new SAMOutputPreparer().prepareForRecords(out, format, header); } catch (IOException e) { System.err.printf("cat :: Outputting header failed: %s\n", e.getMessage()); return 7; } // Output the records from each file in the order given, converting if // necessary. int inIdx = 1; try { for (final Path inPath : inputs) { if (verbose) { System.out.printf("cat :: Concatenating path %d of %d...\n", inIdx++, inputs.size()); } switch (format) { case SAM: { final InputStream in = inPath.getFileSystem(conf).open(inPath); // Use SAMFileReader to grab the header, but ignore it, thus // ensuring that the header has been skipped. new SAMFileReader(in).getFileHeader(); IOUtils.copyBytes(in, out, conf, false); in.close(); break; } case BAM: { final FSDataInputStream in = inPath.getFileSystem(conf).open(inPath); // Find the block length, thankfully given to us by the BGZF // format. We need it in order to know how much gzipped data to // read after skipping the BAM header, so that we can only read // that much and then simply copy the remaining gzip blocks // directly. final ByteBuffer block = ByteBuffer.wrap(new byte[0xffff]).order(ByteOrder.LITTLE_ENDIAN); // Don't use readFully here, since EOF is fine. for (int read = 0, prev; (prev = in.read(block.array(), read, block.capacity() - read)) < block .capacity();) { // EOF is fine. if (prev == -1) break; read += prev; } // Find the BGZF subfield and extract the length from it. int blockLength = 0; for (int xlen = (int) block.getShort(10) & 0xffff, i = 12, end = i + xlen; i < end;) { final int slen = (int) block.getShort(i + 2) & 0xffff; if (block.getShort(i) == 0x4342 && slen == 2) { blockLength = ((int) block.getShort(i + 4) & 0xffff) + 1; break; } i += 4 + slen; } if (blockLength == 0) throw new IOException("BGZF extra field not found in " + inPath); if (verbose) { System.err.printf("cat :: first block length %d\n", blockLength); } // Skip the BAM header. Can't use SAMFileReader because it'll // use its own BlockCompressedInputStream. final ByteArrayInputStream blockIn = new ByteArrayInputStream(block.array(), 0, blockLength); final BlockCompressedInputStream bin = new BlockCompressedInputStream(blockIn); // Theoretically we could write into the ByteBuffer we already // had, since BlockCompressedInputStream needs to read the // header before it can decompress any data and thereafter we // can freely overwrite the first 8 bytes of the header... but // that's a bit too nasty, so let's not. final ByteBuffer buf = ByteBuffer.wrap(new byte[8]).order(ByteOrder.LITTLE_ENDIAN); // Read the BAM magic number and the SAM header length, verify // the magic, and skip the SAM header. IOUtils.readFully(bin, buf.array(), 0, 8); final int magic = buf.getInt(0), headerLen = buf.getInt(4); if (magic != 0x014d4142) throw new IOException("bad BAM magic number in " + inPath); IOUtils.skipFully(bin, headerLen); // Skip the reference sequences. IOUtils.readFully(bin, buf.array(), 0, 4); for (int i = buf.getInt(0); i-- > 0;) { // Read the reference name length and skip it along with the // reference length. IOUtils.readFully(bin, buf.array(), 0, 4); IOUtils.skipFully(bin, buf.getInt(0) + 4); } // Recompress the rest of this gzip block. final int remaining = bin.available(); if (verbose) System.err.printf("cat :: %d bytes to bgzip\n", remaining); if (remaining > 0) { // The overload of IOUtils.copyBytes that takes "long length" // was added only in Hadoop 0.20.205.0, which we don't want // to depend on, so copy manually. final byte[] remBuf = new byte[remaining]; IOUtils.readFully(bin, remBuf, 0, remBuf.length); final BlockCompressedOutputStream bout = new BlockCompressedOutputStream(out, null); bout.write(remBuf); bout.flush(); } // Just copy the raw bytes comprising the remaining blocks. in.seek(blockLength); IOUtils.copyBytes(in, out, conf, false); in.close(); break; } } } } catch (IOException e) { System.err.printf("cat :: Outputting records failed: %s\n", e.getMessage()); return 8; } // For BAM, output the BGZF terminator. try { if (format == SAMFormat.BAM) out.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK); out.close(); } catch (IOException e) { System.err.printf("cat :: Finishing output failed: %s\n", e.getMessage()); return 9; } return 0; }
From source file:io.hops.erasure_coding.RaidUtils.java
License:Apache License
public static void copyBytes(InputStream in, OutputStream out, byte[] buf, long count) throws IOException { for (long bytesRead = 0; bytesRead < count;) { int toRead = Math.min(buf.length, (int) (count - bytesRead)); IOUtils.readFully(in, buf, 0, toRead); bytesRead += toRead;// ww w. j a va 2 s . co m out.write(buf, 0, toRead); } }
From source file:kogiri.common.hadoop.io.reader.fasta.FastaRawReadReader.java
License:Open Source License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); this.start = split.getStart(); this.end = this.start + split.getLength(); final Path file = split.getPath(); this.compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = this.compressionCodecs.getCodec(file); this.filename = file.getName(); this.firstRead = true; // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); // get uncompressed length if (codec instanceof GzipCodec) { this.isCompressed = true; FSDataInputStream fileInCheckSize = fs.open(file); byte[] len = new byte[4]; try {//from w ww. j a v a 2 s . c om LOG.info("compressed input : " + file.getName()); LOG.info("compressed file size : " + this.end); fileInCheckSize.skip(this.end - 4); IOUtils.readFully(fileInCheckSize, len, 0, len.length); this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0]; if (this.uncompressedSize < 0) { this.uncompressedSize = this.end; } LOG.info("uncompressed file size : " + this.uncompressedSize); } finally { fileInCheckSize.close(); } this.end = Long.MAX_VALUE; } else if (codec != null) { this.isCompressed = true; this.end = Long.MAX_VALUE; this.uncompressedSize = Long.MAX_VALUE; } else { this.isCompressed = false; } // get inputstream FSDataInputStream fileIn = fs.open(file); if (codec != null) { this.in = new LineReader(codec.createInputStream(fileIn), job); } else { if (this.start != 0) { fileIn.seek(this.start); } this.in = new LineReader(fileIn, job); } // skip lines until we meet new read start while (this.start < this.end) { Text skipText = new Text(); long newSize = this.in.readLine(skipText, this.maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength)); if (newSize == 0) { // EOF this.hasNextRead = false; this.pos = this.end; break; } if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) { this.prevLine = skipText; this.prevSize = newSize; this.hasNextRead = true; this.pos = this.start; break; } this.start += newSize; if (this.start >= this.end) { // EOF this.hasNextRead = false; this.pos = this.end; break; } } this.key = null; this.value = null; }