List of usage examples for org.apache.hadoop.io IOUtils readFully
public static void readFully(InputStream in, byte[] buf, int off, int len) throws IOException
From source file:org.apache.tez.runtime.library.common.sort.impl.IFileInputStream.java
License:Apache License
private int doRead(byte[] b, int off, int len) throws IOException { // If we are trying to read past the end of data, just read // the left over data int origLen = len; if (currentOffset + len > dataLength) { len = (int) (dataLength - currentOffset); }/*from www . ja va2s . c o m*/ int bytesRead = in.read(b, off, len); if (bytesRead < 0) { String mesg = " CurrentOffset=" + currentOffset + ", offset=" + offset + ", off=" + off + ", dataLength=" + dataLength + ", origLen=" + origLen + ", len=" + len + ", length=" + length + ", checksumSize=" + checksumSize; LOG.info(mesg); throw new ChecksumException("Checksum Error: " + mesg, 0); } checksum(b, off, bytesRead); currentOffset += bytesRead; if (disableChecksumValidation) { return bytesRead; } if (currentOffset == dataLength) { // The last four bytes are checksum. Strip them and verify sum.update(buffer, 0, offset); csum = new byte[checksumSize]; IOUtils.readFully(in, csum, 0, checksumSize); if (!sum.compare(csum, 0)) { String mesg = "CurrentOffset=" + currentOffset + ", off=" + offset + ", dataLength=" + dataLength + ", origLen=" + origLen + ", len=" + len + ", length=" + length + ", checksumSize=" + checksumSize + ", csum=" + Arrays.toString(csum) + ", sum=" + sum; LOG.info(mesg); throw new ChecksumException("Checksum Error: " + mesg, 0); } } return bytesRead; }
From source file:org.apache.tez.runtime.library.shuffle.common.ShuffleUtils.java
License:Apache License
@SuppressWarnings("resource") public static void shuffleToMemory(MemoryFetchedInput fetchedInput, InputStream input, int decompressedLength, int compressedLength, CompressionCodec codec, boolean ifileReadAhead, int ifileReadAheadLength, Log LOG) throws IOException { IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, ifileReadAhead, ifileReadAheadLength);/* w w w. ja va 2 s .c om*/ input = checksumIn; // Are map-outputs compressed? if (codec != null) { Decompressor decompressor = CodecPool.getDecompressor(codec); decompressor.reset(); input = codec.createInputStream(input, decompressor); } // Copy map-output into an in-memory buffer byte[] shuffleData = fetchedInput.getBytes(); try { IOUtils.readFully(input, shuffleData, 0, shuffleData.length); // metrics.inputBytes(shuffleData.length); LOG.info("Read " + shuffleData.length + " bytes from input for " + fetchedInput.getInputAttemptIdentifier()); } catch (IOException ioe) { // Close the streams IOUtils.cleanup(LOG, input); // Re-throw throw ioe; } }
From source file:org.geotools.WholeFile.WholeFileRecordReader.java
License:Apache License
@Override public boolean next(Text key, BytesWritable value) throws IOException { if (!processed) { byte[] contents = new byte[(int) fileSplit.getLength()]; Path file = fileSplit.getPath(); String fileName = file.getName(); key.set(fileName);/* w w w . ja v a2 s . co m*/ FileSystem fs = file.getFileSystem(conf); FSDataInputStream in = null; try { in = fs.open(file); IOUtils.readFully(in, contents, 0, contents.length); value.set(contents, 0, contents.length); } finally { IOUtils.closeStream(in); } processed = true; return true; } return false; }
From source file:org.geotools.WholeFile.WholeFileRecordReader_NewAPI.java
License:Apache License
public boolean nextKeyValue() throws IOException { if (!processed) { byte[] contents = new byte[(int) fileSplit.getLength()]; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); FSDataInputStream in = null;//from ww w. java 2s. c om try { in = fs.open(file); IOUtils.readFully(in, contents, 0, contents.length); value.set(contents, 0, contents.length); } finally { IOUtils.closeStream(in); } processed = true; return true; } return false; }
From source file:org.interactiverobotics.source_code_crawler.step6.TextFileRecordReader.java
License:Open Source License
@Override public boolean nextKeyValue() throws IOException, InterruptedException { if (!processed) { final byte[] contents = new byte[(int) fileSplit.getLength()]; final Path file = fileSplit.getPath(); final FileSystem fileSystem = file.getFileSystem(configuration); FSDataInputStream in = null;/*from w ww . ja v a2s . c o m*/ try { in = fileSystem.open(file); IOUtils.readFully(in, contents, 0, contents.length); key.set(file.toString()); value.set(contents, 0, contents.length); } finally { IOUtils.closeStream(in); } processed = true; return true; } return false; }
From source file:org.kitesdk.cli.commands.TarImportCommand.java
License:Apache License
@Override public int run() throws IOException { Preconditions.checkArgument(targets != null && targets.size() == 2, "Tar path and target dataset URI are required."); Preconditions.checkArgument(SUPPORTED_TAR_COMPRESSION_TYPES.contains(compressionType), "Compression type " + compressionType + " is not supported"); String source = targets.get(0); String datasetUri = targets.get(1); long blockSize = getConf().getLong("dfs.blocksize", DEFAULT_BLOCK_SIZE); int success = 0; View<TarFileEntry> targetDataset; if (Datasets.exists(datasetUri)) { console.debug("Using existing dataset: {}", datasetUri); targetDataset = Datasets.load(datasetUri, TarFileEntry.class); } else {//from ww w .j a v a 2s. c o m console.info("Creating new dataset: {}", datasetUri); DatasetDescriptor.Builder descriptorBuilder = new DatasetDescriptor.Builder(); descriptorBuilder.format(Formats.AVRO); descriptorBuilder.schema(TarFileEntry.class); targetDataset = Datasets.create(datasetUri, descriptorBuilder.build(), TarFileEntry.class); } DatasetWriter<TarFileEntry> writer = targetDataset.newWriter(); // Create a Tar input stream wrapped in appropriate decompressor // TODO: Enhancement would be to use native compression libs TarArchiveInputStream tis; CompressionType tarCompressionType = CompressionType.NONE; if (compressionType.isEmpty()) { if (source.endsWith(".tar")) { tarCompressionType = CompressionType.NONE; } else if (source.endsWith(".tar.gz")) { tarCompressionType = CompressionType.GZIP; } else if (source.endsWith(".tar.bz2")) { tarCompressionType = CompressionType.BZIP2; } } else if (compressionType.equals("gzip")) { tarCompressionType = CompressionType.GZIP; } else if (compressionType.equals("bzip2")) { tarCompressionType = CompressionType.BZIP2; } else { tarCompressionType = CompressionType.NONE; } console.info("Using {} compression", tarCompressionType); switch (tarCompressionType) { case GZIP: tis = new TarArchiveInputStream(new GzipCompressorInputStream(open(source))); break; case BZIP2: tis = new TarArchiveInputStream(new BZip2CompressorInputStream(open(source))); break; case NONE: default: tis = new TarArchiveInputStream(open(source)); } TarArchiveEntry entry; try { int count = 0; while ((entry = tis.getNextTarEntry()) != null) { if (!entry.isDirectory()) { long size = entry.getSize(); if (size >= blockSize) { console.warn( "Entry \"{}\" (size {}) is larger than the " + "HDFS block size of {}. This may result in remote block reads", new Object[] { entry.getName(), size, blockSize }); } byte[] buf = new byte[(int) size]; try { IOUtils.readFully(tis, buf, 0, (int) size); } catch (IOException e) { console.error("Did not read entry {} successfully (entry size {})", entry.getName(), size); success = 1; throw e; } writer.write(TarFileEntry.newBuilder().setFilename(entry.getName()) .setFilecontent(ByteBuffer.wrap(buf)).build()); count++; } } console.info("Added {} records to \"{}\"", count, targetDataset.getDataset().getName()); } finally { IOUtils.closeStream(writer); IOUtils.closeStream(tis); } return success; }
From source file:org.seqdoop.hadoop_bam.cli.plugins.Cat.java
License:Open Source License
@Override protected int run(final CmdLineParser parser) { final List<String> args = parser.getRemainingArgs(); if (args.isEmpty()) { System.err.println("cat :: OUTPATH not given."); return 3; }/* ww w . j a va2 s. co m*/ if (args.size() == 1) { System.err.println("cat :: no INPATHs given."); return 3; } final Path outPath = new Path(args.get(0)); final List<String> ins = args.subList(1, args.size()); final boolean verbose = parser.getBoolean(verboseOpt); final ValidationStringency stringency = Utils.toStringency( parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()), "cat"); if (stringency == null) return 3; final Configuration conf = getConf(); // Expand the glob patterns. final List<Path> inputs = new ArrayList<Path>(ins.size()); for (final String in : ins) { try { final Path p = new Path(in); for (final FileStatus fstat : p.getFileSystem(conf).globStatus(p)) inputs.add(fstat.getPath()); } catch (IOException e) { System.err.printf("cat :: Could not expand glob pattern '%s': %s\n", in, e.getMessage()); } } final Path input0 = inputs.get(0); // Infer the format from the first input path or contents. // the first input path or contents. SAMFormat format = SAMFormat.inferFromFilePath(input0); if (format == null) { try { format = SAMFormat.inferFromData(input0.getFileSystem(conf).open(input0)); } catch (IOException e) { System.err.printf("cat :: Could not read input '%s': %s\n", input0, e.getMessage()); return 4; } if (format == null) { System.err.printf("cat :: Unknown SAM format in input '%s'\n", inputs.get(0)); return 4; } } // Choose the header. final SAMFileHeader header; try { final SAMFileReader r = new SAMFileReader(input0.getFileSystem(conf).open(input0)); header = r.getFileHeader(); r.close(); } catch (IOException e) { System.err.printf("cat :: Could not read input '%s': %s\n", input0, e.getMessage()); return 5; } // Open the output. final OutputStream out; try { out = outPath.getFileSystem(conf).create(outPath); } catch (IOException e) { System.err.printf("cat :: Could not create output file: %s\n", e.getMessage()); return 6; } // Output the header. try { // Don't use the returned stream, because we're concatenating directly // and don't want to apply another layer of compression to BAM. new SAMOutputPreparer().prepareForRecords(out, format, header); } catch (IOException e) { System.err.printf("cat :: Outputting header failed: %s\n", e.getMessage()); return 7; } // Output the records from each file in the order given, converting if // necessary. int inIdx = 1; try { for (final Path inPath : inputs) { if (verbose) { System.out.printf("cat :: Concatenating path %d of %d...\n", inIdx++, inputs.size()); } switch (format) { case SAM: { final InputStream in = inPath.getFileSystem(conf).open(inPath); // Use SAMFileReader to grab the header, but ignore it, thus // ensuring that the header has been skipped. new SAMFileReader(in).getFileHeader(); IOUtils.copyBytes(in, out, conf, false); in.close(); break; } case BAM: { final FSDataInputStream in = inPath.getFileSystem(conf).open(inPath); // Find the block length, thankfully given to us by the BGZF // format. We need it in order to know how much gzipped data to // read after skipping the BAM header, so that we can only read // that much and then simply copy the remaining gzip blocks // directly. final ByteBuffer block = ByteBuffer.wrap(new byte[0xffff]).order(ByteOrder.LITTLE_ENDIAN); // Don't use readFully here, since EOF is fine. for (int read = 0, prev; (prev = in.read(block.array(), read, block.capacity() - read)) < block .capacity();) { // EOF is fine. if (prev == -1) break; read += prev; } // Find the BGZF subfield and extract the length from it. int blockLength = 0; for (int xlen = (int) block.getShort(10) & 0xffff, i = 12, end = i + xlen; i < end;) { final int slen = (int) block.getShort(i + 2) & 0xffff; if (block.getShort(i) == 0x4342 && slen == 2) { blockLength = ((int) block.getShort(i + 4) & 0xffff) + 1; break; } i += 4 + slen; } if (blockLength == 0) throw new IOException("BGZF extra field not found in " + inPath); if (verbose) { System.err.printf("cat :: first block length %d\n", blockLength); } // Skip the BAM header. Can't use SAMFileReader because it'll // use its own BlockCompressedInputStream. final ByteArrayInputStream blockIn = new ByteArrayInputStream(block.array(), 0, blockLength); final BlockCompressedInputStream bin = new BlockCompressedInputStream(blockIn); // Theoretically we could write into the ByteBuffer we already // had, since BlockCompressedInputStream needs to read the // header before it can decompress any data and thereafter we // can freely overwrite the first 8 bytes of the header... but // that's a bit too nasty, so let's not. final ByteBuffer buf = ByteBuffer.wrap(new byte[8]).order(ByteOrder.LITTLE_ENDIAN); // Read the BAM magic number and the SAM header length, verify // the magic, and skip the SAM header. IOUtils.readFully(bin, buf.array(), 0, 8); final int magic = buf.getInt(0), headerLen = buf.getInt(4); if (magic != 0x014d4142) throw new IOException("bad BAM magic number in " + inPath); IOUtils.skipFully(bin, headerLen); // Skip the reference sequences. IOUtils.readFully(bin, buf.array(), 0, 4); for (int i = buf.getInt(0); i-- > 0;) { // Read the reference name length and skip it along with the // reference length. IOUtils.readFully(bin, buf.array(), 0, 4); IOUtils.skipFully(bin, buf.getInt(0) + 4); } // Recompress the rest of this gzip block. final int remaining = bin.available(); if (verbose) System.err.printf("cat :: %d bytes to bgzip\n", remaining); if (remaining > 0) { // The overload of IOUtils.copyBytes that takes "long length" // was added only in Hadoop 0.20.205.0, which we don't want // to depend on, so copy manually. final byte[] remBuf = new byte[remaining]; IOUtils.readFully(bin, remBuf, 0, remBuf.length); final BlockCompressedOutputStream bout = new BlockCompressedOutputStream(out, null); bout.write(remBuf); bout.flush(); } // Just copy the raw bytes comprising the remaining blocks. in.seek(blockLength); IOUtils.copyBytes(in, out, conf, false); in.close(); break; } } } } catch (IOException e) { System.err.printf("cat :: Outputting records failed: %s\n", e.getMessage()); return 8; } // For BAM, output the BGZF terminator. try { if (format == SAMFormat.BAM) out.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK); out.close(); } catch (IOException e) { System.err.printf("cat :: Finishing output failed: %s\n", e.getMessage()); return 9; } return 0; }
From source file:org.shaf.core.io.hadoop.WholeFileRecordReader.java
License:Apache License
@Override public boolean nextKeyValue() throws IOException, InterruptedException { if (fileProcessed) { return false; }// w w w .j a v a2 s. c om int fileLength = (int) split.getLength(); byte[] result = new byte[fileLength]; FileSystem fs = FileSystem.get(config); FSDataInputStream in = null; try { this.key.set(split.getPath().toString()); in = fs.open(split.getPath()); IOUtils.readFully(in, result, 0, fileLength); value.set(new String(result, 0, fileLength)); } finally { IOUtils.closeStream(in); } this.fileProcessed = true; return true; }