List of usage examples for org.apache.hadoop.io IOUtils closeStream
public static void closeStream(java.io.Closeable stream)
From source file:com.knewton.mapreduce.SSTableRecordReader.java
License:Apache License
/** * Copies a remote path to the local filesystem, while updating hadoop that we're making * progress. Doesn't support directories. *///from w ww . ja v a 2 s . c o m @VisibleForTesting void copyToLocalFile(FileSystem remoteFS, FileSystem localFS, Path remote, Path local) throws IOException { // don't support transferring from remote directories FileStatus remoteStat = remoteFS.getFileStatus(remote); Preconditions.checkArgument(!remoteStat.isDirectory(), String.format("Path %s is directory!", remote)); // if local is a dir, copy to inside that dir, like 'cp /path/file /tmp/' would do if (localFS.exists(local)) { FileStatus localStat = localFS.getFileStatus(local); if (localStat.isDirectory()) { local = new Path(local, remote.getName()); } } long remoteFileSize = remoteStat.getLen(); // do actual copy InputStream in = null; OutputStream out = null; try { long startTime = System.currentTimeMillis(); long lastLogTime = 0; long bytesCopied = 0; in = remoteFS.open(remote); out = localFS.create(local, true); int buffSize = this.conf.getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_DEFAULT); byte[] buf = new byte[buffSize]; int bytesRead = in.read(buf); while (bytesRead >= 0) { long now = System.currentTimeMillis(); // log transfer rate once per min, starting 1 min after transfer began if (now - lastLogTime > 60000L && now - startTime > 60000L) { double elapsedSec = (now - startTime) / 1000D; double bytesPerSec = bytesCopied / elapsedSec; LOG.info("Transferred {} of {} bytes at {} bytes per second", bytesCopied, remoteFileSize, bytesPerSec); lastLogTime = now; } this.ctx.progress(); out.write(buf, 0, bytesRead); bytesCopied += bytesRead; bytesRead = in.read(buf); } // try to close these outside of finally so we receive exception on failure out.close(); out = null; in.close(); in = null; } finally { // make sure everything's closed IOUtils.closeStream(out); IOUtils.closeStream(in); } }
From source file:com.kylinolap.job.hadoop.cube.RowKeyDistributionCheckerMapper.java
License:Apache License
@SuppressWarnings("deprecation") public byte[][] getSplits(Configuration conf, Path path) { List<byte[]> rowkeyList = new ArrayList<byte[]>(); SequenceFile.Reader reader = null; try {//from w w w .ja v a 2s. co m reader = new SequenceFile.Reader(path.getFileSystem(conf), path, conf); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { byte[] tmp = ((Text) key).copyBytes(); if (rowkeyList.contains(tmp) == false) { rowkeyList.add(tmp); } } } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(reader); } byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]); return retValue; }
From source file:com.kylinolap.job.hadoop.hbase.CreateHTableJob.java
License:Apache License
@SuppressWarnings("deprecation") public byte[][] getSplits(Configuration conf, Path path) throws Exception { FileSystem fs = path.getFileSystem(conf); if (fs.exists(path) == false) { System.err.println("Path " + path + " not found, no region split, HTable will be one region"); return null; }/*from www . j av a2 s . c o m*/ List<byte[]> rowkeyList = new ArrayList<byte[]>(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, path, conf); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { rowkeyList.add(((Text) key).copyBytes()); } } catch (Exception e) { e.printStackTrace(); throw e; } finally { IOUtils.closeStream(reader); } System.out.println((rowkeyList.size() + 1) + " regions"); System.out.println(rowkeyList.size() + " splits"); for (byte[] split : rowkeyList) { System.out.println(StringUtils.byteToHexString(split)); } byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]); return retValue.length == 0 ? null : retValue; }
From source file:com.marcolotz.lung.io.inputFormat.MultipleFilesRecordReader.java
License:Creative Commons License
/** * <p>/* www. ja v a 2s. c o m*/ * If the file has not already been read, this reads it into memory, so that * a call to getCurrentValue() will return the entire contents of this file * as Text, and getCurrentKey() will return the qualified path to this file * as Text. Then, returns true. If it has already been read, then returns * false without updating any internal state. * </p> * * @return Whether the file was read or not. * @throws IOException * if there is an error reading the file. * @throws InterruptedException * if there is an error. */ @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (!isProcessed) { if (mFileLength > (long) Integer.MAX_VALUE) { throw new IOException("File is longer than Integer.MAX_VALUE."); } byte[] contents = new byte[(int) mFileLength]; FileSystem fs = mFileToRead.getFileSystem(mConf); FSDataInputStream in = null; try { // Set the contents of this file. in = fs.open(mFileToRead); IOUtils.readFully(in, contents, 0, contents.length); fileContent.set(contents, 0, contents.length); } finally { IOUtils.closeStream(in); } isProcessed = true; return true; } return false; }
From source file:com.marcolotz.lung.io.inputFormat.WholeFileRecordReader.java
License:Creative Commons License
@Override public boolean nextKeyValue() throws IOException, InterruptedException { /* if that record reader for that input split was not called yet */ if (!processed) { byte[] contents = new byte[(int) fileSplit.getLength()]; Path file = fileSplit.getPath(); // Reads from the conf file what is the desired file system. FileSystem fs = file.getFileSystem(conf); FSDataInputStream in = null;/*from w w w . jav a 2s . co m*/ try { in = fs.open(file); IOUtils.readFully(in, contents, 0, contents.length); value.set(contents, 0, contents.length); } finally { IOUtils.closeStream(in); } processed = true; return true; } return false; }
From source file:com.marklogic.contentpump.examples.SimpleSequenceFileCreator.java
License:Apache License
public static void main(String args[]) throws IOException { System.out.println("Sequence File Creator"); String uri = args[0]; // output sequence file name String filePath = args[1]; // text file to read from; Odd line is key, // even line is value Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path path = new Path(uri); SequenceFile.Writer writer = null; SimpleSequenceFileKey key = new SimpleSequenceFileKey(); BufferedReader buffer = new BufferedReader(new FileReader(filePath)); String line = null;/*from w ww . j a v a2 s.c o m*/ SimpleSequenceFileValue<Text> value = new SimpleSequenceFileValue<Text>(); try { writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); while ((line = buffer.readLine()) != null) { key.setDocumentURI(new DocumentURI(line)); if ((line = buffer.readLine()) == null) { break; } value.setValue(new Text(line)); writer.append(key, value); } } finally { IOUtils.closeStream(writer); buffer.close(); } }
From source file:com.marklogic.contentpump.examples.SimpleSequenceFileReader.java
License:Apache License
public static void main(String args[]) throws Exception { System.out.println("Sequence File Reader"); String uri = args[0]; // Input should be a sequence file Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path path = new Path(uri); SequenceFile.Reader reader = null; try {// w ww.j av a 2 s. co m reader = new SequenceFile.Reader(fs, path, conf); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); long position = reader.getPosition(); while (reader.next(key, value)) { String syncSeen = reader.syncSeen() ? "*" : ""; System.out.printf("[%s%s]\t%s\t%s\n", position, syncSeen, ((SimpleSequenceFileKey) key).getDocumentURI().getUri(), ((SimpleSequenceFileValue) value).getValue()); position = reader.getPosition(); } } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(reader); } }
From source file:com.marklogic.contentpump.Versions.java
License:Apache License
protected Versions(String component) { info = new Properties(); String versionInfoFile = component + "-version-info.properties"; InputStream is = null;/*from w ww. ja va 2s . co m*/ try { is = Thread.currentThread().getContextClassLoader().getResourceAsStream(versionInfoFile); if (is == null) { throw new IOException("Resource not found"); } info.load(is); } catch (IOException ex) { LogFactory.getLog(getClass()).warn("Could not read '" + versionInfoFile + "', " + ex.toString(), ex); } finally { IOUtils.closeStream(is); } }
From source file:com.mellanox.r4h.DFSClient.java
License:Apache License
/** * Get the checksum of the whole file of a range of the file. Note that the * range always starts from the beginning of the file. * //from w w w. ja va2 s . c o m * @param src * The file path * @param length * the length of the range, i.e., the range is [0, length] * @return The checksum * @see DistributedFileSystem#getFileChecksum(Path) */ public MD5MD5CRC32FileChecksum getFileChecksum(String src, long length) throws IOException { checkOpen(); Preconditions.checkArgument(length >= 0); // get block locations for the file range LocatedBlocks blockLocations = callGetBlockLocations(namenode, src, 0, length); if (null == blockLocations) { throw new FileNotFoundException("File does not exist: " + src); } List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks(); final DataOutputBuffer md5out = new DataOutputBuffer(); int bytesPerCRC = -1; DataChecksum.Type crcType = DataChecksum.Type.DEFAULT; long crcPerBlock = 0; boolean refetchBlocks = false; int lastRetriedIndex = -1; // get block checksum for each block long remaining = length; if (src.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) { remaining = Math.min(length, blockLocations.getFileLength()); } for (int i = 0; i < locatedblocks.size() && remaining > 0; i++) { if (refetchBlocks) { // refetch to get fresh tokens blockLocations = callGetBlockLocations(namenode, src, 0, length); if (null == blockLocations) { throw new FileNotFoundException("File does not exist: " + src); } locatedblocks = blockLocations.getLocatedBlocks(); refetchBlocks = false; } LocatedBlock lb = locatedblocks.get(i); final ExtendedBlock block = lb.getBlock(); if (remaining < block.getNumBytes()) { block.setNumBytes(remaining); } remaining -= block.getNumBytes(); final DatanodeInfo[] datanodes = lb.getLocations(); // try each datanode location of the block final int timeout = 3000 * datanodes.length + dfsClientConf.socketTimeout(); boolean done = false; for (int j = 0; !done && j < datanodes.length; j++) { DataOutputStream out = null; DataInputStream in = null; try { // connect to a datanode IOStreamPair pair = connectToDN(datanodes[j], timeout, lb); out = new DataOutputStream(new BufferedOutputStream(pair.out, HdfsConstants.SMALL_BUFFER_SIZE)); in = new DataInputStream(pair.in); if (LOG.isDebugEnabled()) { LOG.debug("write to " + datanodes[j] + ": " + Op.BLOCK_CHECKSUM + ", block=" + block); } // get block MD5 new Sender(out).blockChecksum(block, lb.getBlockToken()); final BlockOpResponseProto reply = BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in)); String logInfo = "for block " + block + " from datanode " + datanodes[j]; DataTransferProtoUtil.checkBlockOpStatus(reply, logInfo); OpBlockChecksumResponseProto checksumData = reply.getChecksumResponse(); // read byte-per-checksum final int bpc = checksumData.getBytesPerCrc(); if (i == 0) { // first block bytesPerCRC = bpc; } else if (bpc != bytesPerCRC) { throw new IOException( "Byte-per-checksum not matched: bpc=" + bpc + " but bytesPerCRC=" + bytesPerCRC); } // read crc-per-block final long cpb = checksumData.getCrcPerBlock(); if (locatedblocks.size() > 1 && i == 0) { crcPerBlock = cpb; } // read md5 final MD5Hash md5 = new MD5Hash(checksumData.getMd5().toByteArray()); md5.write(md5out); // read crc-type final DataChecksum.Type ct; if (checksumData.hasCrcType()) { ct = PBHelper.convert(checksumData.getCrcType()); } else { LOG.debug("Retrieving checksum from an earlier-version DataNode: " + "inferring checksum by reading first byte"); ct = inferChecksumTypeByReading(lb, datanodes[j]); } if (i == 0) { // first block crcType = ct; } else if (crcType != DataChecksum.Type.MIXED && crcType != ct) { // if crc types are mixed in a file crcType = DataChecksum.Type.MIXED; } done = true; if (LOG.isDebugEnabled()) { if (i == 0) { LOG.debug("set bytesPerCRC=" + bytesPerCRC + ", crcPerBlock=" + crcPerBlock); } LOG.debug("got reply from " + datanodes[j] + ": md5=" + md5); } } catch (InvalidBlockTokenException ibte) { if (i > lastRetriedIndex) { if (LOG.isDebugEnabled()) { LOG.debug("Got access token error in response to OP_BLOCK_CHECKSUM " + "for file " + src + " for block " + block + " from datanode " + datanodes[j] + ". Will retry the block once."); } lastRetriedIndex = i; done = true; // actually it's not done; but we'll retry i--; // repeat at i-th block refetchBlocks = true; break; } } catch (IOException ie) { LOG.warn("src=" + src + ", datanodes[" + j + "]=" + datanodes[j], ie); } finally { IOUtils.closeStream(in); IOUtils.closeStream(out); } } if (!done) { throw new IOException("Fail to get block MD5 for " + block); } } // compute file MD5 final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData()); switch (crcType) { case CRC32: return new MD5MD5CRC32GzipFileChecksum(bytesPerCRC, crcPerBlock, fileMD5); case CRC32C: return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC, crcPerBlock, fileMD5); default: // If there is no block allocated for the file, // return one with the magic entry that matches what previous // hdfs versions return. if (locatedblocks.size() == 0) { return new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5); } // we should never get here since the validity was checked // when getCrcType() was called above. return null; } }
From source file:com.moz.fiji.mapreduce.input.impl.WholeFileRecordReader.java
License:Apache License
/** * <p>If the file has not already been read, this reads it into memory, so that a call * to getCurrentValue() will return the entire contents of this file as Text, * and getCurrentKey() will return the qualified path to this file as Text. Then, returns * true. If it has already been read, then returns false without updating any internal state.</p> * * @return Whether the file was read or not. * @throws IOException if there is an error reading the file. * @throws InterruptedException if there is an error. *//* www . j a v a 2 s.c o m*/ @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (!mProcessed) { if (mFileLength > (long) Integer.MAX_VALUE) { throw new IOException("File is longer than Integer.MAX_VALUE."); } byte[] contents = new byte[(int) mFileLength]; FileSystem fs = mFileToRead.getFileSystem(mConf); FSDataInputStream in = null; try { // Set the contents of this file. in = fs.open(mFileToRead); IOUtils.readFully(in, contents, 0, contents.length); mFileText.set(contents, 0, contents.length); // Set the name of this file. String fileName = mFileToRead.makeQualified(fs).toString(); mFileName.set(fileName); } finally { IOUtils.closeStream(in); } mProcessed = true; return true; } return false; }