List of usage examples for org.apache.hadoop.fs BlockLocation getLength
public long getLength()
From source file:edu.uci.ics.asterix.external.adapter.factory.HDFSAdapterFactory.java
License:Apache License
/** * Instead of creating the split using the input format, we do it manually * This function returns fileSplits (1 per hdfs file block) irrespective of the number of partitions * and the produced splits only cover intersection between current files in hdfs and files stored internally * in AsterixDB/*ww w .j av a2 s . com*/ * 1. NoOp means appended file * 2. AddOp means new file * 3. UpdateOp means the delta of a file * * @return * @throws IOException */ protected InputSplit[] getSplits(JobConf conf) throws IOException { // Create file system object FileSystem fs = FileSystem.get(conf); ArrayList<FileSplit> fileSplits = new ArrayList<FileSplit>(); ArrayList<ExternalFile> orderedExternalFiles = new ArrayList<ExternalFile>(); // Create files splits for (ExternalFile file : files) { Path filePath = new Path(file.getFileName()); FileStatus fileStatus; try { fileStatus = fs.getFileStatus(filePath); } catch (FileNotFoundException e) { // file was deleted at some point, skip to next file continue; } if (file.getPendingOp() == ExternalFilePendingOp.PENDING_ADD_OP && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) { // Get its information from HDFS name node BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, file.getSize()); // Create a split per block for (BlockLocation block : fileBlocks) { if (block.getOffset() < file.getSize()) { fileSplits.add(new FileSplit(filePath, block.getOffset(), (block.getLength() + block.getOffset()) < file.getSize() ? block.getLength() : (file.getSize() - block.getOffset()), block.getHosts())); orderedExternalFiles.add(file); } } } else if (file.getPendingOp() == ExternalFilePendingOp.PENDING_NO_OP && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) { long oldSize = 0L; long newSize = file.getSize(); for (int i = 0; i < files.size(); i++) { if (files.get(i).getFileName() == file.getFileName() && files.get(i).getSize() != file.getSize()) { newSize = files.get(i).getSize(); oldSize = file.getSize(); break; } } // Get its information from HDFS name node BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, newSize); // Create a split per block for (BlockLocation block : fileBlocks) { if (block.getOffset() + block.getLength() > oldSize) { if (block.getOffset() < newSize) { // Block interact with delta -> Create a split long startCut = (block.getOffset() > oldSize) ? 0L : oldSize - block.getOffset(); long endCut = (block.getOffset() + block.getLength() < newSize) ? 0L : block.getOffset() + block.getLength() - newSize; long splitLength = block.getLength() - startCut - endCut; fileSplits.add(new FileSplit(filePath, block.getOffset() + startCut, splitLength, block.getHosts())); orderedExternalFiles.add(file); } } } } } fs.close(); files = orderedExternalFiles; return fileSplits.toArray(new FileSplit[fileSplits.size()]); }
From source file:hsyndicate.tools.BlockLocations.java
License:Apache License
public static void main(String[] args) throws Exception { Path p = new Path(args[0]); Configuration conf = new Configuration(); FileSystem fs = p.getFileSystem(conf); FileStatus f = fs.getFileStatus(p);// w w w . j a v a2 s . c o m BlockLocation[] bla = fs.getFileBlockLocations(f, 0, f.getLen()); System.out.println("File : " + f.getPath().toString()); for (BlockLocation bl : bla) { System.out.println("Offset : " + bl.getOffset()); System.out.println("Len : " + bl.getLength()); System.out.println("Hosts : " + makeCommaSeparated(bl.getHosts())); System.out.println("Names : " + makeCommaSeparated(bl.getNames())); System.out.println("TopologyPaths : " + makeCommaSeparated(bl.getTopologyPaths())); } }
From source file:io.hops.erasure_coding.BaseEncodingManager.java
License:Apache License
/** * RAID an individual file// w w w. ja v a 2 s. co m */ public static boolean doFileRaid(Configuration conf, Path sourceFile, Path parityPath, Codec codec, Statistics statistics, Progressable reporter, int targetRepl, int metaRepl) throws IOException { FileSystem srcFs = sourceFile.getFileSystem(conf); FileStatus sourceStatus = srcFs.getFileStatus(sourceFile); // extract block locations from File system BlockLocation[] locations = srcFs.getFileBlockLocations(sourceFile, 0, sourceStatus.getLen()); // if the file has fewer than 2 blocks, then nothing to do if (locations.length <= 2) { return false; } // add up the raw disk space occupied by this file long diskSpace = 0; for (BlockLocation l : locations) { diskSpace += (l.getLength() * sourceStatus.getReplication()); } statistics.numProcessedBlocks += locations.length; statistics.processedSize += diskSpace; // generate parity file generateParityFile(conf, sourceStatus, targetRepl, reporter, srcFs, parityPath, codec, locations.length, sourceStatus.getReplication(), metaRepl, sourceStatus.getBlockSize()); if (srcFs.setReplication(sourceFile, (short) targetRepl) == false) { LOG.info("Error in reducing replication of " + sourceFile + " to " + targetRepl); statistics.remainingSize += diskSpace; return false; } ; diskSpace = 0; for (BlockLocation l : locations) { diskSpace += (l.getLength() * targetRepl); } statistics.remainingSize += diskSpace; // the metafile will have this many number of blocks int numMeta = locations.length / codec.stripeLength; if (locations.length % codec.stripeLength != 0) { numMeta++; } // we create numMeta for every file. This metablock has metaRepl # replicas. // the last block of the metafile might not be completely filled up, but we // ignore that for now. statistics.numMetaBlocks += (numMeta * metaRepl); statistics.metaSize += (numMeta * metaRepl * sourceStatus.getBlockSize()); return true; }
From source file:ml.shifu.guagua.yarn.util.InputSplitUtils.java
License:Apache License
public static int getBlockIndex(BlockLocation[] blkLocations, long offset) { for (int i = 0; i < blkLocations.length; i++) { // is the offset inside this block? if ((blkLocations[i].getOffset() <= offset) && (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())) { return i; }//from w w w. j ava 2 s .c o m } BlockLocation last = blkLocations[blkLocations.length - 1]; long fileLength = last.getOffset() + last.getLength() - 1; throw new IllegalArgumentException("Offset " + offset + " is outside of file (0.." + fileLength + ")"); }
From source file:mzb.Balancer.java
License:Apache License
/** * Decide all <source, target> pairs and * the number of bytes to move from a source to a target * Maximum bytes to be moved per node is * Min(1 Band worth of bytes, MAX_SIZE_TO_MOVE). * Return total number of bytes to move in this iteration *//*from w w w . j ava 2 s .c om*/ private long chooseNodes(String metaFile, String dirToBalance) throws IOException { Long bytesToBalance = 0L; loadMeta(metaFile, slice2loc); if (oldMetaFile != null) loadMeta(oldMetaFile, origSlice2loc); // FileStatus []matches=fs.globStatus(new Path(dirToBalance+"/*")); FileStatus[] matches = fs.globStatus(new Path(dirToBalance + "*")); if (matches == null) { LOG.error("no file found in " + dirToBalance); System.exit(1); } for (FileStatus status : matches) { //System.out.println(status.getPath()); //System.out.println(status.getLen()); BlockLocation[] blkLocations = fs.getFileBlockLocations(status, 0, status.getLen()); if (blkLocations == null) { LOG.error("blkLocations is null.Why!!!"); System.exit(1); } String fileName = status.getPath().getName(); Integer sliceId = null; if (fileName.lastIndexOf('-') != -1) { sliceId = Integer.parseInt(fileName.substring(fileName.lastIndexOf('-') + 1)); } for (BlockLocation blk : blkLocations) { boolean needBalance = true; for (String host : blk.getHosts()) { if (host.equals(slice2loc.get(sliceId))) { needBalance = false; break; } } //LOG.info("Check Block " + blk + " of File " + status.getPath()); String targetHost = slice2loc.get(sliceId); if (needBalance) { String sourceHost = chooseSource(sliceId, blk.getHosts()); BlockInfo blockInfo = new BlockInfo(blk, targetHost, status.getPath().toString(), sliceId); Source source = (Source) hostDatanodes.get(sourceHost); source.addBlock(blockInfo); BalancerDatanode target = targetsNodes.get(targetHost); matchSourceWithTargetToMove(source, target); bytesToBalance += blk.getLength(); LOG.info("Need to Balancer Block " + blk + " of File " + status.getPath() + " From " + sourceHost + " to " + slice2loc.get(sliceId)); } } } return bytesToBalance; /*// First, match nodes on the same node group if cluster is node group aware if (cluster.isNodeGroupAware()) { chooseNodes(SAME_NODE_GROUP); } // Then, match nodes on the same rack chooseNodes(SAME_RACK); // At last, match all remaining nodes chooseNodes(ANY_OTHER); assert (datanodes.size() >= sources.size()+targets.size()) : "Mismatched number of datanodes (" + datanodes.size() + " total, " + sources.size() + " sources, " + targets.size() + " targets)"; long bytesToMove = 0L; for (Source src : sources) { bytesToMove += src.scheduledSize; } return bytesToMove;*/ }
From source file:org.apache.asterix.external.util.HDFSUtils.java
License:Apache License
/** * Instead of creating the split using the input format, we do it manually * This function returns fileSplits (1 per hdfs file block) irrespective of the number of partitions * and the produced splits only cover intersection between current files in hdfs and files stored internally * in AsterixDB/*from w w w. j a v a 2 s. co m*/ * 1. NoOp means appended file * 2. AddOp means new file * 3. UpdateOp means the delta of a file * @return * @throws IOException */ public static InputSplit[] getSplits(JobConf conf, List<ExternalFile> files) throws IOException { // Create file system object FileSystem fs = FileSystem.get(conf); ArrayList<FileSplit> fileSplits = new ArrayList<FileSplit>(); ArrayList<ExternalFile> orderedExternalFiles = new ArrayList<ExternalFile>(); // Create files splits for (ExternalFile file : files) { Path filePath = new Path(file.getFileName()); FileStatus fileStatus; try { fileStatus = fs.getFileStatus(filePath); } catch (FileNotFoundException e) { // file was deleted at some point, skip to next file continue; } if (file.getPendingOp() == ExternalFilePendingOp.PENDING_ADD_OP && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) { // Get its information from HDFS name node BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, file.getSize()); // Create a split per block for (BlockLocation block : fileBlocks) { if (block.getOffset() < file.getSize()) { fileSplits.add(new FileSplit(filePath, block.getOffset(), (block.getLength() + block.getOffset()) < file.getSize() ? block.getLength() : (file.getSize() - block.getOffset()), block.getHosts())); orderedExternalFiles.add(file); } } } else if (file.getPendingOp() == ExternalFilePendingOp.PENDING_NO_OP && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) { long oldSize = 0L; long newSize = file.getSize(); for (int i = 0; i < files.size(); i++) { if (files.get(i).getFileName() == file.getFileName() && files.get(i).getSize() != file.getSize()) { newSize = files.get(i).getSize(); oldSize = file.getSize(); break; } } // Get its information from HDFS name node BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, newSize); // Create a split per block for (BlockLocation block : fileBlocks) { if (block.getOffset() + block.getLength() > oldSize) { if (block.getOffset() < newSize) { // Block interact with delta -> Create a split long startCut = (block.getOffset() > oldSize) ? 0L : oldSize - block.getOffset(); long endCut = (block.getOffset() + block.getLength() < newSize) ? 0L : block.getOffset() + block.getLength() - newSize; long splitLength = block.getLength() - startCut - endCut; fileSplits.add(new FileSplit(filePath, block.getOffset() + startCut, splitLength, block.getHosts())); orderedExternalFiles.add(file); } } } } } fs.close(); files.clear(); files.addAll(orderedExternalFiles); return fileSplits.toArray(new FileSplit[fileSplits.size()]); }
From source file:org.apache.drill.exec.store.AffinityCalculator.java
License:Apache License
/** * Builds a mapping of block locations to file byte range *//*w w w . ja v a 2 s . com*/ private void buildBlockMap(String fileName) { final Timer.Context context = metrics.timer(BLOCK_MAP_BUILDER_TIMER).time(); BlockLocation[] blocks; ImmutableRangeMap<Long, BlockLocation> blockMap; try { FileStatus file = fs.getFileStatus(new Path(fileName)); blocks = fs.getFileBlockLocations(file, 0, file.getLen()); } catch (IOException ioe) { throw new RuntimeException(ioe); } ImmutableRangeMap.Builder<Long, BlockLocation> blockMapBuilder = new ImmutableRangeMap.Builder<Long, BlockLocation>(); for (BlockLocation block : blocks) { long start = block.getOffset(); long end = start + block.getLength(); Range<Long> range = Range.closedOpen(start, end); blockMapBuilder = blockMapBuilder.put(range, block); } blockMap = blockMapBuilder.build(); blockMapMap.put(fileName, blockMap); context.stop(); }
From source file:org.apache.drill.exec.store.parquet.metadata.Metadata.java
License:Apache License
/** * Get the host affinity for a row group. * * @param fileStatus the parquet file//from w ww. j a va 2 s. co m * @param start the start of the row group * @param length the length of the row group * @return host affinity for the row group */ private Map<String, Float> getHostAffinity(FileStatus fileStatus, FileSystem fs, long start, long length) throws IOException { BlockLocation[] blockLocations = fs.getFileBlockLocations(fileStatus, start, length); Map<String, Float> hostAffinityMap = Maps.newHashMap(); for (BlockLocation blockLocation : blockLocations) { for (String host : blockLocation.getHosts()) { Float currentAffinity = hostAffinityMap.get(host); float blockStart = blockLocation.getOffset(); float blockEnd = blockStart + blockLocation.getLength(); float rowGroupEnd = start + length; Float newAffinity = (blockLocation.getLength() - (blockStart < start ? start - blockStart : 0) - (blockEnd > rowGroupEnd ? blockEnd - rowGroupEnd : 0)) / length; if (currentAffinity != null) { hostAffinityMap.put(host, currentAffinity + newAffinity); } else { hostAffinityMap.put(host, newAffinity); } } } return hostAffinityMap; }
From source file:org.apache.drill.exec.store.parquet.Metadata.java
License:Apache License
/** * Get the host affinity for a row group * * @param fileStatus the parquet file//from ww w. j av a 2s .c om * @param start the start of the row group * @param length the length of the row group * @return * @throws IOException */ private Map<String, Float> getHostAffinity(FileStatus fileStatus, long start, long length) throws IOException { BlockLocation[] blockLocations = fs.getFileBlockLocations(fileStatus, start, length); Map<String, Float> hostAffinityMap = Maps.newHashMap(); for (BlockLocation blockLocation : blockLocations) { for (String host : blockLocation.getHosts()) { Float currentAffinity = hostAffinityMap.get(host); float blockStart = blockLocation.getOffset(); float blockEnd = blockStart + blockLocation.getLength(); float rowGroupEnd = start + length; Float newAffinity = (blockLocation.getLength() - (blockStart < start ? start - blockStart : 0) - (blockEnd > rowGroupEnd ? blockEnd - rowGroupEnd : 0)) / length; if (currentAffinity != null) { hostAffinityMap.put(host, currentAffinity + newAffinity); } else { hostAffinityMap.put(host, newAffinity); } } } return hostAffinityMap; }
From source file:org.apache.drill.exec.store.schedule.BlockMapBuilder.java
License:Apache License
/** * Builds a mapping of block locations to file byte range *///from w w w. j av a 2 s . com private ImmutableRangeMap<Long, BlockLocation> buildBlockMap(FileStatus status) throws IOException { final Timer.Context context = metrics.timer(BLOCK_MAP_BUILDER_TIMER).time(); BlockLocation[] blocks; ImmutableRangeMap<Long, BlockLocation> blockMap; blocks = fs.getFileBlockLocations(status, 0, status.getLen()); ImmutableRangeMap.Builder<Long, BlockLocation> blockMapBuilder = new ImmutableRangeMap.Builder<Long, BlockLocation>(); for (BlockLocation block : blocks) { long start = block.getOffset(); long end = start + block.getLength(); Range<Long> range = Range.closedOpen(start, end); blockMapBuilder = blockMapBuilder.put(range, block); } blockMap = blockMapBuilder.build(); blockMapMap.put(status.getPath(), blockMap); context.stop(); return blockMap; }