Example usage for org.apache.hadoop.fs BlockLocation getLength

List of usage examples for org.apache.hadoop.fs BlockLocation getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.fs BlockLocation getLength.

Prototype

public long getLength() 

Source Link

Document

Get the length of the block

Usage

From source file:edu.uci.ics.asterix.external.adapter.factory.HDFSAdapterFactory.java

License:Apache License

/**
 * Instead of creating the split using the input format, we do it manually
 * This function returns fileSplits (1 per hdfs file block) irrespective of the number of partitions
 * and the produced splits only cover intersection between current files in hdfs and files stored internally
 * in AsterixDB/*ww w .j  av a2  s  .  com*/
 * 1. NoOp means appended file
 * 2. AddOp means new file
 * 3. UpdateOp means the delta of a file
 *
 * @return
 * @throws IOException
 */
protected InputSplit[] getSplits(JobConf conf) throws IOException {
    // Create file system object
    FileSystem fs = FileSystem.get(conf);
    ArrayList<FileSplit> fileSplits = new ArrayList<FileSplit>();
    ArrayList<ExternalFile> orderedExternalFiles = new ArrayList<ExternalFile>();
    // Create files splits
    for (ExternalFile file : files) {
        Path filePath = new Path(file.getFileName());
        FileStatus fileStatus;
        try {
            fileStatus = fs.getFileStatus(filePath);
        } catch (FileNotFoundException e) {
            // file was deleted at some point, skip to next file
            continue;
        }
        if (file.getPendingOp() == ExternalFilePendingOp.PENDING_ADD_OP
                && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
            // Get its information from HDFS name node
            BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, file.getSize());
            // Create a split per block
            for (BlockLocation block : fileBlocks) {
                if (block.getOffset() < file.getSize()) {
                    fileSplits.add(new FileSplit(filePath, block.getOffset(),
                            (block.getLength() + block.getOffset()) < file.getSize() ? block.getLength()
                                    : (file.getSize() - block.getOffset()),
                            block.getHosts()));
                    orderedExternalFiles.add(file);
                }
            }
        } else if (file.getPendingOp() == ExternalFilePendingOp.PENDING_NO_OP
                && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
            long oldSize = 0L;
            long newSize = file.getSize();
            for (int i = 0; i < files.size(); i++) {
                if (files.get(i).getFileName() == file.getFileName()
                        && files.get(i).getSize() != file.getSize()) {
                    newSize = files.get(i).getSize();
                    oldSize = file.getSize();
                    break;
                }
            }

            // Get its information from HDFS name node
            BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, newSize);
            // Create a split per block
            for (BlockLocation block : fileBlocks) {
                if (block.getOffset() + block.getLength() > oldSize) {
                    if (block.getOffset() < newSize) {
                        // Block interact with delta -> Create a split
                        long startCut = (block.getOffset() > oldSize) ? 0L : oldSize - block.getOffset();
                        long endCut = (block.getOffset() + block.getLength() < newSize) ? 0L
                                : block.getOffset() + block.getLength() - newSize;
                        long splitLength = block.getLength() - startCut - endCut;
                        fileSplits.add(new FileSplit(filePath, block.getOffset() + startCut, splitLength,
                                block.getHosts()));
                        orderedExternalFiles.add(file);
                    }
                }
            }
        }
    }
    fs.close();
    files = orderedExternalFiles;
    return fileSplits.toArray(new FileSplit[fileSplits.size()]);
}

From source file:hsyndicate.tools.BlockLocations.java

License:Apache License

public static void main(String[] args) throws Exception {
    Path p = new Path(args[0]);
    Configuration conf = new Configuration();
    FileSystem fs = p.getFileSystem(conf);
    FileStatus f = fs.getFileStatus(p);// w w  w . j  a v  a2 s . c o m
    BlockLocation[] bla = fs.getFileBlockLocations(f, 0, f.getLen());

    System.out.println("File : " + f.getPath().toString());
    for (BlockLocation bl : bla) {
        System.out.println("Offset : " + bl.getOffset());
        System.out.println("Len : " + bl.getLength());
        System.out.println("Hosts : " + makeCommaSeparated(bl.getHosts()));
        System.out.println("Names : " + makeCommaSeparated(bl.getNames()));
        System.out.println("TopologyPaths : " + makeCommaSeparated(bl.getTopologyPaths()));
    }
}

From source file:io.hops.erasure_coding.BaseEncodingManager.java

License:Apache License

/**
 * RAID an individual file// w w  w.  ja  v  a 2 s.  co m
 */
public static boolean doFileRaid(Configuration conf, Path sourceFile, Path parityPath, Codec codec,
        Statistics statistics, Progressable reporter, int targetRepl, int metaRepl) throws IOException {
    FileSystem srcFs = sourceFile.getFileSystem(conf);
    FileStatus sourceStatus = srcFs.getFileStatus(sourceFile);

    // extract block locations from File system
    BlockLocation[] locations = srcFs.getFileBlockLocations(sourceFile, 0, sourceStatus.getLen());
    // if the file has fewer than 2 blocks, then nothing to do
    if (locations.length <= 2) {
        return false;
    }

    // add up the raw disk space occupied by this file
    long diskSpace = 0;
    for (BlockLocation l : locations) {
        diskSpace += (l.getLength() * sourceStatus.getReplication());
    }
    statistics.numProcessedBlocks += locations.length;
    statistics.processedSize += diskSpace;

    // generate parity file
    generateParityFile(conf, sourceStatus, targetRepl, reporter, srcFs, parityPath, codec, locations.length,
            sourceStatus.getReplication(), metaRepl, sourceStatus.getBlockSize());
    if (srcFs.setReplication(sourceFile, (short) targetRepl) == false) {
        LOG.info("Error in reducing replication of " + sourceFile + " to " + targetRepl);
        statistics.remainingSize += diskSpace;
        return false;
    }
    ;

    diskSpace = 0;
    for (BlockLocation l : locations) {
        diskSpace += (l.getLength() * targetRepl);
    }
    statistics.remainingSize += diskSpace;

    // the metafile will have this many number of blocks
    int numMeta = locations.length / codec.stripeLength;
    if (locations.length % codec.stripeLength != 0) {
        numMeta++;
    }

    // we create numMeta for every file. This metablock has metaRepl # replicas.
    // the last block of the metafile might not be completely filled up, but we
    // ignore that for now.
    statistics.numMetaBlocks += (numMeta * metaRepl);
    statistics.metaSize += (numMeta * metaRepl * sourceStatus.getBlockSize());
    return true;
}

From source file:ml.shifu.guagua.yarn.util.InputSplitUtils.java

License:Apache License

public static int getBlockIndex(BlockLocation[] blkLocations, long offset) {
    for (int i = 0; i < blkLocations.length; i++) {
        // is the offset inside this block?
        if ((blkLocations[i].getOffset() <= offset)
                && (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())) {
            return i;
        }//from w w w. j  ava  2 s  .c  o m
    }
    BlockLocation last = blkLocations[blkLocations.length - 1];
    long fileLength = last.getOffset() + last.getLength() - 1;
    throw new IllegalArgumentException("Offset " + offset + " is outside of file (0.." + fileLength + ")");
}

From source file:mzb.Balancer.java

License:Apache License

/**
 * Decide all <source, target> pairs and
 * the number of bytes to move from a source to a target
 * Maximum bytes to be moved per node is
 * Min(1 Band worth of bytes,  MAX_SIZE_TO_MOVE).
 * Return total number of bytes to move in this iteration
 *//*from w w w .  j  ava 2 s  .c  om*/
private long chooseNodes(String metaFile, String dirToBalance) throws IOException {
    Long bytesToBalance = 0L;
    loadMeta(metaFile, slice2loc);
    if (oldMetaFile != null)
        loadMeta(oldMetaFile, origSlice2loc);
    // FileStatus []matches=fs.globStatus(new Path(dirToBalance+"/*"));
    FileStatus[] matches = fs.globStatus(new Path(dirToBalance + "*"));
    if (matches == null) {
        LOG.error("no file found in " + dirToBalance);
        System.exit(1);
    }
    for (FileStatus status : matches) {
        //System.out.println(status.getPath());
        //System.out.println(status.getLen());
        BlockLocation[] blkLocations = fs.getFileBlockLocations(status, 0, status.getLen());
        if (blkLocations == null) {
            LOG.error("blkLocations is null.Why!!!");
            System.exit(1);
        }
        String fileName = status.getPath().getName();
        Integer sliceId = null;
        if (fileName.lastIndexOf('-') != -1) {
            sliceId = Integer.parseInt(fileName.substring(fileName.lastIndexOf('-') + 1));
        }
        for (BlockLocation blk : blkLocations) {
            boolean needBalance = true;
            for (String host : blk.getHosts()) {
                if (host.equals(slice2loc.get(sliceId))) {
                    needBalance = false;
                    break;
                }
            }
            //LOG.info("Check Block " + blk + " of File " + status.getPath());
            String targetHost = slice2loc.get(sliceId);
            if (needBalance) {
                String sourceHost = chooseSource(sliceId, blk.getHosts());
                BlockInfo blockInfo = new BlockInfo(blk, targetHost, status.getPath().toString(), sliceId);
                Source source = (Source) hostDatanodes.get(sourceHost);
                source.addBlock(blockInfo);
                BalancerDatanode target = targetsNodes.get(targetHost);
                matchSourceWithTargetToMove(source, target);
                bytesToBalance += blk.getLength();
                LOG.info("Need to Balancer Block " + blk + " of File " + status.getPath() + " From "
                        + sourceHost + " to " + slice2loc.get(sliceId));

            }
        }
    }
    return bytesToBalance;

    /*// First, match nodes on the same node group if cluster is node group aware
    if (cluster.isNodeGroupAware()) {
      chooseNodes(SAME_NODE_GROUP);
    }
            
    // Then, match nodes on the same rack
    chooseNodes(SAME_RACK);
    // At last, match all remaining nodes
    chooseNodes(ANY_OTHER);
            
    assert (datanodes.size() >= sources.size()+targets.size())
      : "Mismatched number of datanodes (" +
      datanodes.size() + " total, " +
      sources.size() + " sources, " +
      targets.size() + " targets)";
            
    long bytesToMove = 0L;
    for (Source src : sources) {
      bytesToMove += src.scheduledSize;
    }
    return bytesToMove;*/
}

From source file:org.apache.asterix.external.util.HDFSUtils.java

License:Apache License

/**
 * Instead of creating the split using the input format, we do it manually
 * This function returns fileSplits (1 per hdfs file block) irrespective of the number of partitions
 * and the produced splits only cover intersection between current files in hdfs and files stored internally
 * in AsterixDB/*from w w w. j a  v a  2  s. co  m*/
 * 1. NoOp means appended file
 * 2. AddOp means new file
 * 3. UpdateOp means the delta of a file
 * @return
 * @throws IOException
 */
public static InputSplit[] getSplits(JobConf conf, List<ExternalFile> files) throws IOException {
    // Create file system object
    FileSystem fs = FileSystem.get(conf);
    ArrayList<FileSplit> fileSplits = new ArrayList<FileSplit>();
    ArrayList<ExternalFile> orderedExternalFiles = new ArrayList<ExternalFile>();
    // Create files splits
    for (ExternalFile file : files) {
        Path filePath = new Path(file.getFileName());
        FileStatus fileStatus;
        try {
            fileStatus = fs.getFileStatus(filePath);
        } catch (FileNotFoundException e) {
            // file was deleted at some point, skip to next file
            continue;
        }
        if (file.getPendingOp() == ExternalFilePendingOp.PENDING_ADD_OP
                && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
            // Get its information from HDFS name node
            BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, file.getSize());
            // Create a split per block
            for (BlockLocation block : fileBlocks) {
                if (block.getOffset() < file.getSize()) {
                    fileSplits.add(new FileSplit(filePath, block.getOffset(),
                            (block.getLength() + block.getOffset()) < file.getSize() ? block.getLength()
                                    : (file.getSize() - block.getOffset()),
                            block.getHosts()));
                    orderedExternalFiles.add(file);
                }
            }
        } else if (file.getPendingOp() == ExternalFilePendingOp.PENDING_NO_OP
                && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
            long oldSize = 0L;
            long newSize = file.getSize();
            for (int i = 0; i < files.size(); i++) {
                if (files.get(i).getFileName() == file.getFileName()
                        && files.get(i).getSize() != file.getSize()) {
                    newSize = files.get(i).getSize();
                    oldSize = file.getSize();
                    break;
                }
            }

            // Get its information from HDFS name node
            BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, newSize);
            // Create a split per block
            for (BlockLocation block : fileBlocks) {
                if (block.getOffset() + block.getLength() > oldSize) {
                    if (block.getOffset() < newSize) {
                        // Block interact with delta -> Create a split
                        long startCut = (block.getOffset() > oldSize) ? 0L : oldSize - block.getOffset();
                        long endCut = (block.getOffset() + block.getLength() < newSize) ? 0L
                                : block.getOffset() + block.getLength() - newSize;
                        long splitLength = block.getLength() - startCut - endCut;
                        fileSplits.add(new FileSplit(filePath, block.getOffset() + startCut, splitLength,
                                block.getHosts()));
                        orderedExternalFiles.add(file);
                    }
                }
            }
        }
    }
    fs.close();
    files.clear();
    files.addAll(orderedExternalFiles);
    return fileSplits.toArray(new FileSplit[fileSplits.size()]);
}

From source file:org.apache.drill.exec.store.AffinityCalculator.java

License:Apache License

/**
 * Builds a mapping of block locations to file byte range
 *//*w w  w  . ja  v a  2  s  .  com*/
private void buildBlockMap(String fileName) {
    final Timer.Context context = metrics.timer(BLOCK_MAP_BUILDER_TIMER).time();
    BlockLocation[] blocks;
    ImmutableRangeMap<Long, BlockLocation> blockMap;
    try {
        FileStatus file = fs.getFileStatus(new Path(fileName));
        blocks = fs.getFileBlockLocations(file, 0, file.getLen());
    } catch (IOException ioe) {
        throw new RuntimeException(ioe);
    }
    ImmutableRangeMap.Builder<Long, BlockLocation> blockMapBuilder = new ImmutableRangeMap.Builder<Long, BlockLocation>();
    for (BlockLocation block : blocks) {
        long start = block.getOffset();
        long end = start + block.getLength();
        Range<Long> range = Range.closedOpen(start, end);
        blockMapBuilder = blockMapBuilder.put(range, block);
    }
    blockMap = blockMapBuilder.build();
    blockMapMap.put(fileName, blockMap);
    context.stop();
}

From source file:org.apache.drill.exec.store.parquet.metadata.Metadata.java

License:Apache License

/**
 * Get the host affinity for a row group.
 *
 * @param fileStatus the parquet file//from w  ww.  j a va 2  s. co  m
 * @param start      the start of the row group
 * @param length     the length of the row group
 * @return host affinity for the row group
 */
private Map<String, Float> getHostAffinity(FileStatus fileStatus, FileSystem fs, long start, long length)
        throws IOException {
    BlockLocation[] blockLocations = fs.getFileBlockLocations(fileStatus, start, length);
    Map<String, Float> hostAffinityMap = Maps.newHashMap();
    for (BlockLocation blockLocation : blockLocations) {
        for (String host : blockLocation.getHosts()) {
            Float currentAffinity = hostAffinityMap.get(host);
            float blockStart = blockLocation.getOffset();
            float blockEnd = blockStart + blockLocation.getLength();
            float rowGroupEnd = start + length;
            Float newAffinity = (blockLocation.getLength() - (blockStart < start ? start - blockStart : 0)
                    - (blockEnd > rowGroupEnd ? blockEnd - rowGroupEnd : 0)) / length;
            if (currentAffinity != null) {
                hostAffinityMap.put(host, currentAffinity + newAffinity);
            } else {
                hostAffinityMap.put(host, newAffinity);
            }
        }
    }
    return hostAffinityMap;
}

From source file:org.apache.drill.exec.store.parquet.Metadata.java

License:Apache License

/**
 * Get the host affinity for a row group
 *
 * @param fileStatus the parquet file//from   ww w.  j av  a 2s  .c om
 * @param start      the start of the row group
 * @param length     the length of the row group
 * @return
 * @throws IOException
 */
private Map<String, Float> getHostAffinity(FileStatus fileStatus, long start, long length) throws IOException {
    BlockLocation[] blockLocations = fs.getFileBlockLocations(fileStatus, start, length);
    Map<String, Float> hostAffinityMap = Maps.newHashMap();
    for (BlockLocation blockLocation : blockLocations) {
        for (String host : blockLocation.getHosts()) {
            Float currentAffinity = hostAffinityMap.get(host);
            float blockStart = blockLocation.getOffset();
            float blockEnd = blockStart + blockLocation.getLength();
            float rowGroupEnd = start + length;
            Float newAffinity = (blockLocation.getLength() - (blockStart < start ? start - blockStart : 0)
                    - (blockEnd > rowGroupEnd ? blockEnd - rowGroupEnd : 0)) / length;
            if (currentAffinity != null) {
                hostAffinityMap.put(host, currentAffinity + newAffinity);
            } else {
                hostAffinityMap.put(host, newAffinity);
            }
        }
    }
    return hostAffinityMap;
}

From source file:org.apache.drill.exec.store.schedule.BlockMapBuilder.java

License:Apache License

/**
 * Builds a mapping of block locations to file byte range
 *///from  w w w.  j  av a  2 s . com
private ImmutableRangeMap<Long, BlockLocation> buildBlockMap(FileStatus status) throws IOException {
    final Timer.Context context = metrics.timer(BLOCK_MAP_BUILDER_TIMER).time();
    BlockLocation[] blocks;
    ImmutableRangeMap<Long, BlockLocation> blockMap;
    blocks = fs.getFileBlockLocations(status, 0, status.getLen());
    ImmutableRangeMap.Builder<Long, BlockLocation> blockMapBuilder = new ImmutableRangeMap.Builder<Long, BlockLocation>();
    for (BlockLocation block : blocks) {
        long start = block.getOffset();
        long end = start + block.getLength();
        Range<Long> range = Range.closedOpen(start, end);
        blockMapBuilder = blockMapBuilder.put(range, block);
    }
    blockMap = blockMapBuilder.build();
    blockMapMap.put(status.getPath(), blockMap);
    context.stop();
    return blockMap;
}