Example usage for org.apache.hadoop.fs BlockLocation getLength

List of usage examples for org.apache.hadoop.fs BlockLocation getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.fs BlockLocation getLength.

Prototype

public long getLength() 

Source Link

Document

Get the length of the block

Usage

From source file:RawParascaleFileSystem.java

License:Apache License

/**
 * {@inheritDoc}/*  w w  w  .ja  v  a  2  s . c  o  m*/
 */
@Override
public BlockLocation[] getFileBlockLocations(final FileStatus file, final long start, final long len)
        throws IOException {
    ChunkLocator newChunkLocator = null;
    if (file.getLen() < start + len) {
        throw new IOException("start+len must be less or equal than file length");
    }
    final ArrayList<BlockLocation> locations = new ArrayList<BlockLocation>();
    try {
        newChunkLocator = newChunkLocator();
        final Path makeQualified = file.getPath().makeQualified(this.getUri(), this.getWorkingDirectory());

        // sorted by offset
        final ChunkLocation[] chunkLocations = newChunkLocator.getChunkLocations(pathToFile(makeQualified),
                getVirtualFSFromPath(makeQualified, true));
        long begin = start;
        long length = len;
        for (final ChunkLocation chunkLocation : chunkLocations) {
            final ChunkInfo chunkInfo = chunkLocation.getChunkInfo();
            final StorageNodeInfo[] storageNodeInfo = chunkLocation.getStorageNodeInfo();
            if (length <= 0) {
                // stop when length exceeded
                break;
            }
            if (begin < chunkInfo.getChunkOffset()) {
                // skip if location not reached yet
                continue;
            }
            final List<String> hosts = new ArrayList<String>(0);
            for (int j = 0; j < storageNodeInfo.length; j++) {
                // select all enabled and running nodes
                if (storageNodeInfo[j].isUp() && storageNodeInfo[j].isEnabled()) {
                    hosts.add(storageNodeInfo[j].getNodeName());
                }
            }
            final long lengthInChunk = chunkInfo.getChunkLength() - (begin - chunkInfo.getChunkOffset());
            final BlockLocation blockLocation = new BlockLocation(null, hosts.toArray(new String[0]), begin,
                    lengthInChunk < length ? lengthInChunk : length);
            begin += blockLocation.getLength();
            length -= blockLocation.getLength();
            locations.add(blockLocation);

        }
        if (pLog.isDebugEnabled()) {
            pLog.debug("Fetched " + locations.size() + " chunk locations for " + makeQualified);
        }

        return locations.toArray(new BlockLocation[0]);

    } catch (final ChunkStorageException e) {
        throw new IOException(
                "can not fetch chunk locations " + newChunkLocator == null ? "" : newChunkLocator.toString(),
                e);
    } finally {
        if (newChunkLocator != null) {
            newChunkLocator.close();
        }
    }
}

From source file:ch.cern.db.hdfs.Main.java

License:GNU General Public License

private void printBlockMetadata(BlockLocation blockLocation, String[] dataDirs) throws IOException {

    System.out.println("   Offset: " + blockLocation.getOffset());
    System.out.println("   Length: " + blockLocation.getLength());

    String[] cachedHosts = blockLocation.getCachedHosts();
    if (cachedHosts.length == 0) {
        System.out.println("   No cached hosts");
    }/*  w  w w  .  ja  v  a  2s . c o  m*/

    System.out.println("   Replicas:");
    VolumeId[] volumeIds = blockLocation instanceof BlockStorageLocation
            ? (((BlockStorageLocation) blockLocation).getVolumeIds())
            : null;
    String[] hosts = blockLocation.getHosts();
    String[] names = blockLocation.getNames();
    String[] topologyPaths = blockLocation.getTopologyPaths();
    for (int i = 0; i < topologyPaths.length; i++) {
        int diskId = volumeIds != null ? DistributedFileSystemMetadata.getDiskId(volumeIds[i]) : -1;

        System.out.println("      Replica (" + i + "):");
        System.out.println("         Host: " + hosts[i]);

        if (diskId == -1)
            System.out.println("         DiskId: unknown");
        else if (dataDirs != null && diskId < dataDirs.length)
            System.out.println("         Location: " + dataDirs[diskId] + " (DiskId: " + diskId + ")");
        else
            System.out.println("         DiskId: " + diskId);

        System.out.println("         Name: " + names[i]);
        System.out.println("         TopologyPaths: " + topologyPaths[i]);
    }

    if (cachedHosts.length > 0) {
        System.out.println("   Cached hosts:");
        for (String cachedHost : cachedHosts) {
            System.out.println("      Host: " + cachedHost);
        }
    }
}

From source file:co.cask.cdap.data.stream.StreamDataFileSplitter.java

License:Apache License

/**
 * Returns the array index of the given blockLocations that contains the given offset.
 *
 * @param blockLocations Array of {@link BlockLocation} to search for.
 * @param offset File offset./*from  w ww .ja  va2 s.  co m*/
 * @param startIdx Starting index for the search in the array.
 * @return The array index of the {@link BlockLocation} that contains the given offset.
 */
private int getBlockIndex(BlockLocation[] blockLocations, long offset, int startIdx) {
    if (blockLocations == null) {
        return -1;
    }
    for (int i = startIdx; i < blockLocations.length; i++) {
        BlockLocation blockLocation = blockLocations[i];
        long endOffset = blockLocation.getOffset() + blockLocation.getLength();

        if (blockLocation.getOffset() <= offset && offset < endOffset) {
            return i;
        }
    }
    return -1;
}

From source file:com.asakusafw.runtime.directio.hadoop.BlockMap.java

License:Apache License

/**
 * Returns a list of {@link BlockInfo} for the target file.
 * @param fs the target file//from w w  w . j av a 2  s  . co m
 * @param status the target file status
 * @return the computed information
 * @throws IOException if failed to compute information
 */
public static List<BlockInfo> computeBlocks(FileSystem fs, FileStatus status) throws IOException {
    BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen());
    List<BlockInfo> results = new ArrayList<>();
    for (BlockLocation location : locations) {
        long length = location.getLength();
        long start = location.getOffset();
        results.add(new BlockInfo(start, start + length, location.getHosts()));
    }
    return results;
}

From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java

License:Apache License

protected int getBlockIndex(BlockLocation[] blkLocations, long offset) {
    for (int i = 0; i < blkLocations.length; i++) {
        // is the offset inside this block?
        if ((blkLocations[i].getOffset() <= offset)
                && (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())) {
            return i;
        }//from   w w  w.  ja v a 2  s .  c o m
    }
    BlockLocation last = blkLocations[blkLocations.length - 1];
    long fileLength = last.getOffset() + last.getLength() - 1;
    throw new IllegalArgumentException("Offset " + offset + " is outside of file (0.." + fileLength + ")");
}

From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java

License:Apache License

/**
 * @param blkLocations// w  w w  . j a  va2  s.com
 *            The locations of the current blocks
 * @param offset
 *            The start offset of file associated with this block
 * @return The block index
 */
protected int getBlockIndex(BlockLocation[] blkLocations, long offset) {
    for (int i = 0; i < blkLocations.length; i++) {
        // is the offset inside this block?
        if ((blkLocations[i].getOffset() <= offset)
                && (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())) {
            return i;
        }
    }
    BlockLocation last = blkLocations[blkLocations.length - 1];
    long fileLength = last.getOffset() + last.getLength() - 1;
    throw new IllegalArgumentException("Offset " + offset + " is outside of file (0.." + fileLength + ")");
}

From source file:com.cloudera.GetBlockLocations.java

License:Apache License

public static void main(String[] args) throws Exception {
    final Configuration conf = new Configuration();
    String url = getStringOrDie("get.block.locations.path");
    final FileSystem fs = FileSystem.get(new URI(url), conf);

    if (!fs.exists(new Path(url))) {
        System.out.println("no file at " + url);
        System.exit(1);/*from w w  w .ja va2  s.  co m*/
    }
    BlockLocation locs[] = null;
    try {
        locs = fs.getFileBlockLocations(new Path(url), 0, Long.MAX_VALUE);
    } catch (IOException e) {
        System.out.println("Error calling getFileBlockLocations(" + url + ")\n");
        e.printStackTrace(System.err);
        System.exit(1);
    }

    String prefix = "";
    for (BlockLocation loc : locs) {
        System.out.println(prefix);
        System.out.println("{");
        System.out.println("  hosts =         " + Arrays.toString(loc.getHosts()));
        System.out.println("  cachedHosts =   " + Arrays.toString(loc.getCachedHosts()));
        System.out.println("  names    =      " + Arrays.toString(loc.getNames()));
        System.out.println("  topologyPaths = " + Arrays.toString(loc.getTopologyPaths()));
        System.out.println("  offset =        " + loc.getOffset());
        System.out.println("  length =        " + loc.getLength());
        System.out.println("  corrupt =       " + loc.isCorrupt());
        System.out.println("}");
        prefix = ",";
    }
}

From source file:com.cloudera.impala.catalog.HdfsTable.java

License:Apache License

/**
 * Loads the file block metadata for the given collection of FileDescriptors.  The
 * FileDescriptors are passed as a tree, where the first level is indexed by
 * filesystem, the second level is indexed by partition location, and the leaves are
 * the list of files that exist under each directory.
 *//* w  w w  .j  a va 2 s  . com*/
private void loadBlockMd(Map<FsKey, Map<String, List<FileDescriptor>>> perFsFileDescs) throws RuntimeException {
    Preconditions.checkNotNull(perFsFileDescs);
    LOG.debug("load block md for " + name_);

    for (FsKey fsEntry : perFsFileDescs.keySet()) {
        FileSystem fs = fsEntry.filesystem;
        // Store all BlockLocations so they can be reused when loading the disk IDs.
        List<BlockLocation> blockLocations = Lists.newArrayList();
        int numCachedBlocks = 0;
        Map<String, List<FileDescriptor>> partitionToFds = perFsFileDescs.get(fsEntry);
        Preconditions.checkNotNull(partitionToFds);
        // loop over all files and record their block metadata, minus volume ids
        for (String partitionDir : partitionToFds.keySet()) {
            Path partDirPath = new Path(partitionDir);
            for (FileDescriptor fileDescriptor : partitionToFds.get(partitionDir)) {
                Path p = new Path(partDirPath, fileDescriptor.getFileName());
                try {
                    FileStatus fileStatus = fs.getFileStatus(p);
                    // fileDescriptors should not contain directories.
                    Preconditions.checkArgument(!fileStatus.isDirectory());
                    BlockLocation[] locations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
                    Preconditions.checkNotNull(locations);
                    blockLocations.addAll(Arrays.asList(locations));

                    // Loop over all blocks in the file.
                    for (BlockLocation block : locations) {
                        String[] blockHostPorts = block.getNames();
                        try {
                            blockHostPorts = block.getNames();
                        } catch (IOException e) {
                            // this shouldn't happen, getNames() doesn't throw anything
                            String errorMsg = "BlockLocation.getNames() failed:\n" + e.getMessage();
                            LOG.error(errorMsg);
                            throw new IllegalStateException(errorMsg);
                        }
                        // Now enumerate all replicas of the block, adding any unknown hosts to
                        // hostIndex_ and the index for that host to replicaHostIdxs.
                        List<Integer> replicaHostIdxs = new ArrayList<Integer>(blockHostPorts.length);
                        for (int i = 0; i < blockHostPorts.length; ++i) {
                            String[] ip_port = blockHostPorts[i].split(":");
                            Preconditions.checkState(ip_port.length == 2);
                            TNetworkAddress network_address = new TNetworkAddress(ip_port[0],
                                    Integer.parseInt(ip_port[1]));
                            replicaHostIdxs.add(hostIndex_.getIndex(network_address));
                        }
                        fileDescriptor.addFileBlock(
                                new FileBlock(block.getOffset(), block.getLength(), replicaHostIdxs));
                    }
                } catch (IOException e) {
                    throw new RuntimeException(
                            "couldn't determine block locations for path '" + p + "':\n" + e.getMessage(), e);
                }
            }
        }

        if (SUPPORTS_VOLUME_ID && fs instanceof DistributedFileSystem) {
            LOG.trace("loading disk ids for: " + getFullName() + ". nodes: " + getNumNodes() + ". file system: "
                    + fsEntry);
            loadDiskIds((DistributedFileSystem) fs, blockLocations, partitionToFds);
            LOG.trace("completed load of disk ids for: " + getFullName());
        }
    }
}

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

/**
 * Create FileBlock according to BlockLocation and hostIndex. Get host names and ports
 * from BlockLocation, and get all replicas' host id from hostIndex.
 *
 * Must be threadsafe. Access to 'hostIndex' must be protected.
 *///  w w w .  j a  v  a2  s .co m
private static FileBlock createFileBlock(BlockLocation loc, ListMap<TNetworkAddress> hostIndex)
        throws IOException {
    // Get the location of all block replicas in ip:port format.
    String[] blockHostPorts = loc.getNames();
    // Get the hostnames for all block replicas. Used to resolve which hosts
    // contain cached data. The results are returned in the same order as
    // block.getNames() so it allows us to match a host specified as ip:port to
    // corresponding hostname using the same array index.
    String[] blockHostNames = loc.getHosts();
    Preconditions.checkState(blockHostNames.length == blockHostPorts.length);
    // Get the hostnames that contain cached replicas of this block.
    Set<String> cachedHosts = Sets.newHashSet(Arrays.asList(loc.getCachedHosts()));
    Preconditions.checkState(cachedHosts.size() <= blockHostNames.length);

    // Now enumerate all replicas of the block, adding any unknown hosts
    // to hostMap_/hostList_. The host ID (index in to the hostList_) for each
    // replica is stored in replicaHostIdxs.
    List<BlockReplica> replicas = Lists.newArrayListWithExpectedSize(blockHostPorts.length);
    for (int i = 0; i < blockHostPorts.length; ++i) {
        TNetworkAddress networkAddress = BlockReplica.parseLocation(blockHostPorts[i]);
        Preconditions.checkState(networkAddress != null);
        networkAddress.setHdfs_host_name(blockHostNames[i]);
        int idx = -1;
        synchronized (hostIndex) {
            idx = hostIndex.getIndex(networkAddress);
        }
        replicas.add(new BlockReplica(idx, cachedHosts.contains(blockHostNames[i])));
    }
    return new FileBlock(loc.getOffset(), loc.getLength(), replicas);
}

From source file:com.cloudera.kitten.appmaster.util.HDFSFileFinder.java

License:Open Source License

public static Map<String, Long> getNumBytesOfGlobHeldByDatanodes(Path p, Configuration conf)
        throws IOException {
    FileSystem fs = p.getFileSystem(conf);

    HashMap<String, Long> bytesHeld = Maps.newHashMap();
    for (FileStatus f : fs.globStatus(p)) {
        BlockLocation[] bls = fs.getFileBlockLocations(p, 0, f.getLen());
        if (bls.length > 0) {
            for (BlockLocation bl : bls) {
                long l = bl.getLength();
                for (String name : bl.getNames()) {
                    if (bytesHeld.containsKey(name))
                        bytesHeld.put(name, bytesHeld.get(name) + l);
                    else
                        bytesHeld.put(name, l);
                }/*  ww  w .j  a  v a2  s .  c  om*/
            }
        }
    }

    return bytesHeld;
}