Example usage for org.apache.hadoop.fs BlockLocation getOffset

List of usage examples for org.apache.hadoop.fs BlockLocation getOffset

Introduction

In this page you can find the example usage for org.apache.hadoop.fs BlockLocation getOffset.

Prototype

public long getOffset() 

Source Link

Document

Get the start offset of file associated with this block

Usage

From source file:ch.cern.db.hdfs.Main.java

License:GNU General Public License

private void printBlockMetadata(BlockLocation blockLocation, String[] dataDirs) throws IOException {

    System.out.println("   Offset: " + blockLocation.getOffset());
    System.out.println("   Length: " + blockLocation.getLength());

    String[] cachedHosts = blockLocation.getCachedHosts();
    if (cachedHosts.length == 0) {
        System.out.println("   No cached hosts");
    }//from   ww w. j a v  a2s . c om

    System.out.println("   Replicas:");
    VolumeId[] volumeIds = blockLocation instanceof BlockStorageLocation
            ? (((BlockStorageLocation) blockLocation).getVolumeIds())
            : null;
    String[] hosts = blockLocation.getHosts();
    String[] names = blockLocation.getNames();
    String[] topologyPaths = blockLocation.getTopologyPaths();
    for (int i = 0; i < topologyPaths.length; i++) {
        int diskId = volumeIds != null ? DistributedFileSystemMetadata.getDiskId(volumeIds[i]) : -1;

        System.out.println("      Replica (" + i + "):");
        System.out.println("         Host: " + hosts[i]);

        if (diskId == -1)
            System.out.println("         DiskId: unknown");
        else if (dataDirs != null && diskId < dataDirs.length)
            System.out.println("         Location: " + dataDirs[diskId] + " (DiskId: " + diskId + ")");
        else
            System.out.println("         DiskId: " + diskId);

        System.out.println("         Name: " + names[i]);
        System.out.println("         TopologyPaths: " + topologyPaths[i]);
    }

    if (cachedHosts.length > 0) {
        System.out.println("   Cached hosts:");
        for (String cachedHost : cachedHosts) {
            System.out.println("      Host: " + cachedHost);
        }
    }
}

From source file:co.cask.cdap.data.stream.StreamDataFileSplitter.java

License:Apache License

/**
 * Returns the array index of the given blockLocations that contains the given offset.
 *
 * @param blockLocations Array of {@link BlockLocation} to search for.
 * @param offset File offset.//from   www .  j a  v  a 2s  .  co  m
 * @param startIdx Starting index for the search in the array.
 * @return The array index of the {@link BlockLocation} that contains the given offset.
 */
private int getBlockIndex(BlockLocation[] blockLocations, long offset, int startIdx) {
    if (blockLocations == null) {
        return -1;
    }
    for (int i = startIdx; i < blockLocations.length; i++) {
        BlockLocation blockLocation = blockLocations[i];
        long endOffset = blockLocation.getOffset() + blockLocation.getLength();

        if (blockLocation.getOffset() <= offset && offset < endOffset) {
            return i;
        }
    }
    return -1;
}

From source file:com.asakusafw.runtime.directio.hadoop.BlockMap.java

License:Apache License

/**
 * Returns a list of {@link BlockInfo} for the target file.
 * @param fs the target file/*from  w  w w  . j a  va2s .  c  om*/
 * @param status the target file status
 * @return the computed information
 * @throws IOException if failed to compute information
 */
public static List<BlockInfo> computeBlocks(FileSystem fs, FileStatus status) throws IOException {
    BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen());
    List<BlockInfo> results = new ArrayList<>();
    for (BlockLocation location : locations) {
        long length = location.getLength();
        long start = location.getOffset();
        results.add(new BlockInfo(start, start + length, location.getHosts()));
    }
    return results;
}

From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java

License:Apache License

protected int getBlockIndex(BlockLocation[] blkLocations, long offset) {
    for (int i = 0; i < blkLocations.length; i++) {
        // is the offset inside this block?
        if ((blkLocations[i].getOffset() <= offset)
                && (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())) {
            return i;
        }/*from w  w  w  . j ava 2  s . co  m*/
    }
    BlockLocation last = blkLocations[blkLocations.length - 1];
    long fileLength = last.getOffset() + last.getLength() - 1;
    throw new IllegalArgumentException("Offset " + offset + " is outside of file (0.." + fileLength + ")");
}

From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java

License:Apache License

/**
 * @param blkLocations/*  w w  w .j a va 2s.c om*/
 *            The locations of the current blocks
 * @param offset
 *            The start offset of file associated with this block
 * @return The block index
 */
protected int getBlockIndex(BlockLocation[] blkLocations, long offset) {
    for (int i = 0; i < blkLocations.length; i++) {
        // is the offset inside this block?
        if ((blkLocations[i].getOffset() <= offset)
                && (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())) {
            return i;
        }
    }
    BlockLocation last = blkLocations[blkLocations.length - 1];
    long fileLength = last.getOffset() + last.getLength() - 1;
    throw new IllegalArgumentException("Offset " + offset + " is outside of file (0.." + fileLength + ")");
}

From source file:com.cloudera.GetBlockLocations.java

License:Apache License

public static void main(String[] args) throws Exception {
    final Configuration conf = new Configuration();
    String url = getStringOrDie("get.block.locations.path");
    final FileSystem fs = FileSystem.get(new URI(url), conf);

    if (!fs.exists(new Path(url))) {
        System.out.println("no file at " + url);
        System.exit(1);/*from  w  w  w  .  j  av  a 2s. co m*/
    }
    BlockLocation locs[] = null;
    try {
        locs = fs.getFileBlockLocations(new Path(url), 0, Long.MAX_VALUE);
    } catch (IOException e) {
        System.out.println("Error calling getFileBlockLocations(" + url + ")\n");
        e.printStackTrace(System.err);
        System.exit(1);
    }

    String prefix = "";
    for (BlockLocation loc : locs) {
        System.out.println(prefix);
        System.out.println("{");
        System.out.println("  hosts =         " + Arrays.toString(loc.getHosts()));
        System.out.println("  cachedHosts =   " + Arrays.toString(loc.getCachedHosts()));
        System.out.println("  names    =      " + Arrays.toString(loc.getNames()));
        System.out.println("  topologyPaths = " + Arrays.toString(loc.getTopologyPaths()));
        System.out.println("  offset =        " + loc.getOffset());
        System.out.println("  length =        " + loc.getLength());
        System.out.println("  corrupt =       " + loc.isCorrupt());
        System.out.println("}");
        prefix = ",";
    }
}

From source file:com.cloudera.impala.catalog.HdfsTable.java

License:Apache License

/**
 * Loads the file block metadata for the given collection of FileDescriptors.  The
 * FileDescriptors are passed as a tree, where the first level is indexed by
 * filesystem, the second level is indexed by partition location, and the leaves are
 * the list of files that exist under each directory.
 *///from   www.  j  av a2  s  . c o  m
private void loadBlockMd(Map<FsKey, Map<String, List<FileDescriptor>>> perFsFileDescs) throws RuntimeException {
    Preconditions.checkNotNull(perFsFileDescs);
    LOG.debug("load block md for " + name_);

    for (FsKey fsEntry : perFsFileDescs.keySet()) {
        FileSystem fs = fsEntry.filesystem;
        // Store all BlockLocations so they can be reused when loading the disk IDs.
        List<BlockLocation> blockLocations = Lists.newArrayList();
        int numCachedBlocks = 0;
        Map<String, List<FileDescriptor>> partitionToFds = perFsFileDescs.get(fsEntry);
        Preconditions.checkNotNull(partitionToFds);
        // loop over all files and record their block metadata, minus volume ids
        for (String partitionDir : partitionToFds.keySet()) {
            Path partDirPath = new Path(partitionDir);
            for (FileDescriptor fileDescriptor : partitionToFds.get(partitionDir)) {
                Path p = new Path(partDirPath, fileDescriptor.getFileName());
                try {
                    FileStatus fileStatus = fs.getFileStatus(p);
                    // fileDescriptors should not contain directories.
                    Preconditions.checkArgument(!fileStatus.isDirectory());
                    BlockLocation[] locations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
                    Preconditions.checkNotNull(locations);
                    blockLocations.addAll(Arrays.asList(locations));

                    // Loop over all blocks in the file.
                    for (BlockLocation block : locations) {
                        String[] blockHostPorts = block.getNames();
                        try {
                            blockHostPorts = block.getNames();
                        } catch (IOException e) {
                            // this shouldn't happen, getNames() doesn't throw anything
                            String errorMsg = "BlockLocation.getNames() failed:\n" + e.getMessage();
                            LOG.error(errorMsg);
                            throw new IllegalStateException(errorMsg);
                        }
                        // Now enumerate all replicas of the block, adding any unknown hosts to
                        // hostIndex_ and the index for that host to replicaHostIdxs.
                        List<Integer> replicaHostIdxs = new ArrayList<Integer>(blockHostPorts.length);
                        for (int i = 0; i < blockHostPorts.length; ++i) {
                            String[] ip_port = blockHostPorts[i].split(":");
                            Preconditions.checkState(ip_port.length == 2);
                            TNetworkAddress network_address = new TNetworkAddress(ip_port[0],
                                    Integer.parseInt(ip_port[1]));
                            replicaHostIdxs.add(hostIndex_.getIndex(network_address));
                        }
                        fileDescriptor.addFileBlock(
                                new FileBlock(block.getOffset(), block.getLength(), replicaHostIdxs));
                    }
                } catch (IOException e) {
                    throw new RuntimeException(
                            "couldn't determine block locations for path '" + p + "':\n" + e.getMessage(), e);
                }
            }
        }

        if (SUPPORTS_VOLUME_ID && fs instanceof DistributedFileSystem) {
            LOG.trace("loading disk ids for: " + getFullName() + ". nodes: " + getNumNodes() + ". file system: "
                    + fsEntry);
            loadDiskIds((DistributedFileSystem) fs, blockLocations, partitionToFds);
            LOG.trace("completed load of disk ids for: " + getFullName());
        }
    }
}

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

/**
 * Create FileBlock according to BlockLocation and hostIndex. Get host names and ports
 * from BlockLocation, and get all replicas' host id from hostIndex.
 *
 * Must be threadsafe. Access to 'hostIndex' must be protected.
 *//* ww w.  j  a  va2  s  .  com*/
private static FileBlock createFileBlock(BlockLocation loc, ListMap<TNetworkAddress> hostIndex)
        throws IOException {
    // Get the location of all block replicas in ip:port format.
    String[] blockHostPorts = loc.getNames();
    // Get the hostnames for all block replicas. Used to resolve which hosts
    // contain cached data. The results are returned in the same order as
    // block.getNames() so it allows us to match a host specified as ip:port to
    // corresponding hostname using the same array index.
    String[] blockHostNames = loc.getHosts();
    Preconditions.checkState(blockHostNames.length == blockHostPorts.length);
    // Get the hostnames that contain cached replicas of this block.
    Set<String> cachedHosts = Sets.newHashSet(Arrays.asList(loc.getCachedHosts()));
    Preconditions.checkState(cachedHosts.size() <= blockHostNames.length);

    // Now enumerate all replicas of the block, adding any unknown hosts
    // to hostMap_/hostList_. The host ID (index in to the hostList_) for each
    // replica is stored in replicaHostIdxs.
    List<BlockReplica> replicas = Lists.newArrayListWithExpectedSize(blockHostPorts.length);
    for (int i = 0; i < blockHostPorts.length; ++i) {
        TNetworkAddress networkAddress = BlockReplica.parseLocation(blockHostPorts[i]);
        Preconditions.checkState(networkAddress != null);
        networkAddress.setHdfs_host_name(blockHostNames[i]);
        int idx = -1;
        synchronized (hostIndex) {
            idx = hostIndex.getIndex(networkAddress);
        }
        replicas.add(new BlockReplica(idx, cachedHosts.contains(blockHostNames[i])));
    }
    return new FileBlock(loc.getOffset(), loc.getLength(), replicas);
}

From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java

License:Apache License

private List<HiveSplit> createHiveSplits(String partitionName, String path, BlockLocation[] blockLocations,
        long start, long length, Properties schema, List<HivePartitionKey> partitionKeys, boolean splittable,
        ConnectorSession session, OptionalInt bucketNumber, TupleDomain<HiveColumnHandle> effectivePredicate,
        Map<Integer, HiveType> columnCoercions) throws IOException {
    ImmutableList.Builder<HiveSplit> builder = ImmutableList.builder();

    boolean forceLocalScheduling = HiveSessionProperties.isForceLocalScheduling(session);

    if (splittable) {
        for (BlockLocation blockLocation : blockLocations) {
            // get the addresses for the block
            List<HostAddress> addresses = toHostAddress(blockLocation.getHosts());

            long maxBytes = maxSplitSize.toBytes();
            boolean creatingInitialSplits = false;

            if (remainingInitialSplits.get() > 0) {
                maxBytes = maxInitialSplitSize.toBytes();
                creatingInitialSplits = true;
            }/*  w  w  w.j  a v a 2  s.  c o m*/

            // divide the block into uniform chunks that are smaller than the max split size
            int chunks = Math.max(1, (int) (blockLocation.getLength() / maxBytes));
            // when block does not divide evenly into chunks, make the chunk size slightly bigger than necessary
            long targetChunkSize = (long) Math.ceil(blockLocation.getLength() * 1.0 / chunks);

            long chunkOffset = 0;
            while (chunkOffset < blockLocation.getLength()) {
                if (remainingInitialSplits.decrementAndGet() < 0 && creatingInitialSplits) {
                    creatingInitialSplits = false;
                    // recalculate the target chunk size
                    maxBytes = maxSplitSize.toBytes();
                    long remainingLength = blockLocation.getLength() - chunkOffset;
                    chunks = Math.max(1, (int) (remainingLength / maxBytes));
                    targetChunkSize = (long) Math.ceil(remainingLength * 1.0 / chunks);
                }
                // adjust the actual chunk size to account for the overrun when chunks are slightly bigger than necessary (see above)
                long chunkLength = Math.min(targetChunkSize, blockLocation.getLength() - chunkOffset);

                builder.add(new HiveSplit(connectorId, table.getDatabaseName(), table.getTableName(),
                        partitionName, path, blockLocation.getOffset() + chunkOffset, chunkLength, schema,
                        partitionKeys, addresses, bucketNumber,
                        forceLocalScheduling && hasRealAddress(addresses), effectivePredicate,
                        columnCoercions));

                chunkOffset += chunkLength;
            }
            checkState(chunkOffset == blockLocation.getLength(), "Error splitting blocks");
        }
    } else {
        // not splittable, use the hosts from the first block if it exists
        List<HostAddress> addresses = ImmutableList.of();
        if (blockLocations.length > 0) {
            addresses = toHostAddress(blockLocations[0].getHosts());
        }

        builder.add(new HiveSplit(connectorId, table.getDatabaseName(), table.getTableName(), partitionName,
                path, start, length, schema, partitionKeys, addresses, bucketNumber,
                forceLocalScheduling && hasRealAddress(addresses), effectivePredicate, columnCoercions));
    }
    return builder.build();
}

From source file:com.facebook.presto.hive.HiveSplitIterable.java

License:Apache License

private List<HiveSplit> createHiveSplits(String partitionName, FileStatus file, BlockLocation[] blockLocations,
        long start, long length, Properties schema, List<HivePartitionKey> partitionKeys, boolean splittable)
        throws IOException {
    ImmutableList.Builder<HiveSplit> builder = ImmutableList.builder();
    if (splittable) {
        for (BlockLocation blockLocation : blockLocations) {
            // get the addresses for the block
            List<HostAddress> addresses = toHostAddress(blockLocation.getHosts());

            // divide the block into uniform chunks that are smaller than the max split size
            int chunks = Math.max(1, (int) (blockLocation.getLength() / maxSplitSize.toBytes()));
            // when block does not divide evenly into chunks, make the chunk size slightly bigger than necessary
            long targetChunkSize = (long) Math.ceil(blockLocation.getLength() * 1.0 / chunks);

            long chunkOffset = 0;
            while (chunkOffset < blockLocation.getLength()) {
                // adjust the actual chunk size to account for the overrun when chunks are slightly bigger than necessary (see above)
                long chunkLength = Math.min(targetChunkSize, blockLocation.getLength() - chunkOffset);

                builder.add(new HiveSplit(clientId, table.getDbName(), table.getTableName(), partitionName,
                        false, file.getPath().toString(), blockLocation.getOffset() + chunkOffset, chunkLength,
                        schema, partitionKeys, addresses));

                chunkOffset += chunkLength;
            }/*from ww  w  .ja v a  2  s  . c o m*/
            checkState(chunkOffset == blockLocation.getLength(), "Error splitting blocks");
        }
    } else {
        // not splittable, use the hosts from the first block
        builder.add(new HiveSplit(clientId, table.getDbName(), table.getTableName(), partitionName, false,
                file.getPath().toString(), start, length, schema, partitionKeys,
                toHostAddress(blockLocations[0].getHosts())));
    }
    return builder.build();
}