Example usage for org.apache.hadoop.fs BlockLocation getHosts

List of usage examples for org.apache.hadoop.fs BlockLocation getHosts

Introduction

In this page you can find the example usage for org.apache.hadoop.fs BlockLocation getHosts.

Prototype

public String[] getHosts() throws IOException 

Source Link

Document

Get the list of hosts (hostname) hosting this block

Usage

From source file:HDFSFileFinder.java

License:Apache License

private static void getBlockLocationsFromHdfs() {
    StringBuilder sb = new StringBuilder();
    Configuration conf = new Configuration();
    boolean first = true;

    // make connection to hdfs
    try {/*from  w w  w  .j a  va2  s .  c o m*/
        if (verbose) {
            writer.println("DEBUG: Trying to connect to " + fsName);
        }
        FileSystem fs = FileSystem.get(conf);
        Path file = new Path(fileName);
        FileStatus fStatus = fs.getFileStatus(file);
        status = fStatus;
        bLocations = fs.getFileBlockLocations(status, 0, status.getLen());
        //print out all block locations
        for (BlockLocation aLocation : bLocations) {
            String[] names = aLocation.getHosts();
            for (String name : names) {
                InetAddress addr = InetAddress.getByName(name);
                String host = addr.getHostName();
                int idx = host.indexOf('.');
                String hostname;
                if (0 < idx) {
                    hostname = host.substring(0, host.indexOf('.'));
                } else {
                    hostname = host;
                }
                if (first) {
                    sb.append(hostname);
                    first = false;
                } else {
                    sb.append(",").append(hostname);
                }
            }
        }
        sb.append(NEWLINE);
    } catch (IOException e) {
        writer.println("Error getting block location data from namenode");
        e.printStackTrace();
    }
    writer.print(sb.toString());
    writer.flush();
}

From source file:ch.cern.db.hdfs.DistributedFileSystemMetadata.java

License:GNU General Public License

public static HashMap<String, HashMap<Integer, Integer>> computeHostsDiskIdsCount(
        List<BlockLocation> blockLocations) throws IOException {

    HashMap<String, HashMap<Integer, Integer>> hosts_diskIds = new HashMap<>();
    for (BlockLocation blockLocation : blockLocations) {
        String[] hosts = blockLocation.getHosts();

        VolumeId[] volumeIds = null;/*from   w  ww .  j  av a  2s.c  o  m*/
        if (blockLocation instanceof BlockStorageLocation)
            volumeIds = ((BlockStorageLocation) blockLocation).getVolumeIds();

        for (int i = 0; i < hosts.length; i++) {
            String host = hosts[i];
            Integer diskId = getDiskId(volumeIds != null ? volumeIds[i] : null);

            if (!hosts_diskIds.containsKey(host)) {
                HashMap<Integer, Integer> diskIds = new HashMap<>();
                diskIds.put(diskId, 1);
                hosts_diskIds.put(host, diskIds);
            } else {
                HashMap<Integer, Integer> diskIds = hosts_diskIds.get(host);
                Integer count = diskIds.get(diskId);
                if (count != null) {
                    diskIds.put(diskId, count + 1);
                } else {
                    diskIds.put(diskId, 1);
                }
            }
        }
    }

    return hosts_diskIds;
}

From source file:ch.cern.db.hdfs.Main.java

License:GNU General Public License

private void printBlockMetadata(BlockLocation blockLocation, String[] dataDirs) throws IOException {

    System.out.println("   Offset: " + blockLocation.getOffset());
    System.out.println("   Length: " + blockLocation.getLength());

    String[] cachedHosts = blockLocation.getCachedHosts();
    if (cachedHosts.length == 0) {
        System.out.println("   No cached hosts");
    }/*  w w w .  jav  a 2s. co m*/

    System.out.println("   Replicas:");
    VolumeId[] volumeIds = blockLocation instanceof BlockStorageLocation
            ? (((BlockStorageLocation) blockLocation).getVolumeIds())
            : null;
    String[] hosts = blockLocation.getHosts();
    String[] names = blockLocation.getNames();
    String[] topologyPaths = blockLocation.getTopologyPaths();
    for (int i = 0; i < topologyPaths.length; i++) {
        int diskId = volumeIds != null ? DistributedFileSystemMetadata.getDiskId(volumeIds[i]) : -1;

        System.out.println("      Replica (" + i + "):");
        System.out.println("         Host: " + hosts[i]);

        if (diskId == -1)
            System.out.println("         DiskId: unknown");
        else if (dataDirs != null && diskId < dataDirs.length)
            System.out.println("         Location: " + dataDirs[diskId] + " (DiskId: " + diskId + ")");
        else
            System.out.println("         DiskId: " + diskId);

        System.out.println("         Name: " + names[i]);
        System.out.println("         TopologyPaths: " + topologyPaths[i]);
    }

    if (cachedHosts.length > 0) {
        System.out.println("   Cached hosts:");
        for (String cachedHost : cachedHosts) {
            System.out.println("      Host: " + cachedHost);
        }
    }
}

From source file:com.asakusafw.runtime.directio.hadoop.BlockMap.java

License:Apache License

/**
 * Returns a list of {@link BlockInfo} for the target file.
 * @param fs the target file//from w  w w. j av  a 2  s  .c  om
 * @param status the target file status
 * @return the computed information
 * @throws IOException if failed to compute information
 */
public static List<BlockInfo> computeBlocks(FileSystem fs, FileStatus status) throws IOException {
    BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen());
    List<BlockInfo> results = new ArrayList<>();
    for (BlockLocation location : locations) {
        long length = location.getLength();
        long start = location.getOffset();
        results.add(new BlockInfo(start, start + length, location.getHosts()));
    }
    return results;
}

From source file:com.cloudera.GetBlockLocations.java

License:Apache License

public static void main(String[] args) throws Exception {
    final Configuration conf = new Configuration();
    String url = getStringOrDie("get.block.locations.path");
    final FileSystem fs = FileSystem.get(new URI(url), conf);

    if (!fs.exists(new Path(url))) {
        System.out.println("no file at " + url);
        System.exit(1);/* ww  w  .jav a2 s. co m*/
    }
    BlockLocation locs[] = null;
    try {
        locs = fs.getFileBlockLocations(new Path(url), 0, Long.MAX_VALUE);
    } catch (IOException e) {
        System.out.println("Error calling getFileBlockLocations(" + url + ")\n");
        e.printStackTrace(System.err);
        System.exit(1);
    }

    String prefix = "";
    for (BlockLocation loc : locs) {
        System.out.println(prefix);
        System.out.println("{");
        System.out.println("  hosts =         " + Arrays.toString(loc.getHosts()));
        System.out.println("  cachedHosts =   " + Arrays.toString(loc.getCachedHosts()));
        System.out.println("  names    =      " + Arrays.toString(loc.getNames()));
        System.out.println("  topologyPaths = " + Arrays.toString(loc.getTopologyPaths()));
        System.out.println("  offset =        " + loc.getOffset());
        System.out.println("  length =        " + loc.getLength());
        System.out.println("  corrupt =       " + loc.isCorrupt());
        System.out.println("}");
        prefix = ",";
    }
}

From source file:com.cloudera.impala.catalog.TestLoadHdfsMetadataPerf.java

License:Apache License

/**
 * List file status by calling fileSystem.listStatus.
 *//*from  www .j a va  2 s .  c o m*/
private static void listStatus(String dirPath) {
    Path path = new Path(dirPath);
    boolean exceptionThrown = false;
    try {
        FileSystem fs = path.getFileSystem(LoadMetadataUtil.getConf());
        FileStatus[] fileStatus = fs.listStatus(path);
        if (fs.exists(path)) {
            for (FileStatus status : fileStatus) {
                BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen());
                for (BlockLocation loc : locations) {
                    loc.getNames();
                    loc.getHosts();
                }
            }
        }
    } catch (IOException e) {
        exceptionThrown = true;
        LOG.error("Failed to list Status", e);
    }
    assertFalse(exceptionThrown);
}

From source file:com.cloudera.impala.catalog.TestLoadHdfsMetadataPerf.java

License:Apache License

/**
 * List file status by calling abstractFileSystem.listStatusIterator.
 *///from   w w w. j a va2 s .co  m
private static void listStatusIterator(String dirPath) {
    Path path = new Path(dirPath);
    boolean exceptionThrown = false;
    try {
        AbstractFileSystem fs = AbstractFileSystem.createFileSystem(path.toUri(), LoadMetadataUtil.getConf());
        RemoteIterator<FileStatus> iter = fs.listStatusIterator(path);
        while (iter.hasNext()) {
            FileStatus fileStatus = iter.next();
            BlockLocation[] locations = fs.getFileBlockLocations(fileStatus.getPath(), 0, fileStatus.getLen());
            for (BlockLocation loc : locations) {
                loc.getNames();
                loc.getHosts();
            }
        }
    } catch (IOException e) {
        exceptionThrown = true;
        LOG.error("Failed to list Status Iterator", e);
    }
    assertFalse(exceptionThrown);
}

From source file:com.cloudera.impala.catalog.TestLoadHdfsMetadataPerf.java

License:Apache License

/**
 * List file status by calling fileSystem.listLocatedStatus.
 */// w  w w. ja v  a 2 s. c  o m
private static void listLocatedStatus(String dirPath) {
    Path path = new Path(dirPath);
    boolean exceptionThrown = false;
    try {
        FileSystem fs = path.getFileSystem(LoadMetadataUtil.getConf());
        RemoteIterator<LocatedFileStatus> iterator = fs.listLocatedStatus(path);
        if (fs.exists(path)) {
            while (iterator.hasNext()) {
                LocatedFileStatus fileStatus = iterator.next();
                BlockLocation[] locations = fileStatus.getBlockLocations();
                for (BlockLocation loc : locations) {
                    loc.getHosts();
                    loc.getNames();
                }
            }
        }
    } catch (IOException e) {
        exceptionThrown = true;
        LOG.error("Failed to list Located Status", e);
    }
    assertFalse(exceptionThrown);
}

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

/**
 * Create FileBlock according to BlockLocation and hostIndex. Get host names and ports
 * from BlockLocation, and get all replicas' host id from hostIndex.
 *
 * Must be threadsafe. Access to 'hostIndex' must be protected.
 *//*from   w w w.  j  av a 2 s.  c om*/
private static FileBlock createFileBlock(BlockLocation loc, ListMap<TNetworkAddress> hostIndex)
        throws IOException {
    // Get the location of all block replicas in ip:port format.
    String[] blockHostPorts = loc.getNames();
    // Get the hostnames for all block replicas. Used to resolve which hosts
    // contain cached data. The results are returned in the same order as
    // block.getNames() so it allows us to match a host specified as ip:port to
    // corresponding hostname using the same array index.
    String[] blockHostNames = loc.getHosts();
    Preconditions.checkState(blockHostNames.length == blockHostPorts.length);
    // Get the hostnames that contain cached replicas of this block.
    Set<String> cachedHosts = Sets.newHashSet(Arrays.asList(loc.getCachedHosts()));
    Preconditions.checkState(cachedHosts.size() <= blockHostNames.length);

    // Now enumerate all replicas of the block, adding any unknown hosts
    // to hostMap_/hostList_. The host ID (index in to the hostList_) for each
    // replica is stored in replicaHostIdxs.
    List<BlockReplica> replicas = Lists.newArrayListWithExpectedSize(blockHostPorts.length);
    for (int i = 0; i < blockHostPorts.length; ++i) {
        TNetworkAddress networkAddress = BlockReplica.parseLocation(blockHostPorts[i]);
        Preconditions.checkState(networkAddress != null);
        networkAddress.setHdfs_host_name(blockHostNames[i]);
        int idx = -1;
        synchronized (hostIndex) {
            idx = hostIndex.getIndex(networkAddress);
        }
        replicas.add(new BlockReplica(idx, cachedHosts.contains(blockHostNames[i])));
    }
    return new FileBlock(loc.getOffset(), loc.getLength(), replicas);
}

From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java

License:Apache License

private List<HiveSplit> createHiveSplits(String partitionName, String path, BlockLocation[] blockLocations,
        long start, long length, Properties schema, List<HivePartitionKey> partitionKeys, boolean splittable,
        ConnectorSession session, OptionalInt bucketNumber, TupleDomain<HiveColumnHandle> effectivePredicate,
        Map<Integer, HiveType> columnCoercions) throws IOException {
    ImmutableList.Builder<HiveSplit> builder = ImmutableList.builder();

    boolean forceLocalScheduling = HiveSessionProperties.isForceLocalScheduling(session);

    if (splittable) {
        for (BlockLocation blockLocation : blockLocations) {
            // get the addresses for the block
            List<HostAddress> addresses = toHostAddress(blockLocation.getHosts());

            long maxBytes = maxSplitSize.toBytes();
            boolean creatingInitialSplits = false;

            if (remainingInitialSplits.get() > 0) {
                maxBytes = maxInitialSplitSize.toBytes();
                creatingInitialSplits = true;
            }//  www. j ava  2 s. c  o m

            // divide the block into uniform chunks that are smaller than the max split size
            int chunks = Math.max(1, (int) (blockLocation.getLength() / maxBytes));
            // when block does not divide evenly into chunks, make the chunk size slightly bigger than necessary
            long targetChunkSize = (long) Math.ceil(blockLocation.getLength() * 1.0 / chunks);

            long chunkOffset = 0;
            while (chunkOffset < blockLocation.getLength()) {
                if (remainingInitialSplits.decrementAndGet() < 0 && creatingInitialSplits) {
                    creatingInitialSplits = false;
                    // recalculate the target chunk size
                    maxBytes = maxSplitSize.toBytes();
                    long remainingLength = blockLocation.getLength() - chunkOffset;
                    chunks = Math.max(1, (int) (remainingLength / maxBytes));
                    targetChunkSize = (long) Math.ceil(remainingLength * 1.0 / chunks);
                }
                // adjust the actual chunk size to account for the overrun when chunks are slightly bigger than necessary (see above)
                long chunkLength = Math.min(targetChunkSize, blockLocation.getLength() - chunkOffset);

                builder.add(new HiveSplit(connectorId, table.getDatabaseName(), table.getTableName(),
                        partitionName, path, blockLocation.getOffset() + chunkOffset, chunkLength, schema,
                        partitionKeys, addresses, bucketNumber,
                        forceLocalScheduling && hasRealAddress(addresses), effectivePredicate,
                        columnCoercions));

                chunkOffset += chunkLength;
            }
            checkState(chunkOffset == blockLocation.getLength(), "Error splitting blocks");
        }
    } else {
        // not splittable, use the hosts from the first block if it exists
        List<HostAddress> addresses = ImmutableList.of();
        if (blockLocations.length > 0) {
            addresses = toHostAddress(blockLocations[0].getHosts());
        }

        builder.add(new HiveSplit(connectorId, table.getDatabaseName(), table.getTableName(), partitionName,
                path, start, length, schema, partitionKeys, addresses, bucketNumber,
                forceLocalScheduling && hasRealAddress(addresses), effectivePredicate, columnCoercions));
    }
    return builder.build();
}