List of usage examples for org.apache.hadoop.fs BlockLocation getHosts
public String[] getHosts() throws IOException
From source file:HDFSFileFinder.java
License:Apache License
private static void getBlockLocationsFromHdfs() { StringBuilder sb = new StringBuilder(); Configuration conf = new Configuration(); boolean first = true; // make connection to hdfs try {/*from w w w .j a va2 s . c o m*/ if (verbose) { writer.println("DEBUG: Trying to connect to " + fsName); } FileSystem fs = FileSystem.get(conf); Path file = new Path(fileName); FileStatus fStatus = fs.getFileStatus(file); status = fStatus; bLocations = fs.getFileBlockLocations(status, 0, status.getLen()); //print out all block locations for (BlockLocation aLocation : bLocations) { String[] names = aLocation.getHosts(); for (String name : names) { InetAddress addr = InetAddress.getByName(name); String host = addr.getHostName(); int idx = host.indexOf('.'); String hostname; if (0 < idx) { hostname = host.substring(0, host.indexOf('.')); } else { hostname = host; } if (first) { sb.append(hostname); first = false; } else { sb.append(",").append(hostname); } } } sb.append(NEWLINE); } catch (IOException e) { writer.println("Error getting block location data from namenode"); e.printStackTrace(); } writer.print(sb.toString()); writer.flush(); }
From source file:ch.cern.db.hdfs.DistributedFileSystemMetadata.java
License:GNU General Public License
public static HashMap<String, HashMap<Integer, Integer>> computeHostsDiskIdsCount( List<BlockLocation> blockLocations) throws IOException { HashMap<String, HashMap<Integer, Integer>> hosts_diskIds = new HashMap<>(); for (BlockLocation blockLocation : blockLocations) { String[] hosts = blockLocation.getHosts(); VolumeId[] volumeIds = null;/*from w ww . j av a 2s.c o m*/ if (blockLocation instanceof BlockStorageLocation) volumeIds = ((BlockStorageLocation) blockLocation).getVolumeIds(); for (int i = 0; i < hosts.length; i++) { String host = hosts[i]; Integer diskId = getDiskId(volumeIds != null ? volumeIds[i] : null); if (!hosts_diskIds.containsKey(host)) { HashMap<Integer, Integer> diskIds = new HashMap<>(); diskIds.put(diskId, 1); hosts_diskIds.put(host, diskIds); } else { HashMap<Integer, Integer> diskIds = hosts_diskIds.get(host); Integer count = diskIds.get(diskId); if (count != null) { diskIds.put(diskId, count + 1); } else { diskIds.put(diskId, 1); } } } } return hosts_diskIds; }
From source file:ch.cern.db.hdfs.Main.java
License:GNU General Public License
private void printBlockMetadata(BlockLocation blockLocation, String[] dataDirs) throws IOException { System.out.println(" Offset: " + blockLocation.getOffset()); System.out.println(" Length: " + blockLocation.getLength()); String[] cachedHosts = blockLocation.getCachedHosts(); if (cachedHosts.length == 0) { System.out.println(" No cached hosts"); }/* w w w . jav a 2s. co m*/ System.out.println(" Replicas:"); VolumeId[] volumeIds = blockLocation instanceof BlockStorageLocation ? (((BlockStorageLocation) blockLocation).getVolumeIds()) : null; String[] hosts = blockLocation.getHosts(); String[] names = blockLocation.getNames(); String[] topologyPaths = blockLocation.getTopologyPaths(); for (int i = 0; i < topologyPaths.length; i++) { int diskId = volumeIds != null ? DistributedFileSystemMetadata.getDiskId(volumeIds[i]) : -1; System.out.println(" Replica (" + i + "):"); System.out.println(" Host: " + hosts[i]); if (diskId == -1) System.out.println(" DiskId: unknown"); else if (dataDirs != null && diskId < dataDirs.length) System.out.println(" Location: " + dataDirs[diskId] + " (DiskId: " + diskId + ")"); else System.out.println(" DiskId: " + diskId); System.out.println(" Name: " + names[i]); System.out.println(" TopologyPaths: " + topologyPaths[i]); } if (cachedHosts.length > 0) { System.out.println(" Cached hosts:"); for (String cachedHost : cachedHosts) { System.out.println(" Host: " + cachedHost); } } }
From source file:com.asakusafw.runtime.directio.hadoop.BlockMap.java
License:Apache License
/** * Returns a list of {@link BlockInfo} for the target file. * @param fs the target file//from w w w. j av a 2 s .c om * @param status the target file status * @return the computed information * @throws IOException if failed to compute information */ public static List<BlockInfo> computeBlocks(FileSystem fs, FileStatus status) throws IOException { BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen()); List<BlockInfo> results = new ArrayList<>(); for (BlockLocation location : locations) { long length = location.getLength(); long start = location.getOffset(); results.add(new BlockInfo(start, start + length, location.getHosts())); } return results; }
From source file:com.cloudera.GetBlockLocations.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = new Configuration(); String url = getStringOrDie("get.block.locations.path"); final FileSystem fs = FileSystem.get(new URI(url), conf); if (!fs.exists(new Path(url))) { System.out.println("no file at " + url); System.exit(1);/* ww w .jav a2 s. co m*/ } BlockLocation locs[] = null; try { locs = fs.getFileBlockLocations(new Path(url), 0, Long.MAX_VALUE); } catch (IOException e) { System.out.println("Error calling getFileBlockLocations(" + url + ")\n"); e.printStackTrace(System.err); System.exit(1); } String prefix = ""; for (BlockLocation loc : locs) { System.out.println(prefix); System.out.println("{"); System.out.println(" hosts = " + Arrays.toString(loc.getHosts())); System.out.println(" cachedHosts = " + Arrays.toString(loc.getCachedHosts())); System.out.println(" names = " + Arrays.toString(loc.getNames())); System.out.println(" topologyPaths = " + Arrays.toString(loc.getTopologyPaths())); System.out.println(" offset = " + loc.getOffset()); System.out.println(" length = " + loc.getLength()); System.out.println(" corrupt = " + loc.isCorrupt()); System.out.println("}"); prefix = ","; } }
From source file:com.cloudera.impala.catalog.TestLoadHdfsMetadataPerf.java
License:Apache License
/** * List file status by calling fileSystem.listStatus. *//*from www .j a va 2 s . c o m*/ private static void listStatus(String dirPath) { Path path = new Path(dirPath); boolean exceptionThrown = false; try { FileSystem fs = path.getFileSystem(LoadMetadataUtil.getConf()); FileStatus[] fileStatus = fs.listStatus(path); if (fs.exists(path)) { for (FileStatus status : fileStatus) { BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen()); for (BlockLocation loc : locations) { loc.getNames(); loc.getHosts(); } } } } catch (IOException e) { exceptionThrown = true; LOG.error("Failed to list Status", e); } assertFalse(exceptionThrown); }
From source file:com.cloudera.impala.catalog.TestLoadHdfsMetadataPerf.java
License:Apache License
/** * List file status by calling abstractFileSystem.listStatusIterator. *///from w w w. j a va2 s .co m private static void listStatusIterator(String dirPath) { Path path = new Path(dirPath); boolean exceptionThrown = false; try { AbstractFileSystem fs = AbstractFileSystem.createFileSystem(path.toUri(), LoadMetadataUtil.getConf()); RemoteIterator<FileStatus> iter = fs.listStatusIterator(path); while (iter.hasNext()) { FileStatus fileStatus = iter.next(); BlockLocation[] locations = fs.getFileBlockLocations(fileStatus.getPath(), 0, fileStatus.getLen()); for (BlockLocation loc : locations) { loc.getNames(); loc.getHosts(); } } } catch (IOException e) { exceptionThrown = true; LOG.error("Failed to list Status Iterator", e); } assertFalse(exceptionThrown); }
From source file:com.cloudera.impala.catalog.TestLoadHdfsMetadataPerf.java
License:Apache License
/** * List file status by calling fileSystem.listLocatedStatus. */// w w w. ja v a 2 s. c o m private static void listLocatedStatus(String dirPath) { Path path = new Path(dirPath); boolean exceptionThrown = false; try { FileSystem fs = path.getFileSystem(LoadMetadataUtil.getConf()); RemoteIterator<LocatedFileStatus> iterator = fs.listLocatedStatus(path); if (fs.exists(path)) { while (iterator.hasNext()) { LocatedFileStatus fileStatus = iterator.next(); BlockLocation[] locations = fileStatus.getBlockLocations(); for (BlockLocation loc : locations) { loc.getHosts(); loc.getNames(); } } } } catch (IOException e) { exceptionThrown = true; LOG.error("Failed to list Located Status", e); } assertFalse(exceptionThrown); }
From source file:com.cloudera.impala.util.LoadMetadataUtil.java
License:Apache License
/** * Create FileBlock according to BlockLocation and hostIndex. Get host names and ports * from BlockLocation, and get all replicas' host id from hostIndex. * * Must be threadsafe. Access to 'hostIndex' must be protected. *//*from w w w. j av a 2 s. c om*/ private static FileBlock createFileBlock(BlockLocation loc, ListMap<TNetworkAddress> hostIndex) throws IOException { // Get the location of all block replicas in ip:port format. String[] blockHostPorts = loc.getNames(); // Get the hostnames for all block replicas. Used to resolve which hosts // contain cached data. The results are returned in the same order as // block.getNames() so it allows us to match a host specified as ip:port to // corresponding hostname using the same array index. String[] blockHostNames = loc.getHosts(); Preconditions.checkState(blockHostNames.length == blockHostPorts.length); // Get the hostnames that contain cached replicas of this block. Set<String> cachedHosts = Sets.newHashSet(Arrays.asList(loc.getCachedHosts())); Preconditions.checkState(cachedHosts.size() <= blockHostNames.length); // Now enumerate all replicas of the block, adding any unknown hosts // to hostMap_/hostList_. The host ID (index in to the hostList_) for each // replica is stored in replicaHostIdxs. List<BlockReplica> replicas = Lists.newArrayListWithExpectedSize(blockHostPorts.length); for (int i = 0; i < blockHostPorts.length; ++i) { TNetworkAddress networkAddress = BlockReplica.parseLocation(blockHostPorts[i]); Preconditions.checkState(networkAddress != null); networkAddress.setHdfs_host_name(blockHostNames[i]); int idx = -1; synchronized (hostIndex) { idx = hostIndex.getIndex(networkAddress); } replicas.add(new BlockReplica(idx, cachedHosts.contains(blockHostNames[i]))); } return new FileBlock(loc.getOffset(), loc.getLength(), replicas); }
From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java
License:Apache License
private List<HiveSplit> createHiveSplits(String partitionName, String path, BlockLocation[] blockLocations, long start, long length, Properties schema, List<HivePartitionKey> partitionKeys, boolean splittable, ConnectorSession session, OptionalInt bucketNumber, TupleDomain<HiveColumnHandle> effectivePredicate, Map<Integer, HiveType> columnCoercions) throws IOException { ImmutableList.Builder<HiveSplit> builder = ImmutableList.builder(); boolean forceLocalScheduling = HiveSessionProperties.isForceLocalScheduling(session); if (splittable) { for (BlockLocation blockLocation : blockLocations) { // get the addresses for the block List<HostAddress> addresses = toHostAddress(blockLocation.getHosts()); long maxBytes = maxSplitSize.toBytes(); boolean creatingInitialSplits = false; if (remainingInitialSplits.get() > 0) { maxBytes = maxInitialSplitSize.toBytes(); creatingInitialSplits = true; }// www. j ava 2 s. c o m // divide the block into uniform chunks that are smaller than the max split size int chunks = Math.max(1, (int) (blockLocation.getLength() / maxBytes)); // when block does not divide evenly into chunks, make the chunk size slightly bigger than necessary long targetChunkSize = (long) Math.ceil(blockLocation.getLength() * 1.0 / chunks); long chunkOffset = 0; while (chunkOffset < blockLocation.getLength()) { if (remainingInitialSplits.decrementAndGet() < 0 && creatingInitialSplits) { creatingInitialSplits = false; // recalculate the target chunk size maxBytes = maxSplitSize.toBytes(); long remainingLength = blockLocation.getLength() - chunkOffset; chunks = Math.max(1, (int) (remainingLength / maxBytes)); targetChunkSize = (long) Math.ceil(remainingLength * 1.0 / chunks); } // adjust the actual chunk size to account for the overrun when chunks are slightly bigger than necessary (see above) long chunkLength = Math.min(targetChunkSize, blockLocation.getLength() - chunkOffset); builder.add(new HiveSplit(connectorId, table.getDatabaseName(), table.getTableName(), partitionName, path, blockLocation.getOffset() + chunkOffset, chunkLength, schema, partitionKeys, addresses, bucketNumber, forceLocalScheduling && hasRealAddress(addresses), effectivePredicate, columnCoercions)); chunkOffset += chunkLength; } checkState(chunkOffset == blockLocation.getLength(), "Error splitting blocks"); } } else { // not splittable, use the hosts from the first block if it exists List<HostAddress> addresses = ImmutableList.of(); if (blockLocations.length > 0) { addresses = toHostAddress(blockLocations[0].getHosts()); } builder.add(new HiveSplit(connectorId, table.getDatabaseName(), table.getTableName(), partitionName, path, start, length, schema, partitionKeys, addresses, bucketNumber, forceLocalScheduling && hasRealAddress(addresses), effectivePredicate, columnCoercions)); } return builder.build(); }