Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.BlockStorageLocation; import org.apache.hadoop.fs.HdfsVolumeId; import org.apache.hadoop.fs.VolumeId; import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.HdfsBlocksMetadata; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.security.token.Token; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.CancellationException; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; import org.apache.htrace.core.SpanId; import org.apache.htrace.core.TraceScope; import org.apache.htrace.core.Tracer; @InterfaceAudience.Private @InterfaceStability.Unstable class BlockStorageLocationUtil { static final Log LOG = LogFactory.getLog(BlockStorageLocationUtil.class); /** * Create a list of {@link VolumeBlockLocationCallable} corresponding to a set * of datanodes and blocks. The blocks must all correspond to the same * block pool. * * @param datanodeBlocks * Map of datanodes to block replicas at each datanode * @return callables Used to query each datanode for location information on * the block replicas at the datanode */ private static List<VolumeBlockLocationCallable> createVolumeBlockLocationCallables(Configuration conf, Map<DatanodeInfo, List<LocatedBlock>> datanodeBlocks, int timeout, boolean connectToDnViaHostname, Tracer tracer, SpanId parentSpanId) { if (datanodeBlocks.isEmpty()) { return Lists.newArrayList(); } // Construct the callables, one per datanode List<VolumeBlockLocationCallable> callables = new ArrayList<>(); for (Map.Entry<DatanodeInfo, List<LocatedBlock>> entry : datanodeBlocks.entrySet()) { // Construct RPC parameters DatanodeInfo datanode = entry.getKey(); List<LocatedBlock> locatedBlocks = entry.getValue(); if (locatedBlocks.isEmpty()) { continue; } // Ensure that the blocks all are from the same block pool. String poolId = locatedBlocks.get(0).getBlock().getBlockPoolId(); for (LocatedBlock lb : locatedBlocks) { if (!poolId.equals(lb.getBlock().getBlockPoolId())) { throw new IllegalArgumentException("All blocks to be queried must be in the same block pool: " + locatedBlocks.get(0).getBlock() + " and " + lb + " are from different pools."); } } long[] blockIds = new long[locatedBlocks.size()]; int i = 0; List<Token<BlockTokenIdentifier>> dnTokens = new ArrayList<>(locatedBlocks.size()); for (LocatedBlock b : locatedBlocks) { blockIds[i++] = b.getBlock().getBlockId(); dnTokens.add(b.getBlockToken()); } VolumeBlockLocationCallable callable = new VolumeBlockLocationCallable(conf, datanode, poolId, blockIds, dnTokens, timeout, connectToDnViaHostname, tracer, parentSpanId); callables.add(callable); } return callables; } /** * Queries datanodes for the blocks specified in <code>datanodeBlocks</code>, * making one RPC to each datanode. These RPCs are made in parallel using a * threadpool. * * @param datanodeBlocks * Map of datanodes to the blocks present on the DN * @return metadatas Map of datanodes to block metadata of the DN * @throws InvalidBlockTokenException * if client does not have read access on a requested block */ static Map<DatanodeInfo, HdfsBlocksMetadata> queryDatanodesForHdfsBlocksMetadata(Configuration conf, Map<DatanodeInfo, List<LocatedBlock>> datanodeBlocks, int poolsize, int timeoutMs, boolean connectToDnViaHostname, Tracer tracer, SpanId parentSpanId) throws InvalidBlockTokenException { List<VolumeBlockLocationCallable> callables = createVolumeBlockLocationCallables(conf, datanodeBlocks, timeoutMs, connectToDnViaHostname, tracer, parentSpanId); // Use a thread pool to execute the Callables in parallel List<Future<HdfsBlocksMetadata>> futures = new ArrayList<>(); ExecutorService executor = new ScheduledThreadPoolExecutor(poolsize); try { futures = executor.invokeAll(callables, timeoutMs, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { // Swallow the exception here, because we can return partial results } executor.shutdown(); Map<DatanodeInfo, HdfsBlocksMetadata> metadatas = Maps.newHashMapWithExpectedSize(datanodeBlocks.size()); // Fill in metadatas with results from DN RPCs, where possible for (int i = 0; i < futures.size(); i++) { VolumeBlockLocationCallable callable = callables.get(i); DatanodeInfo datanode = callable.getDatanodeInfo(); Future<HdfsBlocksMetadata> future = futures.get(i); try { HdfsBlocksMetadata metadata = future.get(); metadatas.put(callable.getDatanodeInfo(), metadata); } catch (CancellationException e) { LOG.info( "Cancelled while waiting for datanode " + datanode.getIpcAddr(false) + ": " + e.toString()); } catch (ExecutionException e) { Throwable t = e.getCause(); if (t instanceof InvalidBlockTokenException) { LOG.warn("Invalid access token when trying to retrieve " + "information from datanode " + datanode.getIpcAddr(false)); throw (InvalidBlockTokenException) t; } else if (t instanceof UnsupportedOperationException) { LOG.info("Datanode " + datanode.getIpcAddr(false) + " does not support" + " required #getHdfsBlocksMetadata() API"); throw (UnsupportedOperationException) t; } else { LOG.info( "Failed to query block locations on datanode " + datanode.getIpcAddr(false) + ": " + t); } if (LOG.isDebugEnabled()) { LOG.debug("Could not fetch information from datanode", t); } } catch (InterruptedException e) { // Shouldn't happen, because invokeAll waits for all Futures to be ready LOG.info("Interrupted while fetching HdfsBlocksMetadata"); } } return metadatas; } /** * Group the per-replica {@link VolumeId} info returned from * {@link DFSClient#queryDatanodesForHdfsBlocksMetadata(Map)} to be * associated * with the corresponding {@link LocatedBlock}. * * @param blocks * Original LocatedBlock array * @param metadatas * VolumeId information for the replicas on each datanode * @return blockVolumeIds per-replica VolumeId information associated with the * parent LocatedBlock */ static Map<LocatedBlock, List<VolumeId>> associateVolumeIdsWithBlocks(List<LocatedBlock> blocks, Map<DatanodeInfo, HdfsBlocksMetadata> metadatas) { // Initialize mapping of ExtendedBlock to LocatedBlock. // Used to associate results from DN RPCs to the parent LocatedBlock Map<Long, LocatedBlock> blockIdToLocBlock = new HashMap<Long, LocatedBlock>(); for (LocatedBlock b : blocks) { blockIdToLocBlock.put(b.getBlock().getBlockId(), b); } // Initialize the mapping of blocks -> list of VolumeIds, one per replica // This is filled out with real values from the DN RPCs Map<LocatedBlock, List<VolumeId>> blockVolumeIds = new HashMap<>(); for (LocatedBlock b : blocks) { ArrayList<VolumeId> l = new ArrayList<>(b.getLocations().length); for (int i = 0; i < b.getLocations().length; i++) { l.add(null); } blockVolumeIds.put(b, l); } // Iterate through the list of metadatas (one per datanode). // For each metadata, if it's valid, insert its volume location information // into the Map returned to the caller for (Map.Entry<DatanodeInfo, HdfsBlocksMetadata> entry : metadatas.entrySet()) { DatanodeInfo datanode = entry.getKey(); HdfsBlocksMetadata metadata = entry.getValue(); // Check if metadata is valid if (metadata == null) { continue; } long[] metaBlockIds = metadata.getBlockIds(); List<byte[]> metaVolumeIds = metadata.getVolumeIds(); List<Integer> metaVolumeIndexes = metadata.getVolumeIndexes(); // Add VolumeId for each replica in the HdfsBlocksMetadata for (int j = 0; j < metaBlockIds.length; j++) { int volumeIndex = metaVolumeIndexes.get(j); long blockId = metaBlockIds[j]; // Skip if block wasn't found, or not a valid index into metaVolumeIds // Also skip if the DN responded with a block we didn't ask for if (volumeIndex == Integer.MAX_VALUE || volumeIndex >= metaVolumeIds.size() || !blockIdToLocBlock.containsKey(blockId)) { if (LOG.isDebugEnabled()) { LOG.debug("No data for block " + blockId); } continue; } // Get the VolumeId by indexing into the list of VolumeIds // provided by the datanode byte[] volumeId = metaVolumeIds.get(volumeIndex); HdfsVolumeId id = new HdfsVolumeId(volumeId); // Find out which index we are in the LocatedBlock's replicas LocatedBlock locBlock = blockIdToLocBlock.get(blockId); DatanodeInfo[] dnInfos = locBlock.getLocations(); int index = -1; for (int k = 0; k < dnInfos.length; k++) { if (dnInfos[k].equals(datanode)) { index = k; break; } } if (index < 0) { if (LOG.isDebugEnabled()) { LOG.debug("Datanode responded with a block volume id we did" + " not request, omitting."); } continue; } // Place VolumeId at the same index as the DN's index in the list of // replicas List<VolumeId> volumeIds = blockVolumeIds.get(locBlock); volumeIds.set(index, id); } } return blockVolumeIds; } /** * Helper method to combine a list of {@link LocatedBlock} with associated * {@link VolumeId} information to form a list of {@link * BlockStorageLocation} * . */ static BlockStorageLocation[] convertToVolumeBlockLocations(List<LocatedBlock> blocks, Map<LocatedBlock, List<VolumeId>> blockVolumeIds) throws IOException { // Construct the final return value of VolumeBlockLocation[] BlockLocation[] locations = DFSUtil.locatedBlocks2Locations(blocks); List<BlockStorageLocation> volumeBlockLocs = new ArrayList<>(locations.length); for (int i = 0; i < locations.length; i++) { LocatedBlock locBlock = blocks.get(i); List<VolumeId> volumeIds = blockVolumeIds.get(locBlock); BlockStorageLocation bsLoc = new BlockStorageLocation(locations[i], volumeIds.toArray(new VolumeId[0])); volumeBlockLocs.add(bsLoc); } return volumeBlockLocs.toArray(new BlockStorageLocation[] {}); } /** * Callable that sets up an RPC proxy to a datanode and queries it for * volume location information for a list of ExtendedBlocks. */ private static class VolumeBlockLocationCallable implements Callable<HdfsBlocksMetadata> { private final Configuration configuration; private final int timeout; private final DatanodeInfo datanode; private final String poolId; private final long[] blockIds; private final List<Token<BlockTokenIdentifier>> dnTokens; private final boolean connectToDnViaHostname; private final Tracer tracer; private final SpanId parentSpanId; VolumeBlockLocationCallable(Configuration configuration, DatanodeInfo datanode, String poolId, long[] blockIds, List<Token<BlockTokenIdentifier>> dnTokens, int timeout, boolean connectToDnViaHostname, Tracer tracer, SpanId parentSpanId) { this.configuration = configuration; this.timeout = timeout; this.datanode = datanode; this.poolId = poolId; this.blockIds = blockIds; this.dnTokens = dnTokens; this.connectToDnViaHostname = connectToDnViaHostname; this.tracer = tracer; this.parentSpanId = parentSpanId; } public DatanodeInfo getDatanodeInfo() { return datanode; } @Override public HdfsBlocksMetadata call() throws Exception { HdfsBlocksMetadata metadata = null; // Create the RPC proxy and make the RPC ClientDatanodeProtocol cdp = null; TraceScope scope = tracer.newScope("getHdfsBlocksMetadata", parentSpanId); try { cdp = DFSUtil.createClientDatanodeProtocolProxy(datanode, configuration, timeout, connectToDnViaHostname); metadata = cdp.getHdfsBlocksMetadata(poolId, blockIds, dnTokens); } catch (IOException e) { // Bubble this up to the caller, handle with the Future throw e; } finally { scope.close(); if (cdp != null) { RPC.stopProxy(cdp); } } return metadata; } } }