List of usage examples for org.apache.hadoop.hdfs DFSClient getBlockLocations
public BlockLocation[] getBlockLocations(String src, long start, long length) throws IOException
From source file:com.pinterest.terrapin.controller.ControllerUtil.java
License:Apache License
/** * Builds the helix ideal state for HDFS directory by finding the locations of HDFS blocks and * creating an ideal state assignment based on those. * * @param hdfsClient The HDFS client object. * @param hdfsDir The HDFS directory containing the various files. * @param resourceName The name of the Helix resource for which the ideal state is being created. * @param partitioner The partitioner type, used for extracting helix partition names from * HDFS files./*from w w w. java 2 s. c o m*/ * @param numReplicas The number of replicas for each partition. * @param enableZkCompression Whether data in zk is kept compressed. * @return The ideal state as computed based on HDFS block placement. * @throws ControllerException */ public static IdealState buildIdealStateForHdfsDir(DFSClient hdfsClient, String hdfsDir, String resourceName, PartitionerType partitioner, int numReplicas, boolean enableZkCompression) throws ControllerException { List<HdfsFileStatus> fileList; try { fileList = TerrapinUtil.getHdfsFileList(hdfsClient, hdfsDir); } catch (IOException e) { throw new ControllerException("Exception while listing files in " + hdfsDir, ControllerErrorCode.HDFS_ERROR); } // Mapping from file to HDFS block locations. Map<Integer, Set<String>> hdfsBlockMapping = Maps.newHashMapWithExpectedSize(fileList.size()); for (HdfsFileStatus fileStatus : fileList) { Integer partitionName = TerrapinUtil.extractPartitionName(fileStatus.getLocalName(), partitioner); if (partitionName == null) { LOG.info("Skipping " + fileStatus.getLocalName() + " for " + hdfsDir); continue; } String fullName = fileStatus.getFullName(hdfsDir); BlockLocation[] locations = null; try { locations = hdfsClient.getBlockLocations(fullName, 0, fileStatus.getLen()); } catch (Exception e) { throw new ControllerException("Exception while getting block locations " + e.getMessage(), ControllerErrorCode.HDFS_ERROR); } Set<String> instanceSet = Sets.newHashSetWithExpectedSize(3); BlockLocation firstLocation = locations[0]; String[] hosts = null; try { hosts = firstLocation.getHosts(); } catch (IOException e) { throw new ControllerException("Exception while getting hosts " + e.getMessage(), ControllerErrorCode.HDFS_ERROR); } for (String host : hosts) { instanceSet.add(host); } hdfsBlockMapping.put(partitionName, instanceSet); } // Assign helix partitions for the resource - which is the HDFS directory. int bucketSize = TerrapinUtil.getBucketSize(hdfsBlockMapping.size(), enableZkCompression); CustomModeISBuilder idealStateBuilder = new CustomModeISBuilder(resourceName); for (Map.Entry<Integer, Set<String>> mapping : hdfsBlockMapping.entrySet()) { // Make partitions globally unique String partitionName = null; // This is needed because of the way helix parses partition numbers for buckets. if (bucketSize > 0) { partitionName = resourceName + "_" + mapping.getKey(); } else { partitionName = resourceName + "$" + mapping.getKey(); } Set<String> instanceSet = mapping.getValue(); for (String instance : instanceSet) { idealStateBuilder.assignInstanceAndState(partitionName, TerrapinUtil.getHelixInstanceFromHDFSHost(instance), "ONLINE"); } } idealStateBuilder.setStateModel("OnlineOffline"); idealStateBuilder.setNumReplica(numReplicas); idealStateBuilder.setNumPartitions(hdfsBlockMapping.size()); IdealState is = idealStateBuilder.build(); if (bucketSize > 0) { is.setBucketSize(bucketSize); } is.setRebalanceMode(IdealState.RebalanceMode.CUSTOMIZED); if (enableZkCompression) { TerrapinUtil.compressIdealState(is); } return is; }
From source file:com.pinterest.terrapin.controller.ControllerUtilTest.java
License:Apache License
public void testBuildIdealStateForHdfsDirHelper(boolean zkCompression, int numPartitions) throws Exception { String hdfsDir = Constants.HDFS_DATA_DIR + "/fileset"; DFSClient dfsClient = mock(DFSClient.class); // Create three hosts in the clusters. List<BlockLocation> locations = ImmutableList .of(new BlockLocation(new String[] { "host1", "host2" }, new String[] { "host1", "host2" }, 0, 0)); HdfsFileStatus[] fileStatuses = new HdfsFileStatus[numPartitions]; for (int i = 0; i < numPartitions; ++i) { fileStatuses[i] = PowerMockito.mock(HdfsFileStatus.class); String localName = TerrapinUtil.formatPartitionName(i); when(fileStatuses[i].getLocalName()).thenReturn(localName); when(fileStatuses[i].getFullName(eq(hdfsDir))).thenReturn(hdfsDir + "/" + localName); when(fileStatuses[i].getLen()).thenReturn(1000L); BlockLocation[] locationArray = new BlockLocation[1]; locations.subList(0, 1).toArray(locationArray); when(dfsClient.getBlockLocations(eq(fileStatuses[i].getFullName(hdfsDir)), anyLong(), anyLong())) .thenReturn(locationArray); }// www. j a v a 2 s. co m when(dfsClient.listPaths(eq(hdfsDir), any(byte[].class))).thenReturn(new DirectoryListing(fileStatuses, 0)); IdealState is = ControllerUtil.buildIdealStateForHdfsDir(dfsClient, hdfsDir, "resource", PartitionerType.CASCADING, 2, zkCompression); assertEquals(numPartitions, is.getNumPartitions()); assertEquals("resource", is.getResourceName()); for (int i = 0; i < numPartitions; ++i) { String partition; if (numPartitions > 1000 && !zkCompression) { partition = "resource_" + i; } else { partition = "resource$" + i; } assertEquals(Sets.newHashSet("host1", "host2"), is.getInstanceSet(partition)); } assertEquals("OnlineOffline", is.getStateModelDefRef()); if (zkCompression) { assertTrue(is.getRecord().getBooleanField("enableCompression", false)); } assertEquals(IdealState.RebalanceMode.CUSTOMIZED, is.getRebalanceMode()); }