List of usage examples for org.apache.hadoop.fs FileSystem getFileBlockLocations
public BlockLocation[] getFileBlockLocations(Path p, long start, long len) throws IOException
From source file:org.bgi.flexlab.gaea.data.mapreduce.input.cram.GaeaCombineCramFileRecordReader.java
License:Open Source License
protected boolean initializeNextRecordReader() throws IOException { if (currentReader != null) { currentReader.close();/*w ww . j av a 2s.com*/ currentReader = null; } // if all chunks have been processed, nothing more to do. if (fileIndex == split.getNumPaths()) { return false; } // get a record reader for the fileIndex-th chunk try { Configuration conf = context.getConfiguration(); currentReader = new GaeaCramRecordReader(); Path path = split.getPath(fileIndex); long length = split.getLength(fileIndex); FileSystem fs = path.getFileSystem(conf); FileStatus status = fs.getFileStatus(path); BlockLocation[] blkLocations = fs.getFileBlockLocations(status, 0, length); currentReader.initialize(new FileSplit(path, 0, length, blkLocations[0].getHosts()), context); } catch (Exception e) { throw new RuntimeException(e); } fileIndex++; return true; }
From source file:org.cdlib.was.weari.pig.ArcListInputFormat.java
License:Apache License
/** * Generate the list of files and make them into FileSplits. *///from w ww .j a v a 2 s . c o m public List<InputSplit> getSplits(JobContext job) throws IOException { long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); // generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> files = listStatus(job); for (FileStatus file : files) { Path path = file.getPath(); FileSystem fs = path.getFileSystem(job.getConfiguration()); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(job, path)) { long blockSize = file.getBlockSize(); long splitSize = computeSplitSize(blockSize, minSize, maxSize); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(new FileSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts())); } } else if (length != 0) { splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts())); } else { //Create empty hosts array for zero length files splits.add(new FileSplit(path, 0, length, new String[0])); } } // Save the number of input files in the job-conf job.getConfiguration().setLong(NUM_INPUT_FILES, files.size()); LOG.debug("Total # of splits: " + splits.size()); return splits; }
From source file:org.commoncrawl.util.NodeAffinityMaskBuilder.java
License:Open Source License
public static String buildNodeAffinityMask(FileSystem fileSystem, Path partFileDirectory, Map<Integer, String> optionalRootMapHint, Set<String> excludedNodeList, int maxReducersPerNode, boolean skipBalance) throws IOException { TreeMap<Integer, String> partitionToNodeMap = new TreeMap<Integer, String>(); FileStatus paths[] = fileSystem.globStatus(new Path(partFileDirectory, "part-*")); if (paths.length == 0) { throw new IOException("Invalid source Path:" + partFileDirectory); }//from w w w. j a va 2 s .com Multimap<String, Integer> inverseMap = TreeMultimap.create(); Map<Integer, List<String>> paritionToDesiredCandidateList = new TreeMap<Integer, List<String>>(); // iterate paths for (FileStatus path : paths) { String currentFile = path.getPath().getName(); int partitionNumber; try { if (currentFile.startsWith("part-r")) { partitionNumber = NUMBER_FORMAT.parse(currentFile.substring("part-r-".length())).intValue(); } else { partitionNumber = NUMBER_FORMAT.parse(currentFile.substring("part-".length())).intValue(); } } catch (ParseException e) { throw new IOException("Invalid Part Name Encountered:" + currentFile); } // get block locations BlockLocation locations[] = fileSystem.getFileBlockLocations(path, 0, path.getLen()); // if passed in root map is not null, then validate that all blocks for the current file reside on the desired node if (optionalRootMapHint != null) { // the host all blocks should reside on String desiredHost = optionalRootMapHint.get(partitionNumber); ArrayList<String> misplacedBlocks = new ArrayList<String>(); // ok walk all blocks for (BlockLocation location : locations) { boolean found = false; for (String host : location.getHosts()) { if (host.compareTo(desiredHost) == 0) { found = true; break; } } if (!found) { misplacedBlocks.add("Block At:" + location.getOffset() + " for File:" + path.getPath() + " did not contain desired location:" + desiredHost); } } // ok pass test at a certain threshold if (misplacedBlocks.size() != 0 && ((float) misplacedBlocks.size() / (float) locations.length) > .50f) { LOG.error("Misplaced Blocks Exceed Threshold"); for (String misplacedBlock : misplacedBlocks) { LOG.error(misplacedBlock); } // TODO: SKIP THIS STEP FOR NOW ??? //throw new IOException("Misplaced Blocks Exceed Threshold!"); } partitionToNodeMap.put(partitionNumber, desiredHost); } else { if (excludedNodeList != null) { // LOG.info("Exclued Node List is:" + Lists.newArrayList(excludedNodeList).toString()); } // ok ask file system for block locations TreeMap<String, Integer> nodeToBlockCount = new TreeMap<String, Integer>(); for (BlockLocation location : locations) { for (String host : location.getHosts()) { if (excludedNodeList == null || !excludedNodeList.contains(host)) { Integer nodeHitCount = nodeToBlockCount.get(host); if (nodeHitCount == null) { nodeToBlockCount.put(host, 1); } else { nodeToBlockCount.put(host, nodeHitCount.intValue() + 1); } } } } if (nodeToBlockCount.size() == 0) { throw new IOException("No valid nodes found for partition number:" + path); } Map.Entry<String, Integer> entries[] = nodeToBlockCount.entrySet().toArray(new Map.Entry[0]); Arrays.sort(entries, new Comparator<Map.Entry<String, Integer>>() { @Override public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) { return o1.getValue().intValue() < o2.getValue().intValue() ? 1 : o1.getValue().intValue() == o2.getValue().intValue() ? 0 : -1; } }); // build a list of nodes by priority ... List<String> nodesByPriority = Lists.transform(Lists.newArrayList(entries), new Function<Map.Entry<String, Integer>, String>() { @Override public String apply(Entry<String, Integer> entry) { return entry.getKey(); } }); // stash it away ... paritionToDesiredCandidateList.put(partitionNumber, nodesByPriority); //LOG.info("Mapping Partition:" + partitionNumber + " To Node:" + entries[0].getKey() + " BlockCount" + entries[0].getValue().intValue()); partitionToNodeMap.put(partitionNumber, entries[0].getKey()); // store the inverse mapping ... inverseMap.put(entries[0].getKey(), partitionNumber); } } if (skipBalance) { // walk partition map to make sure everything is assigned ... /* for (String node : inverseMap.keys()) { if (inverseMap.get(node).size() > maxReducersPerNode) { throw new IOException("Node:" + node + " has too many partitions! ("+inverseMap.get(node).size()); } } */ } // now if optional root map hint is null if (optionalRootMapHint == null && !skipBalance) { // figure out if there is an imbalance int avgRegionsPerNode = (int) Math.floor((float) paths.length / (float) inverseMap.keySet().size()); int maxRegionsPerNode = (int) Math.ceil((float) paths.length / (float) inverseMap.keySet().size()); LOG.info("Attempting to ideally balance nodes. Avg paritions per node:" + avgRegionsPerNode); // two passes .. for (int pass = 0; pass < 2; ++pass) { LOG.info("Pass:" + pass); // iterate nodes ... for (String node : ImmutableSet.copyOf(inverseMap.keySet())) { // get paritions in map Collection<Integer> paritions = ImmutableList.copyOf(inverseMap.get(node)); // if parition count exceeds desired average ... if (paritions.size() > maxRegionsPerNode) { // first pass, assign based on preference if (pass == 0) { LOG.info("Node:" + node + " parition count:" + paritions.size() + " exceeds avg:" + avgRegionsPerNode); // walk partitions trying to find a node to discrard the parition to for (int partition : paritions) { for (String candidate : paritionToDesiredCandidateList.get(partition)) { if (!candidate.equals(node)) { // see if this candidate has room .. if (inverseMap.get(candidate).size() < avgRegionsPerNode) { LOG.info("REASSIGNING parition:" + partition + " from Node:" + node + " to Node:" + candidate); // found match reassign it ... inverseMap.remove(node, partition); inverseMap.put(candidate, partition); break; } } } // break out if reach our desired number of paritions for this node if (inverseMap.get(node).size() == avgRegionsPerNode) break; } } // second pass ... assign based on least loaded node ... else { int desiredRelocations = paritions.size() - maxRegionsPerNode; LOG.info("Desired Relocation for node:" + node + ":" + desiredRelocations + " partitions:" + paritions.size()); for (int i = 0; i < desiredRelocations; ++i) { String leastLoadedNode = null; int leastLoadedNodePartitionCount = 0; for (String candidateNode : inverseMap.keySet()) { if (leastLoadedNode == null || inverseMap.get(candidateNode) .size() < leastLoadedNodePartitionCount) { leastLoadedNode = candidateNode; leastLoadedNodePartitionCount = inverseMap.get(candidateNode).size(); } } int bestPartition = -1; int bestParitionOffset = -1; for (int candidateParition : inverseMap.get(node)) { int offset = 0; for (String nodeCandidate : paritionToDesiredCandidateList .get(candidateParition)) { if (nodeCandidate.equals(leastLoadedNode)) { if (bestPartition == -1 || bestParitionOffset > offset) { bestPartition = candidateParition; bestParitionOffset = offset; } break; } offset++; } } if (bestPartition == -1) { bestPartition = Iterables.get(inverseMap.get(node), 0); } LOG.info("REASSIGNING parition:" + bestPartition + " from Node:" + node + " to Node:" + leastLoadedNode); // found match reassign it ... inverseMap.remove(node, bestPartition); inverseMap.put(leastLoadedNode, bestPartition); } } } } } LOG.info("Rebuilding parition to node map based on ideal balance"); for (String node : inverseMap.keySet()) { LOG.info("Node:" + node + " has:" + inverseMap.get(node).size() + " partitions:" + inverseMap.get(node).toString()); } partitionToNodeMap.clear(); for (Map.Entry<String, Integer> entry : inverseMap.entries()) { partitionToNodeMap.put(entry.getValue(), entry.getKey()); } } StringBuilder builder = new StringBuilder(); int itemCount = 0; for (Map.Entry<Integer, String> entry : partitionToNodeMap.entrySet()) { if (itemCount++ != 0) builder.append("\t"); builder.append(entry.getKey().intValue() + "," + entry.getValue()); } return builder.toString(); }
From source file:org.hedera.io.input.WikiRevisionInputFormat.java
License:Apache License
/** * This code is copied from StreamWikiDumpNewInputFormat.java by Yusuke Matsubara. * Thanks to Tu Meteora for adjusting the code to the new mapreduce framework * @param job the job context//from w ww. java 2 s . co m * @throws IOException */ public List<InputSplit> getSplits(JobContext jc, FileStatus file, long splitSize) throws IOException { List<InputSplit> splits = new ArrayList<InputSplit>(); Path path = file.getPath(); LOG.info("Splitting file " + path.getName()); Configuration conf = jc.getConfiguration(); configure(conf); long length = file.getLen(); FileSystem fs = file.getPath().getFileSystem(conf); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(jc, path)) { long bytesRemaining = length; SeekableInputStream in = SeekableInputStream.getInstance(path, 0, length, fs, this.compressionCodecs); SplitCompressionInputStream is = in.getSplitCompressionInputStream(); long start = 0; long skip = 0; if (is != null) { start = is.getAdjustedStart(); length = is.getAdjustedEnd(); is.close(); in = null; } FileSplit split = null; Set<Long> processedPageEnds = new HashSet<Long>(); float factor = conf.getFloat(KEY_SKIP_FACTOR, 1.2F); READLOOP: while (((double) bytesRemaining) / splitSize > factor && bytesRemaining > 0) { // prepare matcher ByteMatcher matcher; { long st = Math.min(start + skip + splitSize, length - 1); split = makeSplit(path, st, Math.min(splitSize, length - st), blkLocations); if (in != null) in.close(); if (split.getLength() <= 1) { break; } in = SeekableInputStream.getInstance(split, fs, this.compressionCodecs); } matcher = new ByteMatcher(in); // read until the next page end in the look-ahead split while (!matcher.readUntilMatch(END_PAGE_TAG, null, split.getStart() + split.getLength(), null)) { if (matcher.getPos() >= length || split.getLength() == length - split.getStart()) break READLOOP; split = makeSplit(path, split.getStart(), Math.min(split.getLength() + splitSize, length - split.getStart()), blkLocations); } if (matcher.getLastUnmatchPos() > 0 && matcher.getPos() > matcher.getLastUnmatchPos() && !processedPageEnds.contains(matcher.getPos())) { splits.add(makeSplit(path, start, matcher.getPos() - start, blkLocations)); processedPageEnds.add(matcher.getPos()); long newstart = Math.max(matcher.getLastUnmatchPos(), start); bytesRemaining = length - newstart; start = newstart; skip = 0; } else { skip = matcher.getPos() - start; } } if (bytesRemaining > 0 && !processedPageEnds.contains(length)) { splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts())); } if (in != null) in.close(); } else if (length != 0) { splits.add(makeSplit(path, 0, length, blkLocations)); } else { //Create empty hosts array for zero length files splits.add(makeSplit(path, 0, length, new String[0])); } return splits; }
From source file:org.imageterrier.hadoop.mapreduce.PositionAwareSequenceFileInputFormat.java
License:Mozilla Public License
/** * Generate the list of files and make them into FileSplits. */// w w w .j a v a 2 s. c o m @Override public List<InputSplit> getSplits(JobContext job) throws IOException { long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); int splitnum = 0; // generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); for (FileStatus file : listStatus(job)) { Path path = file.getPath(); FileSystem fs = path.getFileSystem(job.getConfiguration()); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(job, path)) { long blockSize = file.getBlockSize(); long splitSize = computeSplitSize(blockSize, minSize, maxSize); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(new PositionAwareSplitWrapper<FileSplit>(new FileSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts()), splitnum++)); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { splits.add(new PositionAwareSplitWrapper<FileSplit>(new FileSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts()), splitnum++)); } } else if (length != 0) { splits.add(new PositionAwareSplitWrapper<FileSplit>( new FileSplit(path, 0, length, blkLocations[0].getHosts()), splitnum++)); } else { //Create empty hosts array for zero length files splits.add(new PositionAwareSplitWrapper<FileSplit>(new FileSplit(path, 0, length, new String[0]), splitnum++)); } } LOG.debug("Total # of splits: " + splits.size()); return splits; }
From source file:org.mrgeo.hdfs.ingest.format.IngestImageSplittingInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(final JobContext context) throws IOException { final List<InputSplit> splits = new LinkedList<InputSplit>(); // mapred.input.dir final Path[] inputs = FileInputFormat.getInputPaths(context); final Configuration conf = context.getConfiguration(); int tilesize = -1; try {/*from w ww .j av a2 s. co m*/ //metadata = HadoopUtils.getMetadata(conf); Map<String, MrsImagePyramidMetadata> meta = HadoopUtils.getMetadata(context.getConfiguration()); if (!meta.isEmpty()) { MrsImagePyramidMetadata metadata = meta.values().iterator().next(); tilesize = metadata.getTilesize(); } } catch (ClassNotFoundException e) { e.printStackTrace(); throw new RuntimeException(e); } if (tilesize < 0) { tilesize = conf.getInt("tilesize", -1); if (tilesize < 1) { throw new MrsImageException( "Error, no \"tilesize\" or \"metadata\" parameter in configuration, tilesize needs to be calculated & set before map/reduce"); } } final int zoomlevel = conf.getInt("zoomlevel", -1); // get the tilesize in bytes (default to 3 band, 1 byte per band) final long tilebytes = conf.getLong("tilebytes", tilesize * tilesize * 3 * 1); if (zoomlevel < 1) { throw new MrsImageException( "Error, no \"zoomlevel\" parameter in configuration, zoomlevel needs to be calculated & set before map/reduce"); } // get the spill buffer percent, then take 95% of it for extra padding... double spillpct = conf.getFloat("io.sort.spill.percent", (float) 0.8) * 0.95; long spillsize = (long) (conf.getFloat("io.sort.mb", 200) * spillpct) * 1024 * 1024; log.info("Spill size for splitting is: " + spillsize + "b"); Map<String, Bounds> lookup = new HashMap<>(); final String adhocname = conf.get(IngestImageDriver.INGEST_BOUNDS_LOCATION, null); if (adhocname != null) { AdHocDataProvider dp = DataProviderFactory.getAdHocDataProvider(adhocname, DataProviderFactory.AccessMode.READ, conf); InputStream is = dp.get(IngestImageDriver.INGEST_BOUNDS_FILE); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); String line; while ((line = reader.readLine()) != null) { String[] data = line.split("\\|"); if (data.length == 2) { lookup.put(data[0], Bounds.fromDelimitedString(data[1])); } } is.close(); } //log.info("Creating splits for: " + output.toString()); for (final Path input : inputs) { final FileSystem fs = HadoopFileUtils.getFileSystem(conf, input); LongRectangle bounds = null; if (lookup.containsKey(input.toString())) { Bounds b = lookup.get(input.toString()); bounds = TMSUtils.boundsToTile(b.getTMSBounds(), zoomlevel, tilesize).toLongRectangle(); } else { log.info(" reading: " + input.toString()); log.info(" zoomlevel: " + zoomlevel); final AbstractGridCoverage2DReader reader = GeotoolsRasterUtils.openImage(input.toString()); if (reader != null) { try { bounds = GeotoolsRasterUtils.calculateTiles(reader, tilesize, zoomlevel); } finally { try { GeotoolsRasterUtils.closeStreamFromReader(reader); } catch (Exception e) { e.printStackTrace(); throw new IOException(e); } } } } if (bounds != null) { final long minTx = bounds.getMinX(); final long maxTx = bounds.getMaxX(); final long minTy = bounds.getMinY(); final long maxTy = bounds.getMaxY(); final long width = bounds.getWidth(); final long height = bounds.getHeight(); final long totaltiles = width * height; final FileStatus status = fs.getFileStatus(input); // for now, we'll just use the 1st block location for the split. // we can get more sophisticated later... final BlockLocation[] blocks = fs.getFileBlockLocations(status, 0, 0); String location = null; if (blocks.length > 0) { final String hosts[] = blocks[0].getHosts(); if (hosts.length > 0) { location = hosts[0]; } } // long filelen = status.getLen(); final long totalbytes = totaltiles * tilebytes; // if uncompressed tile sizes are greater than the spillsize, break it // into pieces if (totalbytes > spillsize) { final long numsplits = (totalbytes / spillsize) + 1; final long splitrange = (totaltiles / numsplits); long leftovers = totaltiles - (numsplits * splitrange); long start = 0; long end = 0; for (int i = 0; i < numsplits; i++) { end = start + splitrange; if (leftovers > 0) { end++; leftovers--; } final long sy = (start / width); final long sx = (start - (sy * width)); // since the tile range is inclusive, calculate with end-1 final long ey = ((end - 1) / width); final long ex = ((end - 1) - (ey * width)); // System.out.println("start: " + start + " end: " + end); // System.out.println(" sx: " + sx + " sy: " + sy); // System.out.println(" ex: " + ex + " ey: " + ey); splits.add(new IngestImageSplit(input.toString(), minTx + sx, minTx + ex, minTy + sy, minTy + ey, (end - start), bounds, zoomlevel, tilesize, location)); start = end; } } else { splits.add(new IngestImageSplit(input.toString(), minTx, maxTx, minTy, maxTy, (maxTx + 1 - minTx) * (maxTy + 1 - minTy), bounds, zoomlevel, tilesize, location)); } } } return splits; }
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
public void setrep(long secondsToWait, boolean recursive, short replication, String... uris) { Assert.isTrue(replication >= 1, "Replication must be >=1"); List<Path> waitList = (secondsToWait >= 0 ? new ArrayList<Path>() : null); try {/*from w w w . j a v a 2 s. c o m*/ for (String uri : uris) { Path srcPath = new Path(uri); FileSystem srcFs = getFS(srcPath); Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath); for (Path src : srcs) { setrep(replication, recursive, srcFs, src, waitList); } } if (waitList != null) { boolean waitUntilDone = (secondsToWait == 0); long timeLeft = TimeUnit.SECONDS.toMillis(secondsToWait); for (Path path : waitList) { FileSystem srcFs = getFS(path); FileStatus status = srcFs.getFileStatus(path); long len = status.getLen(); boolean done = false; while (!done) { BlockLocation[] locations = srcFs.getFileBlockLocations(status, 0, len); int i = 0; for (; i < locations.length && locations[i].getHosts().length == replication; i++) { } done = (i == locations.length); if (!done && (waitUntilDone || timeLeft > 5000)) { try { // sleep for 10s Thread.sleep(10000); } catch (InterruptedException e) { return; } timeLeft = -1000; } } } } } catch (IOException ex) { throw new HadoopException("Cannot set replication " + ex.getMessage(), ex); } }
From source file:org.springframework.data.hadoop.store.split.AbstractSplitterTests.java
License:Apache License
protected static Path mockWithFileSystem(int blockCount, long blockSize, long extraBlockSize) throws Exception { final ArrayList<BlockLocation> blocks = new ArrayList<BlockLocation>(); long offset = 0; int i = 0;/*from w w w. j a va 2s. c om*/ for (; i < blockCount; i++) { blocks.add(new BlockLocation(new String[] { "names" + i }, new String[] { "hosts" + i }, offset, blockSize)); offset += blockSize; } // extra just means that we add a non full last block if (extraBlockSize > 0 && extraBlockSize < blockSize) { blocks.add(new BlockLocation(new String[] { "names" + i }, new String[] { "hosts" + i }, offset, extraBlockSize)); offset += extraBlockSize; } FileStatus mStatus = mock(FileStatus.class); Path mPath = mock(Path.class); FileSystem mFs = mock(FileSystem.class); when(mStatus.getLen()).thenReturn(offset); when(mStatus.getBlockSize()).thenReturn(blockSize); when(mFs.getFileStatus(mPath)).thenReturn(mStatus); when(mFs.getFileBlockLocations((FileStatus) any(), anyLong(), anyLong())) .thenAnswer(new Answer<BlockLocation[]>() { @Override public BlockLocation[] answer(InvocationOnMock invocation) throws Throwable { Object[] arguments = invocation.getArguments(); return findBlocks(blocks, (Long) arguments[1], (Long) arguments[2]); } }); when(mPath.getFileSystem((Configuration) any())).thenReturn(mFs); return mPath; }
From source file:org.springframework.data.hadoop.store.split.SlopBlockSplitter.java
License:Apache License
@Override public List<Split> getSplits(Path path) throws IOException { List<Split> splits = new ArrayList<Split>(); FileSystem fs = path.getFileSystem(getConfiguration()); FileStatus status = fs.getFileStatus(path); long length = status.getLen(); BlockLocation[] blocks = fs.getFileBlockLocations(status, 0, length); long blockSize = status.getBlockSize(); long splitSize = computeSplitSize(blockSize, getMinSplitSize(), getMaxSplitSize()); long remaining = length; while (((double) remaining) / splitSize > slop) { int i = getBlockIndex(blocks, length - remaining); splits.add(buildSplit(length - remaining, splitSize, blocks[i].getHosts())); remaining -= splitSize;/*from w w w.ja v a 2 s . co m*/ } if (remaining != 0) { int blkIndex = getBlockIndex(blocks, length - remaining); splits.add(buildSplit(length - remaining, remaining, blocks[blkIndex].getHosts())); } return splits; }
From source file:org.springframework.yarn.batch.partition.HdfsSplitBatchPartitionHandler.java
License:Apache License
@Override protected Map<StepExecution, ContainerRequestHint> createResourceRequestData(Set<StepExecution> stepExecutions) throws Exception { Map<StepExecution, ContainerRequestHint> requests = new HashMap<StepExecution, ContainerRequestHint>(); for (StepExecution execution : stepExecutions) { String fileName = execution.getExecutionContext().getString("fileName"); long splitStart = execution.getExecutionContext().getLong("splitStart"); long splitLength = execution.getExecutionContext().getLong("splitLength"); log.debug("Creating request data for stepExecution=" + execution + " with fileName=" + fileName + " splitStart=" + splitStart + " splitLength=" + splitLength); FileSystem fs = FileSystem.get(configuration); Path path = new Path(execution.getExecutionContext().getString("fileName")); HashSet<String> hostsSet = new HashSet<String>(); BlockLocation[] fileBlockLocations = fs.getFileBlockLocations(path, splitStart, splitLength); for (BlockLocation blockLocation : fileBlockLocations) { for (String host : blockLocation.getHosts()) { hostsSet.add(host);//from w w w . j a va2 s . c om } log.debug("block: " + blockLocation + " topologypaths=" + StringUtils.arrayToCommaDelimitedString(blockLocation.getTopologyPaths())); } String[] hosts = hostsSet.toArray(new String[0]); String[] racks = new String[0]; // hints only for hosts requests.put(execution, new ContainerRequestHint(execution, null, hosts, racks, null)); } return requests; }