Example usage for org.apache.hadoop.fs FileSystem getFileBlockLocations

List of usage examples for org.apache.hadoop.fs FileSystem getFileBlockLocations

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileBlockLocations.

Prototype

public BlockLocation[] getFileBlockLocations(Path p, long start, long len) throws IOException 

Source Link

Document

Return an array containing hostnames, offset and size of portions of the given file.

Usage

From source file:org.apache.drill.exec.store.parquet.metadata.Metadata.java

License:Apache License

/**
 * Get the host affinity for a row group.
 *
 * @param fileStatus the parquet file//w w  w  . jav a 2  s.  c  o  m
 * @param start      the start of the row group
 * @param length     the length of the row group
 * @return host affinity for the row group
 */
private Map<String, Float> getHostAffinity(FileStatus fileStatus, FileSystem fs, long start, long length)
        throws IOException {
    BlockLocation[] blockLocations = fs.getFileBlockLocations(fileStatus, start, length);
    Map<String, Float> hostAffinityMap = Maps.newHashMap();
    for (BlockLocation blockLocation : blockLocations) {
        for (String host : blockLocation.getHosts()) {
            Float currentAffinity = hostAffinityMap.get(host);
            float blockStart = blockLocation.getOffset();
            float blockEnd = blockStart + blockLocation.getLength();
            float rowGroupEnd = start + length;
            Float newAffinity = (blockLocation.getLength() - (blockStart < start ? start - blockStart : 0)
                    - (blockEnd > rowGroupEnd ? blockEnd - rowGroupEnd : 0)) / length;
            if (currentAffinity != null) {
                hostAffinityMap.put(host, currentAffinity + newAffinity);
            } else {
                hostAffinityMap.put(host, newAffinity);
            }
        }
    }
    return hostAffinityMap;
}

From source file:org.apache.giraph.io.formats.GiraphFileInputFormat.java

License:Apache License

/**
 * Common method for generating the list of vertex/edge input splits.
 *
 * @param job The job/*from www. j a va2 s.  c  o  m*/
 * @param files Array of FileStatus objects for vertex/edge input files
 * @return The list of vertex/edge input splits
 * @throws IOException
 */
private List<InputSplit> getSplits(JobContext job, List<FileStatus> files) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();

    for (FileStatus file : files) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(job, path)) {
            long blockSize = file.getBlockSize();
            long splitSize = computeSplitSize(blockSize, minSize, maxSize);

            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                splits.add(new FileSplit(path, length - bytesRemaining, splitSize,
                        blkLocations[blkIndex].getHosts()));
                bytesRemaining -= splitSize;
            }

            if (bytesRemaining != 0) {
                splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining,
                        blkLocations[blkLocations.length - 1].getHosts()));
            }
        } else if (length != 0) {
            splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
        } else {
            //Create empty hosts array for zero length files
            splits.add(new FileSplit(path, 0, length, new String[0]));
        }
    }
    return splits;
}

From source file:org.apache.hama.bsp.FileInputFormat.java

License:Apache License

/**
 * Splits files returned by {@link #listStatus(BSPJob)} when they're too big. <br/>
 * numSplits will be ignored by the framework.
 *///www.j  a v a 2s  . com
@Override
public InputSplit[] getSplits(BSPJob job, int numSplits) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    FileStatus[] files = listStatus(job);

    /*
     * TODO: This does not consider data locality. When the numSplits
     * (user-defined) is equal to or smaller than the number of DFS splits, we
     * should assign multiple splits to a task.
     */

    // take the short circuit path if we have already partitioned
    // if (numSplits == files.length) {
    // for (FileStatus file : files) {
    // if (file != null) {
    // splits.add(new FileSplit(file.getPath(), 0, file.getLen(),
    // new String[0]));
    // }
    // }
    // return splits.toArray(new FileSplit[splits.size()]);
    // }

    for (FileStatus file : files) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(job, path)) {
            long blockSize = file.getBlockSize();
            long splitSize = computeSplitSize(blockSize, minSize, maxSize);

            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                splits.add(new FileSplit(path, length - bytesRemaining, splitSize,
                        blkLocations[blkIndex].getHosts()));
                bytesRemaining -= splitSize;
            }

            if (bytesRemaining != 0) {
                splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining,
                        blkLocations[blkLocations.length - 1].getHosts()));
            }
        } else if (length != 0) {
            splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
        } else {
            // Create empty hosts array for zero length files
            splits.add(new FileSplit(path, 0, length, new String[0]));
        }
    }

    // Save the number of input files in the job-conf
    job.getConfiguration().setLong("bsp.input.files", files.length);

    LOG.debug("Total # of splits: " + splits.size());
    return splits.toArray(new InputSplit[splits.size()]);
}

From source file:org.apache.parquet.hadoop.ParquetInputFormat.java

License:Apache License

List<ParquetInputSplit> getSplits(Configuration configuration, List<Footer> footers, long maxSplitSize,
        long minSplitSize, ReadContext readContext) throws IOException {
    List<ParquetInputSplit> splits = new ArrayList<ParquetInputSplit>();
    Filter filter = ParquetInputFormat.getFilter(configuration);

    long rowGroupsDropped = 0;
    long totalRowGroups = 0;

    for (Footer footer : footers) {
        final Path file = footer.getFile();
        LOG.debug(file);//from   w  w  w. jav  a  2s . com
        FileSystem fs = file.getFileSystem(configuration);
        FileStatus fileStatus = fs.getFileStatus(file);
        ParquetMetadata parquetMetaData = footer.getParquetMetadata();
        List<BlockMetaData> blocks = parquetMetaData.getBlocks();

        List<BlockMetaData> filteredBlocks;

        totalRowGroups += blocks.size();
        filteredBlocks = RowGroupFilter.filterRowGroups(filter, blocks,
                parquetMetaData.getFileMetaData().getSchema());
        rowGroupsDropped += blocks.size() - filteredBlocks.size();

        if (filteredBlocks.isEmpty()) {
            continue;
        }

        BlockLocation[] fileBlockLocations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
        splits.addAll(generateSplits(filteredBlocks, fileBlockLocations, fileStatus,
                readContext.getRequestedSchema().toString(), readContext.getReadSupportMetadata(), minSplitSize,
                maxSplitSize));
    }

    if (rowGroupsDropped > 0 && totalRowGroups > 0) {
        int percentDropped = (int) ((((double) rowGroupsDropped) / totalRowGroups) * 100);
        LOG.info("Dropping " + rowGroupsDropped + " row groups that do not pass filter predicate! ("
                + percentDropped + "%)");
    } else {
        LOG.info("There were no row groups that could be dropped due to filter predicates");
    }
    return splits;
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputSplitFormat.java

License:Apache License

/**
 * This function returns the sample split due to the chunksize
 * and the rate./*from  ww  w.  ja v  a  2s  . co  m*/
 * @param job
 * @return
 * @throws IOException
 */
public List<InputSplit> getSplitsSample(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);
    float rate = job.getConfiguration().getFloat(PigConfiguration.PIG_H2IRG_ROLLUP_RATE, 0);
    double maximumSamplingSize = 0;
    String inputFile = job.getConfiguration().get("pig.input.dirs", "");
    if (inputFile != "") {
        Path pPivot = new Path(inputFile);
        FileSystem fs = FileSystem.get(job.getConfiguration());
        FileStatus stt = fs.getFileStatus(pPivot);
        long fileLength = stt.getLen();
        maximumSamplingSize = fileLength * rate;
    }

    ArrayList<ArrayList<InputSplit>> splitArray = null;

    int noOfSizes = 0;
    String splitSizes[] = null;
    long preVariableSizes[] = null;
    String variableSplit = job.getConfiguration().get("pig.h2irg.rollup.variablesplit", "");
    if (variableSplit.equals("")) {
        noOfSizes = 4;
        preVariableSizes = new long[4];
        preVariableSizes[0] = 256;
        preVariableSizes[1] = 512;
        preVariableSizes[2] = 1024;
        preVariableSizes[3] = 2048;
    } else {
        splitSizes = variableSplit.split(",");
        noOfSizes = splitSizes.length;
        preVariableSizes = new long[noOfSizes];
        for (int i = 0; i < noOfSizes; i++) {
            preVariableSizes[i] = Long.parseLong(splitSizes[i]);
        }
    }

    long postVariableSizes[] = new long[noOfSizes];
    long oneKB = 1024;
    for (int i = 0; i < noOfSizes; i++) {
        postVariableSizes[i] = preVariableSizes[i] * oneKB;
    }

    splitArray = new ArrayList<ArrayList<InputSplit>>();
    for (int i = 0; i < noOfSizes + 1; i++) {
        ArrayList<InputSplit> single = new ArrayList<InputSplit>();
        splitArray.add(single);
    }

    for (FileStatus file : listStatus(job)) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(job, path)) {
            long bytesRemaining = length;
            long totalBytes = 0;
            int count = noOfSizes;
            for (int i = 0; i < noOfSizes; i++) {
                while (totalBytes < length / noOfSizes) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    addSplit(splitArray.get(i), new FileSplit(path, length - bytesRemaining,
                            postVariableSizes[i], blkLocations[blkIndex].getHosts()));
                    bytesRemaining -= postVariableSizes[i];
                    totalBytes += postVariableSizes[i];
                }
                count--;
                bytesRemaining = count * (length / noOfSizes);
                totalBytes = 0;
            }

            if (bytesRemaining != 0) {
                addSplit(splitArray.get(splitArray.size() - 1), new FileSplit(path, length - bytesRemaining,
                        bytesRemaining, blkLocations[blkLocations.length - 1].getHosts()));
            }
        } else if (length != 0) {
            addSplit(splitArray.get(splitArray.size() - 1),
                    new FileSplit(path, 0, length, blkLocations[0].getHosts()));
        } else {
            addSplit(splitArray.get(splitArray.size() - 1), new FileSplit(path, 0, length, new String[0]));
        }
    }

    for (int i = 0; i < noOfSizes; i++) {
        log.info("Total # of " + postVariableSizes[i] + " splits: " + splitArray.get(i).size());
        Collections.shuffle(splitArray.get(i));
    }

    List<InputSplit> splitsReturn = new ArrayList<InputSplit>();

    for (int i = 0; i < noOfSizes; i++) {
        int noSampleSplit = (int) Math.ceil(rate * splitArray.get(i).size());
        if (noSampleSplit == 0)
            noSampleSplit = 1;
        for (int j = 0; j < noSampleSplit; j++) {
            splitsReturn.add(splitArray.get(i).get(j));
        }
    }

    log.info("Total # of sampling splits: " + splitsReturn.size());

    return splitsReturn;
}

From source file:org.apache.rya.accumulo.mr.AccumuloHDFSFileInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
    //read the params from AccumuloInputFormat
    Configuration conf = jobContext.getConfiguration();
    Instance instance = MRUtils.AccumuloProps.getInstance(jobContext);
    String user = MRUtils.AccumuloProps.getUsername(jobContext);
    AuthenticationToken password = MRUtils.AccumuloProps.getPassword(jobContext);
    String table = MRUtils.AccumuloProps.getTablename(jobContext);
    ArgumentChecker.notNull(instance);/*w  ww.  j a  v  a 2 s  .  com*/
    ArgumentChecker.notNull(table);

    //find the files necessary
    try {
        Connector connector = instance.getConnector(user, password);
        TableOperations tos = connector.tableOperations();
        String tableId = tos.tableIdMap().get(table);
        Scanner scanner = connector.createScanner("accumulo.metadata", Authorizations.EMPTY); //TODO: auths?
        scanner.setRange(new Range(new Text(tableId + "\u0000"), new Text(tableId + "\uFFFD")));
        scanner.fetchColumnFamily(new Text("file"));
        List<String> files = new ArrayList<String>();
        List<InputSplit> fileSplits = new ArrayList<InputSplit>();
        for (Map.Entry<Key, Value> entry : scanner) {
            String file = entry.getKey().getColumnQualifier().toString();
            Path path = new Path(file);
            FileSystem fs = path.getFileSystem(conf);
            FileStatus fileStatus = fs.getFileStatus(path);
            long len = fileStatus.getLen();
            BlockLocation[] fileBlockLocations = fs.getFileBlockLocations(fileStatus, 0, len);
            files.add(file);
            fileSplits.add(new FileSplit(path, 0, len, fileBlockLocations[0].getHosts()));
        }
        System.out.println(files);
        return fileSplits;
    } catch (Exception e) {
        throw new IOException(e);
    }
}

From source file:org.apache.solr.store.hdfs.HdfsLocalityReporter.java

License:Apache License

/**
 * Update the cached block locations for the given directory. This includes deleting any files that no longer exist in
 * the file system and adding any new files that have shown up.
 * /*w w  w .  j a  v  a 2s  .c  o m*/
 * @param dir
 *          The directory to refresh
 * @throws IOException
 *           If there is a problem getting info from HDFS
 */
private void refreshDirectory(HdfsDirectory dir) throws IOException {
    Map<FileStatus, BlockLocation[]> directoryCache = cache.get(dir);
    Set<FileStatus> cachedStatuses = directoryCache.keySet();

    FileSystem fs = dir.getFileSystem();
    FileStatus[] statuses = fs.listStatus(dir.getHdfsDirPath());
    List<FileStatus> statusList = Arrays.asList(statuses);

    logger.debug("Updating locality information for: {}", statusList);

    // Keep only the files that still exist
    cachedStatuses.retainAll(statusList);

    // Fill in missing entries in the cache
    for (FileStatus status : statusList) {
        if (!status.isDirectory() && !directoryCache.containsKey(status)) {
            BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen());
            directoryCache.put(status, locations);
        }
    }
}

From source file:org.apache.sysml.runtime.controlprogram.parfor.RemoteParForColocatedFileSplit.java

License:Apache License

/**
 * Get the list of hostnames where the input split is located.
 *//*  w w w. ja  v a  2  s.c om*/
@Override
public String[] getLocations() throws IOException {
    //Timing time = new Timing();
    //time.start();

    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = IOUtilFunctions.getFileSystem(getPath(), job);

    //read task string
    LongWritable key = new LongWritable();
    Text value = new Text();
    RecordReader<LongWritable, Text> reader = null;
    try {
        reader = new NLineInputFormat().getRecordReader(this, job, Reporter.NULL);
        reader.next(key, value);
    } finally {
        IOUtilFunctions.closeSilently(reader);
    }

    //parse task
    Task t = Task.parseCompactString(value.toString());

    //get all locations
    HashMap<String, Integer> hosts = new HashMap<>();

    if (t.getType() == TaskType.SET) {
        for (IntObject val : t.getIterations()) {
            String fname = _fname + "/" + String.valueOf(((val.getLongValue() - 1) / _blen + 1));
            FileStatus status = fs.getFileStatus(new Path(fname));
            BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
            for (BlockLocation bl : tmp1)
                countHosts(hosts, bl.getHosts());
        }
    } else //TaskType.RANGE
    {
        //since this is a serial process, we use just the first iteration
        //as a heuristic for location information
        long lFrom = t.getIterations().get(0).getLongValue();
        long lTo = t.getIterations().get(1).getLongValue();
        for (long li : new long[] { lFrom, lTo }) {
            String fname = _fname + "/" + String.valueOf(((li - 1) / _blen + 1));
            FileStatus status = fs.getFileStatus(new Path(fname));
            BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
            for (BlockLocation bl : tmp1)
                countHosts(hosts, bl.getHosts());
        }
    }

    //majority consensus on top host
    return getTopHosts(hosts);
}

From source file:org.apache.tajo.storage.AbstractStorageManager.java

License:Apache License

/**
 * Generate the list of files and make them into FileSplits.
 *
 * @throws IOException/*w  w w.  j  a  va  2  s  . c om*/
 */
public List<FileFragment> getSplits(String tableName, TableMeta meta, Schema schema, Path... inputs)
        throws IOException {
    // generate splits'

    List<FileFragment> splits = Lists.newArrayList();
    List<FileFragment> volumeSplits = Lists.newArrayList();
    List<BlockLocation> blockLocations = Lists.newArrayList();

    for (Path p : inputs) {
        FileSystem fs = p.getFileSystem(conf);
        ArrayList<FileStatus> files = Lists.newArrayList();
        if (fs.isFile(p)) {
            files.addAll(Lists.newArrayList(fs.getFileStatus(p)));
        } else {
            files.addAll(listStatus(p));
        }

        int previousSplitSize = splits.size();
        for (FileStatus file : files) {
            Path path = file.getPath();
            long length = file.getLen();
            if (length > 0) {
                // Get locations of blocks of file
                BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
                boolean splittable = isSplittable(meta, schema, path, file);
                if (blocksMetadataEnabled && fs instanceof DistributedFileSystem) {

                    if (splittable) {
                        for (BlockLocation blockLocation : blkLocations) {
                            volumeSplits.add(makeSplit(tableName, path, blockLocation));
                        }
                        blockLocations.addAll(Arrays.asList(blkLocations));

                    } else { // Non splittable
                        long blockSize = blkLocations[0].getLength();
                        if (blockSize >= length) {
                            blockLocations.addAll(Arrays.asList(blkLocations));
                            for (BlockLocation blockLocation : blkLocations) {
                                volumeSplits.add(makeSplit(tableName, path, blockLocation));
                            }
                        } else {
                            splits.add(makeNonSplit(tableName, path, 0, length, blkLocations));
                        }
                    }

                } else {
                    if (splittable) {

                        long minSize = Math.max(getMinSplitSize(), 1);

                        long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one
                        long splitSize = Math.max(minSize, blockSize);
                        long bytesRemaining = length;

                        // for s3
                        while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                            int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                            splits.add(makeSplit(tableName, path, length - bytesRemaining, splitSize,
                                    blkLocations[blkIndex].getHosts()));
                            bytesRemaining -= splitSize;
                        }
                        if (bytesRemaining > 0) {
                            int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                            splits.add(makeSplit(tableName, path, length - bytesRemaining, bytesRemaining,
                                    blkLocations[blkIndex].getHosts()));
                        }
                    } else { // Non splittable
                        splits.add(makeNonSplit(tableName, path, 0, length, blkLocations));
                    }
                }
            } else {
                //for zero length files
                splits.add(makeSplit(tableName, path, 0, length));
            }
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("# of splits per partition: " + (splits.size() - previousSplitSize));
        }
    }

    // Combine original fileFragments with new VolumeId information
    setVolumeMeta(volumeSplits, blockLocations);
    splits.addAll(volumeSplits);
    LOG.info("Total # of splits: " + splits.size());
    return splits;
}

From source file:org.apache.tajo.storage.FileStorageManager.java

License:Apache License

/**
 * Generate the list of files and make them into FileSplits.
 *
 * @throws IOException//  w ww. j  a  v a  2  s  .co  m
 */
public List<Fragment> getSplits(String tableName, TableMeta meta, Schema schema, Path... inputs)
        throws IOException {
    // generate splits'

    List<Fragment> splits = Lists.newArrayList();
    List<Fragment> volumeSplits = Lists.newArrayList();
    List<BlockLocation> blockLocations = Lists.newArrayList();

    for (Path p : inputs) {
        FileSystem fs = p.getFileSystem(conf);

        ArrayList<FileStatus> files = Lists.newArrayList();
        if (fs.isFile(p)) {
            files.addAll(Lists.newArrayList(fs.getFileStatus(p)));
        } else {
            files.addAll(listStatus(p));
        }

        int previousSplitSize = splits.size();
        for (FileStatus file : files) {
            Path path = file.getPath();
            long length = file.getLen();
            if (length > 0) {
                // Get locations of blocks of file
                BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
                boolean splittable = isSplittable(meta, schema, path, file);
                if (blocksMetadataEnabled && fs instanceof DistributedFileSystem) {

                    if (splittable) {
                        for (BlockLocation blockLocation : blkLocations) {
                            volumeSplits.add(makeSplit(tableName, path, blockLocation));
                        }
                        blockLocations.addAll(Arrays.asList(blkLocations));

                    } else { // Non splittable
                        long blockSize = blkLocations[0].getLength();
                        if (blockSize >= length) {
                            blockLocations.addAll(Arrays.asList(blkLocations));
                            for (BlockLocation blockLocation : blkLocations) {
                                volumeSplits.add(makeSplit(tableName, path, blockLocation));
                            }
                        } else {
                            splits.add(makeNonSplit(tableName, path, 0, length, blkLocations));
                        }
                    }

                } else {
                    if (splittable) {

                        long minSize = Math.max(getMinSplitSize(), 1);

                        long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one
                        long splitSize = Math.max(minSize, blockSize);
                        long bytesRemaining = length;

                        // for s3
                        while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                            int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                            splits.add(makeSplit(tableName, path, length - bytesRemaining, splitSize,
                                    blkLocations[blkIndex].getHosts()));
                            bytesRemaining -= splitSize;
                        }
                        if (bytesRemaining > 0) {
                            int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                            splits.add(makeSplit(tableName, path, length - bytesRemaining, bytesRemaining,
                                    blkLocations[blkIndex].getHosts()));
                        }
                    } else { // Non splittable
                        splits.add(makeNonSplit(tableName, path, 0, length, blkLocations));
                    }
                }
            } else {
                //for zero length files
                splits.add(makeSplit(tableName, path, 0, length));
            }
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("# of splits per partition: " + (splits.size() - previousSplitSize));
        }
    }

    // Combine original fileFragments with new VolumeId information
    setVolumeMeta(volumeSplits, blockLocations);
    splits.addAll(volumeSplits);
    LOG.info("Total # of splits: " + splits.size());
    return splits;
}