Example usage for org.apache.hadoop.fs FileSystem getFileBlockLocations

List of usage examples for org.apache.hadoop.fs FileSystem getFileBlockLocations

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileBlockLocations.

Prototype

public BlockLocation[] getFileBlockLocations(Path p, long start, long len) throws IOException 

Source Link

Document

Return an array containing hostnames, offset and size of portions of the given file.

Usage

From source file:com.cloudera.GetBlockLocations.java

License:Apache License

public static void main(String[] args) throws Exception {
    final Configuration conf = new Configuration();
    String url = getStringOrDie("get.block.locations.path");
    final FileSystem fs = FileSystem.get(new URI(url), conf);

    if (!fs.exists(new Path(url))) {
        System.out.println("no file at " + url);
        System.exit(1);//from  ww  w .j  a v a2s  .  c  om
    }
    BlockLocation locs[] = null;
    try {
        locs = fs.getFileBlockLocations(new Path(url), 0, Long.MAX_VALUE);
    } catch (IOException e) {
        System.out.println("Error calling getFileBlockLocations(" + url + ")\n");
        e.printStackTrace(System.err);
        System.exit(1);
    }

    String prefix = "";
    for (BlockLocation loc : locs) {
        System.out.println(prefix);
        System.out.println("{");
        System.out.println("  hosts =         " + Arrays.toString(loc.getHosts()));
        System.out.println("  cachedHosts =   " + Arrays.toString(loc.getCachedHosts()));
        System.out.println("  names    =      " + Arrays.toString(loc.getNames()));
        System.out.println("  topologyPaths = " + Arrays.toString(loc.getTopologyPaths()));
        System.out.println("  offset =        " + loc.getOffset());
        System.out.println("  length =        " + loc.getLength());
        System.out.println("  corrupt =       " + loc.isCorrupt());
        System.out.println("}");
        prefix = ",";
    }
}

From source file:com.cloudera.impala.catalog.HdfsTable.java

License:Apache License

/**
 * Loads the file block metadata for the given collection of FileDescriptors.  The
 * FileDescriptors are passed as a tree, where the first level is indexed by
 * filesystem, the second level is indexed by partition location, and the leaves are
 * the list of files that exist under each directory.
 */// w w w.jav a  2s  .c o m
private void loadBlockMd(Map<FsKey, Map<String, List<FileDescriptor>>> perFsFileDescs) throws RuntimeException {
    Preconditions.checkNotNull(perFsFileDescs);
    LOG.debug("load block md for " + name_);

    for (FsKey fsEntry : perFsFileDescs.keySet()) {
        FileSystem fs = fsEntry.filesystem;
        // Store all BlockLocations so they can be reused when loading the disk IDs.
        List<BlockLocation> blockLocations = Lists.newArrayList();
        int numCachedBlocks = 0;
        Map<String, List<FileDescriptor>> partitionToFds = perFsFileDescs.get(fsEntry);
        Preconditions.checkNotNull(partitionToFds);
        // loop over all files and record their block metadata, minus volume ids
        for (String partitionDir : partitionToFds.keySet()) {
            Path partDirPath = new Path(partitionDir);
            for (FileDescriptor fileDescriptor : partitionToFds.get(partitionDir)) {
                Path p = new Path(partDirPath, fileDescriptor.getFileName());
                try {
                    FileStatus fileStatus = fs.getFileStatus(p);
                    // fileDescriptors should not contain directories.
                    Preconditions.checkArgument(!fileStatus.isDirectory());
                    BlockLocation[] locations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
                    Preconditions.checkNotNull(locations);
                    blockLocations.addAll(Arrays.asList(locations));

                    // Loop over all blocks in the file.
                    for (BlockLocation block : locations) {
                        String[] blockHostPorts = block.getNames();
                        try {
                            blockHostPorts = block.getNames();
                        } catch (IOException e) {
                            // this shouldn't happen, getNames() doesn't throw anything
                            String errorMsg = "BlockLocation.getNames() failed:\n" + e.getMessage();
                            LOG.error(errorMsg);
                            throw new IllegalStateException(errorMsg);
                        }
                        // Now enumerate all replicas of the block, adding any unknown hosts to
                        // hostIndex_ and the index for that host to replicaHostIdxs.
                        List<Integer> replicaHostIdxs = new ArrayList<Integer>(blockHostPorts.length);
                        for (int i = 0; i < blockHostPorts.length; ++i) {
                            String[] ip_port = blockHostPorts[i].split(":");
                            Preconditions.checkState(ip_port.length == 2);
                            TNetworkAddress network_address = new TNetworkAddress(ip_port[0],
                                    Integer.parseInt(ip_port[1]));
                            replicaHostIdxs.add(hostIndex_.getIndex(network_address));
                        }
                        fileDescriptor.addFileBlock(
                                new FileBlock(block.getOffset(), block.getLength(), replicaHostIdxs));
                    }
                } catch (IOException e) {
                    throw new RuntimeException(
                            "couldn't determine block locations for path '" + p + "':\n" + e.getMessage(), e);
                }
            }
        }

        if (SUPPORTS_VOLUME_ID && fs instanceof DistributedFileSystem) {
            LOG.trace("loading disk ids for: " + getFullName() + ". nodes: " + getNumNodes() + ". file system: "
                    + fsEntry);
            loadDiskIds((DistributedFileSystem) fs, blockLocations, partitionToFds);
            LOG.trace("completed load of disk ids for: " + getFullName());
        }
    }
}

From source file:com.cloudera.impala.catalog.TestLoadHdfsMetadataPerf.java

License:Apache License

/**
 * List file status by calling fileSystem.listStatus.
 *///from w  w  w.  ja va2 s .co  m
private static void listStatus(String dirPath) {
    Path path = new Path(dirPath);
    boolean exceptionThrown = false;
    try {
        FileSystem fs = path.getFileSystem(LoadMetadataUtil.getConf());
        FileStatus[] fileStatus = fs.listStatus(path);
        if (fs.exists(path)) {
            for (FileStatus status : fileStatus) {
                BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen());
                for (BlockLocation loc : locations) {
                    loc.getNames();
                    loc.getHosts();
                }
            }
        }
    } catch (IOException e) {
        exceptionThrown = true;
        LOG.error("Failed to list Status", e);
    }
    assertFalse(exceptionThrown);
}

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

/**
 * Queries the filesystem to load the file block metadata (e.g. DFS blocks) for the
 * given file. Adds the newly created block metadata and block location to the
 * perFsFileBlocks, so that the storage IDs for each block can be retrieved from
 * BlockLocation./*from  w w  w  .ja  v a 2  s  .c om*/
 *
 * Must be threadsafe. Access to 'perFsFileBlocks' and 'hostIndex' must be protected.
 */
private static void loadBlockMetadata(FileSystem fs, FileStatus file, FileDescriptor fd,
        HdfsFileFormat fileFormat, Map<FsKey, FileBlocksInfo> perFsFileBlocks, String tblName,
        ListMap<TNetworkAddress> hostIndex) {
    Preconditions.checkNotNull(fd);
    Preconditions.checkNotNull(perFsFileBlocks);
    Preconditions.checkArgument(!file.isDirectory());
    LOG.debug("load block md for " + tblName + " file " + fd.getFileName());

    if (!FileSystemUtil.hasGetFileBlockLocations(fs)) {
        synthesizeBlockMetadata(file, fd, fileFormat, hostIndex);
        return;
    }
    try {
        BlockLocation[] locations = null;
        if (file instanceof LocatedFileStatus) {
            locations = ((LocatedFileStatus) file).getBlockLocations();
        } else {
            locations = fs.getFileBlockLocations(file, 0, file.getLen());
        }
        Preconditions.checkNotNull(locations);

        // Loop over all blocks in the file.
        for (BlockLocation loc : locations) {
            Preconditions.checkNotNull(loc);
            fd.addFileBlock(createFileBlock(loc, hostIndex));
        }

        // Remember the THdfsFileBlocks and corresponding BlockLocations. Once all the
        // blocks are collected, the disk IDs will be queried in one batch per filesystem.
        FsKey fsKey = new FsKey(fs);
        synchronized (perFsFileBlocks) {
            FileBlocksInfo infos = perFsFileBlocks.get(fsKey);
            if (infos == null) {
                infos = new FileBlocksInfo();
                perFsFileBlocks.put(fsKey, infos);
            }
            infos.addBlocks(fd.getFileBlocks(), Arrays.asList(locations));
        }
    } catch (IOException e) {
        throw new RuntimeException(
                "couldn't determine block locations for path '" + file.getPath() + "':\n" + e.getMessage(), e);
    }
}

From source file:com.cloudera.kitten.appmaster.util.HDFSFileFinder.java

License:Open Source License

public static Map<String, Long> getNumBytesOfGlobHeldByDatanodes(Path p, Configuration conf)
        throws IOException {
    FileSystem fs = p.getFileSystem(conf);

    HashMap<String, Long> bytesHeld = Maps.newHashMap();
    for (FileStatus f : fs.globStatus(p)) {
        BlockLocation[] bls = fs.getFileBlockLocations(p, 0, f.getLen());
        if (bls.length > 0) {
            for (BlockLocation bl : bls) {
                long l = bl.getLength();
                for (String name : bl.getNames()) {
                    if (bytesHeld.containsKey(name))
                        bytesHeld.put(name, bytesHeld.get(name) + l);
                    else
                        bytesHeld.put(name, l);
                }/*from   w w w .j a va  2  s. c  om*/
            }
        }
    }

    return bytesHeld;
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/** 
 * Generate the list of files and make them into FileSplits.
 * @param job the job context//from  ww w.j ava 2  s  . co  m
 * @throws IOException
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    for (FileStatus file : files) {
        Path path = file.getPath();
        long length = file.getLen();
        if (length != 0) {
            FileSystem fs = path.getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
            if (isSplitable(job, path)) {
                long blockSize = file.getBlockSize();
                long splitSize = computeSplitSize(blockSize, minSize, maxSize);

                long bytesRemaining = length;
                while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(makeSplit(path, length - bytesRemaining, splitSize,
                            blkLocations[blkIndex].getHosts()));
                    bytesRemaining -= splitSize;
                }

                if (bytesRemaining != 0) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining,
                            blkLocations[blkIndex].getHosts()));
                }
            } else { // not splitable
                splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts()));
            }
        } else {
            //Create empty hosts array for zero length files
            splits.add(makeSplit(path, 0, length, new String[0]));
        }
    }
    // Save the number of input files for metrics/loadgen
    job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());
    LOG.debug("Total # of splits: " + splits.size());
    return splits;
}

From source file:com.edwardsit.spark4n6.EWFImageInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    log.setLevel(Level.DEBUG);/*from  w w w. jav a  2s .  c o m*/
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    BlockLocation[] blkLocations = null;
    Path path = null;
    FileSystem fs = null;
    EWFFileReader ewf = null;
    ArrayList<EWFSection.SectionPrefix> sections = null;
    Iterator<EWFSection.SectionPrefix> it = null;
    EWFSection.SectionPrefix sp = null;
    Path priorFile = null;
    long priorOffset = 0L;
    FileStatus priorFileStatus = null;
    chunkSize = new EWFSegmentFileReader(fs).DEFAULT_CHUNK_SIZE;
    long priorStart = 0L;
    int blkIndex = 0;
    for (FileStatus file : files) {
        path = file.getPath();
        fs = path.getFileSystem(job.getConfiguration());
        if (path.getName().endsWith(".E01")) {

            ewf = new EWFFileReader(fs, path);
            sections = ewf.getSectionPrefixArray();
            it = sections.iterator();
            while (it.hasNext()) {
                sp = it.next();
                if (sp.sectionType.equals(EWFSection.SectionType.TABLE_TYPE)) {
                    priorFileStatus = fs.getFileStatus(priorFile);
                    for (long i = sp.chunkCount; i > 0L; i = i - getChunksPerSplit(priorFileStatus)) {
                        if (priorFileStatus instanceof LocatedFileStatus) {
                            blkLocations = ((LocatedFileStatus) priorFileStatus).getBlockLocations();
                        } else {
                            blkLocations = fs.getFileBlockLocations(priorFileStatus, priorOffset,
                                    (getChunksPerSplit(priorFileStatus) * chunkSize));
                        }
                        blkIndex = getBlockIndex(blkLocations, priorOffset);
                        if (i > getChunksPerSplit(priorFileStatus)) {
                            log.debug("splits.add(makeSplit(" + priorFile + ", " + (priorStart * chunkSize)
                                    + ", " + (getChunksPerSplit(priorFileStatus) * chunkSize) + ", "
                                    + listHosts(blkLocations, blkIndex) + ");");
                            splits.add(makeSplit(priorFile, (priorStart * chunkSize),
                                    (getChunksPerSplit(priorFileStatus) * chunkSize),
                                    blkLocations[blkIndex].getHosts()));
                            priorStart += getChunksPerSplit(priorFileStatus);
                        } else {
                            log.debug("splits.add(makeSplit(" + priorFile + ", " + (priorStart * chunkSize)
                                    + ", " + (i * chunkSize) + ", " + listHosts(blkLocations, blkIndex) + ");");
                            splits.add(makeSplit(priorFile, (priorStart * chunkSize), (i * chunkSize),
                                    blkLocations[blkIndex].getHosts()));
                            priorStart += i;
                        }
                    }
                }
                priorFile = sp.file;
                priorOffset = sp.fileOffset;
            }
        }
    }
    return splits;
}

From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java

License:Apache License

private void loadPartition(HivePartitionMetadata partition) throws IOException {
    String partitionName = partition.getHivePartition().getPartitionId();
    Properties schema = getPartitionSchema(table, partition.getPartition());
    List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition());
    TupleDomain<HiveColumnHandle> effectivePredicate = partition.getHivePartition().getEffectivePredicate();

    Path path = new Path(getPartitionLocation(table, partition.getPartition()));
    Configuration configuration = hdfsEnvironment.getConfiguration(path);
    InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);
    FileSystem fs = hdfsEnvironment.getFileSystem(session.getUser(), path);

    if (inputFormat instanceof SymlinkTextInputFormat) {
        if (bucketHandle.isPresent()) {
            throw new PrestoException(StandardErrorCode.NOT_SUPPORTED,
                    "Bucketed table in SymlinkTextInputFormat is not yet supported");
        }//w  w  w. j ava 2  s  .c om

        // TODO: This should use an iterator like the HiveFileIterator
        for (Path targetPath : getTargetPathsFromSymlink(fs, path)) {
            // The input should be in TextInputFormat.
            TextInputFormat targetInputFormat = new TextInputFormat();
            // get the configuration for the target path -- it may be a different hdfs instance
            Configuration targetConfiguration = hdfsEnvironment.getConfiguration(targetPath);
            JobConf targetJob = new JobConf(targetConfiguration);
            targetJob.setInputFormat(TextInputFormat.class);
            targetInputFormat.configure(targetJob);
            FileInputFormat.setInputPaths(targetJob, targetPath);
            InputSplit[] targetSplits = targetInputFormat.getSplits(targetJob, 0);

            for (InputSplit inputSplit : targetSplits) {
                FileSplit split = (FileSplit) inputSplit;
                FileSystem targetFilesystem = hdfsEnvironment.getFileSystem(session.getUser(), split.getPath());
                FileStatus file = targetFilesystem.getFileStatus(split.getPath());
                hiveSplitSource.addToQueue(createHiveSplits(partitionName, file.getPath().toString(),
                        targetFilesystem.getFileBlockLocations(file, split.getStart(), split.getLength()),
                        split.getStart(), split.getLength(), schema, partitionKeys, false, session,
                        OptionalInt.empty(), effectivePredicate, partition.getColumnCoercions()));
                if (stopped) {
                    return;
                }
            }
        }
        return;
    }

    // If only one bucket could match: load that one file
    HiveFileIterator iterator = new HiveFileIterator(path, fs, directoryLister, namenodeStats, partitionName,
            inputFormat, schema, partitionKeys, effectivePredicate, partition.getColumnCoercions());
    if (!buckets.isEmpty()) {
        int bucketCount = buckets.get(0).getBucketCount();
        List<LocatedFileStatus> list = listAndSortBucketFiles(iterator, bucketCount);

        for (HiveBucket bucket : buckets) {
            int bucketNumber = bucket.getBucketNumber();
            LocatedFileStatus file = list.get(bucketNumber);
            boolean splittable = isSplittable(iterator.getInputFormat(),
                    hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath());

            hiveSplitSource.addToQueue(createHiveSplits(iterator.getPartitionName(), file.getPath().toString(),
                    file.getBlockLocations(), 0, file.getLen(), iterator.getSchema(),
                    iterator.getPartitionKeys(), splittable, session, OptionalInt.of(bucketNumber),
                    effectivePredicate, partition.getColumnCoercions()));
        }

        return;
    }

    // If table is bucketed: list the directory, sort, tag with bucket id
    if (bucketHandle.isPresent()) {
        // HiveFileIterator skips hidden files automatically.
        int bucketCount = bucketHandle.get().getBucketCount();
        List<LocatedFileStatus> list = listAndSortBucketFiles(iterator, bucketCount);

        for (int bucketIndex = 0; bucketIndex < bucketCount; bucketIndex++) {
            LocatedFileStatus file = list.get(bucketIndex);
            boolean splittable = isSplittable(iterator.getInputFormat(),
                    hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath());

            hiveSplitSource.addToQueue(createHiveSplits(iterator.getPartitionName(), file.getPath().toString(),
                    file.getBlockLocations(), 0, file.getLen(), iterator.getSchema(),
                    iterator.getPartitionKeys(), splittable, session, OptionalInt.of(bucketIndex),
                    iterator.getEffectivePredicate(), partition.getColumnCoercions()));
        }

        return;
    }

    fileIterators.addLast(iterator);
}

From source file:com.facebook.presto.hive.HiveSplitIterable.java

License:Apache License

private void loadPartitionSplits(final HiveSplitQueue hiveSplitQueue, SuspendingExecutor suspendingExecutor)
        throws InterruptedException {
    final Semaphore semaphore = new Semaphore(maxPartitionBatchSize);
    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
        ImmutableList.Builder<ListenableFuture<Void>> futureBuilder = ImmutableList.builder();

        Iterator<String> nameIterator = partitionNames.iterator();
        for (Partition partition : partitions) {
            checkState(nameIterator.hasNext(), "different number of partitions and partition names!");
            semaphore.acquire();/* w  w w  .  jav a  2s . c o  m*/
            final String partitionName = nameIterator.next();
            final Properties schema = getPartitionSchema(table, partition);
            final List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition);

            Path path = new Path(getPartitionLocation(table, partition));
            final Configuration configuration = hdfsEnvironment.getConfiguration(path);
            final InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);
            Path partitionPath = hdfsEnvironment.getFileSystemWrapper().wrap(path);

            FileSystem fs = partitionPath.getFileSystem(configuration);
            final LastSplitMarkingQueue markerQueue = new LastSplitMarkingQueue(hiveSplitQueue);

            if (inputFormat instanceof SymlinkTextInputFormat) {
                JobConf jobConf = new JobConf(configuration);
                FileInputFormat.setInputPaths(jobConf, partitionPath);
                InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
                for (InputSplit rawSplit : splits) {
                    FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit)
                            .getTargetSplit();

                    // get the filesystem for the target path -- it may be a different hdfs instance
                    FileSystem targetFilesystem = split.getPath().getFileSystem(configuration);
                    FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath());
                    markerQueue.addToQueue(createHiveSplits(partitionName, fileStatus,
                            targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(),
                                    split.getLength()),
                            split.getStart(), split.getLength(), schema, partitionKeys, false));
                }
                markerQueue.finish();
                continue;
            }

            ListenableFuture<Void> partitionFuture = new AsyncRecursiveWalker(fs, suspendingExecutor)
                    .beginWalk(partitionPath, new FileStatusCallback() {
                        @Override
                        public void process(FileStatus file, BlockLocation[] blockLocations) {
                            if (bucket.isPresent()
                                    && !fileMatchesBucket(file.getPath().getName(), bucket.get())) {
                                return;
                            }

                            try {
                                boolean splittable = isSplittable(inputFormat,
                                        file.getPath().getFileSystem(configuration), file.getPath());

                                markerQueue.addToQueue(createHiveSplits(partitionName, file, blockLocations, 0,
                                        file.getLen(), schema, partitionKeys, splittable));
                            } catch (IOException e) {
                                hiveSplitQueue.fail(e);
                            }
                        }
                    });

            // release the semaphore when the partition finishes
            Futures.addCallback(partitionFuture, new FutureCallback<Void>() {
                @Override
                public void onSuccess(Void result) {
                    markerQueue.finish();
                    semaphore.release();
                }

                @Override
                public void onFailure(Throwable t) {
                    markerQueue.finish();
                    semaphore.release();
                }
            });
            futureBuilder.add(partitionFuture);
        }

        // when all partitions finish, mark the queue as finished
        Futures.addCallback(Futures.allAsList(futureBuilder.build()), new FutureCallback<List<Void>>() {
            @Override
            public void onSuccess(List<Void> result) {
                hiveSplitQueue.finished();
            }

            @Override
            public void onFailure(Throwable t) {
                hiveSplitQueue.fail(t);
            }
        });
    } catch (Throwable e) {
        hiveSplitQueue.fail(e);
        Throwables.propagateIfInstanceOf(e, Error.class);
    }
}

From source file:com.facebook.presto.hive.HiveSplitSourceProvider.java

License:Apache License

private void loadPartitionSplits(final HiveSplitSource hiveSplitSource, SuspendingExecutor suspendingExecutor,
        final ConnectorSession session) {
    final Semaphore semaphore = new Semaphore(maxPartitionBatchSize);
    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
        ImmutableList.Builder<ListenableFuture<Void>> futureBuilder = ImmutableList.builder();

        Iterator<String> nameIterator = partitionNames.iterator();
        for (Partition partition : partitions) {
            checkState(nameIterator.hasNext(), "different number of partitions and partition names!");
            final String partitionName = nameIterator.next();
            final Properties schema = getPartitionSchema(table, partition);
            final List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition);

            Path path = new Path(getPartitionLocation(table, partition));
            final Configuration configuration = hdfsEnvironment.getConfiguration(path);
            final InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);

            FileSystem fs = path.getFileSystem(configuration);

            if (inputFormat instanceof SymlinkTextInputFormat) {
                JobConf jobConf = new JobConf(configuration);
                FileInputFormat.setInputPaths(jobConf, path);
                InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
                for (InputSplit rawSplit : splits) {
                    FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit)
                            .getTargetSplit();

                    // get the filesystem for the target path -- it may be a different hdfs instance
                    FileSystem targetFilesystem = split.getPath().getFileSystem(configuration);
                    FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath());
                    hiveSplitSource.addToQueue(createHiveSplits(partitionName, fileStatus,
                            targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(),
                                    split.getLength()),
                            split.getStart(), split.getLength(), schema, partitionKeys, false, session));
                }// w w w.j av a  2  s. c  o m
                continue;
            }

            // TODO: this is currently serial across all partitions and should be done in suspendingExecutor
            if (bucket.isPresent()) {
                Optional<FileStatus> bucketFile = getBucketFile(bucket.get(), fs, path);
                if (bucketFile.isPresent()) {
                    FileStatus file = bucketFile.get();
                    BlockLocation[] blockLocations = fs.getFileBlockLocations(file, 0, file.getLen());
                    boolean splittable = isSplittable(inputFormat, fs, file.getPath());

                    hiveSplitSource.addToQueue(createHiveSplits(partitionName, file, blockLocations, 0,
                            file.getLen(), schema, partitionKeys, splittable, session));
                    continue;
                }
            }

            // Acquire semaphore so that we only have a fixed number of outstanding partitions being processed asynchronously
            // NOTE: there must not be any calls that throw in the space between acquiring the semaphore and setting the Future
            // callback to release it. Otherwise, we will need a try-finally block around this section.
            try {
                semaphore.acquire();
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                return;
            }

            ListenableFuture<Void> partitionFuture = createAsyncWalker(fs, suspendingExecutor).beginWalk(path,
                    new FileStatusCallback() {
                        @Override
                        public void process(FileStatus file, BlockLocation[] blockLocations) {
                            try {
                                boolean splittable = isSplittable(inputFormat,
                                        file.getPath().getFileSystem(configuration), file.getPath());

                                hiveSplitSource.addToQueue(createHiveSplits(partitionName, file, blockLocations,
                                        0, file.getLen(), schema, partitionKeys, splittable, session));
                            } catch (IOException e) {
                                hiveSplitSource.fail(e);
                            }
                        }
                    });

            // release the semaphore when the partition finishes
            Futures.addCallback(partitionFuture, new FutureCallback<Void>() {
                @Override
                public void onSuccess(Void result) {
                    semaphore.release();
                }

                @Override
                public void onFailure(Throwable t) {
                    semaphore.release();
                }
            });

            futureBuilder.add(partitionFuture);
        }

        // when all partitions finish, mark the queue as finished
        Futures.addCallback(Futures.allAsList(futureBuilder.build()), new FutureCallback<List<Void>>() {
            @Override
            public void onSuccess(List<Void> result) {
                hiveSplitSource.finished();
            }

            @Override
            public void onFailure(Throwable t) {
                hiveSplitSource.fail(t);
            }
        });
    } catch (Throwable e) {
        hiveSplitSource.fail(e);
        Throwables.propagateIfInstanceOf(e, Error.class);
    }
}