Example usage for org.apache.hadoop.fs BlockLocation getLength

List of usage examples for org.apache.hadoop.fs BlockLocation getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.fs BlockLocation getLength.

Prototype

public long getLength() 

Source Link

Document

Get the length of the block

Usage

From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java

License:Apache License

private List<HiveSplit> createHiveSplits(String partitionName, String path, BlockLocation[] blockLocations,
        long start, long length, Properties schema, List<HivePartitionKey> partitionKeys, boolean splittable,
        ConnectorSession session, OptionalInt bucketNumber, TupleDomain<HiveColumnHandle> effectivePredicate,
        Map<Integer, HiveType> columnCoercions) throws IOException {
    ImmutableList.Builder<HiveSplit> builder = ImmutableList.builder();

    boolean forceLocalScheduling = HiveSessionProperties.isForceLocalScheduling(session);

    if (splittable) {
        for (BlockLocation blockLocation : blockLocations) {
            // get the addresses for the block
            List<HostAddress> addresses = toHostAddress(blockLocation.getHosts());

            long maxBytes = maxSplitSize.toBytes();
            boolean creatingInitialSplits = false;

            if (remainingInitialSplits.get() > 0) {
                maxBytes = maxInitialSplitSize.toBytes();
                creatingInitialSplits = true;
            }//from   w  ww  . j  a v a2  s . c  o  m

            // divide the block into uniform chunks that are smaller than the max split size
            int chunks = Math.max(1, (int) (blockLocation.getLength() / maxBytes));
            // when block does not divide evenly into chunks, make the chunk size slightly bigger than necessary
            long targetChunkSize = (long) Math.ceil(blockLocation.getLength() * 1.0 / chunks);

            long chunkOffset = 0;
            while (chunkOffset < blockLocation.getLength()) {
                if (remainingInitialSplits.decrementAndGet() < 0 && creatingInitialSplits) {
                    creatingInitialSplits = false;
                    // recalculate the target chunk size
                    maxBytes = maxSplitSize.toBytes();
                    long remainingLength = blockLocation.getLength() - chunkOffset;
                    chunks = Math.max(1, (int) (remainingLength / maxBytes));
                    targetChunkSize = (long) Math.ceil(remainingLength * 1.0 / chunks);
                }
                // adjust the actual chunk size to account for the overrun when chunks are slightly bigger than necessary (see above)
                long chunkLength = Math.min(targetChunkSize, blockLocation.getLength() - chunkOffset);

                builder.add(new HiveSplit(connectorId, table.getDatabaseName(), table.getTableName(),
                        partitionName, path, blockLocation.getOffset() + chunkOffset, chunkLength, schema,
                        partitionKeys, addresses, bucketNumber,
                        forceLocalScheduling && hasRealAddress(addresses), effectivePredicate,
                        columnCoercions));

                chunkOffset += chunkLength;
            }
            checkState(chunkOffset == blockLocation.getLength(), "Error splitting blocks");
        }
    } else {
        // not splittable, use the hosts from the first block if it exists
        List<HostAddress> addresses = ImmutableList.of();
        if (blockLocations.length > 0) {
            addresses = toHostAddress(blockLocations[0].getHosts());
        }

        builder.add(new HiveSplit(connectorId, table.getDatabaseName(), table.getTableName(), partitionName,
                path, start, length, schema, partitionKeys, addresses, bucketNumber,
                forceLocalScheduling && hasRealAddress(addresses), effectivePredicate, columnCoercions));
    }
    return builder.build();
}

From source file:com.facebook.presto.hive.HiveSplitIterable.java

License:Apache License

private List<HiveSplit> createHiveSplits(String partitionName, FileStatus file, BlockLocation[] blockLocations,
        long start, long length, Properties schema, List<HivePartitionKey> partitionKeys, boolean splittable)
        throws IOException {
    ImmutableList.Builder<HiveSplit> builder = ImmutableList.builder();
    if (splittable) {
        for (BlockLocation blockLocation : blockLocations) {
            // get the addresses for the block
            List<HostAddress> addresses = toHostAddress(blockLocation.getHosts());

            // divide the block into uniform chunks that are smaller than the max split size
            int chunks = Math.max(1, (int) (blockLocation.getLength() / maxSplitSize.toBytes()));
            // when block does not divide evenly into chunks, make the chunk size slightly bigger than necessary
            long targetChunkSize = (long) Math.ceil(blockLocation.getLength() * 1.0 / chunks);

            long chunkOffset = 0;
            while (chunkOffset < blockLocation.getLength()) {
                // adjust the actual chunk size to account for the overrun when chunks are slightly bigger than necessary (see above)
                long chunkLength = Math.min(targetChunkSize, blockLocation.getLength() - chunkOffset);

                builder.add(new HiveSplit(clientId, table.getDbName(), table.getTableName(), partitionName,
                        false, file.getPath().toString(), blockLocation.getOffset() + chunkOffset, chunkLength,
                        schema, partitionKeys, addresses));

                chunkOffset += chunkLength;
            }/*from  w  w  w. j  av  a 2s .  c  o  m*/
            checkState(chunkOffset == blockLocation.getLength(), "Error splitting blocks");
        }
    } else {
        // not splittable, use the hosts from the first block
        builder.add(new HiveSplit(clientId, table.getDbName(), table.getTableName(), partitionName, false,
                file.getPath().toString(), start, length, schema, partitionKeys,
                toHostAddress(blockLocations[0].getHosts())));
    }
    return builder.build();
}

From source file:com.facebook.presto.hive.HiveSplitSourceProvider.java

License:Apache License

private List<HiveSplit> createHiveSplits(String partitionName, FileStatus file, BlockLocation[] blockLocations,
        long start, long length, Properties schema, List<HivePartitionKey> partitionKeys, boolean splittable,
        ConnectorSession session) throws IOException {
    ImmutableList.Builder<HiveSplit> builder = ImmutableList.builder();
    if (splittable) {
        for (BlockLocation blockLocation : blockLocations) {
            // get the addresses for the block
            List<HostAddress> addresses = toHostAddress(blockLocation.getHosts());

            long maxBytes = maxSplitSize.toBytes();

            if (remainingInitialSplits > 0) {
                maxBytes = maxInitialSplitSize.toBytes();
            }/*from  w ww.j av a2 s.  co m*/

            // divide the block into uniform chunks that are smaller than the max split size
            int chunks = Math.max(1, (int) (blockLocation.getLength() / maxBytes));
            // when block does not divide evenly into chunks, make the chunk size slightly bigger than necessary
            long targetChunkSize = (long) Math.ceil(blockLocation.getLength() * 1.0 / chunks);

            long chunkOffset = 0;
            while (chunkOffset < blockLocation.getLength()) {
                // adjust the actual chunk size to account for the overrun when chunks are slightly bigger than necessary (see above)
                long chunkLength = Math.min(targetChunkSize, blockLocation.getLength() - chunkOffset);

                builder.add(new HiveSplit(connectorId, table.getDbName(), table.getTableName(), partitionName,
                        file.getPath().toString(), blockLocation.getOffset() + chunkOffset, chunkLength, schema,
                        partitionKeys, addresses, session));

                chunkOffset += chunkLength;
                remainingInitialSplits--;
            }
            checkState(chunkOffset == blockLocation.getLength(), "Error splitting blocks");
        }
    } else {
        // not splittable, use the hosts from the first block if it exists
        List<HostAddress> addresses = ImmutableList.of();
        if (blockLocations.length > 0) {
            addresses = toHostAddress(blockLocations[0].getHosts());
        }

        builder.add(new HiveSplit(connectorId, table.getDbName(), table.getTableName(), partitionName,
                file.getPath().toString(), start, length, schema, partitionKeys, addresses, session));
    }
    return builder.build();
}

From source file:com.facebook.presto.hive.util.InternalHiveSplitFactory.java

License:Apache License

private Optional<InternalHiveSplit> createInternalHiveSplit(Path path, BlockLocation[] blockLocations,
        long start, long length, long fileSize, OptionalInt bucketNumber, boolean splittable) {
    String pathString = path.toString();
    if (!pathMatchesPredicate(pathDomain, pathString)) {
        return Optional.empty();
    }/*from   w ww .j a  v  a 2  s .  c o m*/

    boolean forceLocalScheduling = this.forceLocalScheduling;

    // For empty files, some filesystem (e.g. LocalFileSystem) produce one empty block
    // while others (e.g. hdfs.DistributedFileSystem) produces no block.
    // Synthesize an empty block if one does not already exist.
    if (fileSize == 0 && blockLocations.length == 0) {
        blockLocations = new BlockLocation[] { new BlockLocation() };
        // Turn off force local scheduling because hosts list doesn't exist.
        forceLocalScheduling = false;
    }

    ImmutableList.Builder<InternalHiveBlock> blockBuilder = ImmutableList.builder();
    for (BlockLocation blockLocation : blockLocations) {
        // clamp the block range
        long blockStart = Math.max(start, blockLocation.getOffset());
        long blockEnd = Math.min(start + length, blockLocation.getOffset() + blockLocation.getLength());
        if (blockStart > blockEnd) {
            // block is outside split range
            continue;
        }
        if (blockStart == blockEnd && !(blockStart == start && blockEnd == start + length)) {
            // skip zero-width block, except in the special circumstance: slice is empty, and the block covers the empty slice interval.
            continue;
        }
        blockBuilder.add(new InternalHiveBlock(blockStart, blockEnd, getHostAddresses(blockLocation)));
    }
    List<InternalHiveBlock> blocks = blockBuilder.build();
    checkBlocks(blocks, start, length);

    if (!splittable) {
        // not splittable, use the hosts from the first block if it exists
        blocks = ImmutableList.of(new InternalHiveBlock(start, start + length, blocks.get(0).getAddresses()));
    }

    return Optional.of(new InternalHiveSplit(partitionName, pathString, start, start + length, fileSize, schema,
            partitionKeys, blocks, bucketNumber, splittable,
            forceLocalScheduling && allBlocksHaveRealAddress(blocks), columnCoercions, bucketConversion,
            s3SelectPushdownEnabled && S3SelectPushdown.isCompressionCodecSupported(inputFormat, path)));
}

From source file:com.google.mr4c.hadoop.DataLocalizer.java

License:Open Source License

public List<String> localize(Collection<DataFileSource> sources) throws IOException {
    List<BlockLocation> allBlocks = new ArrayList<BlockLocation>();
    long totalSize = 0;
    for (DataFileSource src : sources) {
        BlockLocation[] blocks = src.getBlockLocation();
        allBlocks.addAll(Arrays.asList(blocks));
        for (BlockLocation block : blocks) {
            totalSize += block.getLength();
        }/*  w w  w . j  av a2 s  . co m*/
    }

    return Arrays.asList(m_calc.calcSplitHosts(allBlocks.toArray(new BlockLocation[allBlocks.size()]), 0,
            totalSize, m_topo));

}

From source file:com.linkedin.cubert.io.rubix.RubixInputSplit.java

License:Open Source License

@Override
public String[] getLocations() throws IOException, InterruptedException {
    if (hostnames == null) {
        /* Obtain the FileSystem object and get the FileStatus objects for the split */
        FileSystem fileSystem = FileSystem.get(conf);
        FileStatus fileStatus = fileSystem.getFileStatus(filename);
        /*//from  w  ww  .j  a  v  a 2  s . c  om
         * Obtain the Block locations for the split. This also provides the offset and
         * length information for each block
         */
        final BlockLocation[] blockLocations = fileSystem.getFileBlockLocations(fileStatus, offset, length);
        /**
         * Collect all hosts in a map and populate the number of bytes to be read from
         * each host
         */
        Long l;
        Map<String, Long> hostMap = new HashMap<String, Long>();
        for (BlockLocation bl : blockLocations) {
            final long start = bl.getOffset() < offset ? offset : bl.getOffset();
            final long end = (offset + length) < (bl.getOffset() + bl.getLength()) ? offset + length
                    : bl.getOffset() + bl.getLength();
            final long nRelevantBytes = end - start;
            for (String host : bl.getHosts()) {
                hostMap.put(host, ((l = hostMap.get(host)) == null ? 0 : l) + nRelevantBytes);
            }
        }
        /* Sort them in decreasing order of maximum number of relevant bytes */
        final Set<Map.Entry<String, Long>> entries = hostMap.entrySet();
        final Map.Entry<String, Long>[] hostLengthPairs = entries.toArray(new Map.Entry[entries.size()]);

        Arrays.sort(hostLengthPairs, new Comparator<Map.Entry<String, Long>>() {
            @Override
            public int compare(Map.Entry<String, Long> e1, Map.Entry<String, Long> e2) {
                return (int) (e2.getValue() - e1.getValue());
            }
        });

        /* Populate the hostnames object */
        final int nHost = Math.min(hostLengthPairs.length, MAX_LOCATIONS);
        hostnames = new String[nHost];
        for (int i = 0; i < nHost; ++i) {
            hostnames[i] = hostLengthPairs[i].getKey();
        }
    }
    return hostnames;
}

From source file:com.mongodb.hadoop.splitter.BSONSplitter.java

License:Apache License

public static int getLargestBlockIndex(final BlockLocation[] blockLocations) {
    int retVal = -1;
    if (blockLocations == null) {
        return retVal;
    }//from   ww  w.  ja v  a  2 s.c  om
    long max = 0;
    for (int i = 0; i < blockLocations.length; i++) {
        BlockLocation blk = blockLocations[i];
        if (blk.getLength() > max) {
            retVal = i;
        }
    }
    return retVal;
}

From source file:com.ricemap.spateDB.mapred.IndexedPrism.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*  w w w  .j  a  v  a  2s  .  co m*/
public InputSplit[] getSplits(final JobConf job, int numSplits) throws IOException {
    // Get a list of all input files. There should be exactly two files.
    final Path[] inputFiles = getInputPaths(job);
    GlobalIndex<Partition> gIndexes[] = new GlobalIndex[inputFiles.length];

    BlockFilter blockFilter = null;
    try {
        Class<? extends BlockFilter> blockFilterClass = job.getClass(SpatialSite.FilterClass, null,
                BlockFilter.class);
        if (blockFilterClass != null) {
            // Get all blocks the user wants to process
            blockFilter = blockFilterClass.newInstance();
            blockFilter.configure(job);
        }
    } catch (InstantiationException e1) {
        e1.printStackTrace();
    } catch (IllegalAccessException e1) {
        e1.printStackTrace();
    }

    if (blockFilter != null) {
        // Extract global indexes from input files

        for (int i_file = 0; i_file < inputFiles.length; i_file++) {
            FileSystem fs = inputFiles[i_file].getFileSystem(job);
            gIndexes[i_file] = SpatialSite.getGlobalIndex(fs, inputFiles[i_file]);
        }
    }

    final Vector<CombineFileSplit> matchedSplits = new Vector<CombineFileSplit>();
    if (gIndexes[0] == null || gIndexes[1] == null) {
        // Join every possible pair (Cartesian product)
        BlockLocation[][] fileBlockLocations = new BlockLocation[inputFiles.length][];
        for (int i_file = 0; i_file < inputFiles.length; i_file++) {
            FileSystem fs = inputFiles[i_file].getFileSystem(job);
            FileStatus fileStatus = fs.getFileStatus(inputFiles[i_file]);
            fileBlockLocations[i_file] = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
        }
        LOG.info("Doing a Cartesian product of blocks: " + fileBlockLocations[0].length + "x"
                + fileBlockLocations[1].length);
        for (BlockLocation block1 : fileBlockLocations[0]) {
            for (BlockLocation block2 : fileBlockLocations[1]) {
                FileSplit fsplit1 = new FileSplit(inputFiles[0], block1.getOffset(), block1.getLength(),
                        block1.getHosts());
                FileSplit fsplit2 = new FileSplit(inputFiles[1], block2.getOffset(), block2.getLength(),
                        block2.getHosts());
                CombineFileSplit combinedSplit = (CombineFileSplit) FileSplitUtil.combineFileSplits(job,
                        fsplit1, fsplit2);
                matchedSplits.add(combinedSplit);
            }
        }
    } else {
        // Filter block pairs by the BlockFilter
        blockFilter.selectCellPairs(gIndexes[0], gIndexes[1], new ResultCollector2<Partition, Partition>() {
            @Override
            public void collect(Partition p1, Partition p2) {
                try {
                    List<FileSplit> splits1 = new ArrayList<FileSplit>();
                    Path path1 = new Path(inputFiles[0], p1.filename);
                    splitFile(job, path1, splits1);

                    List<FileSplit> splits2 = new ArrayList<FileSplit>();
                    Path path2 = new Path(inputFiles[1], p2.filename);
                    splitFile(job, path2, splits2);

                    for (FileSplit split1 : splits1) {
                        for (FileSplit split2 : splits2) {
                            matchedSplits.add(
                                    (CombineFileSplit) FileSplitUtil.combineFileSplits(job, split1, split2));
                        }
                    }

                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        });
    }

    LOG.info("Matched " + matchedSplits.size() + " combine splits");

    // Return all matched splits
    return matchedSplits.toArray(new InputSplit[matchedSplits.size()]);
}

From source file:com.zjy.mongo.splitter.BSONSplitter.java

License:Apache License

/**
 * Get the index of the block within the given BlockLocations that
 * contains the given offset. Raises IllegalArgumentException if the
 * offset is outside the file.//from   w  w w . jav a2 s  . co  m
 *
 * @param blockLocations BlockLocations to search.
 * @param offset the offset into the file.
 * @return the index of the BlockLocation containing the offset.
 */
private static int getBlockIndex(final BlockLocation[] blockLocations, final long offset) {
    for (int i = 0; i < blockLocations.length; i++) {
        BlockLocation bl = blockLocations[i];
        if (bl.getOffset() <= offset && offset < bl.getOffset() + bl.getLength()) {
            return i;
        }
    }
    BlockLocation lastBlock = blockLocations[blockLocations.length - 1];
    long fileLength = lastBlock.getOffset() + lastBlock.getLength() - 1;
    throw new IllegalArgumentException(
            String.format("Offset %d is outside the file [0..%d].", offset, fileLength));
}

From source file:de.huberlin.wbi.hiway.common.Data.java

License:Apache License

long countAvailableLocalData(Container container) throws IOException {
    BlockLocation[] blockLocations = null;

    Path hdfsLocation = getHdfsPath();
    while (blockLocations == null) {
        FileStatus fileStatus = hdfs.getFileStatus(hdfsLocation);
        blockLocations = hdfs.getFileBlockLocations(hdfsLocation, 0, fileStatus.getLen());
    }/*w  w  w  .  j  a v  a2  s  .c  o m*/

    long sum = 0;
    for (BlockLocation blockLocation : blockLocations) {
        for (String host : blockLocation.getHosts()) {
            if (container.getNodeId().getHost().equals(host)) {
                sum += blockLocation.getLength();
                break;
            }
        }
    }
    return sum;
}