Example usage for org.apache.hadoop.fs FileSystem getFileBlockLocations

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileBlockLocations.

Prototype

public BlockLocation[] getFileBlockLocations(Path p, long start, long len) throws IOException

Source Link

Document

Return an array containing hostnames, offset and size of portions of the given file.

Usage

From source file:org.bgi.flexlab.gaea.data.mapreduce.input.cram.GaeaCombineCramFileRecordReader.java

License:Open Source License

protected boolean initializeNextRecordReader() throws IOException {
    if (currentReader != null) {
        currentReader.close();/*w ww  . j  av a 2s.com*/
        currentReader = null;
    }

    // if all chunks have been processed, nothing more to do.
    if (fileIndex == split.getNumPaths()) {
        return false;
    }

    // get a record reader for the fileIndex-th chunk
    try {
        Configuration conf = context.getConfiguration();

        currentReader = new GaeaCramRecordReader();

        Path path = split.getPath(fileIndex);
        long length = split.getLength(fileIndex);
        FileSystem fs = path.getFileSystem(conf);
        FileStatus status = fs.getFileStatus(path);
        BlockLocation[] blkLocations = fs.getFileBlockLocations(status, 0, length);

        currentReader.initialize(new FileSplit(path, 0, length, blkLocations[0].getHosts()), context);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    fileIndex++;
    return true;
}

From source file:org.cdlib.was.weari.pig.ArcListInputFormat.java

License:Apache License

/** 
 * Generate the list of files and make them into FileSplits.
 *///from w ww .j a  v a  2 s . c o  m
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    for (FileStatus file : files) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(job, path)) {
            long blockSize = file.getBlockSize();
            long splitSize = computeSplitSize(blockSize, minSize, maxSize);

            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                splits.add(new FileSplit(path, length - bytesRemaining, splitSize,
                        blkLocations[blkIndex].getHosts()));
                bytesRemaining -= splitSize;
            }

            if (bytesRemaining != 0) {
                splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining,
                        blkLocations[blkLocations.length - 1].getHosts()));
            }
        } else if (length != 0) {
            splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
        } else {
            //Create empty hosts array for zero length files
            splits.add(new FileSplit(path, 0, length, new String[0]));
        }
    }

    // Save the number of input files in the job-conf
    job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());

    LOG.debug("Total # of splits: " + splits.size());
    return splits;
}

From source file:org.commoncrawl.util.NodeAffinityMaskBuilder.java

License:Open Source License

public static String buildNodeAffinityMask(FileSystem fileSystem, Path partFileDirectory,
        Map<Integer, String> optionalRootMapHint, Set<String> excludedNodeList, int maxReducersPerNode,
        boolean skipBalance) throws IOException {

    TreeMap<Integer, String> partitionToNodeMap = new TreeMap<Integer, String>();
    FileStatus paths[] = fileSystem.globStatus(new Path(partFileDirectory, "part-*"));

    if (paths.length == 0) {
        throw new IOException("Invalid source Path:" + partFileDirectory);
    }//from w  w w. j  a  va 2  s  .com

    Multimap<String, Integer> inverseMap = TreeMultimap.create();
    Map<Integer, List<String>> paritionToDesiredCandidateList = new TreeMap<Integer, List<String>>();

    // iterate paths 
    for (FileStatus path : paths) {

        String currentFile = path.getPath().getName();
        int partitionNumber;
        try {
            if (currentFile.startsWith("part-r")) {
                partitionNumber = NUMBER_FORMAT.parse(currentFile.substring("part-r-".length())).intValue();
            } else {
                partitionNumber = NUMBER_FORMAT.parse(currentFile.substring("part-".length())).intValue();
            }
        } catch (ParseException e) {
            throw new IOException("Invalid Part Name Encountered:" + currentFile);
        }

        // get block locations 
        BlockLocation locations[] = fileSystem.getFileBlockLocations(path, 0, path.getLen());

        // if passed in root map is not null, then validate that all blocks for the current file reside on the desired node 
        if (optionalRootMapHint != null) {
            // the host all blocks should reside on 
            String desiredHost = optionalRootMapHint.get(partitionNumber);

            ArrayList<String> misplacedBlocks = new ArrayList<String>();
            // ok walk all blocks 
            for (BlockLocation location : locations) {
                boolean found = false;
                for (String host : location.getHosts()) {
                    if (host.compareTo(desiredHost) == 0) {
                        found = true;
                        break;
                    }
                }
                if (!found) {
                    misplacedBlocks.add("Block At:" + location.getOffset() + " for File:" + path.getPath()
                            + " did not contain desired location:" + desiredHost);
                }

            }
            // ok pass test at a certain threshold 
            if (misplacedBlocks.size() != 0
                    && ((float) misplacedBlocks.size() / (float) locations.length) > .50f) {
                LOG.error("Misplaced Blocks Exceed Threshold");
                for (String misplacedBlock : misplacedBlocks) {
                    LOG.error(misplacedBlock);
                }
                // TODO: SKIP THIS STEP FOR NOW ??? 
                //throw new IOException("Misplaced Blocks Exceed Threshold!");
            }
            partitionToNodeMap.put(partitionNumber, desiredHost);
        } else {
            if (excludedNodeList != null) {
                // LOG.info("Exclued Node List is:" + Lists.newArrayList(excludedNodeList).toString());
            }
            // ok ask file system for block locations
            TreeMap<String, Integer> nodeToBlockCount = new TreeMap<String, Integer>();

            for (BlockLocation location : locations) {
                for (String host : location.getHosts()) {
                    if (excludedNodeList == null || !excludedNodeList.contains(host)) {
                        Integer nodeHitCount = nodeToBlockCount.get(host);
                        if (nodeHitCount == null) {
                            nodeToBlockCount.put(host, 1);
                        } else {
                            nodeToBlockCount.put(host, nodeHitCount.intValue() + 1);
                        }
                    }
                }
            }

            if (nodeToBlockCount.size() == 0) {
                throw new IOException("No valid nodes found for partition number:" + path);
            }

            Map.Entry<String, Integer> entries[] = nodeToBlockCount.entrySet().toArray(new Map.Entry[0]);
            Arrays.sort(entries, new Comparator<Map.Entry<String, Integer>>() {

                @Override
                public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
                    return o1.getValue().intValue() < o2.getValue().intValue() ? 1
                            : o1.getValue().intValue() == o2.getValue().intValue() ? 0 : -1;
                }
            });

            // build a list of nodes by priority ... 
            List<String> nodesByPriority = Lists.transform(Lists.newArrayList(entries),
                    new Function<Map.Entry<String, Integer>, String>() {

                        @Override
                        public String apply(Entry<String, Integer> entry) {
                            return entry.getKey();
                        }
                    });

            // stash it away ... 
            paritionToDesiredCandidateList.put(partitionNumber, nodesByPriority);
            //LOG.info("Mapping Partition:" + partitionNumber + " To Node:" + entries[0].getKey() + " BlockCount" + entries[0].getValue().intValue());
            partitionToNodeMap.put(partitionNumber, entries[0].getKey());
            // store the inverse mapping ... 
            inverseMap.put(entries[0].getKey(), partitionNumber);
        }
    }

    if (skipBalance) {
        // walk partition map to make sure everything is assigned ...
        /*
        for (String node : inverseMap.keys()) { 
          if (inverseMap.get(node).size() > maxReducersPerNode) { 
            throw new IOException("Node:" + node + " has too many partitions! ("+inverseMap.get(node).size());
          }
        }
        */
    }

    // now if optional root map hint is null 
    if (optionalRootMapHint == null && !skipBalance) {
        // figure out if there is an imbalance
        int avgRegionsPerNode = (int) Math.floor((float) paths.length / (float) inverseMap.keySet().size());
        int maxRegionsPerNode = (int) Math.ceil((float) paths.length / (float) inverseMap.keySet().size());
        LOG.info("Attempting to ideally balance nodes. Avg paritions per node:" + avgRegionsPerNode);

        // two passes .. 
        for (int pass = 0; pass < 2; ++pass) {
            LOG.info("Pass:" + pass);
            // iterate nodes ... 
            for (String node : ImmutableSet.copyOf(inverseMap.keySet())) {
                // get paritions in map  
                Collection<Integer> paritions = ImmutableList.copyOf(inverseMap.get(node));
                // if parition count exceeds desired average ... 
                if (paritions.size() > maxRegionsPerNode) {
                    // first pass, assign based on preference 
                    if (pass == 0) {
                        LOG.info("Node:" + node + " parition count:" + paritions.size() + " exceeds avg:"
                                + avgRegionsPerNode);
                        // walk partitions trying to find a node to discrard the parition to 
                        for (int partition : paritions) {
                            for (String candidate : paritionToDesiredCandidateList.get(partition)) {
                                if (!candidate.equals(node)) {
                                    // see if this candidate has room ..
                                    if (inverseMap.get(candidate).size() < avgRegionsPerNode) {
                                        LOG.info("REASSIGNING parition:" + partition + " from Node:" + node
                                                + " to Node:" + candidate);
                                        // found match reassign it ... 
                                        inverseMap.remove(node, partition);
                                        inverseMap.put(candidate, partition);
                                        break;
                                    }
                                }
                            }
                            // break out if reach our desired number of paritions for this node 
                            if (inverseMap.get(node).size() == avgRegionsPerNode)
                                break;
                        }
                    }
                    // second pass ... assign based on least loaded node ... 
                    else {
                        int desiredRelocations = paritions.size() - maxRegionsPerNode;
                        LOG.info("Desired Relocation for node:" + node + ":" + desiredRelocations
                                + " partitions:" + paritions.size());
                        for (int i = 0; i < desiredRelocations; ++i) {
                            String leastLoadedNode = null;
                            int leastLoadedNodePartitionCount = 0;

                            for (String candidateNode : inverseMap.keySet()) {
                                if (leastLoadedNode == null || inverseMap.get(candidateNode)
                                        .size() < leastLoadedNodePartitionCount) {
                                    leastLoadedNode = candidateNode;
                                    leastLoadedNodePartitionCount = inverseMap.get(candidateNode).size();
                                }
                            }
                            int bestPartition = -1;
                            int bestParitionOffset = -1;

                            for (int candidateParition : inverseMap.get(node)) {
                                int offset = 0;
                                for (String nodeCandidate : paritionToDesiredCandidateList
                                        .get(candidateParition)) {
                                    if (nodeCandidate.equals(leastLoadedNode)) {
                                        if (bestPartition == -1 || bestParitionOffset > offset) {
                                            bestPartition = candidateParition;
                                            bestParitionOffset = offset;
                                        }
                                        break;
                                    }
                                    offset++;
                                }
                            }
                            if (bestPartition == -1) {
                                bestPartition = Iterables.get(inverseMap.get(node), 0);
                            }
                            LOG.info("REASSIGNING parition:" + bestPartition + " from Node:" + node
                                    + " to Node:" + leastLoadedNode);
                            // found match reassign it ... 
                            inverseMap.remove(node, bestPartition);
                            inverseMap.put(leastLoadedNode, bestPartition);
                        }
                    }
                }
            }
        }
        LOG.info("Rebuilding parition to node map based on ideal balance");
        for (String node : inverseMap.keySet()) {
            LOG.info("Node:" + node + " has:" + inverseMap.get(node).size() + " partitions:"
                    + inverseMap.get(node).toString());
        }

        partitionToNodeMap.clear();
        for (Map.Entry<String, Integer> entry : inverseMap.entries()) {
            partitionToNodeMap.put(entry.getValue(), entry.getKey());
        }
    }

    StringBuilder builder = new StringBuilder();
    int itemCount = 0;
    for (Map.Entry<Integer, String> entry : partitionToNodeMap.entrySet()) {
        if (itemCount++ != 0)
            builder.append("\t");
        builder.append(entry.getKey().intValue() + "," + entry.getValue());
    }

    return builder.toString();
}

From source file:org.hedera.io.input.WikiRevisionInputFormat.java

License:Apache License

/** 
 * This code is copied from StreamWikiDumpNewInputFormat.java by Yusuke Matsubara.
 * Thanks to Tu Meteora for adjusting the code to the new mapreduce framework
 * @param job the job context//from w  ww. java  2  s  . co m
 * @throws IOException
 */
public List<InputSplit> getSplits(JobContext jc, FileStatus file, long splitSize) throws IOException {

    List<InputSplit> splits = new ArrayList<InputSplit>();
    Path path = file.getPath();

    LOG.info("Splitting file " + path.getName());

    Configuration conf = jc.getConfiguration();
    configure(conf);

    long length = file.getLen();
    FileSystem fs = file.getPath().getFileSystem(conf);
    BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
    if ((length != 0) && isSplitable(jc, path)) {
        long bytesRemaining = length;

        SeekableInputStream in = SeekableInputStream.getInstance(path, 0, length, fs, this.compressionCodecs);
        SplitCompressionInputStream is = in.getSplitCompressionInputStream();
        long start = 0;
        long skip = 0;
        if (is != null) {
            start = is.getAdjustedStart();
            length = is.getAdjustedEnd();
            is.close();
            in = null;
        }
        FileSplit split = null;
        Set<Long> processedPageEnds = new HashSet<Long>();
        float factor = conf.getFloat(KEY_SKIP_FACTOR, 1.2F);

        READLOOP: while (((double) bytesRemaining) / splitSize > factor && bytesRemaining > 0) {
            // prepare matcher
            ByteMatcher matcher;
            {
                long st = Math.min(start + skip + splitSize, length - 1);
                split = makeSplit(path, st, Math.min(splitSize, length - st), blkLocations);
                if (in != null)
                    in.close();
                if (split.getLength() <= 1) {
                    break;
                }
                in = SeekableInputStream.getInstance(split, fs, this.compressionCodecs);
            }
            matcher = new ByteMatcher(in);

            // read until the next page end in the look-ahead split
            while (!matcher.readUntilMatch(END_PAGE_TAG, null, split.getStart() + split.getLength(), null)) {
                if (matcher.getPos() >= length || split.getLength() == length - split.getStart())
                    break READLOOP;
                split = makeSplit(path, split.getStart(),
                        Math.min(split.getLength() + splitSize, length - split.getStart()), blkLocations);
            }
            if (matcher.getLastUnmatchPos() > 0 && matcher.getPos() > matcher.getLastUnmatchPos()
                    && !processedPageEnds.contains(matcher.getPos())) {
                splits.add(makeSplit(path, start, matcher.getPos() - start, blkLocations));
                processedPageEnds.add(matcher.getPos());
                long newstart = Math.max(matcher.getLastUnmatchPos(), start);
                bytesRemaining = length - newstart;
                start = newstart;
                skip = 0;
            } else {
                skip = matcher.getPos() - start;
            }
        }

        if (bytesRemaining > 0 && !processedPageEnds.contains(length)) {
            splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining,
                    blkLocations[blkLocations.length - 1].getHosts()));
        }
        if (in != null)
            in.close();
    } else if (length != 0) {
        splits.add(makeSplit(path, 0, length, blkLocations));
    } else {
        //Create empty hosts array for zero length files
        splits.add(makeSplit(path, 0, length, new String[0]));
    }
    return splits;
}

From source file:org.imageterrier.hadoop.mapreduce.PositionAwareSequenceFileInputFormat.java

License:Mozilla Public License

/** 
 * Generate the list of files and make them into FileSplits.
 *///  w  w  w  .j  a  v a 2  s.  c o  m
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);
    int splitnum = 0;

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    for (FileStatus file : listStatus(job)) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(job, path)) {
            long blockSize = file.getBlockSize();
            long splitSize = computeSplitSize(blockSize, minSize, maxSize);

            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);

                splits.add(new PositionAwareSplitWrapper<FileSplit>(new FileSplit(path, length - bytesRemaining,
                        splitSize, blkLocations[blkIndex].getHosts()), splitnum++));
                bytesRemaining -= splitSize;
            }

            if (bytesRemaining != 0) {
                splits.add(new PositionAwareSplitWrapper<FileSplit>(new FileSplit(path, length - bytesRemaining,
                        bytesRemaining, blkLocations[blkLocations.length - 1].getHosts()), splitnum++));
            }
        } else if (length != 0) {
            splits.add(new PositionAwareSplitWrapper<FileSplit>(
                    new FileSplit(path, 0, length, blkLocations[0].getHosts()), splitnum++));
        } else {
            //Create empty hosts array for zero length files
            splits.add(new PositionAwareSplitWrapper<FileSplit>(new FileSplit(path, 0, length, new String[0]),
                    splitnum++));
        }
    }

    LOG.debug("Total # of splits: " + splits.size());
    return splits;
}

From source file:org.mrgeo.hdfs.ingest.format.IngestImageSplittingInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(final JobContext context) throws IOException {
    final List<InputSplit> splits = new LinkedList<InputSplit>();
    // mapred.input.dir
    final Path[] inputs = FileInputFormat.getInputPaths(context);

    final Configuration conf = context.getConfiguration();

    int tilesize = -1;
    try {/*from  w ww  .j  av  a2  s. co  m*/
        //metadata = HadoopUtils.getMetadata(conf);
        Map<String, MrsImagePyramidMetadata> meta = HadoopUtils.getMetadata(context.getConfiguration());
        if (!meta.isEmpty()) {
            MrsImagePyramidMetadata metadata = meta.values().iterator().next();
            tilesize = metadata.getTilesize();
        }
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    }

    if (tilesize < 0) {
        tilesize = conf.getInt("tilesize", -1);
        if (tilesize < 1) {
            throw new MrsImageException(
                    "Error, no \"tilesize\" or \"metadata\" parameter in configuration, tilesize needs to be calculated & set before map/reduce");
        }

    }

    final int zoomlevel = conf.getInt("zoomlevel", -1);

    // get the tilesize in bytes (default to 3 band, 1 byte per band)
    final long tilebytes = conf.getLong("tilebytes", tilesize * tilesize * 3 * 1);

    if (zoomlevel < 1) {
        throw new MrsImageException(
                "Error, no \"zoomlevel\" parameter in configuration, zoomlevel needs to be calculated & set before map/reduce");
    }

    // get the spill buffer percent, then take 95% of it for extra padding...
    double spillpct = conf.getFloat("io.sort.spill.percent", (float) 0.8) * 0.95;
    long spillsize = (long) (conf.getFloat("io.sort.mb", 200) * spillpct) * 1024 * 1024;
    log.info("Spill size for splitting is: " + spillsize + "b");

    Map<String, Bounds> lookup = new HashMap<>();

    final String adhocname = conf.get(IngestImageDriver.INGEST_BOUNDS_LOCATION, null);
    if (adhocname != null) {
        AdHocDataProvider dp = DataProviderFactory.getAdHocDataProvider(adhocname,
                DataProviderFactory.AccessMode.READ, conf);
        InputStream is = dp.get(IngestImageDriver.INGEST_BOUNDS_FILE);
        BufferedReader reader = new BufferedReader(new InputStreamReader(is));

        String line;
        while ((line = reader.readLine()) != null) {
            String[] data = line.split("\\|");
            if (data.length == 2) {
                lookup.put(data[0], Bounds.fromDelimitedString(data[1]));
            }
        }
        is.close();
    }
    //log.info("Creating splits for: " + output.toString());
    for (final Path input : inputs) {
        final FileSystem fs = HadoopFileUtils.getFileSystem(conf, input);
        LongRectangle bounds = null;

        if (lookup.containsKey(input.toString())) {
            Bounds b = lookup.get(input.toString());
            bounds = TMSUtils.boundsToTile(b.getTMSBounds(), zoomlevel, tilesize).toLongRectangle();
        } else {
            log.info("  reading: " + input.toString());
            log.info("    zoomlevel: " + zoomlevel);

            final AbstractGridCoverage2DReader reader = GeotoolsRasterUtils.openImage(input.toString());

            if (reader != null) {
                try {
                    bounds = GeotoolsRasterUtils.calculateTiles(reader, tilesize, zoomlevel);
                } finally {
                    try {
                        GeotoolsRasterUtils.closeStreamFromReader(reader);
                    } catch (Exception e) {
                        e.printStackTrace();
                        throw new IOException(e);
                    }
                }
            }
        }

        if (bounds != null) {
            final long minTx = bounds.getMinX();
            final long maxTx = bounds.getMaxX();
            final long minTy = bounds.getMinY();
            final long maxTy = bounds.getMaxY();

            final long width = bounds.getWidth();
            final long height = bounds.getHeight();

            final long totaltiles = width * height;

            final FileStatus status = fs.getFileStatus(input);

            // for now, we'll just use the 1st block location for the split.
            // we can get more sophisticated later...
            final BlockLocation[] blocks = fs.getFileBlockLocations(status, 0, 0);

            String location = null;
            if (blocks.length > 0) {
                final String hosts[] = blocks[0].getHosts();
                if (hosts.length > 0) {
                    location = hosts[0];
                }
            }

            // long filelen = status.getLen();
            final long totalbytes = totaltiles * tilebytes;

            // if uncompressed tile sizes are greater than the spillsize, break it
            // into pieces
            if (totalbytes > spillsize) {
                final long numsplits = (totalbytes / spillsize) + 1;

                final long splitrange = (totaltiles / numsplits);
                long leftovers = totaltiles - (numsplits * splitrange);

                long start = 0;
                long end = 0;

                for (int i = 0; i < numsplits; i++) {
                    end = start + splitrange;
                    if (leftovers > 0) {
                        end++;
                        leftovers--;
                    }

                    final long sy = (start / width);
                    final long sx = (start - (sy * width));

                    // since the tile range is inclusive, calculate with end-1
                    final long ey = ((end - 1) / width);
                    final long ex = ((end - 1) - (ey * width));

                    // System.out.println("start: " + start + " end: " + end);
                    // System.out.println("  sx: " + sx + " sy: " + sy);
                    // System.out.println("  ex: " + ex + " ey: " + ey);
                    splits.add(new IngestImageSplit(input.toString(), minTx + sx, minTx + ex, minTy + sy,
                            minTy + ey, (end - start), bounds, zoomlevel, tilesize, location));

                    start = end;
                }
            } else {
                splits.add(new IngestImageSplit(input.toString(), minTx, maxTx, minTy, maxTy,
                        (maxTx + 1 - minTx) * (maxTy + 1 - minTy), bounds, zoomlevel, tilesize, location));
            }
        }
    }

    return splits;
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

public void setrep(long secondsToWait, boolean recursive, short replication, String... uris) {
    Assert.isTrue(replication >= 1, "Replication must be >=1");

    List<Path> waitList = (secondsToWait >= 0 ? new ArrayList<Path>() : null);

    try {/*from w  w  w  .  j  a v a 2  s. c  o m*/
        for (String uri : uris) {
            Path srcPath = new Path(uri);
            FileSystem srcFs = getFS(srcPath);
            Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath);
            for (Path src : srcs) {
                setrep(replication, recursive, srcFs, src, waitList);
            }
        }

        if (waitList != null) {
            boolean waitUntilDone = (secondsToWait == 0);
            long timeLeft = TimeUnit.SECONDS.toMillis(secondsToWait);

            for (Path path : waitList) {
                FileSystem srcFs = getFS(path);
                FileStatus status = srcFs.getFileStatus(path);
                long len = status.getLen();

                boolean done = false;

                while (!done) {
                    BlockLocation[] locations = srcFs.getFileBlockLocations(status, 0, len);
                    int i = 0;
                    for (; i < locations.length && locations[i].getHosts().length == replication; i++) {
                    }
                    done = (i == locations.length);

                    if (!done && (waitUntilDone || timeLeft > 5000)) {
                        try {
                            // sleep for 10s
                            Thread.sleep(10000);
                        } catch (InterruptedException e) {
                            return;
                        }
                        timeLeft = -1000;
                    }
                }
            }
        }
    } catch (IOException ex) {
        throw new HadoopException("Cannot set replication " + ex.getMessage(), ex);
    }
}

From source file:org.springframework.data.hadoop.store.split.AbstractSplitterTests.java

License:Apache License

protected static Path mockWithFileSystem(int blockCount, long blockSize, long extraBlockSize) throws Exception {
    final ArrayList<BlockLocation> blocks = new ArrayList<BlockLocation>();
    long offset = 0;
    int i = 0;/*from  w w  w. j  a  va 2s. c  om*/
    for (; i < blockCount; i++) {
        blocks.add(new BlockLocation(new String[] { "names" + i }, new String[] { "hosts" + i }, offset,
                blockSize));
        offset += blockSize;
    }

    // extra just means that we add a non full last block
    if (extraBlockSize > 0 && extraBlockSize < blockSize) {
        blocks.add(new BlockLocation(new String[] { "names" + i }, new String[] { "hosts" + i }, offset,
                extraBlockSize));
        offset += extraBlockSize;
    }

    FileStatus mStatus = mock(FileStatus.class);
    Path mPath = mock(Path.class);
    FileSystem mFs = mock(FileSystem.class);
    when(mStatus.getLen()).thenReturn(offset);
    when(mStatus.getBlockSize()).thenReturn(blockSize);
    when(mFs.getFileStatus(mPath)).thenReturn(mStatus);

    when(mFs.getFileBlockLocations((FileStatus) any(), anyLong(), anyLong()))
            .thenAnswer(new Answer<BlockLocation[]>() {

                @Override
                public BlockLocation[] answer(InvocationOnMock invocation) throws Throwable {
                    Object[] arguments = invocation.getArguments();
                    return findBlocks(blocks, (Long) arguments[1], (Long) arguments[2]);
                }
            });

    when(mPath.getFileSystem((Configuration) any())).thenReturn(mFs);
    return mPath;
}

From source file:org.springframework.data.hadoop.store.split.SlopBlockSplitter.java

License:Apache License

@Override
public List<Split> getSplits(Path path) throws IOException {
    List<Split> splits = new ArrayList<Split>();

    FileSystem fs = path.getFileSystem(getConfiguration());
    FileStatus status = fs.getFileStatus(path);

    long length = status.getLen();
    BlockLocation[] blocks = fs.getFileBlockLocations(status, 0, length);

    long blockSize = status.getBlockSize();
    long splitSize = computeSplitSize(blockSize, getMinSplitSize(), getMaxSplitSize());

    long remaining = length;
    while (((double) remaining) / splitSize > slop) {
        int i = getBlockIndex(blocks, length - remaining);
        splits.add(buildSplit(length - remaining, splitSize, blocks[i].getHosts()));
        remaining -= splitSize;/*from w w w.ja  v a 2  s .  co m*/
    }

    if (remaining != 0) {
        int blkIndex = getBlockIndex(blocks, length - remaining);
        splits.add(buildSplit(length - remaining, remaining, blocks[blkIndex].getHosts()));
    }

    return splits;
}

From source file:org.springframework.yarn.batch.partition.HdfsSplitBatchPartitionHandler.java

License:Apache License

@Override
protected Map<StepExecution, ContainerRequestHint> createResourceRequestData(Set<StepExecution> stepExecutions)
        throws Exception {
    Map<StepExecution, ContainerRequestHint> requests = new HashMap<StepExecution, ContainerRequestHint>();

    for (StepExecution execution : stepExecutions) {
        String fileName = execution.getExecutionContext().getString("fileName");
        long splitStart = execution.getExecutionContext().getLong("splitStart");
        long splitLength = execution.getExecutionContext().getLong("splitLength");

        log.debug("Creating request data for stepExecution=" + execution + " with fileName=" + fileName
                + " splitStart=" + splitStart + " splitLength=" + splitLength);

        FileSystem fs = FileSystem.get(configuration);
        Path path = new Path(execution.getExecutionContext().getString("fileName"));

        HashSet<String> hostsSet = new HashSet<String>();

        BlockLocation[] fileBlockLocations = fs.getFileBlockLocations(path, splitStart, splitLength);
        for (BlockLocation blockLocation : fileBlockLocations) {
            for (String host : blockLocation.getHosts()) {
                hostsSet.add(host);//from w  w  w . j a va2  s  . c  om
            }
            log.debug("block: " + blockLocation + " topologypaths="
                    + StringUtils.arrayToCommaDelimitedString(blockLocation.getTopologyPaths()));
        }

        String[] hosts = hostsSet.toArray(new String[0]);
        String[] racks = new String[0];
        // hints only for hosts
        requests.put(execution, new ContainerRequestHint(execution, null, hosts, racks, null));
    }

    return requests;
}