Example usage for org.apache.hadoop.fs FileSystem getFileBlockLocations

List of usage examples for org.apache.hadoop.fs FileSystem getFileBlockLocations

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileBlockLocations.

Prototype

public BlockLocation[] getFileBlockLocations(Path p, long start, long len) throws IOException 

Source Link

Document

Return an array containing hostnames, offset and size of portions of the given file.

Usage

From source file:org.bgi.flexlab.gaea.data.mapreduce.input.cram.GaeaCombineCramFileRecordReader.java

License:Open Source License

protected boolean initializeNextRecordReader() throws IOException {
    if (currentReader != null) {
        currentReader.close();/*w ww  . j  av a 2s.com*/
        currentReader = null;
    }

    // if all chunks have been processed, nothing more to do.
    if (fileIndex == split.getNumPaths()) {
        return false;
    }

    // get a record reader for the fileIndex-th chunk
    try {
        Configuration conf = context.getConfiguration();

        currentReader = new GaeaCramRecordReader();

        Path path = split.getPath(fileIndex);
        long length = split.getLength(fileIndex);
        FileSystem fs = path.getFileSystem(conf);
        FileStatus status = fs.getFileStatus(path);
        BlockLocation[] blkLocations = fs.getFileBlockLocations(status, 0, length);

        currentReader.initialize(new FileSplit(path, 0, length, blkLocations[0].getHosts()), context);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    fileIndex++;
    return true;
}

From source file:org.cdlib.was.weari.pig.ArcListInputFormat.java

License:Apache License

/** 
 * Generate the list of files and make them into FileSplits.
 *///from w ww .j a  v a  2 s . c o  m
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    for (FileStatus file : files) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(job, path)) {
            long blockSize = file.getBlockSize();
            long splitSize = computeSplitSize(blockSize, minSize, maxSize);

            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                splits.add(new FileSplit(path, length - bytesRemaining, splitSize,
                        blkLocations[blkIndex].getHosts()));
                bytesRemaining -= splitSize;
            }

            if (bytesRemaining != 0) {
                splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining,
                        blkLocations[blkLocations.length - 1].getHosts()));
            }
        } else if (length != 0) {
            splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
        } else {
            //Create empty hosts array for zero length files
            splits.add(new FileSplit(path, 0, length, new String[0]));
        }
    }

    // Save the number of input files in the job-conf
    job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());

    LOG.debug("Total # of splits: " + splits.size());
    return splits;
}

From source file:org.commoncrawl.util.NodeAffinityMaskBuilder.java

License:Open Source License

public static String buildNodeAffinityMask(FileSystem fileSystem, Path partFileDirectory,
        Map<Integer, String> optionalRootMapHint, Set<String> excludedNodeList, int maxReducersPerNode,
        boolean skipBalance) throws IOException {

    TreeMap<Integer, String> partitionToNodeMap = new TreeMap<Integer, String>();
    FileStatus paths[] = fileSystem.globStatus(new Path(partFileDirectory, "part-*"));

    if (paths.length == 0) {
        throw new IOException("Invalid source Path:" + partFileDirectory);
    }//from w  w w. j  a  va 2  s  .com

    Multimap<String, Integer> inverseMap = TreeMultimap.create();
    Map<Integer, List<String>> paritionToDesiredCandidateList = new TreeMap<Integer, List<String>>();

    // iterate paths 
    for (FileStatus path : paths) {

        String currentFile = path.getPath().getName();
        int partitionNumber;
        try {
            if (currentFile.startsWith("part-r")) {
                partitionNumber = NUMBER_FORMAT.parse(currentFile.substring("part-r-".length())).intValue();
            } else {
                partitionNumber = NUMBER_FORMAT.parse(currentFile.substring("part-".length())).intValue();
            }
        } catch (ParseException e) {
            throw new IOException("Invalid Part Name Encountered:" + currentFile);
        }

        // get block locations 
        BlockLocation locations[] = fileSystem.getFileBlockLocations(path, 0, path.getLen());

        // if passed in root map is not null, then validate that all blocks for the current file reside on the desired node 
        if (optionalRootMapHint != null) {
            // the host all blocks should reside on 
            String desiredHost = optionalRootMapHint.get(partitionNumber);

            ArrayList<String> misplacedBlocks = new ArrayList<String>();
            // ok walk all blocks 
            for (BlockLocation location : locations) {
                boolean found = false;
                for (String host : location.getHosts()) {
                    if (host.compareTo(desiredHost) == 0) {
                        found = true;
                        break;
                    }
                }
                if (!found) {
                    misplacedBlocks.add("Block At:" + location.getOffset() + " for File:" + path.getPath()
                            + " did not contain desired location:" + desiredHost);
                }

            }
            // ok pass test at a certain threshold 
            if (misplacedBlocks.size() != 0
                    && ((float) misplacedBlocks.size() / (float) locations.length) > .50f) {
                LOG.error("Misplaced Blocks Exceed Threshold");
                for (String misplacedBlock : misplacedBlocks) {
                    LOG.error(misplacedBlock);
                }
                // TODO: SKIP THIS STEP FOR NOW ??? 
                //throw new IOException("Misplaced Blocks Exceed Threshold!");
            }
            partitionToNodeMap.put(partitionNumber, desiredHost);
        } else {
            if (excludedNodeList != null) {
                // LOG.info("Exclued Node List is:" + Lists.newArrayList(excludedNodeList).toString());
            }
            // ok ask file system for block locations
            TreeMap<String, Integer> nodeToBlockCount = new TreeMap<String, Integer>();

            for (BlockLocation location : locations) {
                for (String host : location.getHosts()) {
                    if (excludedNodeList == null || !excludedNodeList.contains(host)) {
                        Integer nodeHitCount = nodeToBlockCount.get(host);
                        if (nodeHitCount == null) {
                            nodeToBlockCount.put(host, 1);
                        } else {
                            nodeToBlockCount.put(host, nodeHitCount.intValue() + 1);
                        }
                    }
                }
            }

            if (nodeToBlockCount.size() == 0) {
                throw new IOException("No valid nodes found for partition number:" + path);
            }

            Map.Entry<String, Integer> entries[] = nodeToBlockCount.entrySet().toArray(new Map.Entry[0]);
            Arrays.sort(entries, new Comparator<Map.Entry<String, Integer>>() {

                @Override
                public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
                    return o1.getValue().intValue() < o2.getValue().intValue() ? 1
                            : o1.getValue().intValue() == o2.getValue().intValue() ? 0 : -1;
                }
            });

            // build a list of nodes by priority ... 
            List<String> nodesByPriority = Lists.transform(Lists.newArrayList(entries),
                    new Function<Map.Entry<String, Integer>, String>() {

                        @Override
                        public String apply(Entry<String, Integer> entry) {
                            return entry.getKey();
                        }
                    });

            // stash it away ... 
            paritionToDesiredCandidateList.put(partitionNumber, nodesByPriority);
            //LOG.info("Mapping Partition:" + partitionNumber + " To Node:" + entries[0].getKey() + " BlockCount" + entries[0].getValue().intValue());
            partitionToNodeMap.put(partitionNumber, entries[0].getKey());
            // store the inverse mapping ... 
            inverseMap.put(entries[0].getKey(), partitionNumber);
        }
    }

    if (skipBalance) {
        // walk partition map to make sure everything is assigned ...
        /*
        for (String node : inverseMap.keys()) { 
          if (inverseMap.get(node).size() > maxReducersPerNode) { 
            throw new IOException("Node:" + node + " has too many partitions! ("+inverseMap.get(node).size());
          }
        }
        */
    }

    // now if optional root map hint is null 
    if (optionalRootMapHint == null && !skipBalance) {
        // figure out if there is an imbalance
        int avgRegionsPerNode = (int) Math.floor((float) paths.length / (float) inverseMap.keySet().size());
        int maxRegionsPerNode = (int) Math.ceil((float) paths.length / (float) inverseMap.keySet().size());
        LOG.info("Attempting to ideally balance nodes. Avg paritions per node:" + avgRegionsPerNode);

        // two passes .. 
        for (int pass = 0; pass < 2; ++pass) {
            LOG.info("Pass:" + pass);
            // iterate nodes ... 
            for (String node : ImmutableSet.copyOf(inverseMap.keySet())) {
                // get paritions in map  
                Collection<Integer> paritions = ImmutableList.copyOf(inverseMap.get(node));
                // if parition count exceeds desired average ... 
                if (paritions.size() > maxRegionsPerNode) {
                    // first pass, assign based on preference 
                    if (pass == 0) {
                        LOG.info("Node:" + node + " parition count:" + paritions.size() + " exceeds avg:"
                                + avgRegionsPerNode);
                        // walk partitions trying to find a node to discrard the parition to 
                        for (int partition : paritions) {
                            for (String candidate : paritionToDesiredCandidateList.get(partition)) {
                                if (!candidate.equals(node)) {
                                    // see if this candidate has room ..
                                    if (inverseMap.get(candidate).size() < avgRegionsPerNode) {
                                        LOG.info("REASSIGNING parition:" + partition + " from Node:" + node
                                                + " to Node:" + candidate);
                                        // found match reassign it ... 
                                        inverseMap.remove(node, partition);
                                        inverseMap.put(candidate, partition);
                                        break;
                                    }
                                }
                            }
                            // break out if reach our desired number of paritions for this node 
                            if (inverseMap.get(node).size() == avgRegionsPerNode)
                                break;
                        }
                    }
                    // second pass ... assign based on least loaded node ... 
                    else {
                        int desiredRelocations = paritions.size() - maxRegionsPerNode;
                        LOG.info("Desired Relocation for node:" + node + ":" + desiredRelocations
                                + " partitions:" + paritions.size());
                        for (int i = 0; i < desiredRelocations; ++i) {
                            String leastLoadedNode = null;
                            int leastLoadedNodePartitionCount = 0;

                            for (String candidateNode : inverseMap.keySet()) {
                                if (leastLoadedNode == null || inverseMap.get(candidateNode)
                                        .size() < leastLoadedNodePartitionCount) {
                                    leastLoadedNode = candidateNode;
                                    leastLoadedNodePartitionCount = inverseMap.get(candidateNode).size();
                                }
                            }
                            int bestPartition = -1;
                            int bestParitionOffset = -1;

                            for (int candidateParition : inverseMap.get(node)) {
                                int offset = 0;
                                for (String nodeCandidate : paritionToDesiredCandidateList
                                        .get(candidateParition)) {
                                    if (nodeCandidate.equals(leastLoadedNode)) {
                                        if (bestPartition == -1 || bestParitionOffset > offset) {
                                            bestPartition = candidateParition;
                                            bestParitionOffset = offset;
                                        }
                                        break;
                                    }
                                    offset++;
                                }
                            }
                            if (bestPartition == -1) {
                                bestPartition = Iterables.get(inverseMap.get(node), 0);
                            }
                            LOG.info("REASSIGNING parition:" + bestPartition + " from Node:" + node
                                    + " to Node:" + leastLoadedNode);
                            // found match reassign it ... 
                            inverseMap.remove(node, bestPartition);
                            inverseMap.put(leastLoadedNode, bestPartition);
                        }
                    }
                }
            }
        }
        LOG.info("Rebuilding parition to node map based on ideal balance");
        for (String node : inverseMap.keySet()) {
            LOG.info("Node:" + node + " has:" + inverseMap.get(node).size() + " partitions:"
                    + inverseMap.get(node).toString());
        }

        partitionToNodeMap.clear();
        for (Map.Entry<String, Integer> entry : inverseMap.entries()) {
            partitionToNodeMap.put(entry.getValue(), entry.getKey());
        }
    }

    StringBuilder builder = new StringBuilder();
    int itemCount = 0;
    for (Map.Entry<Integer, String> entry : partitionToNodeMap.entrySet()) {
        if (itemCount++ != 0)
            builder.append("\t");
        builder.append(entry.getKey().intValue() + "," + entry.getValue());
    }

    return builder.toString();
}

From source file:org.hedera.io.input.WikiRevisionInputFormat.java

License:Apache License

/** 
 * This code is copied from StreamWikiDumpNewInputFormat.java by Yusuke Matsubara.
 * Thanks to Tu Meteora for adjusting the code to the new mapreduce framework
 * @param job the job context//from w  ww. java  2  s  . co m
 * @throws IOException
 */
public List<InputSplit> getSplits(JobContext jc, FileStatus file, long splitSize) throws IOException {

    List<InputSplit> splits = new ArrayList<InputSplit>();
    Path path = file.getPath();

    LOG.info("Splitting file " + path.getName());

    Configuration conf = jc.getConfiguration();
    configure(conf);

    long length = file.getLen();
    FileSystem fs = file.getPath().getFileSystem(conf);
    BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
    if ((length != 0) && isSplitable(jc, path)) {
        long bytesRemaining = length;

        SeekableInputStream in = SeekableInputStream.getInstance(path, 0, length, fs, this.compressionCodecs);
        SplitCompressionInputStream is = in.getSplitCompressionInputStream();
        long start = 0;
        long skip = 0;
        if (is != null) {
            start = is.getAdjustedStart();
            length = is.getAdjustedEnd();
            is.close();
            in = null;
        }
        FileSplit split = null;
        Set<Long> processedPageEnds = new HashSet<Long>();
        float factor = conf.getFloat(KEY_SKIP_FACTOR, 1.2F);

        READLOOP: while (((double) bytesRemaining) / splitSize > factor && bytesRemaining > 0) {
            // prepare matcher
            ByteMatcher matcher;
            {
                long st = Math.min(start + skip + splitSize, length - 1);
                split = makeSplit(path, st, Math.min(splitSize, length - st), blkLocations);
                if (in != null)
                    in.close();
                if (split.getLength() <= 1) {
                    break;
                }
                in = SeekableInputStream.getInstance(split, fs, this.compressionCodecs);
            }
            matcher = new ByteMatcher(in);

            // read until the next page end in the look-ahead split
            while (!matcher.readUntilMatch(END_PAGE_TAG, null, split.getStart() + split.getLength(), null)) {
                if (matcher.getPos() >= length || split.getLength() == length - split.getStart())
                    break READLOOP;
                split = makeSplit(path, split.getStart(),
                        Math.min(split.getLength() + splitSize, length - split.getStart()), blkLocations);
            }
            if (matcher.getLastUnmatchPos() > 0 && matcher.getPos() > matcher.getLastUnmatchPos()
                    && !processedPageEnds.contains(matcher.getPos())) {
                splits.add(makeSplit(path, start, matcher.getPos() - start, blkLocations));
                processedPageEnds.add(matcher.getPos());
                long newstart = Math.max(matcher.getLastUnmatchPos(), start);
                bytesRemaining = length - newstart;
                start = newstart;
                skip = 0;
            } else {
                skip = matcher.getPos() - start;
            }
        }

        if (bytesRemaining > 0 && !processedPageEnds.contains(length)) {
            splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining,
                    blkLocations[blkLocations.length - 1].getHosts()));
        }
        if (in != null)
            in.close();
    } else if (length != 0) {
        splits.add(makeSplit(path, 0, length, blkLocations));
    } else {
        //Create empty hosts array for zero length files
        splits.add(makeSplit(path, 0, length, new String[0]));
    }
    return splits;
}

From source file:org.imageterrier.hadoop.mapreduce.PositionAwareSequenceFileInputFormat.java

License:Mozilla Public License

/** 
 * Generate the list of files and make them into FileSplits.
 *///  w  w  w  .j  a  v a 2  s.  c o  m
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);
    int splitnum = 0;

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    for (FileStatus file : listStatus(job)) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(job, path)) {
            long blockSize = file.getBlockSize();
            long splitSize = computeSplitSize(blockSize, minSize, maxSize);

            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);

                splits.add(new PositionAwareSplitWrapper<FileSplit>(new FileSplit(path, length - bytesRemaining,
                        splitSize, blkLocations[blkIndex].getHosts()), splitnum++));
                bytesRemaining -= splitSize;
            }

            if (bytesRemaining != 0) {
                splits.add(new PositionAwareSplitWrapper<FileSplit>(new FileSplit(path, length - bytesRemaining,
                        bytesRemaining, blkLocations[blkLocations.length - 1].getHosts()), splitnum++));
            }
        } else if (length != 0) {
            splits.add(new PositionAwareSplitWrapper<FileSplit>(
                    new FileSplit(path, 0, length, blkLocations[0].getHosts()), splitnum++));
        } else {
            //Create empty hosts array for zero length files
            splits.add(new PositionAwareSplitWrapper<FileSplit>(new FileSplit(path, 0, length, new String[0]),
                    splitnum++));
        }
    }

    LOG.debug("Total # of splits: " + splits.size());
    return splits;
}

From source file:org.mrgeo.hdfs.ingest.format.IngestImageSplittingInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(final JobContext context) throws IOException {
    final List<InputSplit> splits = new LinkedList<InputSplit>();
    // mapred.input.dir
    final Path[] inputs = FileInputFormat.getInputPaths(context);

    final Configuration conf = context.getConfiguration();

    int tilesize = -1;
    try {/*from  w ww  .j  av  a2  s. co  m*/
        //metadata = HadoopUtils.getMetadata(conf);
        Map<String, MrsImagePyramidMetadata> meta = HadoopUtils.getMetadata(context.getConfiguration());
        if (!meta.isEmpty()) {
            MrsImagePyramidMetadata metadata = meta.values().iterator().next();
            tilesize = metadata.getTilesize();
        }
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    }

    if (tilesize < 0) {
        tilesize = conf.getInt("tilesize", -1);
        if (tilesize < 1) {
            throw new MrsImageException(
                    "Error, no \"tilesize\" or \"metadata\" parameter in configuration, tilesize needs to be calculated & set before map/reduce");
        }

    }

    final int zoomlevel = conf.getInt("zoomlevel", -1);

    // get the tilesize in bytes (default to 3 band, 1 byte per band)
    final long tilebytes = conf.getLong("tilebytes", tilesize * tilesize * 3 * 1);

    if (zoomlevel < 1) {
        throw new MrsImageException(
                "Error, no \"zoomlevel\" parameter in configuration, zoomlevel needs to be calculated & set before map/reduce");
    }

    // get the spill buffer percent, then take 95% of it for extra padding...
    double spillpct = conf.getFloat("io.sort.spill.percent", (float) 0.8) * 0.95;
    long spillsize = (long) (conf.getFloat("io.sort.mb", 200) * spillpct) * 1024 * 1024;
    log.info("Spill size for splitting is: " + spillsize + "b");

    Map<String, Bounds> lookup = new HashMap<>();

    final String adhocname = conf.get(IngestImageDriver.INGEST_BOUNDS_LOCATION, null);
    if (adhocname != null) {
        AdHocDataProvider dp = DataProviderFactory.getAdHocDataProvider(adhocname,
                DataProviderFactory.AccessMode.READ, conf);
        InputStream is = dp.get(IngestImageDriver.INGEST_BOUNDS_FILE);
        BufferedReader reader = new BufferedReader(new InputStreamReader(is));

        String line;
        while ((line = reader.readLine()) != null) {
            String[] data = line.split("\\|");
            if (data.length == 2) {
                lookup.put(data[0], Bounds.fromDelimitedString(data[1]));
            }
        }
        is.close();
    }
    //log.info("Creating splits for: " + output.toString());
    for (final Path input : inputs) {
        final FileSystem fs = HadoopFileUtils.getFileSystem(conf, input);
        LongRectangle bounds = null;

        if (lookup.containsKey(input.toString())) {
            Bounds b = lookup.get(input.toString());
            bounds = TMSUtils.boundsToTile(b.getTMSBounds(), zoomlevel, tilesize).toLongRectangle();
        } else {
            log.info("  reading: " + input.toString());
            log.info("    zoomlevel: " + zoomlevel);

            final AbstractGridCoverage2DReader reader = GeotoolsRasterUtils.openImage(input.toString());

            if (reader != null) {
                try {
                    bounds = GeotoolsRasterUtils.calculateTiles(reader, tilesize, zoomlevel);
                } finally {
                    try {
                        GeotoolsRasterUtils.closeStreamFromReader(reader);
                    } catch (Exception e) {
                        e.printStackTrace();
                        throw new IOException(e);
                    }
                }
            }
        }

        if (bounds != null) {
            final long minTx = bounds.getMinX();
            final long maxTx = bounds.getMaxX();
            final long minTy = bounds.getMinY();
            final long maxTy = bounds.getMaxY();

            final long width = bounds.getWidth();
            final long height = bounds.getHeight();

            final long totaltiles = width * height;

            final FileStatus status = fs.getFileStatus(input);

            // for now, we'll just use the 1st block location for the split.
            // we can get more sophisticated later...
            final BlockLocation[] blocks = fs.getFileBlockLocations(status, 0, 0);

            String location = null;
            if (blocks.length > 0) {
                final String hosts[] = blocks[0].getHosts();
                if (hosts.length > 0) {
                    location = hosts[0];
                }
            }

            // long filelen = status.getLen();
            final long totalbytes = totaltiles * tilebytes;

            // if uncompressed tile sizes are greater than the spillsize, break it
            // into pieces
            if (totalbytes > spillsize) {
                final long numsplits = (totalbytes / spillsize) + 1;

                final long splitrange = (totaltiles / numsplits);
                long leftovers = totaltiles - (numsplits * splitrange);

                long start = 0;
                long end = 0;

                for (int i = 0; i < numsplits; i++) {
                    end = start + splitrange;
                    if (leftovers > 0) {
                        end++;
                        leftovers--;
                    }

                    final long sy = (start / width);
                    final long sx = (start - (sy * width));

                    // since the tile range is inclusive, calculate with end-1
                    final long ey = ((end - 1) / width);
                    final long ex = ((end - 1) - (ey * width));

                    // System.out.println("start: " + start + " end: " + end);
                    // System.out.println("  sx: " + sx + " sy: " + sy);
                    // System.out.println("  ex: " + ex + " ey: " + ey);
                    splits.add(new IngestImageSplit(input.toString(), minTx + sx, minTx + ex, minTy + sy,
                            minTy + ey, (end - start), bounds, zoomlevel, tilesize, location));

                    start = end;
                }
            } else {
                splits.add(new IngestImageSplit(input.toString(), minTx, maxTx, minTy, maxTy,
                        (maxTx + 1 - minTx) * (maxTy + 1 - minTy), bounds, zoomlevel, tilesize, location));
            }
        }
    }

    return splits;
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

public void setrep(long secondsToWait, boolean recursive, short replication, String... uris) {
    Assert.isTrue(replication >= 1, "Replication must be >=1");

    List<Path> waitList = (secondsToWait >= 0 ? new ArrayList<Path>() : null);

    try {/*from w  w  w  .  j  a v a 2  s. c  o m*/
        for (String uri : uris) {
            Path srcPath = new Path(uri);
            FileSystem srcFs = getFS(srcPath);
            Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath);
            for (Path src : srcs) {
                setrep(replication, recursive, srcFs, src, waitList);
            }
        }

        if (waitList != null) {
            boolean waitUntilDone = (secondsToWait == 0);
            long timeLeft = TimeUnit.SECONDS.toMillis(secondsToWait);

            for (Path path : waitList) {
                FileSystem srcFs = getFS(path);
                FileStatus status = srcFs.getFileStatus(path);
                long len = status.getLen();

                boolean done = false;

                while (!done) {
                    BlockLocation[] locations = srcFs.getFileBlockLocations(status, 0, len);
                    int i = 0;
                    for (; i < locations.length && locations[i].getHosts().length == replication; i++) {
                    }
                    done = (i == locations.length);

                    if (!done && (waitUntilDone || timeLeft > 5000)) {
                        try {
                            // sleep for 10s
                            Thread.sleep(10000);
                        } catch (InterruptedException e) {
                            return;
                        }
                        timeLeft = -1000;
                    }
                }
            }
        }
    } catch (IOException ex) {
        throw new HadoopException("Cannot set replication " + ex.getMessage(), ex);
    }
}

From source file:org.springframework.data.hadoop.store.split.AbstractSplitterTests.java

License:Apache License

protected static Path mockWithFileSystem(int blockCount, long blockSize, long extraBlockSize) throws Exception {
    final ArrayList<BlockLocation> blocks = new ArrayList<BlockLocation>();
    long offset = 0;
    int i = 0;/*from  w w  w. j  a  va 2s. c  om*/
    for (; i < blockCount; i++) {
        blocks.add(new BlockLocation(new String[] { "names" + i }, new String[] { "hosts" + i }, offset,
                blockSize));
        offset += blockSize;
    }

    // extra just means that we add a non full last block
    if (extraBlockSize > 0 && extraBlockSize < blockSize) {
        blocks.add(new BlockLocation(new String[] { "names" + i }, new String[] { "hosts" + i }, offset,
                extraBlockSize));
        offset += extraBlockSize;
    }

    FileStatus mStatus = mock(FileStatus.class);
    Path mPath = mock(Path.class);
    FileSystem mFs = mock(FileSystem.class);
    when(mStatus.getLen()).thenReturn(offset);
    when(mStatus.getBlockSize()).thenReturn(blockSize);
    when(mFs.getFileStatus(mPath)).thenReturn(mStatus);

    when(mFs.getFileBlockLocations((FileStatus) any(), anyLong(), anyLong()))
            .thenAnswer(new Answer<BlockLocation[]>() {

                @Override
                public BlockLocation[] answer(InvocationOnMock invocation) throws Throwable {
                    Object[] arguments = invocation.getArguments();
                    return findBlocks(blocks, (Long) arguments[1], (Long) arguments[2]);
                }
            });

    when(mPath.getFileSystem((Configuration) any())).thenReturn(mFs);
    return mPath;
}

From source file:org.springframework.data.hadoop.store.split.SlopBlockSplitter.java

License:Apache License

@Override
public List<Split> getSplits(Path path) throws IOException {
    List<Split> splits = new ArrayList<Split>();

    FileSystem fs = path.getFileSystem(getConfiguration());
    FileStatus status = fs.getFileStatus(path);

    long length = status.getLen();
    BlockLocation[] blocks = fs.getFileBlockLocations(status, 0, length);

    long blockSize = status.getBlockSize();
    long splitSize = computeSplitSize(blockSize, getMinSplitSize(), getMaxSplitSize());

    long remaining = length;
    while (((double) remaining) / splitSize > slop) {
        int i = getBlockIndex(blocks, length - remaining);
        splits.add(buildSplit(length - remaining, splitSize, blocks[i].getHosts()));
        remaining -= splitSize;/*from w w w.ja  v a 2  s .  co m*/
    }

    if (remaining != 0) {
        int blkIndex = getBlockIndex(blocks, length - remaining);
        splits.add(buildSplit(length - remaining, remaining, blocks[blkIndex].getHosts()));
    }

    return splits;
}

From source file:org.springframework.yarn.batch.partition.HdfsSplitBatchPartitionHandler.java

License:Apache License

@Override
protected Map<StepExecution, ContainerRequestHint> createResourceRequestData(Set<StepExecution> stepExecutions)
        throws Exception {
    Map<StepExecution, ContainerRequestHint> requests = new HashMap<StepExecution, ContainerRequestHint>();

    for (StepExecution execution : stepExecutions) {
        String fileName = execution.getExecutionContext().getString("fileName");
        long splitStart = execution.getExecutionContext().getLong("splitStart");
        long splitLength = execution.getExecutionContext().getLong("splitLength");

        log.debug("Creating request data for stepExecution=" + execution + " with fileName=" + fileName
                + " splitStart=" + splitStart + " splitLength=" + splitLength);

        FileSystem fs = FileSystem.get(configuration);
        Path path = new Path(execution.getExecutionContext().getString("fileName"));

        HashSet<String> hostsSet = new HashSet<String>();

        BlockLocation[] fileBlockLocations = fs.getFileBlockLocations(path, splitStart, splitLength);
        for (BlockLocation blockLocation : fileBlockLocations) {
            for (String host : blockLocation.getHosts()) {
                hostsSet.add(host);//from w  w  w . j a va2  s  . c  om
            }
            log.debug("block: " + blockLocation + " topologypaths="
                    + StringUtils.arrayToCommaDelimitedString(blockLocation.getTopologyPaths()));
        }

        String[] hosts = hostsSet.toArray(new String[0]);
        String[] racks = new String[0];
        // hints only for hosts
        requests.put(execution, new ContainerRequestHint(execution, null, hosts, racks, null));
    }

    return requests;
}