Example usage for org.apache.hadoop.fs FileStatus getBlockSize

List of usage examples for org.apache.hadoop.fs FileStatus getBlockSize

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getBlockSize.

Prototype

public long getBlockSize() 

Source Link

Document

Get the block size of the file.

Usage

From source file:org.opencloudengine.garuda.model.HdfsFileInfo.java

License:Open Source License

public HdfsFileInfo(FileStatus fileStatus, ContentSummary contentSummary) {
    this.fullyQualifiedPath = fileStatus.getPath().toUri().getPath();
    this.filename = isEmpty(getFilename(fullyQualifiedPath)) ? getDirectoryName(fullyQualifiedPath)
            : getFilename(fullyQualifiedPath);
    this.length = fileStatus.getLen();
    this.path = getPath(fullyQualifiedPath);
    this.directory = fileStatus.isDirectory();
    this.file = !fileStatus.isDirectory();
    this.owner = fileStatus.getOwner();
    this.group = fileStatus.getGroup();
    this.blockSize = fileStatus.getBlockSize();
    this.replication = fileStatus.getReplication();
    this.modificationTime = fileStatus.getModificationTime();
    if (contentSummary != null) {
        this.spaceConsumed = contentSummary.getSpaceConsumed();
        this.quota = contentSummary.getQuota();
        this.spaceQuota = contentSummary.getSpaceQuota();
        this.directoryCount = contentSummary.getDirectoryCount();
        this.fileCount = contentSummary.getFileCount();
    }//from ww w  .  j  a  va  2 s.  co m
    this.accessTime = fileStatus.getAccessTime();
    this.permission = fileStatus.getPermission().toString();
}

From source file:org.openflamingo.fs.hdfs.HdfsFileInfo.java

License:Apache License

/**
 *  ??./*  w w  w  . j  a v a 2s . co m*/
 *
 * @param fileStatus HDFS File Status
 */
public HdfsFileInfo(FileStatus fileStatus) {
    this.fullyQualifiedPath = fileStatus.getPath().toUri().getPath();
    this.filename = StringUtils.isEmpty(FileUtils.getFilename(fullyQualifiedPath))
            ? FileUtils.getDirectoryName(fullyQualifiedPath)
            : FileUtils.getFilename(fullyQualifiedPath);
    this.length = fileStatus.getLen();
    this.path = FileUtils.getPath(fullyQualifiedPath);
    this.directory = fileStatus.isDir();
    this.file = !fileStatus.isDir();
    this.owner = fileStatus.getOwner();
    this.group = fileStatus.getGroup();
    this.blockSize = fileStatus.getBlockSize();
    this.replication = fileStatus.getReplication();
    this.modificationTime = fileStatus.getModificationTime();
    this.accessTime = fileStatus.getAccessTime();
    this.setText(this.filename);
    this.setLeaf(file ? true : false);
    this.setCls(directory ? "folder" : "file");
    this.setId(fullyQualifiedPath);
    this.permission = fileStatus.getPermission().toString();
}

From source file:org.openflamingo.fs.hdfs.HdfsFileSystemProvider.java

License:Apache License

@Override
public FileInfo getFileInfo(String path) {
    try {//from   w  w  w  .ja  v a2  s .  c o  m
        FileStatus fileStatus = fs.getFileStatus(new Path(path));
        HdfsFileInfo hdfsFileInfo = new HdfsFileInfo(fileStatus);

        ContentSummary summary = fs.getContentSummary(new Path(path));
        hdfsFileInfo.setBlockSize(fileStatus.getBlockSize());
        hdfsFileInfo.setReplication(fileStatus.getReplication());
        hdfsFileInfo.setDirectoryCount(summary.getDirectoryCount());
        hdfsFileInfo.setFileCount(summary.getFileCount());
        hdfsFileInfo.setQuota(summary.getQuota());
        hdfsFileInfo.setSpaceQuota(summary.getSpaceQuota());
        hdfsFileInfo.setSpaceConsumed(StringUtils.byteDesc(summary.getSpaceConsumed()));
        hdfsFileInfo.setLength(summary.getLength());

        return hdfsFileInfo;
    } catch (Exception ex) {
        throw new FileSystemException(bundle.message("S_FS", "CANNOT_GET_FILE_INFO", path), ex);
    }
}

From source file:org.smartfrog.services.hadoop.operations.dfs.DfsPathExistsImpl.java

License:Open Source License

/**
 * check that a path exists//from  ww  w  .  j  a v  a2s .  c o  m
 *
 * @throws SmartFrogLivenessException if it does not, or it is the wrong type/size
 */
private void checkPathExists() throws SmartFrogLivenessException {
    String filename = getPathName() + " in " + dfs.toString();
    try {
        if (!doesPathExist()) {
            throw new SmartFrogLivenessException("Missing path " + filename);
        }
        FileStatus status = dfs.getFileStatus(getPath());
        if (verbose) {
            sfLog().info("Path " + getPath() + " size " + status.getLen() + " last modified:"
                    + status.getModificationTime());
        }
        if (status.isDir()) {
            //it is a directory. Run the directory checks

            FileStatus[] statuses = dfs.listStatus(getPath());
            if (statuses == null) {
                throw new SmartFrogLivenessException("Unable to list the status of " + filename);
            }
            int fileCount = statuses.length;
            StringBuilder filenames = new StringBuilder();

            long totalFileSize = 0;
            for (FileStatus fstat : statuses) {
                totalFileSize += fstat.getLen();
                filenames.append(fstat.getPath() + "\t").append('\t').append(fstat.getBlockSize()).append("\n");
                filenames.append('\n');
                if (verbose) {
                    sfLog().info(fstat.getPath() + "\t" + fstat.getBlockSize() + "\n");
                }
            }

            if (!canBeDir) {
                throw new SmartFrogLivenessException("Expected a file, got a directory: " + filename
                        + " containing " + fileCount + " file(s):\n" + filenames);
            }
            if (fileCount < minFileCount) {
                throw new SmartFrogLivenessException("Not enough files under " + filename + " required "
                        + minFileCount + " found " + fileCount + " :\n" + filenames);
            }
            if (maxFileCount >= 0 && fileCount > maxFileCount) {
                throw new SmartFrogLivenessException("Too many files under " + filename + " maximum "
                        + maxFileCount + " found " + fileCount + " :\n" + filenames);
            }
            if (totalFileSize < minTotalFileSize) {
                throw new SmartFrogLivenessException("not enough file content " + filename + " required "
                        + minTotalFileSize + " found " + totalFileSize + " :\n" + filenames);
            }
            if (maxTotalFileSize >= 0 && totalFileSize > maxTotalFileSize) {
                throw new SmartFrogLivenessException("too much enough file content " + filename + " maximum "
                        + minTotalFileSize + " found " + totalFileSize + " :\n" + filenames);
            }
        } else {
            if (!canBeFile) {
                throw new SmartFrogLivenessException("Not allowed to be a file: " + filename);
            }
            long size = status.getLen();
            if (size < minFileSize) {
                throw new SmartFrogLivenessException("File " + filename + " is too small at " + size
                        + " bytes for the minimum size " + minFileSize);
            }
            if (maxFileSize >= 0 && size > maxFileSize) {
                throw new SmartFrogLivenessException("File " + filename + " is too big at " + size
                        + " bytes for the maximum size " + maxFileSize);
            }
            short replication = status.getReplication();
            if (replication < minReplication) {
                throw new SmartFrogLivenessException("File  " + filename + " has a replication factor of"
                        + replication + " which is less than the minimum value of " + minReplication);
            }
            if (maxReplication >= 0 && replication > maxReplication) {
                throw new SmartFrogLivenessException("File  " + filename + " has a replication factor of"
                        + replication + " which is less than the maximum value of " + maxReplication);
            }
        }
    } catch (IOException e) {
        throw new SmartFrogLivenessException("Missing path " + filename, e);
    }
}

From source file:org.springframework.data.hadoop.store.split.AbstractSplitterTests.java

License:Apache License

protected static Path mockWithFileSystem(int blockCount, long blockSize, long extraBlockSize) throws Exception {
    final ArrayList<BlockLocation> blocks = new ArrayList<BlockLocation>();
    long offset = 0;
    int i = 0;/*ww  w  .j a  v  a 2  s  . c om*/
    for (; i < blockCount; i++) {
        blocks.add(new BlockLocation(new String[] { "names" + i }, new String[] { "hosts" + i }, offset,
                blockSize));
        offset += blockSize;
    }

    // extra just means that we add a non full last block
    if (extraBlockSize > 0 && extraBlockSize < blockSize) {
        blocks.add(new BlockLocation(new String[] { "names" + i }, new String[] { "hosts" + i }, offset,
                extraBlockSize));
        offset += extraBlockSize;
    }

    FileStatus mStatus = mock(FileStatus.class);
    Path mPath = mock(Path.class);
    FileSystem mFs = mock(FileSystem.class);
    when(mStatus.getLen()).thenReturn(offset);
    when(mStatus.getBlockSize()).thenReturn(blockSize);
    when(mFs.getFileStatus(mPath)).thenReturn(mStatus);

    when(mFs.getFileBlockLocations((FileStatus) any(), anyLong(), anyLong()))
            .thenAnswer(new Answer<BlockLocation[]>() {

                @Override
                public BlockLocation[] answer(InvocationOnMock invocation) throws Throwable {
                    Object[] arguments = invocation.getArguments();
                    return findBlocks(blocks, (Long) arguments[1], (Long) arguments[2]);
                }
            });

    when(mPath.getFileSystem((Configuration) any())).thenReturn(mFs);
    return mPath;
}

From source file:org.springframework.data.hadoop.store.split.SlopBlockSplitter.java

License:Apache License

@Override
public List<Split> getSplits(Path path) throws IOException {
    List<Split> splits = new ArrayList<Split>();

    FileSystem fs = path.getFileSystem(getConfiguration());
    FileStatus status = fs.getFileStatus(path);

    long length = status.getLen();
    BlockLocation[] blocks = fs.getFileBlockLocations(status, 0, length);

    long blockSize = status.getBlockSize();
    long splitSize = computeSplitSize(blockSize, getMinSplitSize(), getMaxSplitSize());

    long remaining = length;
    while (((double) remaining) / splitSize > slop) {
        int i = getBlockIndex(blocks, length - remaining);
        splits.add(buildSplit(length - remaining, splitSize, blocks[i].getHosts()));
        remaining -= splitSize;/*from   w  ww .  java2 s  .  c om*/
    }

    if (remaining != 0) {
        int blkIndex = getBlockIndex(blocks, length - remaining);
        splits.add(buildSplit(length - remaining, remaining, blocks[blkIndex].getHosts()));
    }

    return splits;
}

From source file:org.terrier.structures.indexing.singlepass.hadoop.BitPostingIndexInputFormat.java

License:Mozilla Public License

/** Returns the block size of the specified file. Only recommended to overload for testing */
protected long getBlockSize(Path path, FileStatus fss) {
    return fss.getBlockSize();
}

From source file:org.terrier.structures.indexing.singlepass.hadoop.MultiFileCollectionInputFormat.java

License:Mozilla Public License

@SuppressWarnings("unchecked")
@Override//from w  ww  .  j av a 2  s .  com
/**
 * Splits the input collection into
 * sets of files where each Map task 
 * gets about the same number of files
 */
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {

    Path[] paths = FileInputFormat.getInputPaths(job);
    // HADOOP-1818: Manage splits only if there are paths
    if (paths.length == 0) {
        return new InputSplit[0];
    }

    if (numSplits > paths.length) {
        numSplits = paths.length;
    } else if (numSplits < 1) {
        numSplits = 1;
    }
    logger.info("Allocating " + paths.length + " files across " + numSplits + " map tasks");
    List<PositionAwareSplit<CombineFileSplit>> splits = new ArrayList<PositionAwareSplit<CombineFileSplit>>(
            numSplits);
    final int numPaths = paths.length;
    long[] lengths = new long[numPaths];
    TObjectLongHashMap<String>[] locations = (TObjectLongHashMap<String>[]) Array
            .newInstance(TObjectLongHashMap.class, numPaths);
    final FileSystem fs = FileSystem.get(job);
    for (int i = 0; i < paths.length; i++) {
        final FileStatus fss = fs.getFileStatus(paths[i]);
        lengths[i] = fss.getLen();
        final TObjectLongHashMap<String> location2size = locations[i] = new TObjectLongHashMap<String>();
        final long normalblocksize = fss.getBlockSize();
        for (long offset = 0; offset < lengths[i]; offset += normalblocksize) {
            final long blocksize = Math.min(offset + normalblocksize, lengths[i]);
            final BlockLocation[] blockLocations = fs.getFileBlockLocations(fss, offset, blocksize);
            for (BlockLocation bl : blockLocations) {
                for (String host : bl.getHosts()) {
                    location2size.adjustOrPutValue(host, blocksize, blocksize);
                }
            }
        }
    }

    //we need to over-estimate using ceil, to ensure that the last split is not /too/ big
    final int numberOfFilesPerSplit = (int) Math.ceil((double) paths.length / (double) numSplits);

    int pathsUsed = 0;
    int splitnum = 0;
    CombineFileSplit mfs;
    // for each split except the last one (which may be smaller than numberOfFilesPerSplit)
    while (pathsUsed < numPaths) {
        /* caclulate split size for this task - usually numberOfFilesPerSplit, but
         * less than this for the last split */
        final int splitSizeForThisSplit = numberOfFilesPerSplit + pathsUsed > numPaths ? numPaths - pathsUsed
                : numberOfFilesPerSplit;
        //arrays of information for split
        Path[] splitPaths = new Path[splitSizeForThisSplit];
        long[] splitLengths = new long[splitSizeForThisSplit];
        long[] splitStarts = new long[splitSizeForThisSplit];
        final TObjectLongHashMap<String> allLocationsForSplit = new TObjectLongHashMap<String>();
        String[] splitLocations = null; //final recommended locations for this split.
        for (int i = 0; i < splitSizeForThisSplit; i++) {
            locations[pathsUsed + i].forEachEntry(new TObjectLongProcedure<String>() {
                public boolean execute(String a, long b) {
                    allLocationsForSplit.adjustOrPutValue(a, b, b);
                    return true;
                }
            });
            if (allLocationsForSplit.size() <= 3) {
                splitLocations = allLocationsForSplit.keys(new String[allLocationsForSplit.size()]);
            } else {
                String[] hosts = allLocationsForSplit.keys(new String[allLocationsForSplit.size()]);
                Arrays.sort(hosts, new Comparator<String>() {
                    public int compare(String o1, String o2) {
                        long diffamount = allLocationsForSplit.get(o1) - allLocationsForSplit.get(o2);
                        if (diffamount > 0) {
                            return -1;
                        } else if (diffamount < 0) {
                            return 1;
                        }
                        return 0;
                    }
                });
                splitLocations = new String[3];
                System.arraycopy(hosts, 0, splitLocations, 0, 3);
            }
        }

        //copy information for this split
        System.arraycopy(lengths, pathsUsed, splitLengths, 0, splitSizeForThisSplit);
        System.arraycopy(paths, pathsUsed, splitPaths, 0, splitSizeForThisSplit);
        //count the number of paths consumed
        pathsUsed += splitSizeForThisSplit;

        //make the actual split object
        //logger.info("New split of size " + splitSizeForThisSplit);
        mfs = new CombineFileSplit(job, splitPaths, splitStarts, splitLengths, splitLocations);
        splits.add(new PositionAwareSplit<CombineFileSplit>(mfs, splitnum));
        splitnum++;
    }

    if (!(pathsUsed == paths.length)) {
        throw new IOException("Number of used paths does not equal total available paths!");
    }
    return splits.toArray(new PositionAwareSplit[splits.size()]);
}

From source file:org.terrier.structures.indexing.singlepass.hadoop.TestBitPostingIndexInputFormat.java

License:Mozilla Public License

protected BitPostingIndexInputFormat makeInputFormat(JobConf jc, Index index, final long blockSize)
        throws Exception {
    BitPostingIndexInputFormat bpiif;//from  ww w .ja  va2 s .co m
    if (blockSize == 0) {
        bpiif = new BitPostingIndexInputFormat();
    } else {
        bpiif = new BitPostingIndexInputFormat() {
            @Override
            protected long getBlockSize(Path path, FileStatus fss) {
                System.err.println("Forcing blocksize of file " + path + " (size=" + fss.getLen()
                        + " actualBlocksize=" + fss.getBlockSize() + ") to " + blockSize + " bytes");
                return blockSize;
            }
        };
    }
    BitPostingIndexInputFormat.setStructures(jc, "direct", "document");
    HadoopUtility.toHConfiguration(index, jc);
    return bpiif;
}

From source file:org.wikimedia.wikihadoop.StreamWikiDumpInputFormat.java

License:Apache License

/** 
 * Generate the list of files and make them into FileSplits.
 * @param job the job context// w  w w  .  ja  v a  2 s .  c o m
 * @throws IOException
 */
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    LOG.info("StreamWikiDumpInputFormat.getSplits job=" + job + " n=" + numSplits);
    InputSplit[] oldSplits = super.getSplits(job, numSplits);
    List<InputSplit> splits = new ArrayList<InputSplit>();
    FileStatus[] files = listStatus(job);
    // Save the number of input files for metrics/loadgen
    job.setLong(NUM_INPUT_FILES, files.length);
    long totalSize = 0; // compute total size
    for (FileStatus file : files) { // check we have valid files
        if (file.isDirectory()) {
            throw new IOException("Not a file: " + file.getPath());
        }
        totalSize += file.getLen();
    }
    long minSize = job.getLong(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MINSIZE, 1);
    long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits);
    for (FileStatus file : files) {
        if (file.isDirectory()) {
            throw new IOException("Not a file: " + file.getPath());
        }
        long blockSize = file.getBlockSize();
        long splitSize = computeSplitSize(goalSize, minSize, blockSize);
        LOG.info(String.format("goalsize=%d splitsize=%d blocksize=%d", goalSize, splitSize, blockSize));
        //System.err.println(String.format("goalsize=%d splitsize=%d blocksize=%d", goalSize, splitSize, blockSize));
        for (InputSplit x : getSplits(job, file, pageBeginPattern, splitSize))
            splits.add(x);
    }
    System.err.println("splits=" + splits);
    return splits.toArray(new InputSplit[splits.size()]);
}