Example usage for org.apache.hadoop.fs FileStatus getBlockSize

List of usage examples for org.apache.hadoop.fs FileStatus getBlockSize

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getBlockSize.

Prototype

public long getBlockSize() 

Source Link

Document

Get the block size of the file.

Usage

From source file:com.google.mr4c.sources.URIDataFileSource.java

License:Open Source License

@Override
public BlockLocation[] getBlockLocation() throws IOException {
    URI uri = ContentFactories.scrubURI(m_uri);
    FileSystem fs = FileSystem.get(uri, s_config);
    Path path = new Path(uri);
    FileStatus status = fs.getFileStatus(path);
    return fs.getFileBlockLocations(status, 0, status.getBlockSize());
}

From source file:com.ibm.jaql.lang.expr.system.LsFn.java

License:Apache License

@Override
public JsonIterator iter(final Context context) throws Exception {
    JsonString glob = (JsonString) exprs[0].eval(context);
    // Configuration conf = context.getConfiguration();
    Configuration conf = new Configuration(); // TODO: get from context, incl options
    //URI uri;/*from  ww w. j  av  a  2  s  .  c om*/
    //FileSystem fs = FileSystem.get(uri, conf);
    Path inpath = new Path(glob.toString());
    FileSystem fs = inpath.getFileSystem(conf);
    //final FileStatus[] stats = fs.listStatus(path, filter);
    final FileStatus[] stats = fs.globStatus(inpath);

    if (stats == null || stats.length == 0) {
        return JsonIterator.EMPTY;
    }

    final MutableJsonDate accessTime = new MutableJsonDate();
    final MutableJsonDate modifyTime = new MutableJsonDate();
    final MutableJsonLong length = new MutableJsonLong();
    final MutableJsonLong blockSize = new MutableJsonLong();
    final MutableJsonLong replication = new MutableJsonLong();
    final MutableJsonString path = new MutableJsonString();
    final MutableJsonString owner = new MutableJsonString();
    final MutableJsonString group = new MutableJsonString();
    final MutableJsonString permission = new MutableJsonString();
    final JsonValue[] values = new JsonValue[] { accessTime, modifyTime, length, blockSize, replication, path,
            owner, group, permission };
    final BufferedJsonRecord rec = new BufferedJsonRecord();
    rec.set(LsField.names, values, values.length, false);

    return new JsonIterator(rec) {
        int i = 0;

        @Override
        public boolean moveNext() throws Exception {
            if (i >= stats.length) {
                return false;
            }

            FileStatus stat = stats[i++];
            // fs.getUri().toString();
            long x = HadoopShim.getAccessTime(stat);
            if (x <= 0) {
                values[LsField.ACCESS_TIME.ordinal()] = null;
            } else {
                accessTime.set(x);
                values[LsField.ACCESS_TIME.ordinal()] = accessTime;
            }
            modifyTime.set(stat.getModificationTime());
            length.set(stat.getLen());
            blockSize.set(stat.getBlockSize());
            replication.set(stat.getReplication());
            path.setCopy(stat.getPath().toString());
            owner.setCopy(stat.getOwner());
            group.setCopy(stat.getGroup());
            permission.setCopy(stat.getPermission().toString());
            return true;
        }
    };
}

From source file:com.ibm.stocator.fs.swift2d.systemtests.TestSwiftFileSystemBlocksize.java

License:Apache License

@Test(timeout = SwiftTestConstants.SWIFT_TEST_TIMEOUT)
public void testBlocksizeNonZeroForFile() throws Throwable {
    Path smallfile = new Path(getBaseURI() + "/test/smallfile");
    SwiftTestUtils.writeTextFile(sFileSystem, smallfile, "blocksize", true);
    createFile(smallfile);//ww  w .j  a  v a2s.co  m
    FileStatus status = getFs().getFileStatus(smallfile);
    assertTrue("Zero blocksize in " + status, status.getBlockSize() != 0L);
    assertTrue("Zero replication in " + status, status.getReplication() != 0L);
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java

License:Apache License

/** 
 * Generate the list of files and make them into FileSplits.
 * @param job the job context/*from w w w .j  a v a2s.  co m*/
 * @throws IOException
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
    Stopwatch sw = Stopwatch.createStarted();
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    for (FileStatus file : files) {
        Path path = file.getPath();
        long length = file.getLen();
        if (length != 0) {
            BlockLocation[] blkLocations;
            if (file instanceof LocatedFileStatus) {
                blkLocations = ((LocatedFileStatus) file).getBlockLocations();
            } else {
                FileSystem fs = path.getFileSystem(job.getConfiguration());
                blkLocations = fs.getFileBlockLocations(file, 0, length);
            }
            if (isSplitable(job, path)) {
                long blockSize = file.getBlockSize();
                long splitSize = computeSplitSize(blockSize, minSize, maxSize);

                long bytesRemaining = length;
                while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(makeSplit(path, length - bytesRemaining, splitSize,
                            blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts()));
                    bytesRemaining -= splitSize;
                }

                if (bytesRemaining != 0) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining,
                            blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts()));
                }
            } else { // not splitable
                splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts(),
                        blkLocations[0].getCachedHosts()));
            }
        } else {
            //Create empty hosts array for zero length files
            splits.add(makeSplit(path, 0, length, new String[0]));
        }
    }
    // Save the number of input files for metrics/loadgen
    job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());
    sw.stop();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Total # of splits generated by getSplits: " + splits.size() + ", TimeTaken: "
                + sw.elapsed(TimeUnit.MILLISECONDS));
    }
    return splits;
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java

License:Apache License

private boolean mustUpdate(FileSystem sourceFS, FileStatus source, Path target) throws IOException {
    final FileStatus targetFileStatus = targetFS.getFileStatus(target);

    return syncFolders && (targetFileStatus.getLen() != source.getLen()
            || (!skipCrc && !DistCpUtils.checksumsAreEqual(sourceFS, source.getPath(), targetFS, target))
            || (source.getBlockSize() != targetFileStatus.getBlockSize()
                    && preserve.contains(FileAttribute.BLOCKSIZE)));
}

From source file:com.inmobi.conduit.distcp.tools.mapred.RetriableFileCopyCommand.java

License:Apache License

private static long getBlockSize(EnumSet<FileAttribute> fileAttributes, FileStatus sourceFile,
        FileSystem targetFS) {/*from  w w w . j  a v a2  s.  c  o m*/
    return fileAttributes.contains(FileAttribute.BLOCKSIZE) ? sourceFile.getBlockSize()
            : targetFS.getDefaultBlockSize();
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java

License:Apache License

private static void touchFile(String path) throws Exception {
    FileSystem fs;//from   ww  w.  ja v a  2 s. c  om
    DataOutputStream outputStream = null;
    GzipCodec gzipCodec = ReflectionUtils.newInstance(GzipCodec.class, getConfiguration());
    Compressor gzipCompressor = CodecPool.getCompressor(gzipCodec);
    OutputStream compressedOut = null;
    try {
        fs = cluster.getFileSystem();
        final Path qualifiedPath = new Path(path).makeQualified(fs);
        final long blockSize = fs.getDefaultBlockSize() * 2;
        outputStream = fs.create(qualifiedPath, true, 0, (short) (fs.getDefaultReplication() * 2), blockSize);
        compressedOut = gzipCodec.createOutputStream(outputStream, gzipCompressor);
        Message msg = new Message("generating test data".getBytes());
        AuditUtil.attachHeaders(msg, currentTimestamp);
        byte[] encodeMsg = Base64.encodeBase64(msg.getData().array());
        compressedOut.write(encodeMsg);
        compressedOut.write("\n".getBytes());
        compressedOut.write(encodeMsg);
        compressedOut.write("\n".getBytes());
        // Genearate a msg with different timestamp.  Default window period is 60sec
        AuditUtil.attachHeaders(msg, nextMinuteTimeStamp);
        encodeMsg = Base64.encodeBase64(msg.getData().array());
        compressedOut.write(encodeMsg);
        compressedOut.write("\n".getBytes());
        compressedOut.flush();
        compressedOut.close();
        pathList.add(qualifiedPath);
        ++nFiles;

        FileStatus fileStatus = fs.getFileStatus(qualifiedPath);
        System.out.println(fileStatus.getBlockSize());
        System.out.println(fileStatus.getReplication());
    } finally {
        compressedOut.close();
        IOUtils.cleanup(null, outputStream);
        CodecPool.returnCompressor(gzipCompressor);
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java

License:Apache License

private void testPreserveBlockSizeAndReplicationImpl(boolean preserve) {
    try {/*from www  .  j  av  a2s.c  o  m*/

        deleteState();
        createSourceData();

        FileSystem fs = cluster.getFileSystem();
        CopyMapper copyMapper = new CopyMapper();
        StatusReporter reporter = new StubStatusReporter();
        InMemoryWriter writer = new InMemoryWriter();
        Mapper<Text, FileStatus, NullWritable, Text>.Context context = getMapperContext(copyMapper, reporter,
                writer);

        Configuration configuration = context.getConfiguration();
        EnumSet<DistCpOptions.FileAttribute> fileAttributes = EnumSet.noneOf(DistCpOptions.FileAttribute.class);
        if (preserve) {
            fileAttributes.add(DistCpOptions.FileAttribute.BLOCKSIZE);
            fileAttributes.add(DistCpOptions.FileAttribute.REPLICATION);
        }
        configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(),
                DistCpUtils.packAttributes(fileAttributes));

        copyMapper.setup(context);

        for (Path path : pathList) {
            final FileStatus fileStatus = fs.getFileStatus(path);
            copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), fileStatus,
                    context);
        }

        // Check that the block-size/replication aren't preserved.
        for (Path path : pathList) {
            final Path targetPath = new Path(path.toString().replaceAll(SOURCE_PATH, TARGET_PATH));
            final FileStatus source = fs.getFileStatus(path);
            final FileStatus target = fs.getFileStatus(targetPath);
            if (!source.isDir()) {
                Assert.assertTrue(preserve || source.getBlockSize() != target.getBlockSize());
                Assert.assertTrue(preserve || source.getReplication() != target.getReplication());
                Assert.assertTrue(!preserve || source.getBlockSize() == target.getBlockSize());
                Assert.assertTrue(!preserve || source.getReplication() == target.getReplication());
            }
        }
    } catch (Exception e) {
        Assert.assertTrue("Unexpected exception: " + e.getMessage(), false);
        e.printStackTrace();
    }
}

From source file:com.jeffy.hdfs.FileMetaData.java

License:Apache License

public static void showFileStatusForFile(String path) {
    Configuration config = new Configuration();
    try {//  w  w w.ja  v a  2s .com
        FileSystem fs = FileSystem.get(URI.create(path), config);
        FileStatus stat = fs.getFileStatus(new Path(path));
        System.out.println("File URI: " + stat.getPath().toUri().getPath());
        System.out.println("Is directory: " + stat.isDirectory());
        System.out.println("File length: " + stat.getLen());
        System.out.println("Modification Time: " + new Date(stat.getModificationTime()));
        System.out.println("File replications: " + stat.getReplication());
        System.out.println("File Block Size: " + (stat.getBlockSize() >>> 10 >>> 10) + " MB");
        System.out.println("File Owner: " + stat.getOwner());
        System.out.println("File Group: " + stat.getGroup());
        System.out.println("File Permission: " + stat.getPermission().toString());
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.marklogic.contentpump.CombineDocumentInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = super.getSplits(job);
    List<InputSplit> combinedSplits = new ArrayList<InputSplit>();
    CombineDocumentSplit split = null;/*w  w  w . j  a va  2 s. co m*/
    for (InputSplit file : splits) {
        Path path = ((FileSplit) file).getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        FileStatus status = fs.getFileStatus(path);
        long length = status.getLen();
        long blockSize = status.getBlockSize();
        long splitSize = computeSplitSize(blockSize, minSize, maxSize);
        if (length != 0) {
            if (split == null) {
                split = new CombineDocumentSplit();
            }

            try {
                if (split.getLength() + length < splitSize || split.getLength() < minSize) {
                    split.addSplit((FileSplit) file);
                } else {
                    combinedSplits.add(split);
                    split = new CombineDocumentSplit();
                    split.addSplit((FileSplit) file);
                }
            } catch (InterruptedException e) {
                LOG.error(e);
                throw new RuntimeException(e);
            }
        }
    }
    if (split != null) {
        combinedSplits.add(split);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Total # of splits: " + splits.size());
        LOG.debug("Total # of combined splits: " + combinedSplits.size());
    }

    return combinedSplits;
}