List of usage examples for org.apache.hadoop.fs FileStatus getBlockSize
public long getBlockSize()
From source file:com.google.mr4c.sources.URIDataFileSource.java
License:Open Source License
@Override public BlockLocation[] getBlockLocation() throws IOException { URI uri = ContentFactories.scrubURI(m_uri); FileSystem fs = FileSystem.get(uri, s_config); Path path = new Path(uri); FileStatus status = fs.getFileStatus(path); return fs.getFileBlockLocations(status, 0, status.getBlockSize()); }
From source file:com.ibm.jaql.lang.expr.system.LsFn.java
License:Apache License
@Override public JsonIterator iter(final Context context) throws Exception { JsonString glob = (JsonString) exprs[0].eval(context); // Configuration conf = context.getConfiguration(); Configuration conf = new Configuration(); // TODO: get from context, incl options //URI uri;/*from ww w. j av a 2 s . c om*/ //FileSystem fs = FileSystem.get(uri, conf); Path inpath = new Path(glob.toString()); FileSystem fs = inpath.getFileSystem(conf); //final FileStatus[] stats = fs.listStatus(path, filter); final FileStatus[] stats = fs.globStatus(inpath); if (stats == null || stats.length == 0) { return JsonIterator.EMPTY; } final MutableJsonDate accessTime = new MutableJsonDate(); final MutableJsonDate modifyTime = new MutableJsonDate(); final MutableJsonLong length = new MutableJsonLong(); final MutableJsonLong blockSize = new MutableJsonLong(); final MutableJsonLong replication = new MutableJsonLong(); final MutableJsonString path = new MutableJsonString(); final MutableJsonString owner = new MutableJsonString(); final MutableJsonString group = new MutableJsonString(); final MutableJsonString permission = new MutableJsonString(); final JsonValue[] values = new JsonValue[] { accessTime, modifyTime, length, blockSize, replication, path, owner, group, permission }; final BufferedJsonRecord rec = new BufferedJsonRecord(); rec.set(LsField.names, values, values.length, false); return new JsonIterator(rec) { int i = 0; @Override public boolean moveNext() throws Exception { if (i >= stats.length) { return false; } FileStatus stat = stats[i++]; // fs.getUri().toString(); long x = HadoopShim.getAccessTime(stat); if (x <= 0) { values[LsField.ACCESS_TIME.ordinal()] = null; } else { accessTime.set(x); values[LsField.ACCESS_TIME.ordinal()] = accessTime; } modifyTime.set(stat.getModificationTime()); length.set(stat.getLen()); blockSize.set(stat.getBlockSize()); replication.set(stat.getReplication()); path.setCopy(stat.getPath().toString()); owner.setCopy(stat.getOwner()); group.setCopy(stat.getGroup()); permission.setCopy(stat.getPermission().toString()); return true; } }; }
From source file:com.ibm.stocator.fs.swift2d.systemtests.TestSwiftFileSystemBlocksize.java
License:Apache License
@Test(timeout = SwiftTestConstants.SWIFT_TEST_TIMEOUT) public void testBlocksizeNonZeroForFile() throws Throwable { Path smallfile = new Path(getBaseURI() + "/test/smallfile"); SwiftTestUtils.writeTextFile(sFileSystem, smallfile, "blocksize", true); createFile(smallfile);//ww w .j a v a2s.co m FileStatus status = getFs().getFileStatus(smallfile); assertTrue("Zero blocksize in " + status, status.getBlockSize() != 0L); assertTrue("Zero replication in " + status, status.getReplication() != 0L); }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java
License:Apache License
/** * Generate the list of files and make them into FileSplits. * @param job the job context/*from w w w .j a v a2s. co m*/ * @throws IOException */ public List<InputSplit> getSplits(JobContext job) throws IOException { Stopwatch sw = Stopwatch.createStarted(); long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); // generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> files = listStatus(job); for (FileStatus file : files) { Path path = file.getPath(); long length = file.getLen(); if (length != 0) { BlockLocation[] blkLocations; if (file instanceof LocatedFileStatus) { blkLocations = ((LocatedFileStatus) file).getBlockLocations(); } else { FileSystem fs = path.getFileSystem(job.getConfiguration()); blkLocations = fs.getFileBlockLocations(file, 0, length); } if (isSplitable(job, path)) { long blockSize = file.getBlockSize(); long splitSize = computeSplitSize(blockSize, minSize, maxSize); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(makeSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts())); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts())); } } else { // not splitable splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts(), blkLocations[0].getCachedHosts())); } } else { //Create empty hosts array for zero length files splits.add(makeSplit(path, 0, length, new String[0])); } } // Save the number of input files for metrics/loadgen job.getConfiguration().setLong(NUM_INPUT_FILES, files.size()); sw.stop(); if (LOG.isDebugEnabled()) { LOG.debug("Total # of splits generated by getSplits: " + splits.size() + ", TimeTaken: " + sw.elapsed(TimeUnit.MILLISECONDS)); } return splits; }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java
License:Apache License
private boolean mustUpdate(FileSystem sourceFS, FileStatus source, Path target) throws IOException { final FileStatus targetFileStatus = targetFS.getFileStatus(target); return syncFolders && (targetFileStatus.getLen() != source.getLen() || (!skipCrc && !DistCpUtils.checksumsAreEqual(sourceFS, source.getPath(), targetFS, target)) || (source.getBlockSize() != targetFileStatus.getBlockSize() && preserve.contains(FileAttribute.BLOCKSIZE))); }
From source file:com.inmobi.conduit.distcp.tools.mapred.RetriableFileCopyCommand.java
License:Apache License
private static long getBlockSize(EnumSet<FileAttribute> fileAttributes, FileStatus sourceFile, FileSystem targetFS) {/*from w w w . j a v a2 s. c o m*/ return fileAttributes.contains(FileAttribute.BLOCKSIZE) ? sourceFile.getBlockSize() : targetFS.getDefaultBlockSize(); }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java
License:Apache License
private static void touchFile(String path) throws Exception { FileSystem fs;//from ww w. ja v a 2 s. c om DataOutputStream outputStream = null; GzipCodec gzipCodec = ReflectionUtils.newInstance(GzipCodec.class, getConfiguration()); Compressor gzipCompressor = CodecPool.getCompressor(gzipCodec); OutputStream compressedOut = null; try { fs = cluster.getFileSystem(); final Path qualifiedPath = new Path(path).makeQualified(fs); final long blockSize = fs.getDefaultBlockSize() * 2; outputStream = fs.create(qualifiedPath, true, 0, (short) (fs.getDefaultReplication() * 2), blockSize); compressedOut = gzipCodec.createOutputStream(outputStream, gzipCompressor); Message msg = new Message("generating test data".getBytes()); AuditUtil.attachHeaders(msg, currentTimestamp); byte[] encodeMsg = Base64.encodeBase64(msg.getData().array()); compressedOut.write(encodeMsg); compressedOut.write("\n".getBytes()); compressedOut.write(encodeMsg); compressedOut.write("\n".getBytes()); // Genearate a msg with different timestamp. Default window period is 60sec AuditUtil.attachHeaders(msg, nextMinuteTimeStamp); encodeMsg = Base64.encodeBase64(msg.getData().array()); compressedOut.write(encodeMsg); compressedOut.write("\n".getBytes()); compressedOut.flush(); compressedOut.close(); pathList.add(qualifiedPath); ++nFiles; FileStatus fileStatus = fs.getFileStatus(qualifiedPath); System.out.println(fileStatus.getBlockSize()); System.out.println(fileStatus.getReplication()); } finally { compressedOut.close(); IOUtils.cleanup(null, outputStream); CodecPool.returnCompressor(gzipCompressor); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java
License:Apache License
private void testPreserveBlockSizeAndReplicationImpl(boolean preserve) { try {/*from www . j av a2s.c o m*/ deleteState(); createSourceData(); FileSystem fs = cluster.getFileSystem(); CopyMapper copyMapper = new CopyMapper(); StatusReporter reporter = new StubStatusReporter(); InMemoryWriter writer = new InMemoryWriter(); Mapper<Text, FileStatus, NullWritable, Text>.Context context = getMapperContext(copyMapper, reporter, writer); Configuration configuration = context.getConfiguration(); EnumSet<DistCpOptions.FileAttribute> fileAttributes = EnumSet.noneOf(DistCpOptions.FileAttribute.class); if (preserve) { fileAttributes.add(DistCpOptions.FileAttribute.BLOCKSIZE); fileAttributes.add(DistCpOptions.FileAttribute.REPLICATION); } configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(), DistCpUtils.packAttributes(fileAttributes)); copyMapper.setup(context); for (Path path : pathList) { final FileStatus fileStatus = fs.getFileStatus(path); copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), fileStatus, context); } // Check that the block-size/replication aren't preserved. for (Path path : pathList) { final Path targetPath = new Path(path.toString().replaceAll(SOURCE_PATH, TARGET_PATH)); final FileStatus source = fs.getFileStatus(path); final FileStatus target = fs.getFileStatus(targetPath); if (!source.isDir()) { Assert.assertTrue(preserve || source.getBlockSize() != target.getBlockSize()); Assert.assertTrue(preserve || source.getReplication() != target.getReplication()); Assert.assertTrue(!preserve || source.getBlockSize() == target.getBlockSize()); Assert.assertTrue(!preserve || source.getReplication() == target.getReplication()); } } } catch (Exception e) { Assert.assertTrue("Unexpected exception: " + e.getMessage(), false); e.printStackTrace(); } }
From source file:com.jeffy.hdfs.FileMetaData.java
License:Apache License
public static void showFileStatusForFile(String path) { Configuration config = new Configuration(); try {// w w w.ja v a 2s .com FileSystem fs = FileSystem.get(URI.create(path), config); FileStatus stat = fs.getFileStatus(new Path(path)); System.out.println("File URI: " + stat.getPath().toUri().getPath()); System.out.println("Is directory: " + stat.isDirectory()); System.out.println("File length: " + stat.getLen()); System.out.println("Modification Time: " + new Date(stat.getModificationTime())); System.out.println("File replications: " + stat.getReplication()); System.out.println("File Block Size: " + (stat.getBlockSize() >>> 10 >>> 10) + " MB"); System.out.println("File Owner: " + stat.getOwner()); System.out.println("File Group: " + stat.getGroup()); System.out.println("File Permission: " + stat.getPermission().toString()); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.marklogic.contentpump.CombineDocumentInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); // generate splits List<InputSplit> splits = super.getSplits(job); List<InputSplit> combinedSplits = new ArrayList<InputSplit>(); CombineDocumentSplit split = null;/*w w w . j a va 2 s. co m*/ for (InputSplit file : splits) { Path path = ((FileSplit) file).getPath(); FileSystem fs = path.getFileSystem(job.getConfiguration()); FileStatus status = fs.getFileStatus(path); long length = status.getLen(); long blockSize = status.getBlockSize(); long splitSize = computeSplitSize(blockSize, minSize, maxSize); if (length != 0) { if (split == null) { split = new CombineDocumentSplit(); } try { if (split.getLength() + length < splitSize || split.getLength() < minSize) { split.addSplit((FileSplit) file); } else { combinedSplits.add(split); split = new CombineDocumentSplit(); split.addSplit((FileSplit) file); } } catch (InterruptedException e) { LOG.error(e); throw new RuntimeException(e); } } } if (split != null) { combinedSplits.add(split); } if (LOG.isDebugEnabled()) { LOG.debug("Total # of splits: " + splits.size()); LOG.debug("Total # of combined splits: " + combinedSplits.size()); } return combinedSplits; }