List of usage examples for org.apache.hadoop.fs FileStatus getBlockSize
public long getBlockSize()
From source file:org.opencloudengine.garuda.model.HdfsFileInfo.java
License:Open Source License
public HdfsFileInfo(FileStatus fileStatus, ContentSummary contentSummary) { this.fullyQualifiedPath = fileStatus.getPath().toUri().getPath(); this.filename = isEmpty(getFilename(fullyQualifiedPath)) ? getDirectoryName(fullyQualifiedPath) : getFilename(fullyQualifiedPath); this.length = fileStatus.getLen(); this.path = getPath(fullyQualifiedPath); this.directory = fileStatus.isDirectory(); this.file = !fileStatus.isDirectory(); this.owner = fileStatus.getOwner(); this.group = fileStatus.getGroup(); this.blockSize = fileStatus.getBlockSize(); this.replication = fileStatus.getReplication(); this.modificationTime = fileStatus.getModificationTime(); if (contentSummary != null) { this.spaceConsumed = contentSummary.getSpaceConsumed(); this.quota = contentSummary.getQuota(); this.spaceQuota = contentSummary.getSpaceQuota(); this.directoryCount = contentSummary.getDirectoryCount(); this.fileCount = contentSummary.getFileCount(); }//from ww w . j a va 2 s. co m this.accessTime = fileStatus.getAccessTime(); this.permission = fileStatus.getPermission().toString(); }
From source file:org.openflamingo.fs.hdfs.HdfsFileInfo.java
License:Apache License
/** * ??./* w w w . j a v a 2s . co m*/ * * @param fileStatus HDFS File Status */ public HdfsFileInfo(FileStatus fileStatus) { this.fullyQualifiedPath = fileStatus.getPath().toUri().getPath(); this.filename = StringUtils.isEmpty(FileUtils.getFilename(fullyQualifiedPath)) ? FileUtils.getDirectoryName(fullyQualifiedPath) : FileUtils.getFilename(fullyQualifiedPath); this.length = fileStatus.getLen(); this.path = FileUtils.getPath(fullyQualifiedPath); this.directory = fileStatus.isDir(); this.file = !fileStatus.isDir(); this.owner = fileStatus.getOwner(); this.group = fileStatus.getGroup(); this.blockSize = fileStatus.getBlockSize(); this.replication = fileStatus.getReplication(); this.modificationTime = fileStatus.getModificationTime(); this.accessTime = fileStatus.getAccessTime(); this.setText(this.filename); this.setLeaf(file ? true : false); this.setCls(directory ? "folder" : "file"); this.setId(fullyQualifiedPath); this.permission = fileStatus.getPermission().toString(); }
From source file:org.openflamingo.fs.hdfs.HdfsFileSystemProvider.java
License:Apache License
@Override public FileInfo getFileInfo(String path) { try {//from w w w .ja v a2 s . c o m FileStatus fileStatus = fs.getFileStatus(new Path(path)); HdfsFileInfo hdfsFileInfo = new HdfsFileInfo(fileStatus); ContentSummary summary = fs.getContentSummary(new Path(path)); hdfsFileInfo.setBlockSize(fileStatus.getBlockSize()); hdfsFileInfo.setReplication(fileStatus.getReplication()); hdfsFileInfo.setDirectoryCount(summary.getDirectoryCount()); hdfsFileInfo.setFileCount(summary.getFileCount()); hdfsFileInfo.setQuota(summary.getQuota()); hdfsFileInfo.setSpaceQuota(summary.getSpaceQuota()); hdfsFileInfo.setSpaceConsumed(StringUtils.byteDesc(summary.getSpaceConsumed())); hdfsFileInfo.setLength(summary.getLength()); return hdfsFileInfo; } catch (Exception ex) { throw new FileSystemException(bundle.message("S_FS", "CANNOT_GET_FILE_INFO", path), ex); } }
From source file:org.smartfrog.services.hadoop.operations.dfs.DfsPathExistsImpl.java
License:Open Source License
/** * check that a path exists//from ww w . j a v a2s . c o m * * @throws SmartFrogLivenessException if it does not, or it is the wrong type/size */ private void checkPathExists() throws SmartFrogLivenessException { String filename = getPathName() + " in " + dfs.toString(); try { if (!doesPathExist()) { throw new SmartFrogLivenessException("Missing path " + filename); } FileStatus status = dfs.getFileStatus(getPath()); if (verbose) { sfLog().info("Path " + getPath() + " size " + status.getLen() + " last modified:" + status.getModificationTime()); } if (status.isDir()) { //it is a directory. Run the directory checks FileStatus[] statuses = dfs.listStatus(getPath()); if (statuses == null) { throw new SmartFrogLivenessException("Unable to list the status of " + filename); } int fileCount = statuses.length; StringBuilder filenames = new StringBuilder(); long totalFileSize = 0; for (FileStatus fstat : statuses) { totalFileSize += fstat.getLen(); filenames.append(fstat.getPath() + "\t").append('\t').append(fstat.getBlockSize()).append("\n"); filenames.append('\n'); if (verbose) { sfLog().info(fstat.getPath() + "\t" + fstat.getBlockSize() + "\n"); } } if (!canBeDir) { throw new SmartFrogLivenessException("Expected a file, got a directory: " + filename + " containing " + fileCount + " file(s):\n" + filenames); } if (fileCount < minFileCount) { throw new SmartFrogLivenessException("Not enough files under " + filename + " required " + minFileCount + " found " + fileCount + " :\n" + filenames); } if (maxFileCount >= 0 && fileCount > maxFileCount) { throw new SmartFrogLivenessException("Too many files under " + filename + " maximum " + maxFileCount + " found " + fileCount + " :\n" + filenames); } if (totalFileSize < minTotalFileSize) { throw new SmartFrogLivenessException("not enough file content " + filename + " required " + minTotalFileSize + " found " + totalFileSize + " :\n" + filenames); } if (maxTotalFileSize >= 0 && totalFileSize > maxTotalFileSize) { throw new SmartFrogLivenessException("too much enough file content " + filename + " maximum " + minTotalFileSize + " found " + totalFileSize + " :\n" + filenames); } } else { if (!canBeFile) { throw new SmartFrogLivenessException("Not allowed to be a file: " + filename); } long size = status.getLen(); if (size < minFileSize) { throw new SmartFrogLivenessException("File " + filename + " is too small at " + size + " bytes for the minimum size " + minFileSize); } if (maxFileSize >= 0 && size > maxFileSize) { throw new SmartFrogLivenessException("File " + filename + " is too big at " + size + " bytes for the maximum size " + maxFileSize); } short replication = status.getReplication(); if (replication < minReplication) { throw new SmartFrogLivenessException("File " + filename + " has a replication factor of" + replication + " which is less than the minimum value of " + minReplication); } if (maxReplication >= 0 && replication > maxReplication) { throw new SmartFrogLivenessException("File " + filename + " has a replication factor of" + replication + " which is less than the maximum value of " + maxReplication); } } } catch (IOException e) { throw new SmartFrogLivenessException("Missing path " + filename, e); } }
From source file:org.springframework.data.hadoop.store.split.AbstractSplitterTests.java
License:Apache License
protected static Path mockWithFileSystem(int blockCount, long blockSize, long extraBlockSize) throws Exception { final ArrayList<BlockLocation> blocks = new ArrayList<BlockLocation>(); long offset = 0; int i = 0;/*ww w .j a v a 2 s . c om*/ for (; i < blockCount; i++) { blocks.add(new BlockLocation(new String[] { "names" + i }, new String[] { "hosts" + i }, offset, blockSize)); offset += blockSize; } // extra just means that we add a non full last block if (extraBlockSize > 0 && extraBlockSize < blockSize) { blocks.add(new BlockLocation(new String[] { "names" + i }, new String[] { "hosts" + i }, offset, extraBlockSize)); offset += extraBlockSize; } FileStatus mStatus = mock(FileStatus.class); Path mPath = mock(Path.class); FileSystem mFs = mock(FileSystem.class); when(mStatus.getLen()).thenReturn(offset); when(mStatus.getBlockSize()).thenReturn(blockSize); when(mFs.getFileStatus(mPath)).thenReturn(mStatus); when(mFs.getFileBlockLocations((FileStatus) any(), anyLong(), anyLong())) .thenAnswer(new Answer<BlockLocation[]>() { @Override public BlockLocation[] answer(InvocationOnMock invocation) throws Throwable { Object[] arguments = invocation.getArguments(); return findBlocks(blocks, (Long) arguments[1], (Long) arguments[2]); } }); when(mPath.getFileSystem((Configuration) any())).thenReturn(mFs); return mPath; }
From source file:org.springframework.data.hadoop.store.split.SlopBlockSplitter.java
License:Apache License
@Override public List<Split> getSplits(Path path) throws IOException { List<Split> splits = new ArrayList<Split>(); FileSystem fs = path.getFileSystem(getConfiguration()); FileStatus status = fs.getFileStatus(path); long length = status.getLen(); BlockLocation[] blocks = fs.getFileBlockLocations(status, 0, length); long blockSize = status.getBlockSize(); long splitSize = computeSplitSize(blockSize, getMinSplitSize(), getMaxSplitSize()); long remaining = length; while (((double) remaining) / splitSize > slop) { int i = getBlockIndex(blocks, length - remaining); splits.add(buildSplit(length - remaining, splitSize, blocks[i].getHosts())); remaining -= splitSize;/*from w ww . java2 s . c om*/ } if (remaining != 0) { int blkIndex = getBlockIndex(blocks, length - remaining); splits.add(buildSplit(length - remaining, remaining, blocks[blkIndex].getHosts())); } return splits; }
From source file:org.terrier.structures.indexing.singlepass.hadoop.BitPostingIndexInputFormat.java
License:Mozilla Public License
/** Returns the block size of the specified file. Only recommended to overload for testing */ protected long getBlockSize(Path path, FileStatus fss) { return fss.getBlockSize(); }
From source file:org.terrier.structures.indexing.singlepass.hadoop.MultiFileCollectionInputFormat.java
License:Mozilla Public License
@SuppressWarnings("unchecked") @Override//from w ww . j av a 2 s . com /** * Splits the input collection into * sets of files where each Map task * gets about the same number of files */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { Path[] paths = FileInputFormat.getInputPaths(job); // HADOOP-1818: Manage splits only if there are paths if (paths.length == 0) { return new InputSplit[0]; } if (numSplits > paths.length) { numSplits = paths.length; } else if (numSplits < 1) { numSplits = 1; } logger.info("Allocating " + paths.length + " files across " + numSplits + " map tasks"); List<PositionAwareSplit<CombineFileSplit>> splits = new ArrayList<PositionAwareSplit<CombineFileSplit>>( numSplits); final int numPaths = paths.length; long[] lengths = new long[numPaths]; TObjectLongHashMap<String>[] locations = (TObjectLongHashMap<String>[]) Array .newInstance(TObjectLongHashMap.class, numPaths); final FileSystem fs = FileSystem.get(job); for (int i = 0; i < paths.length; i++) { final FileStatus fss = fs.getFileStatus(paths[i]); lengths[i] = fss.getLen(); final TObjectLongHashMap<String> location2size = locations[i] = new TObjectLongHashMap<String>(); final long normalblocksize = fss.getBlockSize(); for (long offset = 0; offset < lengths[i]; offset += normalblocksize) { final long blocksize = Math.min(offset + normalblocksize, lengths[i]); final BlockLocation[] blockLocations = fs.getFileBlockLocations(fss, offset, blocksize); for (BlockLocation bl : blockLocations) { for (String host : bl.getHosts()) { location2size.adjustOrPutValue(host, blocksize, blocksize); } } } } //we need to over-estimate using ceil, to ensure that the last split is not /too/ big final int numberOfFilesPerSplit = (int) Math.ceil((double) paths.length / (double) numSplits); int pathsUsed = 0; int splitnum = 0; CombineFileSplit mfs; // for each split except the last one (which may be smaller than numberOfFilesPerSplit) while (pathsUsed < numPaths) { /* caclulate split size for this task - usually numberOfFilesPerSplit, but * less than this for the last split */ final int splitSizeForThisSplit = numberOfFilesPerSplit + pathsUsed > numPaths ? numPaths - pathsUsed : numberOfFilesPerSplit; //arrays of information for split Path[] splitPaths = new Path[splitSizeForThisSplit]; long[] splitLengths = new long[splitSizeForThisSplit]; long[] splitStarts = new long[splitSizeForThisSplit]; final TObjectLongHashMap<String> allLocationsForSplit = new TObjectLongHashMap<String>(); String[] splitLocations = null; //final recommended locations for this split. for (int i = 0; i < splitSizeForThisSplit; i++) { locations[pathsUsed + i].forEachEntry(new TObjectLongProcedure<String>() { public boolean execute(String a, long b) { allLocationsForSplit.adjustOrPutValue(a, b, b); return true; } }); if (allLocationsForSplit.size() <= 3) { splitLocations = allLocationsForSplit.keys(new String[allLocationsForSplit.size()]); } else { String[] hosts = allLocationsForSplit.keys(new String[allLocationsForSplit.size()]); Arrays.sort(hosts, new Comparator<String>() { public int compare(String o1, String o2) { long diffamount = allLocationsForSplit.get(o1) - allLocationsForSplit.get(o2); if (diffamount > 0) { return -1; } else if (diffamount < 0) { return 1; } return 0; } }); splitLocations = new String[3]; System.arraycopy(hosts, 0, splitLocations, 0, 3); } } //copy information for this split System.arraycopy(lengths, pathsUsed, splitLengths, 0, splitSizeForThisSplit); System.arraycopy(paths, pathsUsed, splitPaths, 0, splitSizeForThisSplit); //count the number of paths consumed pathsUsed += splitSizeForThisSplit; //make the actual split object //logger.info("New split of size " + splitSizeForThisSplit); mfs = new CombineFileSplit(job, splitPaths, splitStarts, splitLengths, splitLocations); splits.add(new PositionAwareSplit<CombineFileSplit>(mfs, splitnum)); splitnum++; } if (!(pathsUsed == paths.length)) { throw new IOException("Number of used paths does not equal total available paths!"); } return splits.toArray(new PositionAwareSplit[splits.size()]); }
From source file:org.terrier.structures.indexing.singlepass.hadoop.TestBitPostingIndexInputFormat.java
License:Mozilla Public License
protected BitPostingIndexInputFormat makeInputFormat(JobConf jc, Index index, final long blockSize) throws Exception { BitPostingIndexInputFormat bpiif;//from ww w .ja va2 s .co m if (blockSize == 0) { bpiif = new BitPostingIndexInputFormat(); } else { bpiif = new BitPostingIndexInputFormat() { @Override protected long getBlockSize(Path path, FileStatus fss) { System.err.println("Forcing blocksize of file " + path + " (size=" + fss.getLen() + " actualBlocksize=" + fss.getBlockSize() + ") to " + blockSize + " bytes"); return blockSize; } }; } BitPostingIndexInputFormat.setStructures(jc, "direct", "document"); HadoopUtility.toHConfiguration(index, jc); return bpiif; }
From source file:org.wikimedia.wikihadoop.StreamWikiDumpInputFormat.java
License:Apache License
/** * Generate the list of files and make them into FileSplits. * @param job the job context// w w w . ja v a 2 s . c o m * @throws IOException */ @Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { LOG.info("StreamWikiDumpInputFormat.getSplits job=" + job + " n=" + numSplits); InputSplit[] oldSplits = super.getSplits(job, numSplits); List<InputSplit> splits = new ArrayList<InputSplit>(); FileStatus[] files = listStatus(job); // Save the number of input files for metrics/loadgen job.setLong(NUM_INPUT_FILES, files.length); long totalSize = 0; // compute total size for (FileStatus file : files) { // check we have valid files if (file.isDirectory()) { throw new IOException("Not a file: " + file.getPath()); } totalSize += file.getLen(); } long minSize = job.getLong(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MINSIZE, 1); long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits); for (FileStatus file : files) { if (file.isDirectory()) { throw new IOException("Not a file: " + file.getPath()); } long blockSize = file.getBlockSize(); long splitSize = computeSplitSize(goalSize, minSize, blockSize); LOG.info(String.format("goalsize=%d splitsize=%d blocksize=%d", goalSize, splitSize, blockSize)); //System.err.println(String.format("goalsize=%d splitsize=%d blocksize=%d", goalSize, splitSize, blockSize)); for (InputSplit x : getSplits(job, file, pageBeginPattern, splitSize)) splits.add(x); } System.err.println("splits=" + splits); return splits.toArray(new InputSplit[splits.size()]); }