List of usage examples for org.apache.hadoop.fs FileStatus getBlockSize
public long getBlockSize()
From source file:io.hops.erasure_coding.TestMapReduceBlockRepairManager.java
License:Apache License
@Test public void testCorruptedRepair() throws IOException, InterruptedException { DistributedFileSystem dfs = (DistributedFileSystem) getFileSystem(); TestDfsClient testDfsClient = new TestDfsClient(getConfig()); testDfsClient.injectIntoDfs(dfs);/*from w w w . j av a 2 s .c om*/ MapReduceEncodingManager encodingManager = new MapReduceEncodingManager(conf); Util.createRandomFile(dfs, testFile, seed, TEST_BLOCK_COUNT, DFS_TEST_BLOCK_SIZE); Codec.initializeCodecs(conf); FileStatus testFileStatus = dfs.getFileStatus(testFile); EncodingPolicy policy = new EncodingPolicy("src", (short) 1); encodingManager.encodeFile(policy, testFile, parityFile); // Busy waiting until the encoding is done while (encodingManager.computeReports().size() > 0) { ; } String path = testFileStatus.getPath().toUri().getPath(); int blockToLoose = new Random(seed) .nextInt((int) (testFileStatus.getLen() / testFileStatus.getBlockSize())); LocatedBlock lb = dfs.getClient().getLocatedBlocks(path, 0, Long.MAX_VALUE).get(blockToLoose); DataNodeUtil.loseBlock(getCluster(), lb); List<LocatedBlock> lostBlocks = new ArrayList<LocatedBlock>(); lostBlocks.add(lb); LocatedBlocks locatedBlocks = new LocatedBlocks(0, false, lostBlocks, null, true); testDfsClient.setMissingLocatedBlocks(locatedBlocks); LOG.info("Loosing block " + lb.toString()); getCluster().triggerBlockReports(); dfs.getClient().addBlockChecksum(testFile.toUri().getPath(), (int) (lb.getStartOffset() / lb.getBlockSize()), 0); MapReduceBlockRepairManager repairManager = new MapReduceBlockRepairManager(conf); repairManager.repairSourceBlocks("src", testFile, parityFile); while (true) { List<Report> reports = repairManager.computeReports(); if (reports.size() == 0) { break; } LOG.info(reports.get(0).getStatus()); System.out.println("WAIT"); Thread.sleep(1000); } try { FSDataInputStream in = dfs.open(testFile); byte[] buff = new byte[TEST_BLOCK_COUNT * DFS_TEST_BLOCK_SIZE]; in.readFully(0, buff); fail("Repair succeeded with bogus checksum."); } catch (BlockMissingException e) { } }
From source file:it.prz.jmatrw4spark.JMATFileInputFormat.java
License:Open Source License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); //It generates the splits. List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> files = listStatus(job); for (FileStatus file : files) { Path filePath = file.getPath(); //Calculates the content (array of double) length in bytes. FileSystem fs = filePath.getFileSystem(job.getConfiguration()); FSDataInputStream dis = fs.open(filePath); JMATReader _matReader = new JMATReader(dis); JMATInfo _matdata = _matReader.getInfo(); long length = _matdata.dataNumOfItems * MLDataType.miDOUBLE.bytes; //Content length. long lContentByteOffset = dis.getPos(); _matReader.close();// ww w .j a va 2 s . co m _matReader = null; dis = null; //Zero bytes, empty file split. if (length <= 0) { //Create empty hosts array for zero length files splits.add(makeSplit(filePath, 0, length, new String[0])); } //Split the data. if (length > 0) { BlockLocation[] blkLocations; if (file instanceof LocatedFileStatus) { blkLocations = ((LocatedFileStatus) file).getBlockLocations(); } else { blkLocations = fs.getFileBlockLocations(file, lContentByteOffset, length); } boolean isSplittable = isSplitable(job, filePath); LOG.debug("Current file to process " + filePath.getName() + ". Splittable? " + isSplittable); if (isSplittable) { long blockSize = file.getBlockSize(); long splitSize = computeSplitSize(blockSize, minSize, maxSize); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { long lBlockByteStart = lContentByteOffset + length - bytesRemaining; int blkIndex = getBlockIndex(blkLocations, lBlockByteStart); splits.add( makeSplit(filePath, lBlockByteStart, splitSize, blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; } //EndWhile. if (bytesRemaining != 0) { long lBlockByteStart = lContentByteOffset + length - bytesRemaining; int blkIndex = getBlockIndex(blkLocations, lBlockByteStart); splits.add(makeSplit(filePath, lBlockByteStart, bytesRemaining, blkLocations[blkIndex].getHosts())); } } else { // not splitable splits.add(makeSplit(filePath, lContentByteOffset, length, blkLocations[0].getHosts())); } } } //EndFor. // Save the number of input files for metrics/loadgen job.getConfiguration().setLong(NUM_INPUT_FILES, files.size()); LOG.debug("Total # of splits: " + splits.size()); return splits; }
From source file:ml.shifu.guagua.hadoop.GuaguaMRUnitDriver.java
License:Apache License
@Override public List<GuaguaFileSplit[]> generateWorkerSplits(String inputs) throws IOException { List<GuaguaFileSplit[]> splits = new ArrayList<GuaguaFileSplit[]>(); Configuration conf = new Configuration(); // generate splits List<FileStatus> files = listStatus(conf, inputs); for (FileStatus file : files) { Path path = file.getPath(); if (isPigOrHadoopMetaFile(path)) { continue; }//from w ww .j a v a2s . c o m long length = file.getLen(); if ((length != 0) && isSplitable(conf, path)) { long splitSize = file.getBlockSize(); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > GuaguaYarnConstants.SPLIT_SLOP) { splits.add(new GuaguaFileSplit[] { new GuaguaFileSplit(path.toString(), length - bytesRemaining, splitSize) }); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { splits.add(new GuaguaFileSplit[] { new GuaguaFileSplit(path.toString(), length - bytesRemaining, bytesRemaining) }); } } else if (length != 0) { splits.add(new GuaguaFileSplit[] { new GuaguaFileSplit(path.toString(), 0, length) }); } } return splits; }
From source file:ml.shifu.guagua.mapreduce.GuaguaInputFormat.java
License:Apache License
/** * Generate the list of files and make them into FileSplits. *//* www . j a v a 2 s .co m*/ protected List<InputSplit> getGuaguaSplits(JobContext job) throws IOException { long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); // generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> files = listStatus(job); for (FileStatus file : files) { Path path = file.getPath(); if (isPigOrHadoopMetaFile(path)) { continue; } FileSystem fs = path.getFileSystem(job.getConfiguration()); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(job, path)) { long blockSize = file.getBlockSize(); long splitSize = computeSplitSize(blockSize, minSize, maxSize); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > GuaguaMapReduceConstants.SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(new GuaguaInputSplit(false, new FileSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts()))); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { splits.add(new GuaguaInputSplit(false, new FileSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts()))); } } else if (length != 0) { splits.add(new GuaguaInputSplit(false, new FileSplit(path, 0, length, blkLocations[0].getHosts()))); } else { // Create empty hosts array for zero length files splits.add(new GuaguaInputSplit(false, new FileSplit(path, 0, length, new String[0]))); } } // Save the number of input files in the job-conf job.getConfiguration().setLong(GuaguaMapReduceConstants.NUM_INPUT_FILES, files.size()); LOG.debug("Total # of splits: {}", splits.size()); return splits; }
From source file:ml.shifu.guagua.mapreduce.GuaguaMRUnitDriver.java
License:Apache License
@Override public List<GuaguaFileSplit[]> generateWorkerSplits(String inputs) throws IOException { List<GuaguaFileSplit[]> splits = new ArrayList<GuaguaFileSplit[]>(); Configuration conf = new Configuration(); // generate splits List<FileStatus> files = listStatus(conf, inputs); for (FileStatus file : files) { Path path = file.getPath(); if (isPigOrHadoopMetaFile(path)) { continue; }//from w w w .j a v a 2 s . c o m long length = file.getLen(); if ((length != 0) && isSplitable(conf, path)) { long splitSize = file.getBlockSize(); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > GuaguaMapReduceConstants.SPLIT_SLOP) { splits.add(new GuaguaFileSplit[] { new GuaguaFileSplit(path.toString(), length - bytesRemaining, splitSize) }); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { splits.add(new GuaguaFileSplit[] { new GuaguaFileSplit(path.toString(), length - bytesRemaining, bytesRemaining) }); } } else if (length != 0) { splits.add(new GuaguaFileSplit[] { new GuaguaFileSplit(path.toString(), 0, length) }); } } return splits; }
From source file:ml.shifu.shifu.core.mr.input.CombineInputFormat.java
License:Apache License
/** * Generate the list of files and make them into FileSplits. *///w w w .j av a 2s .c om protected List<InputSplit> getVarSelectSplits(JobContext job) throws IOException { long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); // generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> files = listStatus(job); for (FileStatus file : files) { Path path = file.getPath(); if (isPigOrHadoopMetaFile(path)) { continue; } FileSystem fs = path.getFileSystem(job.getConfiguration()); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(job, path)) { long blockSize = file.getBlockSize(); long splitSize = computeSplitSize(blockSize, minSize, maxSize); long bytesRemaining = length; // here double comparison can be directly used because of no precision requirement while (((double) bytesRemaining) / splitSize > 1.1d) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(new CombineInputSplit(new FileSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts()))); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { splits.add(new CombineInputSplit(new FileSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts()))); } } else if (length != 0) { splits.add(new CombineInputSplit(new FileSplit(path, 0, length, blkLocations[0].getHosts()))); } else { // Create empty hosts array for zero length files splits.add(new CombineInputSplit(new FileSplit(path, 0, length, new String[0]))); } } // Save the number of input files in the job-conf job.getConfiguration().setLong(GuaguaMapReduceConstants.NUM_INPUT_FILES, files.size()); LOG.debug("Total # of splits: {}", splits.size()); return splits; }
From source file:org.apache.ambari.view.filebrowser.HdfsApi.java
License:Apache License
/** * Converts a Hadoop <code>FileStatus</code> object into a JSON array object. * It replaces the <code>SCHEME://HOST:PORT</code> of the path with the * specified URL.// w w w. j a v a 2 s. co m * <p/> * * @param status * Hadoop file status. * @return The JSON representation of the file status. */ public Map<String, Object> fileStatusToJSON(FileStatus status) { Map<String, Object> json = new LinkedHashMap<String, Object>(); json.put("path", Path.getPathWithoutSchemeAndAuthority(status.getPath()).toString()); json.put("replication", status.getReplication()); json.put("isDirectory", status.isDirectory()); json.put("len", status.getLen()); json.put("owner", status.getOwner()); json.put("group", status.getGroup()); json.put("permission", permissionToString(status.getPermission())); json.put("accessTime", status.getAccessTime()); json.put("modificationTime", status.getModificationTime()); json.put("blockSize", status.getBlockSize()); json.put("replication", status.getReplication()); json.put("readAccess", checkAccessPermissions(status, FsAction.READ, ugi)); json.put("writeAccess", checkAccessPermissions(status, FsAction.WRITE, ugi)); json.put("executeAccess", checkAccessPermissions(status, FsAction.EXECUTE, ugi)); return json; }
From source file:org.apache.ambari.view.hive.utils.HdfsApi.java
License:Apache License
/** * Converts a Hadoop <code>FileStatus</code> object into a JSON array object. * It replaces the <code>SCHEME://HOST:PORT</code> of the path with the * specified URL./*from w w w.j a v a 2 s . co m*/ * <p/> * * @param status * Hadoop file status. * @return The JSON representation of the file status. */ public static Map<String, Object> fileStatusToJSON(FileStatus status) { Map<String, Object> json = new LinkedHashMap<String, Object>(); json.put("path", status.getPath().toString()); json.put("isDirectory", status.isDirectory()); json.put("len", status.getLen()); json.put("owner", status.getOwner()); json.put("group", status.getGroup()); json.put("permission", permissionToString(status.getPermission())); json.put("accessTime", status.getAccessTime()); json.put("modificationTime", status.getModificationTime()); json.put("blockSize", status.getBlockSize()); json.put("replication", status.getReplication()); return json; }
From source file:org.apache.ambari.view.utils.hdfs.HdfsApi.java
License:Apache License
/** * Converts a Hadoop <code>FileStatus</code> object into a JSON array object. * It replaces the <code>SCHEME://HOST:PORT</code> of the path with the * specified URL.//from w w w. ja v a 2s.co m * <p/> * * @param status * Hadoop file status. * @return The JSON representation of the file status. */ public Map<String, Object> fileStatusToJSON(FileStatus status) { Map<String, Object> json = new LinkedHashMap<String, Object>(); json.put("path", Path.getPathWithoutSchemeAndAuthority(status.getPath()).toString()); json.put("replication", status.getReplication()); json.put("isDirectory", status.isDirectory()); json.put("len", status.getLen()); json.put("owner", status.getOwner()); json.put("group", status.getGroup()); json.put("permission", permissionToString(status.getPermission())); json.put("accessTime", status.getAccessTime()); json.put("modificationTime", status.getModificationTime()); json.put("blockSize", status.getBlockSize()); json.put("replication", status.getReplication()); json.put("readAccess", checkAccessPermissions(status, FsAction.READ, ugi)); json.put("writeAccess", checkAccessPermissions(status, FsAction.WRITE, ugi)); json.put("executeAccess", checkAccessPermissions(status, FsAction.EXECUTE, ugi)); return json; }
From source file:org.apache.falcon.hadoop.JailedFileSystem.java
License:Apache License
@Override public FileStatus[] listStatus(Path f) throws IOException { FileStatus[] fileStatuses = localFS.listStatus(toLocalPath(f)); if (fileStatuses == null || fileStatuses.length == 0) { return fileStatuses; } else {/*from w w w. java 2 s .c om*/ FileStatus[] jailFileStatuses = new FileStatus[fileStatuses.length]; for (int index = 0; index < fileStatuses.length; index++) { FileStatus status = fileStatuses[index]; jailFileStatuses[index] = new FileStatus(status.getLen(), status.isDirectory(), status.getReplication(), status.getBlockSize(), status.getModificationTime(), status.getAccessTime(), status.getPermission(), status.getOwner(), status.getGroup(), fromLocalPath(status.getPath()).makeQualified(this.getUri(), this.getWorkingDirectory())); } return jailFileStatuses; } }