Example usage for org.apache.hadoop.fs FileStatus getBlockSize

List of usage examples for org.apache.hadoop.fs FileStatus getBlockSize

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getBlockSize.

Prototype

public long getBlockSize() 

Source Link

Document

Get the block size of the file.

Usage

From source file:io.hops.erasure_coding.TestMapReduceBlockRepairManager.java

License:Apache License

@Test
public void testCorruptedRepair() throws IOException, InterruptedException {
    DistributedFileSystem dfs = (DistributedFileSystem) getFileSystem();
    TestDfsClient testDfsClient = new TestDfsClient(getConfig());
    testDfsClient.injectIntoDfs(dfs);/*from w w w  . j  av a  2 s  .c om*/

    MapReduceEncodingManager encodingManager = new MapReduceEncodingManager(conf);

    Util.createRandomFile(dfs, testFile, seed, TEST_BLOCK_COUNT, DFS_TEST_BLOCK_SIZE);
    Codec.initializeCodecs(conf);
    FileStatus testFileStatus = dfs.getFileStatus(testFile);
    EncodingPolicy policy = new EncodingPolicy("src", (short) 1);
    encodingManager.encodeFile(policy, testFile, parityFile);

    // Busy waiting until the encoding is done
    while (encodingManager.computeReports().size() > 0) {
        ;
    }

    String path = testFileStatus.getPath().toUri().getPath();
    int blockToLoose = new Random(seed)
            .nextInt((int) (testFileStatus.getLen() / testFileStatus.getBlockSize()));
    LocatedBlock lb = dfs.getClient().getLocatedBlocks(path, 0, Long.MAX_VALUE).get(blockToLoose);
    DataNodeUtil.loseBlock(getCluster(), lb);
    List<LocatedBlock> lostBlocks = new ArrayList<LocatedBlock>();
    lostBlocks.add(lb);
    LocatedBlocks locatedBlocks = new LocatedBlocks(0, false, lostBlocks, null, true);
    testDfsClient.setMissingLocatedBlocks(locatedBlocks);
    LOG.info("Loosing block " + lb.toString());
    getCluster().triggerBlockReports();

    dfs.getClient().addBlockChecksum(testFile.toUri().getPath(),
            (int) (lb.getStartOffset() / lb.getBlockSize()), 0);

    MapReduceBlockRepairManager repairManager = new MapReduceBlockRepairManager(conf);
    repairManager.repairSourceBlocks("src", testFile, parityFile);

    while (true) {
        List<Report> reports = repairManager.computeReports();
        if (reports.size() == 0) {
            break;
        }
        LOG.info(reports.get(0).getStatus());
        System.out.println("WAIT");
        Thread.sleep(1000);
    }

    try {
        FSDataInputStream in = dfs.open(testFile);
        byte[] buff = new byte[TEST_BLOCK_COUNT * DFS_TEST_BLOCK_SIZE];
        in.readFully(0, buff);
        fail("Repair succeeded with bogus checksum.");
    } catch (BlockMissingException e) {
    }
}

From source file:it.prz.jmatrw4spark.JMATFileInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    //It generates the splits.
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);

    for (FileStatus file : files) {
        Path filePath = file.getPath();

        //Calculates the content (array of double) length in bytes.
        FileSystem fs = filePath.getFileSystem(job.getConfiguration());
        FSDataInputStream dis = fs.open(filePath);
        JMATReader _matReader = new JMATReader(dis);
        JMATInfo _matdata = _matReader.getInfo();

        long length = _matdata.dataNumOfItems * MLDataType.miDOUBLE.bytes; //Content length.
        long lContentByteOffset = dis.getPos();

        _matReader.close();// ww  w .j  a va  2  s  .  co m
        _matReader = null;
        dis = null;

        //Zero bytes, empty file split.
        if (length <= 0) {
            //Create empty hosts array for zero length files
            splits.add(makeSplit(filePath, 0, length, new String[0]));
        }

        //Split the data.
        if (length > 0) {
            BlockLocation[] blkLocations;
            if (file instanceof LocatedFileStatus) {
                blkLocations = ((LocatedFileStatus) file).getBlockLocations();
            } else {
                blkLocations = fs.getFileBlockLocations(file, lContentByteOffset, length);
            }

            boolean isSplittable = isSplitable(job, filePath);
            LOG.debug("Current file to process " + filePath.getName() + ". Splittable? " + isSplittable);
            if (isSplittable) {
                long blockSize = file.getBlockSize();
                long splitSize = computeSplitSize(blockSize, minSize, maxSize);

                long bytesRemaining = length;
                while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                    long lBlockByteStart = lContentByteOffset + length - bytesRemaining;

                    int blkIndex = getBlockIndex(blkLocations, lBlockByteStart);
                    splits.add(
                            makeSplit(filePath, lBlockByteStart, splitSize, blkLocations[blkIndex].getHosts()));
                    bytesRemaining -= splitSize;
                } //EndWhile.

                if (bytesRemaining != 0) {
                    long lBlockByteStart = lContentByteOffset + length - bytesRemaining;
                    int blkIndex = getBlockIndex(blkLocations, lBlockByteStart);
                    splits.add(makeSplit(filePath, lBlockByteStart, bytesRemaining,
                            blkLocations[blkIndex].getHosts()));
                }
            } else { // not splitable
                splits.add(makeSplit(filePath, lContentByteOffset, length, blkLocations[0].getHosts()));
            }
        }
    } //EndFor.

    // Save the number of input files for metrics/loadgen
    job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());
    LOG.debug("Total # of splits: " + splits.size());
    return splits;
}

From source file:ml.shifu.guagua.hadoop.GuaguaMRUnitDriver.java

License:Apache License

@Override
public List<GuaguaFileSplit[]> generateWorkerSplits(String inputs) throws IOException {
    List<GuaguaFileSplit[]> splits = new ArrayList<GuaguaFileSplit[]>();
    Configuration conf = new Configuration();
    // generate splits
    List<FileStatus> files = listStatus(conf, inputs);
    for (FileStatus file : files) {
        Path path = file.getPath();
        if (isPigOrHadoopMetaFile(path)) {
            continue;
        }//from w  ww .j  a  v  a2s .  c  o  m
        long length = file.getLen();
        if ((length != 0) && isSplitable(conf, path)) {
            long splitSize = file.getBlockSize();

            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > GuaguaYarnConstants.SPLIT_SLOP) {
                splits.add(new GuaguaFileSplit[] {
                        new GuaguaFileSplit(path.toString(), length - bytesRemaining, splitSize) });
                bytesRemaining -= splitSize;
            }

            if (bytesRemaining != 0) {
                splits.add(new GuaguaFileSplit[] {
                        new GuaguaFileSplit(path.toString(), length - bytesRemaining, bytesRemaining) });
            }
        } else if (length != 0) {
            splits.add(new GuaguaFileSplit[] { new GuaguaFileSplit(path.toString(), 0, length) });
        }
    }
    return splits;
}

From source file:ml.shifu.guagua.mapreduce.GuaguaInputFormat.java

License:Apache License

/**
 * Generate the list of files and make them into FileSplits.
 *//* www  . j  a  v a 2  s  .co  m*/
protected List<InputSplit> getGuaguaSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    for (FileStatus file : files) {
        Path path = file.getPath();
        if (isPigOrHadoopMetaFile(path)) {
            continue;
        }
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(job, path)) {
            long blockSize = file.getBlockSize();
            long splitSize = computeSplitSize(blockSize, minSize, maxSize);

            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > GuaguaMapReduceConstants.SPLIT_SLOP) {
                int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                splits.add(new GuaguaInputSplit(false, new FileSplit(path, length - bytesRemaining, splitSize,
                        blkLocations[blkIndex].getHosts())));
                bytesRemaining -= splitSize;
            }

            if (bytesRemaining != 0) {
                splits.add(new GuaguaInputSplit(false, new FileSplit(path, length - bytesRemaining,
                        bytesRemaining, blkLocations[blkLocations.length - 1].getHosts())));
            }
        } else if (length != 0) {
            splits.add(new GuaguaInputSplit(false, new FileSplit(path, 0, length, blkLocations[0].getHosts())));
        } else {
            // Create empty hosts array for zero length files
            splits.add(new GuaguaInputSplit(false, new FileSplit(path, 0, length, new String[0])));
        }
    }

    // Save the number of input files in the job-conf
    job.getConfiguration().setLong(GuaguaMapReduceConstants.NUM_INPUT_FILES, files.size());

    LOG.debug("Total # of splits: {}", splits.size());
    return splits;
}

From source file:ml.shifu.guagua.mapreduce.GuaguaMRUnitDriver.java

License:Apache License

@Override
public List<GuaguaFileSplit[]> generateWorkerSplits(String inputs) throws IOException {
    List<GuaguaFileSplit[]> splits = new ArrayList<GuaguaFileSplit[]>();
    Configuration conf = new Configuration();
    // generate splits
    List<FileStatus> files = listStatus(conf, inputs);
    for (FileStatus file : files) {
        Path path = file.getPath();
        if (isPigOrHadoopMetaFile(path)) {
            continue;
        }//from w  w  w  .j  a  v a  2  s .  c  o  m
        long length = file.getLen();
        if ((length != 0) && isSplitable(conf, path)) {
            long splitSize = file.getBlockSize();

            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > GuaguaMapReduceConstants.SPLIT_SLOP) {
                splits.add(new GuaguaFileSplit[] {
                        new GuaguaFileSplit(path.toString(), length - bytesRemaining, splitSize) });
                bytesRemaining -= splitSize;
            }

            if (bytesRemaining != 0) {
                splits.add(new GuaguaFileSplit[] {
                        new GuaguaFileSplit(path.toString(), length - bytesRemaining, bytesRemaining) });
            }
        } else if (length != 0) {
            splits.add(new GuaguaFileSplit[] { new GuaguaFileSplit(path.toString(), 0, length) });
        }
    }
    return splits;
}

From source file:ml.shifu.shifu.core.mr.input.CombineInputFormat.java

License:Apache License

/**
 * Generate the list of files and make them into FileSplits.
 *///w  w  w .j av a 2s .c om
protected List<InputSplit> getVarSelectSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    for (FileStatus file : files) {
        Path path = file.getPath();
        if (isPigOrHadoopMetaFile(path)) {
            continue;
        }
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(job, path)) {
            long blockSize = file.getBlockSize();
            long splitSize = computeSplitSize(blockSize, minSize, maxSize);

            long bytesRemaining = length;
            // here double comparison can be directly used because of no precision requirement
            while (((double) bytesRemaining) / splitSize > 1.1d) {
                int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                splits.add(new CombineInputSplit(new FileSplit(path, length - bytesRemaining, splitSize,
                        blkLocations[blkIndex].getHosts())));
                bytesRemaining -= splitSize;
            }

            if (bytesRemaining != 0) {
                splits.add(new CombineInputSplit(new FileSplit(path, length - bytesRemaining, bytesRemaining,
                        blkLocations[blkLocations.length - 1].getHosts())));
            }
        } else if (length != 0) {
            splits.add(new CombineInputSplit(new FileSplit(path, 0, length, blkLocations[0].getHosts())));
        } else {
            // Create empty hosts array for zero length files
            splits.add(new CombineInputSplit(new FileSplit(path, 0, length, new String[0])));
        }
    }

    // Save the number of input files in the job-conf
    job.getConfiguration().setLong(GuaguaMapReduceConstants.NUM_INPUT_FILES, files.size());

    LOG.debug("Total # of splits: {}", splits.size());
    return splits;
}

From source file:org.apache.ambari.view.filebrowser.HdfsApi.java

License:Apache License

/**
 * Converts a Hadoop <code>FileStatus</code> object into a JSON array object.
 * It replaces the <code>SCHEME://HOST:PORT</code> of the path with the
 * specified URL.//  w w  w.  j a  v a  2  s.  co m
 * <p/>
 *
 * @param status
 *          Hadoop file status.
 * @return The JSON representation of the file status.
 */

public Map<String, Object> fileStatusToJSON(FileStatus status) {
    Map<String, Object> json = new LinkedHashMap<String, Object>();
    json.put("path", Path.getPathWithoutSchemeAndAuthority(status.getPath()).toString());
    json.put("replication", status.getReplication());
    json.put("isDirectory", status.isDirectory());
    json.put("len", status.getLen());
    json.put("owner", status.getOwner());
    json.put("group", status.getGroup());
    json.put("permission", permissionToString(status.getPermission()));
    json.put("accessTime", status.getAccessTime());
    json.put("modificationTime", status.getModificationTime());
    json.put("blockSize", status.getBlockSize());
    json.put("replication", status.getReplication());
    json.put("readAccess", checkAccessPermissions(status, FsAction.READ, ugi));
    json.put("writeAccess", checkAccessPermissions(status, FsAction.WRITE, ugi));
    json.put("executeAccess", checkAccessPermissions(status, FsAction.EXECUTE, ugi));
    return json;
}

From source file:org.apache.ambari.view.hive.utils.HdfsApi.java

License:Apache License

/**
 * Converts a Hadoop <code>FileStatus</code> object into a JSON array object.
 * It replaces the <code>SCHEME://HOST:PORT</code> of the path with the
 * specified URL./*from w w  w.j  a v  a  2 s .  co m*/
 * <p/>
 *
 * @param status
 *          Hadoop file status.
 * @return The JSON representation of the file status.
 */

public static Map<String, Object> fileStatusToJSON(FileStatus status) {
    Map<String, Object> json = new LinkedHashMap<String, Object>();
    json.put("path", status.getPath().toString());
    json.put("isDirectory", status.isDirectory());
    json.put("len", status.getLen());
    json.put("owner", status.getOwner());
    json.put("group", status.getGroup());
    json.put("permission", permissionToString(status.getPermission()));
    json.put("accessTime", status.getAccessTime());
    json.put("modificationTime", status.getModificationTime());
    json.put("blockSize", status.getBlockSize());
    json.put("replication", status.getReplication());
    return json;
}

From source file:org.apache.ambari.view.utils.hdfs.HdfsApi.java

License:Apache License

/**
 * Converts a Hadoop <code>FileStatus</code> object into a JSON array object.
 * It replaces the <code>SCHEME://HOST:PORT</code> of the path with the
 * specified URL.//from  w w  w.  ja v a 2s.co m
 * <p/>
 *
 * @param status
 *          Hadoop file status.
 * @return The JSON representation of the file status.
 */
public Map<String, Object> fileStatusToJSON(FileStatus status) {
    Map<String, Object> json = new LinkedHashMap<String, Object>();
    json.put("path", Path.getPathWithoutSchemeAndAuthority(status.getPath()).toString());
    json.put("replication", status.getReplication());
    json.put("isDirectory", status.isDirectory());
    json.put("len", status.getLen());
    json.put("owner", status.getOwner());
    json.put("group", status.getGroup());
    json.put("permission", permissionToString(status.getPermission()));
    json.put("accessTime", status.getAccessTime());
    json.put("modificationTime", status.getModificationTime());
    json.put("blockSize", status.getBlockSize());
    json.put("replication", status.getReplication());
    json.put("readAccess", checkAccessPermissions(status, FsAction.READ, ugi));
    json.put("writeAccess", checkAccessPermissions(status, FsAction.WRITE, ugi));
    json.put("executeAccess", checkAccessPermissions(status, FsAction.EXECUTE, ugi));
    return json;
}

From source file:org.apache.falcon.hadoop.JailedFileSystem.java

License:Apache License

@Override
public FileStatus[] listStatus(Path f) throws IOException {
    FileStatus[] fileStatuses = localFS.listStatus(toLocalPath(f));
    if (fileStatuses == null || fileStatuses.length == 0) {
        return fileStatuses;
    } else {/*from   w w  w. java  2  s .c om*/
        FileStatus[] jailFileStatuses = new FileStatus[fileStatuses.length];
        for (int index = 0; index < fileStatuses.length; index++) {
            FileStatus status = fileStatuses[index];
            jailFileStatuses[index] = new FileStatus(status.getLen(), status.isDirectory(),
                    status.getReplication(), status.getBlockSize(), status.getModificationTime(),
                    status.getAccessTime(), status.getPermission(), status.getOwner(), status.getGroup(),
                    fromLocalPath(status.getPath()).makeQualified(this.getUri(), this.getWorkingDirectory()));
        }
        return jailFileStatuses;
    }
}