Example usage for org.apache.hadoop.fs ReadOption SKIP

Introduction

In this page you can find the example usage for org.apache.hadoop.fs ReadOption SKIP_CHECKSUMS.

Prototype

ReadOption SKIP_CHECKSUMS

To view the source code for org.apache.hadoop.fs ReadOption SKIP_CHECKSUMS.

Click Source Link

Document

Skip checksums when reading.

Usage

From source file:HdfsCacheReader.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("HdfsReader [FileSize i.e. 1g/10g/100g/200g]");
        return 1;
    }/* w w w .j av a  2  s  . c  om*/

    double fileSize;
    double fileSizeInMB;
    if (args[0].equals("1g")) {
        fileSize = 1073741824.0;
        fileSizeInMB = 1024.0;
    } else if (args[0].equals("10g")) {
        fileSize = 10737418240.0;
        fileSizeInMB = 10240.0;
    } else if (args[0].equals("100g")) {
        fileSize = 107374182400.0;
        fileSizeInMB = 102400.0;
    } else if (args[0].equals("200g")) {
        fileSize = 214748364800.0;
        fileSizeInMB = 204800.0;
    } else {
        throw new IllegalArgumentException("Invalid arg: " + args[0]);
    }

    String fileName = "cacheRead-" + args[0] + "-avg.txt";
    File avgFile = new File(fileName);
    PrintWriter avgPW = new PrintWriter(avgFile);
    fileName = "cacheRead-" + args[0] + "-min.txt";
    File minFile = new File(fileName);
    PrintWriter minPW = new PrintWriter(minFile);
    fileName = "cacheRead-" + args[0] + "-max.txt";
    File maxFile = new File(fileName);
    PrintWriter maxPW = new PrintWriter(maxFile);

    int numIters = 10;
    int bufferSize = 65536;
    long blockSize[] = new long[] { 67108864, 134217728, 268435456, 536870912, 1073741824 };
    short replication[] = new short[] { 1, 4 };
    String hdfsFile = "/hdfs_test/" + args[0] + "/1.in";
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path hdfsFilePath = new Path(hdfsFile);

    for (int i = 0; i < 5; i++) { // blockSize
        for (int j = 0; j < 2; j++) { // replication
            OutputStream os = fs.create(hdfsFilePath, true, bufferSize, replication[j], blockSize[i]);
            byte[] buf = new byte[bufferSize];
            for (int m = 0; m < bufferSize; m += 4) {
                buf[m] = (byte) m;
            }
            double numBufPerFile = fileSize / (double) bufferSize;

            for (double m = 0.0; m < numBufPerFile; m++) {
                os.write(buf);
            }
            os.close();
            String cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -addDirective -path " + hdfsFile
                    + " -pool hdfs_test";
            Process p = Runtime.getRuntime().exec(cmdStr);
            p.waitFor();
            String cmdOutLine = "";
            StringBuffer cmdOut = new StringBuffer();
            BufferedReader cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream()));
            while ((cmdOutLine = cmdOutReader.readLine()) != null) {
                cmdOut.append(cmdOutLine + "\n");
            }
            // System.out.println (cmdOut.toString());

            long avg = 0, min = Long.MAX_VALUE, max = Long.MIN_VALUE;
            for (int k = 0; k < numIters; k++) {
                FSDataInputStream in = fs.open(hdfsFilePath);
                ByteBuffer bbuf = null;
                ElasticByteBufferPool ebbp = new ElasticByteBufferPool();
                long startTime = System.currentTimeMillis();
                while ((bbuf = in.read(ebbp, bufferSize, EnumSet.of(ReadOption.SKIP_CHECKSUMS))) != null) {
                    in.releaseBuffer(bbuf);
                }
                long endTime = System.currentTimeMillis();
                in.close();
                long duration = (endTime - startTime);
                avg += duration;
                if (duration < min) {
                    min = duration;
                }
                if (duration > max) {
                    max = duration;
                }
            }
            // write result to output
            double avgBW = fileSizeInMB * 1000.0 * (double) numIters / (double) avg;
            avgPW.print(avgBW);
            avgPW.print("\t");
            double minBW = fileSizeInMB * 1000.0 / (double) max;
            minPW.print(minBW);
            minPW.print("\t");
            double maxBW = fileSizeInMB * 1000.0 / (double) min;
            maxPW.print(maxBW);
            maxPW.print("\t");
            cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -removeDirectives -path " + hdfsFile;
            p = Runtime.getRuntime().exec(cmdStr);
            p.waitFor();
            cmdOutLine = "";
            cmdOut.setLength(0);
            cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream()));
            while ((cmdOutLine = cmdOutReader.readLine()) != null) {
                cmdOut.append(cmdOutLine + "\n");
            }
            // System.out.println (cmdOut.toString());
            fs.delete(hdfsFilePath, true);
        }
        avgPW.println();
        minPW.println();
        maxPW.println();
    }
    avgPW.close();
    minPW.close();
    maxPW.close();
    return 0;
}

From source file:com.cloudera.ByteBufferRecordReader.java

License:Apache License

private void initialize(Configuration job, long splitStart, long splitLength, Path file) throws IOException {
    start = splitStart;//from ww  w  .j  ava 2  s  .c  o m
    end = start + splitLength;
    pos = start;

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);

    this.readStats = new ReadStatistics();
    this.bufferPool = new ElasticByteBufferPool();
    boolean skipChecksums = job.getBoolean("bytecount.skipChecksums", false);
    this.readOption = skipChecksums ? EnumSet.of(ReadOption.SKIP_CHECKSUMS) : EnumSet.noneOf(ReadOption.class);

    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
        isCompressedInput = true;
        decompressor = CodecPool.getDecompressor(codec);
        CompressionInputStream cIn = codec.createInputStream(fileIn, decompressor);
        filePosition = cIn;
        inputStream = cIn;
        LOG.info("Compressed input; cannot compute number of records in the split");
    } else {
        fileIn.seek(start);
        filePosition = fileIn;
        inputStream = fileIn;
        LOG.info("Split pos = " + start + " length " + splitLength);
    }
}

Example usage for org.apache.hadoop.fs ReadOption SKIP_CHECKSUMS

Introduction

Prototype

Document

Usage