List of usage examples for org.apache.hadoop.fs ReadOption SKIP_CHECKSUMS
ReadOption SKIP_CHECKSUMS
To view the source code for org.apache.hadoop.fs ReadOption SKIP_CHECKSUMS.
Click Source Link
From source file:HdfsCacheReader.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("HdfsReader [FileSize i.e. 1g/10g/100g/200g]"); return 1; }/* w w w .j av a 2 s . c om*/ double fileSize; double fileSizeInMB; if (args[0].equals("1g")) { fileSize = 1073741824.0; fileSizeInMB = 1024.0; } else if (args[0].equals("10g")) { fileSize = 10737418240.0; fileSizeInMB = 10240.0; } else if (args[0].equals("100g")) { fileSize = 107374182400.0; fileSizeInMB = 102400.0; } else if (args[0].equals("200g")) { fileSize = 214748364800.0; fileSizeInMB = 204800.0; } else { throw new IllegalArgumentException("Invalid arg: " + args[0]); } String fileName = "cacheRead-" + args[0] + "-avg.txt"; File avgFile = new File(fileName); PrintWriter avgPW = new PrintWriter(avgFile); fileName = "cacheRead-" + args[0] + "-min.txt"; File minFile = new File(fileName); PrintWriter minPW = new PrintWriter(minFile); fileName = "cacheRead-" + args[0] + "-max.txt"; File maxFile = new File(fileName); PrintWriter maxPW = new PrintWriter(maxFile); int numIters = 10; int bufferSize = 65536; long blockSize[] = new long[] { 67108864, 134217728, 268435456, 536870912, 1073741824 }; short replication[] = new short[] { 1, 4 }; String hdfsFile = "/hdfs_test/" + args[0] + "/1.in"; Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path hdfsFilePath = new Path(hdfsFile); for (int i = 0; i < 5; i++) { // blockSize for (int j = 0; j < 2; j++) { // replication OutputStream os = fs.create(hdfsFilePath, true, bufferSize, replication[j], blockSize[i]); byte[] buf = new byte[bufferSize]; for (int m = 0; m < bufferSize; m += 4) { buf[m] = (byte) m; } double numBufPerFile = fileSize / (double) bufferSize; for (double m = 0.0; m < numBufPerFile; m++) { os.write(buf); } os.close(); String cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -addDirective -path " + hdfsFile + " -pool hdfs_test"; Process p = Runtime.getRuntime().exec(cmdStr); p.waitFor(); String cmdOutLine = ""; StringBuffer cmdOut = new StringBuffer(); BufferedReader cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream())); while ((cmdOutLine = cmdOutReader.readLine()) != null) { cmdOut.append(cmdOutLine + "\n"); } // System.out.println (cmdOut.toString()); long avg = 0, min = Long.MAX_VALUE, max = Long.MIN_VALUE; for (int k = 0; k < numIters; k++) { FSDataInputStream in = fs.open(hdfsFilePath); ByteBuffer bbuf = null; ElasticByteBufferPool ebbp = new ElasticByteBufferPool(); long startTime = System.currentTimeMillis(); while ((bbuf = in.read(ebbp, bufferSize, EnumSet.of(ReadOption.SKIP_CHECKSUMS))) != null) { in.releaseBuffer(bbuf); } long endTime = System.currentTimeMillis(); in.close(); long duration = (endTime - startTime); avg += duration; if (duration < min) { min = duration; } if (duration > max) { max = duration; } } // write result to output double avgBW = fileSizeInMB * 1000.0 * (double) numIters / (double) avg; avgPW.print(avgBW); avgPW.print("\t"); double minBW = fileSizeInMB * 1000.0 / (double) max; minPW.print(minBW); minPW.print("\t"); double maxBW = fileSizeInMB * 1000.0 / (double) min; maxPW.print(maxBW); maxPW.print("\t"); cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -removeDirectives -path " + hdfsFile; p = Runtime.getRuntime().exec(cmdStr); p.waitFor(); cmdOutLine = ""; cmdOut.setLength(0); cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream())); while ((cmdOutLine = cmdOutReader.readLine()) != null) { cmdOut.append(cmdOutLine + "\n"); } // System.out.println (cmdOut.toString()); fs.delete(hdfsFilePath, true); } avgPW.println(); minPW.println(); maxPW.println(); } avgPW.close(); minPW.close(); maxPW.close(); return 0; }
From source file:com.cloudera.ByteBufferRecordReader.java
License:Apache License
private void initialize(Configuration job, long splitStart, long splitLength, Path file) throws IOException { start = splitStart;//from ww w .j ava 2 s .c o m end = start + splitLength; pos = start; // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); this.readStats = new ReadStatistics(); this.bufferPool = new ElasticByteBufferPool(); boolean skipChecksums = job.getBoolean("bytecount.skipChecksums", false); this.readOption = skipChecksums ? EnumSet.of(ReadOption.SKIP_CHECKSUMS) : EnumSet.noneOf(ReadOption.class); CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file); if (null != codec) { isCompressedInput = true; decompressor = CodecPool.getDecompressor(codec); CompressionInputStream cIn = codec.createInputStream(fileIn, decompressor); filePosition = cIn; inputStream = cIn; LOG.info("Compressed input; cannot compute number of records in the split"); } else { fileIn.seek(start); filePosition = fileIn; inputStream = fileIn; LOG.info("Split pos = " + start + " length " + splitLength); } }