List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication, long blockSize) throws IOException
From source file:HdfsReader.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("HdfsReader [FileSize i.e. 1g/10g/100g/200g]"); return 1; }/*from ww w . ja va 2s .co m*/ double fileSize; double fileSizeInMB; if (args[0].equals("1g")) { fileSize = 1073741824.0; fileSizeInMB = 1024.0; } else if (args[0].equals("10g")) { fileSize = 10737418240.0; fileSizeInMB = 10240.0; } else if (args[0].equals("100g")) { fileSize = 107374182400.0; fileSizeInMB = 102400.0; } else if (args[0].equals("200g")) { fileSize = 214748364800.0; fileSizeInMB = 204800.0; } else { throw new IllegalArgumentException("Invalid arg: " + args[0]); } String fileName = "read-" + args[0] + "-avg.txt"; File avgFile = new File(fileName); PrintWriter avgPW = new PrintWriter(avgFile); fileName = "read-" + args[0] + "-min.txt"; File minFile = new File(fileName); PrintWriter minPW = new PrintWriter(minFile); fileName = "read-" + args[0] + "-max.txt"; File maxFile = new File(fileName); PrintWriter maxPW = new PrintWriter(maxFile); int numIters = 10; int bufferSize = 4096; long blockSize[] = new long[] { 67108864, 134217728, 268435456, 536870912, 1073741824 }; short replication[] = new short[] { 1, 4 }; String hdfsFile = "/hdfs_test/" + args[0] + "/1.in"; Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path hdfsFilePath = new Path(hdfsFile); for (int i = 0; i < 5; i++) { // blockSize for (int j = 0; j < 2; j++) { // replication OutputStream os = fs.create(hdfsFilePath, true, bufferSize, replication[j], blockSize[i]); byte[] buf = new byte[bufferSize]; for (int m = 0; m < bufferSize; m += 4) { buf[m] = (byte) m; } double numBufPerFile = fileSize / (double) bufferSize; for (double m = 0.0; m < numBufPerFile; m++) { os.write(buf); } os.close(); long avg = 0, min = Long.MAX_VALUE, max = Long.MIN_VALUE; for (int k = 0; k < numIters; k++) { InputStream is = fs.open(hdfsFilePath); long startTime = System.currentTimeMillis(); int bytesRead = is.read(buf); while (bytesRead != -1) { bytesRead = is.read(buf); } is.close(); long endTime = System.currentTimeMillis(); long duration = (endTime - startTime); avg += duration; if (duration < min) { min = duration; } if (duration > max) { max = duration; } } // write result to output double avgBW = fileSizeInMB * 1000.0 * (double) numIters / (double) avg; avgPW.print(avgBW); avgPW.print("\t"); double minBW = fileSizeInMB * 1000.0 / (double) max; minPW.print(minBW); minPW.print("\t"); double maxBW = fileSizeInMB * 1000.0 / (double) min; maxPW.print(maxBW); maxPW.print("\t"); } avgPW.println(); minPW.println(); maxPW.println(); } avgPW.close(); minPW.close(); maxPW.close(); return 0; }
From source file:HdfsCacheReader.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("HdfsReader [FileSize i.e. 1g/10g/100g/200g]"); return 1; }/*from w w w . java2 s . co m*/ double fileSize; double fileSizeInMB; if (args[0].equals("1g")) { fileSize = 1073741824.0; fileSizeInMB = 1024.0; } else if (args[0].equals("10g")) { fileSize = 10737418240.0; fileSizeInMB = 10240.0; } else if (args[0].equals("100g")) { fileSize = 107374182400.0; fileSizeInMB = 102400.0; } else if (args[0].equals("200g")) { fileSize = 214748364800.0; fileSizeInMB = 204800.0; } else { throw new IllegalArgumentException("Invalid arg: " + args[0]); } String fileName = "cacheRead-" + args[0] + "-avg.txt"; File avgFile = new File(fileName); PrintWriter avgPW = new PrintWriter(avgFile); fileName = "cacheRead-" + args[0] + "-min.txt"; File minFile = new File(fileName); PrintWriter minPW = new PrintWriter(minFile); fileName = "cacheRead-" + args[0] + "-max.txt"; File maxFile = new File(fileName); PrintWriter maxPW = new PrintWriter(maxFile); int numIters = 10; int bufferSize = 65536; long blockSize[] = new long[] { 67108864, 134217728, 268435456, 536870912, 1073741824 }; short replication[] = new short[] { 1, 4 }; String hdfsFile = "/hdfs_test/" + args[0] + "/1.in"; Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path hdfsFilePath = new Path(hdfsFile); for (int i = 0; i < 5; i++) { // blockSize for (int j = 0; j < 2; j++) { // replication OutputStream os = fs.create(hdfsFilePath, true, bufferSize, replication[j], blockSize[i]); byte[] buf = new byte[bufferSize]; for (int m = 0; m < bufferSize; m += 4) { buf[m] = (byte) m; } double numBufPerFile = fileSize / (double) bufferSize; for (double m = 0.0; m < numBufPerFile; m++) { os.write(buf); } os.close(); String cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -addDirective -path " + hdfsFile + " -pool hdfs_test"; Process p = Runtime.getRuntime().exec(cmdStr); p.waitFor(); String cmdOutLine = ""; StringBuffer cmdOut = new StringBuffer(); BufferedReader cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream())); while ((cmdOutLine = cmdOutReader.readLine()) != null) { cmdOut.append(cmdOutLine + "\n"); } // System.out.println (cmdOut.toString()); long avg = 0, min = Long.MAX_VALUE, max = Long.MIN_VALUE; for (int k = 0; k < numIters; k++) { FSDataInputStream in = fs.open(hdfsFilePath); ByteBuffer bbuf = null; ElasticByteBufferPool ebbp = new ElasticByteBufferPool(); long startTime = System.currentTimeMillis(); while ((bbuf = in.read(ebbp, bufferSize, EnumSet.of(ReadOption.SKIP_CHECKSUMS))) != null) { in.releaseBuffer(bbuf); } long endTime = System.currentTimeMillis(); in.close(); long duration = (endTime - startTime); avg += duration; if (duration < min) { min = duration; } if (duration > max) { max = duration; } } // write result to output double avgBW = fileSizeInMB * 1000.0 * (double) numIters / (double) avg; avgPW.print(avgBW); avgPW.print("\t"); double minBW = fileSizeInMB * 1000.0 / (double) max; minPW.print(minBW); minPW.print("\t"); double maxBW = fileSizeInMB * 1000.0 / (double) min; maxPW.print(maxBW); maxPW.print("\t"); cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -removeDirectives -path " + hdfsFile; p = Runtime.getRuntime().exec(cmdStr); p.waitFor(); cmdOutLine = ""; cmdOut.setLength(0); cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream())); while ((cmdOutLine = cmdOutReader.readLine()) != null) { cmdOut.append(cmdOutLine + "\n"); } // System.out.println (cmdOut.toString()); fs.delete(hdfsFilePath, true); } avgPW.println(); minPW.println(); maxPW.println(); } avgPW.close(); minPW.close(); maxPW.close(); return 0; }
From source file:SingleFileWriter.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("SingleFileWriter [fileSize ie. 1g/10g/100g]"); return 1; }/*from w w w .j av a2 s . c o m*/ double fileSize = Double.parseDouble((args[0].split("g|G"))[0]) * 1024 * 1024 * 1024; String hdfsFolder = "/hdfs_test/"; String hdfsFile = hdfsFolder + args[0]; short replication = 1; boolean overWrite = true; int bufferSize = 65536; int blockSize = 536870912; double numIters = fileSize / (double) bufferSize; /* Initialize byte buffer */ ByteBuffer buf = ByteBuffer.allocate(bufferSize); buf.order(ByteOrder.nativeOrder()); for (int k = 0; k < bufferSize / Integer.SIZE; k++) { buf.putInt(k); } buf.flip(); /* Create file on HDFS */ Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path hdfsFilePath = new Path(hdfsFile); OutputStream os = fs.create(hdfsFilePath, overWrite, bufferSize, replication, blockSize); /* Write the content of the byte buffer to the HDFS file*/ Timer t = new Timer(); t.start(0); for (long i = 0; i < numIters; i++) { os.write(buf.array()); buf.flip(); } t.end(0); os.close(); fs.delete(hdfsFilePath, true); t.dump(); return 0; }
From source file:a.TestConcatExample.java
License:Apache License
private long writeFile(FileSystem fs, Path p, int blockSize, short replication, int numBlocks) throws IOException { int bufferSize = 4096; FSDataOutputStream os = fs.create(p, true, bufferSize, replication, blockSize); int i = 0;/*from w ww . j a v a 2s. com*/ byte[] data = new byte[bufferSize]; r.nextBytes(data); while (i < blockSize * numBlocks) { os.write(data); i += data.length; } os.close(); FileStatus fileStatus = fs.getFileStatus(p); long f1Len = fileStatus.getLen(); assertEquals(i, f1Len); return f1Len; }
From source file:cn.uway.util.apache.parquet.hadoop.ParquetFileWriter.java
License:Apache License
/** * @param configuration Hadoop configuration * @param schema the schema of the data/* w w w . j av a 2 s. c om*/ * @param file the file to write to * @param mode file creation mode * @param rowGroupSize the row group size * @throws IOException if the file can not be created */ public ParquetFileWriter(Configuration configuration, MessageType schema, Path file, Mode mode, long rowGroupSize, int maxPaddingSize) throws IOException { TypeUtil.checkValidWriteSchema(schema); this.schema = schema; FileSystem fs = file.getFileSystem(configuration); boolean overwriteFlag = (mode == Mode.OVERWRITE); if (supportsBlockSize(fs)) { // use the default block size, unless row group size is larger long dfsBlockSize = Math.max(fs.getDefaultBlockSize(file), rowGroupSize); this.alignment = PaddingAlignment.get(dfsBlockSize, rowGroupSize, maxPaddingSize); this.out = fs.create(file, overwriteFlag, DFS_BUFFER_SIZE_DEFAULT, fs.getDefaultReplication(file), dfsBlockSize); } else { this.alignment = NoAlignment.get(rowGroupSize); this.out = fs.create(file, overwriteFlag); } }
From source file:cn.uway.util.apache.parquet.hadoop.ParquetFileWriter.java
License:Apache License
/** * FOR TESTING ONLY.// w w w .ja v a 2 s . com * * @param configuration Hadoop configuration * @param schema the schema of the data * @param file the file to write to * @param rowAndBlockSize the row group size * @throws IOException if the file can not be created */ ParquetFileWriter(Configuration configuration, MessageType schema, Path file, long rowAndBlockSize, int maxPaddingSize) throws IOException { FileSystem fs = file.getFileSystem(configuration); this.schema = schema; this.alignment = PaddingAlignment.get(rowAndBlockSize, rowAndBlockSize, maxPaddingSize); this.out = fs.create(file, true, DFS_BUFFER_SIZE_DEFAULT, fs.getDefaultReplication(file), rowAndBlockSize); }
From source file:co.cask.cdap.common.logging.SyncTest.java
License:Apache License
@Test @Ignore// www . ja v a 2s . c om public void testSync() throws IOException { FileSystem fs = FileSystem.get(config); // create a file and write n bytes, then sync Path path = new Path("/myfile"); FSDataOutputStream out = fs.create(path, false, 4096, (short) 2, 4096L); int numBytes = 5000; for (int i = 0; i < numBytes; i++) { out.write((byte) i); } out.hflush(); // verify the file is there Assert.assertTrue(fs.exists(path)); // do not verify the length of the file, hflush() does not update that //Assert.assertEquals(numBytes, fs.getFileStatus(path).getLen()); // read back and verify all bytes FSDataInputStream in = fs.open(path); byte[] buffer = new byte[numBytes]; in.readFully(buffer); for (int i = 0; i < numBytes; i++) { Assert.assertEquals((byte) i, buffer[i]); } in.close(); // now close the writer out.close(); }
From source file:com.cloudera.CacheTool.java
License:Apache License
public static void createFile(FileSystem fs, Path fileName, long fileLen) throws IOException { int bufferLen = 1024; assert bufferLen > 0; if (!fs.mkdirs(fileName.getParent())) { throw new IOException("Mkdirs failed to create " + fileName.getParent().toString()); }//from w w w . jav a 2s .c o m FSDataOutputStream out = null; try { out = fs.create(fileName, true, fs.getConf().getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096), (short) 1, fs.getDefaultBlockSize(fileName)); if (fileLen > 0) { byte[] toWrite = new byte[bufferLen]; Random rb = new Random(0); long bytesToWrite = fileLen; while (bytesToWrite > 0) { rb.nextBytes(toWrite); int bytesToWriteNext = (bufferLen < bytesToWrite) ? bufferLen : (int) bytesToWrite; out.write(toWrite, 0, bytesToWriteNext); bytesToWrite -= bytesToWriteNext; } } } finally { if (out != null) { out.close(); } } }
From source file:com.cloudera.recordservice.examples.terasort.TeraInputFormat.java
License:Apache License
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * @param job the job to sample//from ww w .ja v a2 s .co m * @param partFile where to write the output file to * @throws Throwable if something goes wrong */ public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable { long t1 = System.currentTimeMillis(); Configuration conf = job.getConfiguration(); final TeraInputFormat inFormat = new TeraInputFormat(); final TextSampler sampler = new TextSampler(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 100000); final List<InputSplit> splits = inFormat.getSplits(job); long t2 = System.currentTimeMillis(); System.out.println("Computing input splits took " + (t2 - t1) + "ms"); int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size()); System.out.println("Sampling " + samples + " splits of " + splits.size()); final long recordsPerSample = sampleSize / samples; final int sampleStep = splits.size() / samples; Thread[] samplerReader = new Thread[samples]; SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group"); // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { final int idx = i; samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) { { setDaemon(true); } @Override public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentKey())); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie) { System.err.println( "Got an exception while reading splits " + StringUtils.stringifyException(ie)); throw new RuntimeException(ie); } catch (InterruptedException e) { } } }; samplerReader[i].start(); } FileSystem outFs = partFile.getFileSystem(conf); DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10, outFs.getDefaultBlockSize(partFile)); for (int i = 0; i < samples; i++) { try { samplerReader[i].join(); if (threadGroup.getThrowable() != null) { throw threadGroup.getThrowable(); } } catch (InterruptedException e) { } } for (Text split : sampler.createPartitions(partitions)) { split.write(writer); } writer.close(); long t3 = System.currentTimeMillis(); System.out.println("Computing parititions took " + (t3 - t2) + "ms"); }
From source file:com.hadoop.hdfs_test.HdfsWriter.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("HdfsWriter [fileSize ie. 1g/10g/40g]"); return 1; }//from w ww . j av a2 s .c om double fileSize; double fileSizeInMB; if (args[0].equals("1g")) { fileSize = 1073741824.0; fileSizeInMB = 1024.0; } else if (args[0].equals("10g")) { fileSize = 10737418240.0; fileSizeInMB = 10240.0; } else if (args[0].equals("100g")) { fileSize = 107374182400.0; fileSizeInMB = 102400.0; } else if (args[0].equals("200g")) { fileSize = 214748364800.0; fileSizeInMB = 204800.0; } else { throw new IllegalArgumentException("Invalid arg: " + args[0]); } String localFolder = "/home/hduser/projects/hdfs_test/input/"; String hdfsFolder = "/hdfs_test/"; int numFiles = 5; short replication = 1; String hdfsFile; long startTime, endTime, duration = 0; long avg = 0, min = Long.MAX_VALUE, max = Long.MIN_VALUE; String fileName = args[0] + "-avg.txt"; File avgFile = new File(fileName); PrintWriter avgPW = new PrintWriter(avgFile); fileName = args[0] + "-min.txt"; File minFile = new File(fileName); PrintWriter minPW = new PrintWriter(minFile); fileName = args[0] + "-max.txt"; File maxFile = new File(fileName); PrintWriter maxPW = new PrintWriter(maxFile); boolean overWrite = true; int bufferSize[] = new int[] { 4096, 16384, 65536, 262144 }; long blockSize[] = new long[] { 67108864, 134217728, 268435456 }; Configuration conf = getConf(); System.out.println("configured filesystem = " + conf.get(FS_PARAM_NAME)); FileSystem fs = FileSystem.get(conf); for (int i = 0; i < 4; i++) { // different buffer size for (int j = 0; j < 3; j++) { // different block size double numIters = fileSize / (double) bufferSize[i]; byte[] buf = new byte[bufferSize[i]]; for (int m = 0; m < bufferSize[i]; m += 4) { buf[m] = (byte) m; } for (int k = 1; k <= numFiles; k++) { hdfsFile = hdfsFolder + args[0] + "/" + i + ".in"; Path outputPath = new Path(hdfsFile); OutputStream os = fs.create(outputPath, overWrite, bufferSize[i], replication, blockSize[j]); startTime = System.currentTimeMillis(); for (long m = 0; m < numIters; m++) { os.write(buf); } endTime = System.currentTimeMillis(); os.close(); fs.delete(outputPath, true); duration = endTime - startTime; avg += duration; if (duration < min) { min = duration; } if (duration > max) { max = duration; } } // write result to output Double avgBW = fileSizeInMB * 1000.0 * (double) numFiles / (double) avg; avgPW.print(avgBW); avgPW.print("\t"); double minBW = fileSizeInMB * 1000.0 / (double) max; minPW.print(minBW); minPW.print("\t"); double maxBW = fileSizeInMB * 1000.0 / (double) min; maxPW.print(maxBW); maxPW.print("\t"); duration = 0; avg = 0; min = Long.MAX_VALUE; max = Long.MIN_VALUE; } avgPW.println(); minPW.println(); maxPW.println(); } //avgFile.close(); avgPW.close(); //minFile.close(); minPW.close(); //maxFile.close(); maxPW.close(); /* System.out.println ("avg: " + (fileSizeInMB*1000.0*(double)numFiles/(double)avg) + " max: " + (fileSizeInMB*1000.0)/(double)min + " min: " + (fileSizeInMB*1000.0)/(double)max); */ return 0; }