List of usage examples for org.apache.hadoop.hdfs MiniDFSCluster getFileSystem
public DistributedFileSystem getFileSystem() throws IOException
From source file:com.linkedin.haivvreo.TestHaivvreoUtils.java
License:Apache License
@Test public void determineSchemaCanReadSchemaFromHDFS() throws IOException, HaivvreoException { // TODO: Make this an integration test, mock out hdfs for the actual unit test. String schemaString = TestAvroObjectInspectorGenerator.RECORD_SCHEMA; MiniDFSCluster miniDfs = null; try {//from w ww .jav a 2s. c o m // MiniDFSCluster litters files and folders all over the place. System.setProperty("test.build.data", "target/test-intermediate-stuff-data/"); miniDfs = new MiniDFSCluster(new Configuration(), 1, true, null); miniDfs.getFileSystem().mkdirs(new Path("/path/to/schema")); FSDataOutputStream out = miniDfs.getFileSystem().create(new Path("/path/to/schema/schema.avsc")); out.writeBytes(schemaString); out.close(); String onHDFS = miniDfs.getFileSystem().getUri() + "/path/to/schema/schema.avsc"; Schema schemaFromHDFS = HaivvreoUtils.getSchemaFromHDFS(onHDFS, miniDfs.getFileSystem().getConf()); Schema expectedSchema = Schema.parse(schemaString); assertEquals(expectedSchema, schemaFromHDFS); } finally { if (miniDfs != null) miniDfs.shutdown(); } }
From source file:com.mellanox.r4h.TestFSOutputSummer.java
License:Apache License
private void doTestFSOutputSummer(String checksumType) throws Exception { Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, BYTES_PER_CHECKSUM); conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, checksumType); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_OF_DATANODES).build(); fileSys = cluster.getFileSystem(); try {/*w ww .j ava 2 s . c om*/ Path file = new Path("try.dat"); Random rand = new Random(seed); rand.nextBytes(expected); writeFile1(file); writeFile2(file); writeFile3(file); } finally { fileSys.close(); cluster.shutdown(); } }
From source file:com.mellanox.r4h.TestFSOutputSummer.java
License:Apache License
@Test public void TestDFSCheckSumType() throws Exception { Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, BYTES_PER_CHECKSUM); conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, "NULL"); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_OF_DATANODES).build(); fileSys = cluster.getFileSystem(); try {/*from w w w. j av a 2 s . c o m*/ Path file = new Path("try.dat"); Random rand = new Random(seed); rand.nextBytes(expected); writeFile1(file); } finally { fileSys.close(); cluster.shutdown(); } }
From source file:com.mellanox.r4h.TestHFlush.java
License:Apache License
/** * Test hsync (with updating block length in NameNode) while no data is * actually written yet//from w w w . ja v a 2s.com */ @Test public void hSyncUpdateLength_00() throws IOException { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); DistributedFileSystem fileSystem = (DistributedFileSystem) cluster.getFileSystem(); try { Path path = new Path(fName); FSDataOutputStream stm = fileSystem.create(path, true, 4096, (short) 2, MiniDFSClusterBridge.getAppendTestUtil_BLOCK_SIZE()); System.out.println("Created file " + path.toString()); ((DFSOutputStream) stm.getWrappedStream()).hsync(EnumSet.of(SyncFlag.UPDATE_LENGTH)); long currentFileLength = fileSystem.getFileStatus(path).getLen(); assertEquals(0L, currentFileLength); stm.close(); } finally { fileSystem.close(); cluster.shutdown(); } }
From source file:com.mellanox.r4h.TestHFlush.java
License:Apache License
/** * The method starts new cluster with defined Configuration; creates a file * with specified block_size and writes 10 equal sections in it; it also calls * hflush/hsync after each write and throws an IOException in case of an error. * //w w w . ja v a2s. c om * @param conf cluster configuration * @param fileName of the file to be created and processed as required * @param block_size value to be used for the file's creation * @param replicas is the number of replicas * @param isSync hsync or hflush * @param syncFlags specify the semantic of the sync/flush * @throws IOException in case of any errors */ public static void doTheJob(Configuration conf, final String fileName, long block_size, short replicas, boolean isSync, EnumSet<SyncFlag> syncFlags) throws IOException { byte[] fileContent; final int SECTIONS = 10; fileContent = AppendTestUtil.initBuffer(MiniDFSClusterBridge.getAppendTestUtils_FILE_SIZE()); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(replicas).build(); // Make sure we work with DFS in order to utilize all its functionality DistributedFileSystem fileSystem = (DistributedFileSystem) cluster.getFileSystem(); FSDataInputStream is; try { Path path = new Path(fileName); FSDataOutputStream stm = fileSystem.create(path, false, 4096, replicas, block_size); System.out.println("Created file " + fileName); int tenth = MiniDFSClusterBridge.getAppendTestUtils_FILE_SIZE() / SECTIONS; int rounding = MiniDFSClusterBridge.getAppendTestUtils_FILE_SIZE() - tenth * SECTIONS; for (int i = 0; i < SECTIONS; i++) { System.out.println( "Writing " + (tenth * i) + " to " + (tenth * (i + 1)) + " section to file " + fileName); // write to the file stm.write(fileContent, tenth * i, tenth); // Wait while hflush/hsync pushes all packets through built pipeline if (isSync) { ((DFSOutputStream) stm.getWrappedStream()).hsync(syncFlags); } else { ((DFSOutputStream) stm.getWrappedStream()).hflush(); } // Check file length if updatelength is required if (isSync && syncFlags.contains(SyncFlag.UPDATE_LENGTH)) { long currentFileLength = fileSystem.getFileStatus(path).getLen(); assertEquals("File size doesn't match for hsync/hflush with updating the length", tenth * (i + 1), currentFileLength); } byte[] toRead = new byte[tenth]; byte[] expected = new byte[tenth]; System.arraycopy(fileContent, tenth * i, expected, 0, tenth); // Open the same file for read. Need to create new reader after every write operation(!) is = fileSystem.open(path); is.seek(tenth * i); int readBytes = is.read(toRead, 0, tenth); System.out.println("Has read " + readBytes); assertTrue("Should've get more bytes", (readBytes > 0) && (readBytes <= tenth)); is.close(); checkData(toRead, 0, readBytes, expected, "Partial verification"); } System.out.println("Writing " + (tenth * SECTIONS) + " to " + (tenth * SECTIONS + rounding) + " section to file " + fileName); stm.write(fileContent, tenth * SECTIONS, rounding); stm.close(); assertEquals("File size doesn't match ", MiniDFSClusterBridge.getAppendTestUtils_FILE_SIZE(), fileSystem.getFileStatus(path).getLen()); AppendTestUtil.checkFullFile(fileSystem, path, fileContent.length, fileContent, "hflush()"); } finally { fileSystem.close(); cluster.shutdown(); } }
From source file:com.mellanox.r4h.TestHFlush.java
License:Apache License
/** This creates a slow writer and check to see * if pipeline heartbeats work fine/*from w w w .j av a 2 s .c om*/ */ @Test public void testPipelineHeartbeat() throws Exception { final int DATANODE_NUM = 2; final int fileLen = 6; Configuration conf = new HdfsConfiguration(); final int timeout = 2000; conf.setInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, timeout); final Path p = new Path("/pipelineHeartbeat/foo"); System.out.println("p=" + p); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(DATANODE_NUM).build(); try { DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem(); byte[] fileContents = AppendTestUtil.initBuffer(fileLen); // create a new file. FSDataOutputStream stm = AppendTestUtil.createFile(fs, p, DATANODE_NUM); stm.write(fileContents, 0, 1); Thread.sleep(timeout); stm.hflush(); System.out.println("Wrote 1 byte and hflush " + p); // write another byte Thread.sleep(timeout); stm.write(fileContents, 1, 1); stm.hflush(); stm.write(fileContents, 2, 1); Thread.sleep(timeout); stm.hflush(); stm.write(fileContents, 3, 1); Thread.sleep(timeout); stm.write(fileContents, 4, 1); stm.hflush(); stm.write(fileContents, 5, 1); Thread.sleep(timeout); stm.close(); // verify that entire file is good AppendTestUtil.checkFullFile(fs, p, fileLen, fileContents, "Failed to slowly write to a file"); } finally { cluster.shutdown(); } }
From source file:com.mellanox.r4h.TestHFlush.java
License:Apache License
@Test public void testHFlushInterrupted() throws Exception { final int DATANODE_NUM = 2; final int fileLen = 6; byte[] fileContents = AppendTestUtil.initBuffer(fileLen); Configuration conf = new HdfsConfiguration(); final Path p = new Path("/hflush-interrupted"); System.out.println("p=" + p); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(DATANODE_NUM).build(); try {// w w w . ja va 2 s .c o m DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem(); // create a new file. FSDataOutputStream stm = AppendTestUtil.createFile(fs, p, DATANODE_NUM); stm.write(fileContents, 0, 2); Thread.currentThread().interrupt(); try { stm.hflush(); // If we made it past the hflush(), then that means that the ack made it back // from the pipeline before we got to the wait() call. In that case we should // still have interrupted status. assertTrue(Thread.currentThread().interrupted()); } catch (InterruptedIOException ie) { System.out.println("Got expected exception during flush"); } assertFalse(Thread.currentThread().interrupted()); // Try again to flush should succeed since we no longer have interrupt status stm.hflush(); // Write some more data and flush stm.write(fileContents, 2, 2); stm.hflush(); // Write some data and close while interrupted stm.write(fileContents, 4, 2); Thread.currentThread().interrupt(); try { stm.close(); // If we made it past the close(), then that means that the ack made it back // from the pipeline before we got to the wait() call. In that case we should // still have interrupted status. assertTrue(Thread.currentThread().interrupted()); } catch (InterruptedIOException ioe) { System.out.println("Got expected exception during close"); // If we got the exception, we shouldn't have interrupted status anymore. assertFalse(Thread.currentThread().interrupted()); // Now do a successful close. stm.close(); } // verify that entire file is good AppendTestUtil.checkFullFile(fs, p, fileLen, fileContents, "Failed to deal with thread interruptions"); } finally { cluster.shutdown(); } }
From source file:com.mellanox.r4h.TestReadWhileWriting.java
License:Apache License
/** Test reading while writing. */ @Test/* w ww . j ava 2 s . c o m*/ public void pipeline_02_03() throws Exception { final Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); // create cluster final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build(); try { //change the lease limits. cluster.setLeasePeriod(SOFT_LEASE_LIMIT, HARD_LEASE_LIMIT); //wait for the cluster cluster.waitActive(); final FileSystem fs = cluster.getFileSystem(); final Path p = new Path(DIR, "file1"); final int half = BLOCK_SIZE / 2; //a. On Machine M1, Create file. Write half block of data. // Invoke DFSOutputStream.hflush() on the dfs file handle. // Do not close file yet. { final FSDataOutputStream out = fs.create(p, true, fs.getConf().getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096), (short) 3, BLOCK_SIZE); write(out, 0, half); //hflush ((DFSOutputStream) out.getWrappedStream()).hflush(); } //b. On another machine M2, open file and verify that the half-block // of data can be read successfully. checkFile(p, half, conf); MiniDFSClusterBridge.getAppendTestUtilLOG().info("leasechecker.interruptAndJoin()"); ((DistributedFileSystem) fs).dfs.getLeaseRenewer().interruptAndJoin(); //c. On M1, append another half block of data. Close file on M1. { //sleep to let the lease is expired. Thread.sleep(2 * SOFT_LEASE_LIMIT); final UserGroupInformation current = UserGroupInformation.getCurrentUser(); final UserGroupInformation ugi = UserGroupInformation .createUserForTesting(current.getShortUserName() + "x", new String[] { "supergroup" }); final DistributedFileSystem dfs = ugi.doAs(new PrivilegedExceptionAction<DistributedFileSystem>() { @Override public DistributedFileSystem run() throws Exception { return (DistributedFileSystem) FileSystem.newInstance(conf); } }); final FSDataOutputStream out = append(dfs, p); write(out, 0, half); out.close(); } //d. On M2, open file and read 1 block of data from it. Close file. checkFile(p, 2 * half, conf); } finally { cluster.shutdown(); } }
From source file:com.mycompany.app.TestStagingDirectoryPermissions.java
License:Apache License
@Test public void perms() throws IOException, InterruptedException { MiniDFSCluster minidfs = null; FileSystem fs = null;/*from w ww . j ava 2 s . c om*/ MiniMRClientCluster minimr = null; try { Configuration conf = new Configuration(true); conf.set("fs.permission.umask-mode", "0077"); minidfs = new MiniDFSCluster.Builder(conf).build(); minidfs.waitActive(); fs = minidfs.getFileSystem(); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString()); Path p = path("/in"); fs.mkdirs(p); FSDataOutputStream os = fs.create(new Path(p, "input.txt")); os.write("hello!".getBytes("UTF-8")); os.close(); String user = UserGroupInformation.getCurrentUser().getUserName(); Path home = new Path("/User/" + user); fs.mkdirs(home); minimr = MiniMRClientClusterFactory.create(this.getClass(), 1, conf); JobConf job = new JobConf(minimr.getConfig()); job.setJobName("PermsTest"); JobClient client = new JobClient(job); FileInputFormat.addInputPath(job, p); FileOutputFormat.setOutputPath(job, path("/out")); job.setInputFormat(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MySleepMapper.class); job.setNumReduceTasks(1); RunningJob submittedJob = client.submitJob(job); // Sleep for a bit to let localization finish System.out.println("Sleeping..."); Thread.sleep(3 * 1000l); System.out.println("Done sleeping..."); assertFalse(UserGroupInformation.isSecurityEnabled()); Path stagingRoot = path("/tmp/hadoop-yarn/staging/" + user + "/.staging/"); assertTrue(fs.exists(stagingRoot)); assertEquals(1, fs.listStatus(stagingRoot).length); Path staging = fs.listStatus(stagingRoot)[0].getPath(); Path jobXml = path(staging + "/job.xml"); assertTrue(fs.exists(jobXml)); FileStatus fileStatus = fs.getFileStatus(jobXml); System.out.println("job.xml permission = " + fileStatus.getPermission()); assertTrue(fileStatus.getPermission().getOtherAction().implies(FsAction.READ)); assertTrue(fileStatus.getPermission().getGroupAction().implies(FsAction.READ)); submittedJob.waitForCompletion(); } finally { if (minimr != null) { minimr.stop(); } if (fs != null) { fs.close(); } if (minidfs != null) { minidfs.shutdown(true); } } }
From source file:edu.berkeley.chukwa_xtrace.TestXtrExtract.java
License:Apache License
public void testArchiving() throws Exception { System.out.println("starting archive test"); Configuration conf = new Configuration(); System.setProperty("hadoop.log.dir", System.getProperty("test.build.data", "/tmp")); MiniDFSCluster dfs = new MiniDFSCluster(conf, NUM_HADOOP_SLAVES, true, null); FileSystem fileSys = dfs.getFileSystem(); fileSys.delete(OUTPUT_DIR, true);//nuke output dir writeASinkFile(conf, fileSys, INPUT_DIR, 1000); FileStatus fstat = fileSys.getFileStatus(INPUT_DIR); assertTrue(fstat.getLen() > 10);//w w w. j a va2s. com System.out.println("filesystem is " + fileSys.getUri()); conf.set("fs.default.name", fileSys.getUri().toString()); conf.setInt("io.sort.mb", 1); conf.setInt("io.sort.factor", 5); conf.setInt("mapred.tasktracker.map.tasks.maximum", 2); conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 2); MiniMRCluster mr = new MiniMRCluster(NUM_HADOOP_SLAVES, fileSys.getUri().toString(), 1); String[] archiveArgs = { INPUT_DIR.toString(), fileSys.getUri().toString() + OUTPUT_DIR.toString() }; JobConf jc = mr.createJobConf(new JobConf(conf)); assertEquals("true", jc.get("archive.groupByClusterName")); assertEquals(1, jc.getInt("io.sort.mb", 5)); int returnVal = ToolRunner.run(jc, new XtrExtract(), archiveArgs); assertEquals(0, returnVal); fstat = fileSys.getFileStatus(new Path("/chukwa/archives/foocluster/HadoopLogProcessor_2008_05_29.arc")); assertTrue(fstat.getLen() > 10); Thread.sleep(1000); System.out.println("done!"); }