Example usage for org.apache.hadoop.hdfs MiniDFSCluster getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.hdfs MiniDFSCluster getFileSystem.

Prototype

public DistributedFileSystem getFileSystem() throws IOException

Source Link

Document

Get a client handle to the DFS cluster with a single namenode.

Usage

From source file:com.linkedin.haivvreo.TestHaivvreoUtils.java

License:Apache License

@Test
public void determineSchemaCanReadSchemaFromHDFS() throws IOException, HaivvreoException {
    // TODO: Make this an integration test, mock out hdfs for the actual unit test.
    String schemaString = TestAvroObjectInspectorGenerator.RECORD_SCHEMA;
    MiniDFSCluster miniDfs = null;
    try {//from  w  ww .jav  a  2s. c  o m
        // MiniDFSCluster litters files and folders all over the place.
        System.setProperty("test.build.data", "target/test-intermediate-stuff-data/");
        miniDfs = new MiniDFSCluster(new Configuration(), 1, true, null);

        miniDfs.getFileSystem().mkdirs(new Path("/path/to/schema"));
        FSDataOutputStream out = miniDfs.getFileSystem().create(new Path("/path/to/schema/schema.avsc"));
        out.writeBytes(schemaString);
        out.close();
        String onHDFS = miniDfs.getFileSystem().getUri() + "/path/to/schema/schema.avsc";

        Schema schemaFromHDFS = HaivvreoUtils.getSchemaFromHDFS(onHDFS, miniDfs.getFileSystem().getConf());
        Schema expectedSchema = Schema.parse(schemaString);
        assertEquals(expectedSchema, schemaFromHDFS);
    } finally {
        if (miniDfs != null)
            miniDfs.shutdown();
    }
}

From source file:com.mellanox.r4h.TestFSOutputSummer.java

License:Apache License

private void doTestFSOutputSummer(String checksumType) throws Exception {
    Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
    conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, BYTES_PER_CHECKSUM);
    conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, checksumType);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_OF_DATANODES).build();
    fileSys = cluster.getFileSystem();
    try {/*w ww  .j ava 2  s . c  om*/
        Path file = new Path("try.dat");
        Random rand = new Random(seed);
        rand.nextBytes(expected);
        writeFile1(file);
        writeFile2(file);
        writeFile3(file);
    } finally {
        fileSys.close();
        cluster.shutdown();
    }
}

From source file:com.mellanox.r4h.TestFSOutputSummer.java

License:Apache License

@Test
public void TestDFSCheckSumType() throws Exception {
    Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
    conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, BYTES_PER_CHECKSUM);
    conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, "NULL");
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_OF_DATANODES).build();
    fileSys = cluster.getFileSystem();
    try {/*from  w  w  w.  j av  a 2 s  .  c  o  m*/
        Path file = new Path("try.dat");
        Random rand = new Random(seed);
        rand.nextBytes(expected);
        writeFile1(file);
    } finally {
        fileSys.close();
        cluster.shutdown();
    }
}

From source file:com.mellanox.r4h.TestHFlush.java

License:Apache License

/**
 * Test hsync (with updating block length in NameNode) while no data is
 * actually written yet//from  w  w w  . ja  v  a 2s.com
 */
@Test
public void hSyncUpdateLength_00() throws IOException {
    Configuration conf = new HdfsConfiguration();
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
    DistributedFileSystem fileSystem = (DistributedFileSystem) cluster.getFileSystem();

    try {
        Path path = new Path(fName);
        FSDataOutputStream stm = fileSystem.create(path, true, 4096, (short) 2,
                MiniDFSClusterBridge.getAppendTestUtil_BLOCK_SIZE());
        System.out.println("Created file " + path.toString());
        ((DFSOutputStream) stm.getWrappedStream()).hsync(EnumSet.of(SyncFlag.UPDATE_LENGTH));
        long currentFileLength = fileSystem.getFileStatus(path).getLen();
        assertEquals(0L, currentFileLength);
        stm.close();
    } finally {
        fileSystem.close();
        cluster.shutdown();
    }
}

From source file:com.mellanox.r4h.TestHFlush.java

License:Apache License

/**
 * The method starts new cluster with defined Configuration; creates a file
 * with specified block_size and writes 10 equal sections in it; it also calls
 * hflush/hsync after each write and throws an IOException in case of an error.
 * //w w w  . ja  v a2s.  c om
 * @param conf cluster configuration
 * @param fileName of the file to be created and processed as required
 * @param block_size value to be used for the file's creation
 * @param replicas is the number of replicas
 * @param isSync hsync or hflush         
 * @param syncFlags specify the semantic of the sync/flush
 * @throws IOException in case of any errors
 */
public static void doTheJob(Configuration conf, final String fileName, long block_size, short replicas,
        boolean isSync, EnumSet<SyncFlag> syncFlags) throws IOException {
    byte[] fileContent;
    final int SECTIONS = 10;

    fileContent = AppendTestUtil.initBuffer(MiniDFSClusterBridge.getAppendTestUtils_FILE_SIZE());
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(replicas).build();
    // Make sure we work with DFS in order to utilize all its functionality
    DistributedFileSystem fileSystem = (DistributedFileSystem) cluster.getFileSystem();

    FSDataInputStream is;
    try {
        Path path = new Path(fileName);
        FSDataOutputStream stm = fileSystem.create(path, false, 4096, replicas, block_size);
        System.out.println("Created file " + fileName);

        int tenth = MiniDFSClusterBridge.getAppendTestUtils_FILE_SIZE() / SECTIONS;
        int rounding = MiniDFSClusterBridge.getAppendTestUtils_FILE_SIZE() - tenth * SECTIONS;
        for (int i = 0; i < SECTIONS; i++) {
            System.out.println(
                    "Writing " + (tenth * i) + " to " + (tenth * (i + 1)) + " section to file " + fileName);
            // write to the file
            stm.write(fileContent, tenth * i, tenth);

            // Wait while hflush/hsync pushes all packets through built pipeline
            if (isSync) {
                ((DFSOutputStream) stm.getWrappedStream()).hsync(syncFlags);
            } else {
                ((DFSOutputStream) stm.getWrappedStream()).hflush();
            }

            // Check file length if updatelength is required
            if (isSync && syncFlags.contains(SyncFlag.UPDATE_LENGTH)) {
                long currentFileLength = fileSystem.getFileStatus(path).getLen();
                assertEquals("File size doesn't match for hsync/hflush with updating the length",
                        tenth * (i + 1), currentFileLength);
            }
            byte[] toRead = new byte[tenth];
            byte[] expected = new byte[tenth];
            System.arraycopy(fileContent, tenth * i, expected, 0, tenth);
            // Open the same file for read. Need to create new reader after every write operation(!)
            is = fileSystem.open(path);
            is.seek(tenth * i);
            int readBytes = is.read(toRead, 0, tenth);
            System.out.println("Has read " + readBytes);
            assertTrue("Should've get more bytes", (readBytes > 0) && (readBytes <= tenth));
            is.close();
            checkData(toRead, 0, readBytes, expected, "Partial verification");
        }
        System.out.println("Writing " + (tenth * SECTIONS) + " to " + (tenth * SECTIONS + rounding)
                + " section to file " + fileName);
        stm.write(fileContent, tenth * SECTIONS, rounding);
        stm.close();

        assertEquals("File size doesn't match ", MiniDFSClusterBridge.getAppendTestUtils_FILE_SIZE(),
                fileSystem.getFileStatus(path).getLen());
        AppendTestUtil.checkFullFile(fileSystem, path, fileContent.length, fileContent, "hflush()");
    } finally {
        fileSystem.close();
        cluster.shutdown();
    }
}

From source file:com.mellanox.r4h.TestHFlush.java

License:Apache License

/** This creates a slow writer and check to see 
 * if pipeline heartbeats work fine/*from   w w w .j  av a 2 s .c  om*/
 */
@Test
public void testPipelineHeartbeat() throws Exception {
    final int DATANODE_NUM = 2;
    final int fileLen = 6;
    Configuration conf = new HdfsConfiguration();
    final int timeout = 2000;
    conf.setInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, timeout);

    final Path p = new Path("/pipelineHeartbeat/foo");
    System.out.println("p=" + p);

    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(DATANODE_NUM).build();
    try {
        DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem();

        byte[] fileContents = AppendTestUtil.initBuffer(fileLen);

        // create a new file.
        FSDataOutputStream stm = AppendTestUtil.createFile(fs, p, DATANODE_NUM);

        stm.write(fileContents, 0, 1);
        Thread.sleep(timeout);
        stm.hflush();
        System.out.println("Wrote 1 byte and hflush " + p);

        // write another byte
        Thread.sleep(timeout);
        stm.write(fileContents, 1, 1);
        stm.hflush();

        stm.write(fileContents, 2, 1);
        Thread.sleep(timeout);
        stm.hflush();

        stm.write(fileContents, 3, 1);
        Thread.sleep(timeout);
        stm.write(fileContents, 4, 1);
        stm.hflush();

        stm.write(fileContents, 5, 1);
        Thread.sleep(timeout);
        stm.close();

        // verify that entire file is good
        AppendTestUtil.checkFullFile(fs, p, fileLen, fileContents, "Failed to slowly write to a file");
    } finally {
        cluster.shutdown();
    }
}

From source file:com.mellanox.r4h.TestHFlush.java

License:Apache License

@Test
public void testHFlushInterrupted() throws Exception {
    final int DATANODE_NUM = 2;
    final int fileLen = 6;
    byte[] fileContents = AppendTestUtil.initBuffer(fileLen);
    Configuration conf = new HdfsConfiguration();
    final Path p = new Path("/hflush-interrupted");

    System.out.println("p=" + p);

    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(DATANODE_NUM).build();
    try {//  w  w w  .  ja  va  2  s .c o m
        DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem();

        // create a new file.
        FSDataOutputStream stm = AppendTestUtil.createFile(fs, p, DATANODE_NUM);

        stm.write(fileContents, 0, 2);
        Thread.currentThread().interrupt();
        try {
            stm.hflush();
            // If we made it past the hflush(), then that means that the ack made it back
            // from the pipeline before we got to the wait() call. In that case we should
            // still have interrupted status.
            assertTrue(Thread.currentThread().interrupted());
        } catch (InterruptedIOException ie) {
            System.out.println("Got expected exception during flush");
        }
        assertFalse(Thread.currentThread().interrupted());

        // Try again to flush should succeed since we no longer have interrupt status
        stm.hflush();

        // Write some more data and flush
        stm.write(fileContents, 2, 2);
        stm.hflush();

        // Write some data and close while interrupted

        stm.write(fileContents, 4, 2);
        Thread.currentThread().interrupt();
        try {
            stm.close();
            // If we made it past the close(), then that means that the ack made it back
            // from the pipeline before we got to the wait() call. In that case we should
            // still have interrupted status.
            assertTrue(Thread.currentThread().interrupted());
        } catch (InterruptedIOException ioe) {
            System.out.println("Got expected exception during close");
            // If we got the exception, we shouldn't have interrupted status anymore.
            assertFalse(Thread.currentThread().interrupted());

            // Now do a successful close.
            stm.close();
        }

        // verify that entire file is good
        AppendTestUtil.checkFullFile(fs, p, fileLen, fileContents, "Failed to deal with thread interruptions");
    } finally {
        cluster.shutdown();
    }
}

From source file:com.mellanox.r4h.TestReadWhileWriting.java

License:Apache License

/** Test reading while writing. */
@Test/* w  ww  . j ava 2 s . c  o  m*/
public void pipeline_02_03() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);

    // create cluster
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
    try {
        //change the lease limits.
        cluster.setLeasePeriod(SOFT_LEASE_LIMIT, HARD_LEASE_LIMIT);

        //wait for the cluster
        cluster.waitActive();
        final FileSystem fs = cluster.getFileSystem();
        final Path p = new Path(DIR, "file1");
        final int half = BLOCK_SIZE / 2;

        //a. On Machine M1, Create file. Write half block of data.
        //   Invoke DFSOutputStream.hflush() on the dfs file handle.
        //   Do not close file yet.
        {
            final FSDataOutputStream out = fs.create(p, true,
                    fs.getConf().getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096), (short) 3,
                    BLOCK_SIZE);
            write(out, 0, half);

            //hflush
            ((DFSOutputStream) out.getWrappedStream()).hflush();
        }

        //b. On another machine M2, open file and verify that the half-block
        //   of data can be read successfully.
        checkFile(p, half, conf);
        MiniDFSClusterBridge.getAppendTestUtilLOG().info("leasechecker.interruptAndJoin()");
        ((DistributedFileSystem) fs).dfs.getLeaseRenewer().interruptAndJoin();

        //c. On M1, append another half block of data.  Close file on M1.
        {
            //sleep to let the lease is expired.
            Thread.sleep(2 * SOFT_LEASE_LIMIT);

            final UserGroupInformation current = UserGroupInformation.getCurrentUser();
            final UserGroupInformation ugi = UserGroupInformation
                    .createUserForTesting(current.getShortUserName() + "x", new String[] { "supergroup" });
            final DistributedFileSystem dfs = ugi.doAs(new PrivilegedExceptionAction<DistributedFileSystem>() {
                @Override
                public DistributedFileSystem run() throws Exception {
                    return (DistributedFileSystem) FileSystem.newInstance(conf);
                }
            });
            final FSDataOutputStream out = append(dfs, p);
            write(out, 0, half);
            out.close();
        }

        //d. On M2, open file and read 1 block of data from it. Close file.
        checkFile(p, 2 * half, conf);
    } finally {
        cluster.shutdown();
    }
}

From source file:com.mycompany.app.TestStagingDirectoryPermissions.java

License:Apache License

@Test
public void perms() throws IOException, InterruptedException {
    MiniDFSCluster minidfs = null;
    FileSystem fs = null;/*from   w  ww  . j ava  2  s  .  c  om*/
    MiniMRClientCluster minimr = null;
    try {
        Configuration conf = new Configuration(true);
        conf.set("fs.permission.umask-mode", "0077");
        minidfs = new MiniDFSCluster.Builder(conf).build();
        minidfs.waitActive();

        fs = minidfs.getFileSystem();
        conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString());
        Path p = path("/in");
        fs.mkdirs(p);

        FSDataOutputStream os = fs.create(new Path(p, "input.txt"));
        os.write("hello!".getBytes("UTF-8"));
        os.close();

        String user = UserGroupInformation.getCurrentUser().getUserName();
        Path home = new Path("/User/" + user);
        fs.mkdirs(home);
        minimr = MiniMRClientClusterFactory.create(this.getClass(), 1, conf);
        JobConf job = new JobConf(minimr.getConfig());

        job.setJobName("PermsTest");
        JobClient client = new JobClient(job);
        FileInputFormat.addInputPath(job, p);
        FileOutputFormat.setOutputPath(job, path("/out"));
        job.setInputFormat(TextInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(MySleepMapper.class);

        job.setNumReduceTasks(1);
        RunningJob submittedJob = client.submitJob(job);

        // Sleep for a bit to let localization finish
        System.out.println("Sleeping...");
        Thread.sleep(3 * 1000l);
        System.out.println("Done sleeping...");
        assertFalse(UserGroupInformation.isSecurityEnabled());

        Path stagingRoot = path("/tmp/hadoop-yarn/staging/" + user + "/.staging/");
        assertTrue(fs.exists(stagingRoot));
        assertEquals(1, fs.listStatus(stagingRoot).length);
        Path staging = fs.listStatus(stagingRoot)[0].getPath();
        Path jobXml = path(staging + "/job.xml");

        assertTrue(fs.exists(jobXml));

        FileStatus fileStatus = fs.getFileStatus(jobXml);
        System.out.println("job.xml permission = " + fileStatus.getPermission());
        assertTrue(fileStatus.getPermission().getOtherAction().implies(FsAction.READ));
        assertTrue(fileStatus.getPermission().getGroupAction().implies(FsAction.READ));

        submittedJob.waitForCompletion();
    } finally {
        if (minimr != null) {
            minimr.stop();
        }
        if (fs != null) {
            fs.close();
        }
        if (minidfs != null) {
            minidfs.shutdown(true);
        }
    }
}

From source file:edu.berkeley.chukwa_xtrace.TestXtrExtract.java

License:Apache License

public void testArchiving() throws Exception {

    System.out.println("starting archive test");
    Configuration conf = new Configuration();
    System.setProperty("hadoop.log.dir", System.getProperty("test.build.data", "/tmp"));
    MiniDFSCluster dfs = new MiniDFSCluster(conf, NUM_HADOOP_SLAVES, true, null);
    FileSystem fileSys = dfs.getFileSystem();
    fileSys.delete(OUTPUT_DIR, true);//nuke output dir

    writeASinkFile(conf, fileSys, INPUT_DIR, 1000);

    FileStatus fstat = fileSys.getFileStatus(INPUT_DIR);
    assertTrue(fstat.getLen() > 10);//w  w w. j a  va2s. com

    System.out.println("filesystem is " + fileSys.getUri());
    conf.set("fs.default.name", fileSys.getUri().toString());
    conf.setInt("io.sort.mb", 1);
    conf.setInt("io.sort.factor", 5);
    conf.setInt("mapred.tasktracker.map.tasks.maximum", 2);
    conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 2);

    MiniMRCluster mr = new MiniMRCluster(NUM_HADOOP_SLAVES, fileSys.getUri().toString(), 1);
    String[] archiveArgs = { INPUT_DIR.toString(), fileSys.getUri().toString() + OUTPUT_DIR.toString() };

    JobConf jc = mr.createJobConf(new JobConf(conf));
    assertEquals("true", jc.get("archive.groupByClusterName"));
    assertEquals(1, jc.getInt("io.sort.mb", 5));

    int returnVal = ToolRunner.run(jc, new XtrExtract(), archiveArgs);
    assertEquals(0, returnVal);
    fstat = fileSys.getFileStatus(new Path("/chukwa/archives/foocluster/HadoopLogProcessor_2008_05_29.arc"));
    assertTrue(fstat.getLen() > 10);

    Thread.sleep(1000);

    System.out.println("done!");
}