Example usage for org.apache.hadoop.hdfs MiniDFSCluster getFileSystem

List of usage examples for org.apache.hadoop.hdfs MiniDFSCluster getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.hdfs MiniDFSCluster getFileSystem.

Prototype

public DistributedFileSystem getFileSystem() throws IOException 

Source Link

Document

Get a client handle to the DFS cluster with a single namenode.

Usage

From source file:com.linkedin.haivvreo.TestHaivvreoUtils.java

License:Apache License

@Test
public void determineSchemaCanReadSchemaFromHDFS() throws IOException, HaivvreoException {
    // TODO: Make this an integration test, mock out hdfs for the actual unit test.
    String schemaString = TestAvroObjectInspectorGenerator.RECORD_SCHEMA;
    MiniDFSCluster miniDfs = null;
    try {//from  w  ww .jav  a  2s. c  o m
        // MiniDFSCluster litters files and folders all over the place.
        System.setProperty("test.build.data", "target/test-intermediate-stuff-data/");
        miniDfs = new MiniDFSCluster(new Configuration(), 1, true, null);

        miniDfs.getFileSystem().mkdirs(new Path("/path/to/schema"));
        FSDataOutputStream out = miniDfs.getFileSystem().create(new Path("/path/to/schema/schema.avsc"));
        out.writeBytes(schemaString);
        out.close();
        String onHDFS = miniDfs.getFileSystem().getUri() + "/path/to/schema/schema.avsc";

        Schema schemaFromHDFS = HaivvreoUtils.getSchemaFromHDFS(onHDFS, miniDfs.getFileSystem().getConf());
        Schema expectedSchema = Schema.parse(schemaString);
        assertEquals(expectedSchema, schemaFromHDFS);
    } finally {
        if (miniDfs != null)
            miniDfs.shutdown();
    }
}

From source file:com.mellanox.r4h.TestFSOutputSummer.java

License:Apache License

private void doTestFSOutputSummer(String checksumType) throws Exception {
    Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
    conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, BYTES_PER_CHECKSUM);
    conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, checksumType);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_OF_DATANODES).build();
    fileSys = cluster.getFileSystem();
    try {/*w ww  .j ava 2  s . c  om*/
        Path file = new Path("try.dat");
        Random rand = new Random(seed);
        rand.nextBytes(expected);
        writeFile1(file);
        writeFile2(file);
        writeFile3(file);
    } finally {
        fileSys.close();
        cluster.shutdown();
    }
}

From source file:com.mellanox.r4h.TestFSOutputSummer.java

License:Apache License

@Test
public void TestDFSCheckSumType() throws Exception {
    Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
    conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, BYTES_PER_CHECKSUM);
    conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, "NULL");
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_OF_DATANODES).build();
    fileSys = cluster.getFileSystem();
    try {/*from  w  w  w.  j av  a 2 s  .  c  o  m*/
        Path file = new Path("try.dat");
        Random rand = new Random(seed);
        rand.nextBytes(expected);
        writeFile1(file);
    } finally {
        fileSys.close();
        cluster.shutdown();
    }
}

From source file:com.mellanox.r4h.TestHFlush.java

License:Apache License

/**
 * Test hsync (with updating block length in NameNode) while no data is
 * actually written yet//from  w  w w  . ja  v  a 2s.com
 */
@Test
public void hSyncUpdateLength_00() throws IOException {
    Configuration conf = new HdfsConfiguration();
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
    DistributedFileSystem fileSystem = (DistributedFileSystem) cluster.getFileSystem();

    try {
        Path path = new Path(fName);
        FSDataOutputStream stm = fileSystem.create(path, true, 4096, (short) 2,
                MiniDFSClusterBridge.getAppendTestUtil_BLOCK_SIZE());
        System.out.println("Created file " + path.toString());
        ((DFSOutputStream) stm.getWrappedStream()).hsync(EnumSet.of(SyncFlag.UPDATE_LENGTH));
        long currentFileLength = fileSystem.getFileStatus(path).getLen();
        assertEquals(0L, currentFileLength);
        stm.close();
    } finally {
        fileSystem.close();
        cluster.shutdown();
    }
}

From source file:com.mellanox.r4h.TestHFlush.java

License:Apache License

/**
 * The method starts new cluster with defined Configuration; creates a file
 * with specified block_size and writes 10 equal sections in it; it also calls
 * hflush/hsync after each write and throws an IOException in case of an error.
 * //w w w  . ja  v a2s.  c om
 * @param conf cluster configuration
 * @param fileName of the file to be created and processed as required
 * @param block_size value to be used for the file's creation
 * @param replicas is the number of replicas
 * @param isSync hsync or hflush         
 * @param syncFlags specify the semantic of the sync/flush
 * @throws IOException in case of any errors
 */
public static void doTheJob(Configuration conf, final String fileName, long block_size, short replicas,
        boolean isSync, EnumSet<SyncFlag> syncFlags) throws IOException {
    byte[] fileContent;
    final int SECTIONS = 10;

    fileContent = AppendTestUtil.initBuffer(MiniDFSClusterBridge.getAppendTestUtils_FILE_SIZE());
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(replicas).build();
    // Make sure we work with DFS in order to utilize all its functionality
    DistributedFileSystem fileSystem = (DistributedFileSystem) cluster.getFileSystem();

    FSDataInputStream is;
    try {
        Path path = new Path(fileName);
        FSDataOutputStream stm = fileSystem.create(path, false, 4096, replicas, block_size);
        System.out.println("Created file " + fileName);

        int tenth = MiniDFSClusterBridge.getAppendTestUtils_FILE_SIZE() / SECTIONS;
        int rounding = MiniDFSClusterBridge.getAppendTestUtils_FILE_SIZE() - tenth * SECTIONS;
        for (int i = 0; i < SECTIONS; i++) {
            System.out.println(
                    "Writing " + (tenth * i) + " to " + (tenth * (i + 1)) + " section to file " + fileName);
            // write to the file
            stm.write(fileContent, tenth * i, tenth);

            // Wait while hflush/hsync pushes all packets through built pipeline
            if (isSync) {
                ((DFSOutputStream) stm.getWrappedStream()).hsync(syncFlags);
            } else {
                ((DFSOutputStream) stm.getWrappedStream()).hflush();
            }

            // Check file length if updatelength is required
            if (isSync && syncFlags.contains(SyncFlag.UPDATE_LENGTH)) {
                long currentFileLength = fileSystem.getFileStatus(path).getLen();
                assertEquals("File size doesn't match for hsync/hflush with updating the length",
                        tenth * (i + 1), currentFileLength);
            }
            byte[] toRead = new byte[tenth];
            byte[] expected = new byte[tenth];
            System.arraycopy(fileContent, tenth * i, expected, 0, tenth);
            // Open the same file for read. Need to create new reader after every write operation(!)
            is = fileSystem.open(path);
            is.seek(tenth * i);
            int readBytes = is.read(toRead, 0, tenth);
            System.out.println("Has read " + readBytes);
            assertTrue("Should've get more bytes", (readBytes > 0) && (readBytes <= tenth));
            is.close();
            checkData(toRead, 0, readBytes, expected, "Partial verification");
        }
        System.out.println("Writing " + (tenth * SECTIONS) + " to " + (tenth * SECTIONS + rounding)
                + " section to file " + fileName);
        stm.write(fileContent, tenth * SECTIONS, rounding);
        stm.close();

        assertEquals("File size doesn't match ", MiniDFSClusterBridge.getAppendTestUtils_FILE_SIZE(),
                fileSystem.getFileStatus(path).getLen());
        AppendTestUtil.checkFullFile(fileSystem, path, fileContent.length, fileContent, "hflush()");
    } finally {
        fileSystem.close();
        cluster.shutdown();
    }
}

From source file:com.mellanox.r4h.TestHFlush.java

License:Apache License

/** This creates a slow writer and check to see 
 * if pipeline heartbeats work fine/*from   w w w .j  av a 2 s .c  om*/
 */
@Test
public void testPipelineHeartbeat() throws Exception {
    final int DATANODE_NUM = 2;
    final int fileLen = 6;
    Configuration conf = new HdfsConfiguration();
    final int timeout = 2000;
    conf.setInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, timeout);

    final Path p = new Path("/pipelineHeartbeat/foo");
    System.out.println("p=" + p);

    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(DATANODE_NUM).build();
    try {
        DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem();

        byte[] fileContents = AppendTestUtil.initBuffer(fileLen);

        // create a new file.
        FSDataOutputStream stm = AppendTestUtil.createFile(fs, p, DATANODE_NUM);

        stm.write(fileContents, 0, 1);
        Thread.sleep(timeout);
        stm.hflush();
        System.out.println("Wrote 1 byte and hflush " + p);

        // write another byte
        Thread.sleep(timeout);
        stm.write(fileContents, 1, 1);
        stm.hflush();

        stm.write(fileContents, 2, 1);
        Thread.sleep(timeout);
        stm.hflush();

        stm.write(fileContents, 3, 1);
        Thread.sleep(timeout);
        stm.write(fileContents, 4, 1);
        stm.hflush();

        stm.write(fileContents, 5, 1);
        Thread.sleep(timeout);
        stm.close();

        // verify that entire file is good
        AppendTestUtil.checkFullFile(fs, p, fileLen, fileContents, "Failed to slowly write to a file");
    } finally {
        cluster.shutdown();
    }
}

From source file:com.mellanox.r4h.TestHFlush.java

License:Apache License

@Test
public void testHFlushInterrupted() throws Exception {
    final int DATANODE_NUM = 2;
    final int fileLen = 6;
    byte[] fileContents = AppendTestUtil.initBuffer(fileLen);
    Configuration conf = new HdfsConfiguration();
    final Path p = new Path("/hflush-interrupted");

    System.out.println("p=" + p);

    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(DATANODE_NUM).build();
    try {//  w  w w  .  ja  va  2  s .c o m
        DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem();

        // create a new file.
        FSDataOutputStream stm = AppendTestUtil.createFile(fs, p, DATANODE_NUM);

        stm.write(fileContents, 0, 2);
        Thread.currentThread().interrupt();
        try {
            stm.hflush();
            // If we made it past the hflush(), then that means that the ack made it back
            // from the pipeline before we got to the wait() call. In that case we should
            // still have interrupted status.
            assertTrue(Thread.currentThread().interrupted());
        } catch (InterruptedIOException ie) {
            System.out.println("Got expected exception during flush");
        }
        assertFalse(Thread.currentThread().interrupted());

        // Try again to flush should succeed since we no longer have interrupt status
        stm.hflush();

        // Write some more data and flush
        stm.write(fileContents, 2, 2);
        stm.hflush();

        // Write some data and close while interrupted

        stm.write(fileContents, 4, 2);
        Thread.currentThread().interrupt();
        try {
            stm.close();
            // If we made it past the close(), then that means that the ack made it back
            // from the pipeline before we got to the wait() call. In that case we should
            // still have interrupted status.
            assertTrue(Thread.currentThread().interrupted());
        } catch (InterruptedIOException ioe) {
            System.out.println("Got expected exception during close");
            // If we got the exception, we shouldn't have interrupted status anymore.
            assertFalse(Thread.currentThread().interrupted());

            // Now do a successful close.
            stm.close();
        }

        // verify that entire file is good
        AppendTestUtil.checkFullFile(fs, p, fileLen, fileContents, "Failed to deal with thread interruptions");
    } finally {
        cluster.shutdown();
    }
}

From source file:com.mellanox.r4h.TestReadWhileWriting.java

License:Apache License

/** Test reading while writing. */
@Test/* w  ww  . j ava 2 s . c  o  m*/
public void pipeline_02_03() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);

    // create cluster
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
    try {
        //change the lease limits.
        cluster.setLeasePeriod(SOFT_LEASE_LIMIT, HARD_LEASE_LIMIT);

        //wait for the cluster
        cluster.waitActive();
        final FileSystem fs = cluster.getFileSystem();
        final Path p = new Path(DIR, "file1");
        final int half = BLOCK_SIZE / 2;

        //a. On Machine M1, Create file. Write half block of data.
        //   Invoke DFSOutputStream.hflush() on the dfs file handle.
        //   Do not close file yet.
        {
            final FSDataOutputStream out = fs.create(p, true,
                    fs.getConf().getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096), (short) 3,
                    BLOCK_SIZE);
            write(out, 0, half);

            //hflush
            ((DFSOutputStream) out.getWrappedStream()).hflush();
        }

        //b. On another machine M2, open file and verify that the half-block
        //   of data can be read successfully.
        checkFile(p, half, conf);
        MiniDFSClusterBridge.getAppendTestUtilLOG().info("leasechecker.interruptAndJoin()");
        ((DistributedFileSystem) fs).dfs.getLeaseRenewer().interruptAndJoin();

        //c. On M1, append another half block of data.  Close file on M1.
        {
            //sleep to let the lease is expired.
            Thread.sleep(2 * SOFT_LEASE_LIMIT);

            final UserGroupInformation current = UserGroupInformation.getCurrentUser();
            final UserGroupInformation ugi = UserGroupInformation
                    .createUserForTesting(current.getShortUserName() + "x", new String[] { "supergroup" });
            final DistributedFileSystem dfs = ugi.doAs(new PrivilegedExceptionAction<DistributedFileSystem>() {
                @Override
                public DistributedFileSystem run() throws Exception {
                    return (DistributedFileSystem) FileSystem.newInstance(conf);
                }
            });
            final FSDataOutputStream out = append(dfs, p);
            write(out, 0, half);
            out.close();
        }

        //d. On M2, open file and read 1 block of data from it. Close file.
        checkFile(p, 2 * half, conf);
    } finally {
        cluster.shutdown();
    }
}

From source file:com.mycompany.app.TestStagingDirectoryPermissions.java

License:Apache License

@Test
public void perms() throws IOException, InterruptedException {
    MiniDFSCluster minidfs = null;
    FileSystem fs = null;/*from   w  ww  . j ava  2  s  .  c  om*/
    MiniMRClientCluster minimr = null;
    try {
        Configuration conf = new Configuration(true);
        conf.set("fs.permission.umask-mode", "0077");
        minidfs = new MiniDFSCluster.Builder(conf).build();
        minidfs.waitActive();

        fs = minidfs.getFileSystem();
        conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString());
        Path p = path("/in");
        fs.mkdirs(p);

        FSDataOutputStream os = fs.create(new Path(p, "input.txt"));
        os.write("hello!".getBytes("UTF-8"));
        os.close();

        String user = UserGroupInformation.getCurrentUser().getUserName();
        Path home = new Path("/User/" + user);
        fs.mkdirs(home);
        minimr = MiniMRClientClusterFactory.create(this.getClass(), 1, conf);
        JobConf job = new JobConf(minimr.getConfig());

        job.setJobName("PermsTest");
        JobClient client = new JobClient(job);
        FileInputFormat.addInputPath(job, p);
        FileOutputFormat.setOutputPath(job, path("/out"));
        job.setInputFormat(TextInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(MySleepMapper.class);

        job.setNumReduceTasks(1);
        RunningJob submittedJob = client.submitJob(job);

        // Sleep for a bit to let localization finish
        System.out.println("Sleeping...");
        Thread.sleep(3 * 1000l);
        System.out.println("Done sleeping...");
        assertFalse(UserGroupInformation.isSecurityEnabled());

        Path stagingRoot = path("/tmp/hadoop-yarn/staging/" + user + "/.staging/");
        assertTrue(fs.exists(stagingRoot));
        assertEquals(1, fs.listStatus(stagingRoot).length);
        Path staging = fs.listStatus(stagingRoot)[0].getPath();
        Path jobXml = path(staging + "/job.xml");

        assertTrue(fs.exists(jobXml));

        FileStatus fileStatus = fs.getFileStatus(jobXml);
        System.out.println("job.xml permission = " + fileStatus.getPermission());
        assertTrue(fileStatus.getPermission().getOtherAction().implies(FsAction.READ));
        assertTrue(fileStatus.getPermission().getGroupAction().implies(FsAction.READ));

        submittedJob.waitForCompletion();
    } finally {
        if (minimr != null) {
            minimr.stop();
        }
        if (fs != null) {
            fs.close();
        }
        if (minidfs != null) {
            minidfs.shutdown(true);
        }
    }
}

From source file:edu.berkeley.chukwa_xtrace.TestXtrExtract.java

License:Apache License

public void testArchiving() throws Exception {

    System.out.println("starting archive test");
    Configuration conf = new Configuration();
    System.setProperty("hadoop.log.dir", System.getProperty("test.build.data", "/tmp"));
    MiniDFSCluster dfs = new MiniDFSCluster(conf, NUM_HADOOP_SLAVES, true, null);
    FileSystem fileSys = dfs.getFileSystem();
    fileSys.delete(OUTPUT_DIR, true);//nuke output dir

    writeASinkFile(conf, fileSys, INPUT_DIR, 1000);

    FileStatus fstat = fileSys.getFileStatus(INPUT_DIR);
    assertTrue(fstat.getLen() > 10);//w  w w. j a  va2s. com

    System.out.println("filesystem is " + fileSys.getUri());
    conf.set("fs.default.name", fileSys.getUri().toString());
    conf.setInt("io.sort.mb", 1);
    conf.setInt("io.sort.factor", 5);
    conf.setInt("mapred.tasktracker.map.tasks.maximum", 2);
    conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 2);

    MiniMRCluster mr = new MiniMRCluster(NUM_HADOOP_SLAVES, fileSys.getUri().toString(), 1);
    String[] archiveArgs = { INPUT_DIR.toString(), fileSys.getUri().toString() + OUTPUT_DIR.toString() };

    JobConf jc = mr.createJobConf(new JobConf(conf));
    assertEquals("true", jc.get("archive.groupByClusterName"));
    assertEquals(1, jc.getInt("io.sort.mb", 5));

    int returnVal = ToolRunner.run(jc, new XtrExtract(), archiveArgs);
    assertEquals(0, returnVal);
    fstat = fileSys.getFileStatus(new Path("/chukwa/archives/foocluster/HadoopLogProcessor_2008_05_29.arc"));
    assertTrue(fstat.getLen() > 10);

    Thread.sleep(1000);

    System.out.println("done!");
}