Example usage for org.apache.hadoop.hdfs MiniDFSCluster.Builder MiniDFSCluster.Builder

List of usage examples for org.apache.hadoop.hdfs MiniDFSCluster.Builder MiniDFSCluster.Builder

Introduction

In this page you can find the example usage for org.apache.hadoop.hdfs MiniDFSCluster.Builder MiniDFSCluster.Builder.

Prototype

public Builder(Configuration conf) 

Source Link

Usage

From source file:org.apache.sentry.tests.e2e.hdfs.TestHDFSIntegration.java

License:Apache License

private static void startDFSandYARN() throws IOException, InterruptedException {
    adminUgi.doAs(new PrivilegedExceptionAction<Void>() {
        @Override/*from   www .j ava 2  s .com*/
        public Void run() throws Exception {
            System.setProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA, "target/test/data");
            hadoopConf = new HdfsConfiguration();
            hadoopConf.set(DFSConfigKeys.DFS_NAMENODE_INODE_ATTRIBUTES_PROVIDER_KEY,
                    SentryINodeAttributesProvider.class.getName());
            hadoopConf.setBoolean(DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY, true);
            hadoopConf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);
            File dfsDir = assertCreateDir(new File(baseDir, "dfs"));
            hadoopConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, dfsDir.getPath());
            hadoopConf.set("hadoop.security.group.mapping", MiniDFS.PseudoGroupMappingService.class.getName());
            Configuration.addDefaultResource("test.xml");

            hadoopConf.set("sentry.authorization-provider.hdfs-path-prefixes", MANAGED_PREFIXES);
            hadoopConf.set("sentry.authorization-provider.cache-refresh-retry-wait.ms", "5000");
            hadoopConf.set("sentry.authorization-provider.cache-refresh-interval.ms",
                    String.valueOf(CACHE_REFRESH));

            hadoopConf.set("sentry.authorization-provider.cache-stale-threshold.ms",
                    String.valueOf(STALE_THRESHOLD));

            hadoopConf.set("sentry.hdfs.service.security.mode", "none");
            hadoopConf.set("sentry.hdfs.service.client.server.rpc-addresses", "localhost");
            hadoopConf.set("sentry.hdfs.service.client.server.rpc-port", String.valueOf(sentryPort));
            EditLogFileOutputStream.setShouldSkipFsyncForTesting(true);
            miniDFS = new MiniDFSCluster.Builder(hadoopConf).build();
            Path tmpPath = new Path("/tmp");
            Path hivePath = new Path("/user/hive");
            Path warehousePath = new Path(hivePath, "warehouse");
            miniDFS.getFileSystem().mkdirs(warehousePath);
            boolean directory = miniDFS.getFileSystem().isDirectory(warehousePath);
            LOGGER.info("\n\n Is dir :" + directory + "\n\n");
            LOGGER.info("\n\n DefaultFS :" + miniDFS.getFileSystem().getUri() + "\n\n");
            fsURI = miniDFS.getFileSystem().getUri().toString();
            hadoopConf.set("fs.defaultFS", fsURI);

            // Create Yarn cluster
            // miniMR = MiniMRClientClusterFactory.create(this.getClass(), 1, conf);

            miniDFS.getFileSystem().mkdirs(tmpPath);
            miniDFS.getFileSystem().setPermission(tmpPath, FsPermission.valueOf("drwxrwxrwx"));
            miniDFS.getFileSystem().setOwner(hivePath, "hive", "hive");
            miniDFS.getFileSystem().setOwner(warehousePath, "hive", "hive");
            LOGGER.info("\n\n Owner :" + miniDFS.getFileSystem().getFileStatus(warehousePath).getOwner() + ", "
                    + miniDFS.getFileSystem().getFileStatus(warehousePath).getGroup() + "\n\n");
            LOGGER.info("\n\n Owner tmp :" + miniDFS.getFileSystem().getFileStatus(tmpPath).getOwner() + ", "
                    + miniDFS.getFileSystem().getFileStatus(tmpPath).getGroup() + ", "
                    + miniDFS.getFileSystem().getFileStatus(tmpPath).getPermission() + ", " + "\n\n");

            int dfsSafeCheckRetry = 30;
            boolean hasStarted = false;
            for (int i = dfsSafeCheckRetry; i > 0; i--) {
                if (!miniDFS.getFileSystem().isInSafeMode()) {
                    hasStarted = true;
                    LOGGER.info("HDFS safemode check num times : " + (31 - i));
                    break;
                }
            }
            if (!hasStarted) {
                throw new RuntimeException("HDFS hasnt exited safe mode yet..");
            }

            return null;
        }
    });
}

From source file:org.apache.tajo.storage.TestFileStorageManager.java

License:Apache License

@Test
public void testGetSplit() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    String testDataPath = TEST_PATH + "/" + UUID.randomUUID().toString();
    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath);
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0);
    conf.setBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, false);

    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitClusterUp();//from   w  ww.j av  a 2  s.  c  o m
    TajoConf tajoConf = new TajoConf(conf);
    tajoConf.setVar(TajoConf.ConfVars.ROOT_DIR, cluster.getFileSystem().getUri() + "/tajo");

    int testCount = 10;
    Path tablePath = new Path("/testGetSplit");
    try {
        DistributedFileSystem fs = cluster.getFileSystem();

        // Create test partitions
        List<Path> partitions = Lists.newArrayList();
        for (int i = 0; i < testCount; i++) {
            Path tmpFile = new Path(tablePath, String.valueOf(i));
            DFSTestUtil.createFile(fs, new Path(tmpFile, "tmpfile.dat"), 10, (short) 2, 0xDEADDEADl);
            partitions.add(tmpFile);
        }

        assertTrue(fs.exists(tablePath));
        FileStorageManager sm = (FileStorageManager) StorageManager.getFileStorageManager(tajoConf);
        assertEquals(fs.getUri(), sm.getFileSystem().getUri());

        Schema schema = new Schema();
        schema.addColumn("id", Type.INT4);
        schema.addColumn("age", Type.INT4);
        schema.addColumn("name", Type.TEXT);
        TableMeta meta = CatalogUtil.newTableMeta(StoreType.CSV);

        List<Fragment> splits = Lists.newArrayList();
        // Get FileFragments in partition batch
        splits.addAll(sm.getSplits("data", meta, schema, partitions.toArray(new Path[partitions.size()])));
        assertEquals(testCount, splits.size());
        // -1 is unknown volumeId
        assertEquals(-1, ((FileFragment) splits.get(0)).getDiskIds()[0]);

        splits.clear();
        splits.addAll(sm.getSplits("data", meta, schema,
                partitions.subList(0, partitions.size() / 2).toArray(new Path[partitions.size() / 2])));
        assertEquals(testCount / 2, splits.size());
        assertEquals(1, splits.get(0).getHosts().length);
        assertEquals(-1, ((FileFragment) splits.get(0)).getDiskIds()[0]);
        fs.close();
    } finally {
        cluster.shutdown(true);
    }
}

From source file:org.apache.tajo.storage.TestFileStorageManager.java

License:Apache License

@Test
public void testGetSplitWithBlockStorageLocationsBatching() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    String testDataPath = TEST_PATH + "/" + UUID.randomUUID().toString();
    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath);
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0);
    conf.setBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, true);

    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
    cluster.waitClusterUp();//from   w  w  w  .  j a  v a 2 s  .c om

    TajoConf tajoConf = new TajoConf(conf);
    tajoConf.setVar(TajoConf.ConfVars.ROOT_DIR, cluster.getFileSystem().getUri() + "/tajo");

    int testCount = 10;
    Path tablePath = new Path("/testGetSplitWithBlockStorageLocationsBatching");
    try {
        DistributedFileSystem fs = cluster.getFileSystem();

        // Create test files
        for (int i = 0; i < testCount; i++) {
            Path tmpFile = new Path(tablePath, "tmpfile" + i + ".dat");
            DFSTestUtil.createFile(fs, tmpFile, 10, (short) 2, 0xDEADDEADl);
        }
        assertTrue(fs.exists(tablePath));
        FileStorageManager sm = (FileStorageManager) StorageManager.getFileStorageManager(tajoConf);
        assertEquals(fs.getUri(), sm.getFileSystem().getUri());

        Schema schema = new Schema();
        schema.addColumn("id", Type.INT4);
        schema.addColumn("age", Type.INT4);
        schema.addColumn("name", Type.TEXT);
        TableMeta meta = CatalogUtil.newTableMeta(StoreType.CSV);

        List<Fragment> splits = Lists.newArrayList();
        splits.addAll(sm.getSplits("data", meta, schema, tablePath));

        assertEquals(testCount, splits.size());
        assertEquals(2, splits.get(0).getHosts().length);
        assertEquals(2, ((FileFragment) splits.get(0)).getDiskIds().length);
        assertNotEquals(-1, ((FileFragment) splits.get(0)).getDiskIds()[0]);
        fs.close();
    } finally {
        cluster.shutdown(true);
    }
}

From source file:org.apache.tajo.storage.TestFileStorageManager.java

License:Apache License

@Test
public void testStoreType() throws Exception {
    final Configuration hdfsConf = new HdfsConfiguration();
    String testDataPath = TEST_PATH + "/" + UUID.randomUUID().toString();
    hdfsConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath);
    hdfsConf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0);
    hdfsConf.setBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, true);

    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(hdfsConf).numDataNodes(2).build();
    cluster.waitClusterUp();/*from  w ww  . j  a va  2 s .  c o  m*/

    TajoConf tajoConf = new TajoConf(hdfsConf);
    tajoConf.setVar(TajoConf.ConfVars.ROOT_DIR, cluster.getFileSystem().getUri() + "/tajo");

    try {
        /* Local FileSystem */
        FileStorageManager sm = (FileStorageManager) StorageManager.getStorageManager(conf, StoreType.CSV);
        assertEquals(fs.getUri(), sm.getFileSystem().getUri());

        /* Distributed FileSystem */
        sm = (FileStorageManager) StorageManager.getStorageManager(tajoConf, StoreType.CSV);
        assertNotEquals(fs.getUri(), sm.getFileSystem().getUri());
        assertEquals(cluster.getFileSystem().getUri(), sm.getFileSystem().getUri());
    } finally {
        cluster.shutdown(true);
    }
}

From source file:org.apache.tajo.storage.TestFileTablespace.java

License:Apache License

@Test
public void testGetSplit() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    String testDataPath = TEST_PATH + "/" + UUID.randomUUID().toString();
    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath);
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0);
    conf.setBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, false);

    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitClusterUp();/* ww w .j  a va  2 s . co  m*/
    TajoConf tajoConf = new TajoConf(conf);
    tajoConf.setVar(TajoConf.ConfVars.ROOT_DIR, cluster.getFileSystem().getUri() + "/tajo");

    int testCount = 10;
    Path tablePath = new Path("/testGetSplit");
    try {
        DistributedFileSystem fs = cluster.getFileSystem();

        // Create test partitions
        List<Path> partitions = Lists.newArrayList();
        for (int i = 0; i < testCount; i++) {
            Path tmpFile = new Path(tablePath, String.valueOf(i));
            DFSTestUtil.createFile(fs, new Path(tmpFile, "tmpfile.dat"), 10, (short) 2, 0xDEADDEADl);
            partitions.add(tmpFile);
        }

        assertTrue(fs.exists(tablePath));
        FileTablespace space = new FileTablespace("testGetSplit", fs.getUri());
        space.init(new TajoConf(conf));
        assertEquals(fs.getUri(), space.getUri());

        Schema schema = new Schema();
        schema.addColumn("id", Type.INT4);
        schema.addColumn("age", Type.INT4);
        schema.addColumn("name", Type.TEXT);
        TableMeta meta = CatalogUtil.newTableMeta("TEXT");

        List<Fragment> splits = Lists.newArrayList();
        // Get FileFragments in partition batch
        splits.addAll(space.getSplits("data", meta, schema, partitions.toArray(new Path[partitions.size()])));
        assertEquals(testCount, splits.size());
        // -1 is unknown volumeId
        assertEquals(-1, ((FileFragment) splits.get(0)).getDiskIds()[0]);

        splits.clear();
        splits.addAll(space.getSplits("data", meta, schema,
                partitions.subList(0, partitions.size() / 2).toArray(new Path[partitions.size() / 2])));
        assertEquals(testCount / 2, splits.size());
        assertEquals(1, splits.get(0).getHosts().length);
        assertEquals(-1, ((FileFragment) splits.get(0)).getDiskIds()[0]);
        fs.close();
    } finally {
        cluster.shutdown(true);
    }
}

From source file:org.apache.tajo.storage.TestFileTablespace.java

License:Apache License

@Test
public void testGetSplitWithBlockStorageLocationsBatching() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    String testDataPath = TEST_PATH + "/" + UUID.randomUUID().toString();
    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath);
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0);
    conf.setBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, true);

    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
    cluster.waitClusterUp();// w w w . ja v  a2 s  .  c om

    TajoConf tajoConf = new TajoConf(conf);
    tajoConf.setVar(TajoConf.ConfVars.ROOT_DIR, cluster.getFileSystem().getUri() + "/tajo");

    int testCount = 10;
    Path tablePath = new Path("/testGetSplitWithBlockStorageLocationsBatching");
    try {
        DistributedFileSystem fs = cluster.getFileSystem();

        // Create test files
        for (int i = 0; i < testCount; i++) {
            Path tmpFile = new Path(tablePath, "tmpfile" + i + ".dat");
            DFSTestUtil.createFile(fs, tmpFile, 10, (short) 2, 0xDEADDEADl);
        }
        assertTrue(fs.exists(tablePath));

        FileTablespace sm = new FileTablespace("testGetSplitWithBlockStorageLocationsBatching", fs.getUri());
        sm.init(new TajoConf(conf));

        assertEquals(fs.getUri(), sm.getUri());

        Schema schema = new Schema();
        schema.addColumn("id", Type.INT4);
        schema.addColumn("age", Type.INT4);
        schema.addColumn("name", Type.TEXT);
        TableMeta meta = CatalogUtil.newTableMeta("TEXT");

        List<Fragment> splits = Lists.newArrayList();
        splits.addAll(sm.getSplits("data", meta, schema, tablePath));

        assertEquals(testCount, splits.size());
        assertEquals(2, splits.get(0).getHosts().length);
        assertEquals(2, ((FileFragment) splits.get(0)).getDiskIds().length);
        assertNotEquals(-1, ((FileFragment) splits.get(0)).getDiskIds()[0]);
        fs.close();
    } finally {
        cluster.shutdown(true);
    }
}

From source file:org.apache.tajo.storage.TestFileTablespace.java

License:Apache License

@Test
public void testGetFileTablespace() throws Exception {
    final Configuration hdfsConf = new HdfsConfiguration();
    String testDataPath = TEST_PATH + "/" + UUID.randomUUID().toString();
    hdfsConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath);
    hdfsConf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0);
    hdfsConf.setBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, true);

    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(hdfsConf).numDataNodes(2).build();
    cluster.waitClusterUp();//from  w w w  . j  a  v  a2  s .  c o  m
    URI uri = URI.create(cluster.getFileSystem().getUri() + "/tajo");

    Optional<Tablespace> existingTs = Optional.absent();
    try {
        /* Local FileSystem */
        FileTablespace space = TablespaceManager.getLocalFs();
        assertEquals(localFs.getUri(), space.getFileSystem().getUri());

        FileTablespace distTablespace = new FileTablespace("testGetFileTablespace", uri);
        distTablespace.init(conf);
        existingTs = TablespaceManager.addTableSpaceForTest(distTablespace);

        /* Distributed FileSystem */
        space = (FileTablespace) TablespaceManager.get(uri).get();
        assertEquals(cluster.getFileSystem().getUri(), space.getFileSystem().getUri());

        space = (FileTablespace) TablespaceManager.getByName("testGetFileTablespace").get();
        assertEquals(cluster.getFileSystem().getUri(), space.getFileSystem().getUri());

    } finally {

        if (existingTs.isPresent()) {
            TablespaceManager.addTableSpaceForTest(existingTs.get());
        }

        cluster.shutdown(true);
    }
}

From source file:org.apache.tajo.storage.TestStorageManager.java

License:Apache License

@Test
public void testGetSplit() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    String testDataPath = TEST_PATH + "/" + UUID.randomUUID().toString();
    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath);
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0);
    conf.setBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, false);

    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();

    int testCount = 10;
    Path tablePath = new Path("/testGetSplit");
    try {//from  w  ww .j  a  v  a 2 s .c om
        DistributedFileSystem fs = cluster.getFileSystem();

        // Create test partitions
        List<Path> partitions = Lists.newArrayList();
        for (int i = 0; i < testCount; i++) {
            Path tmpFile = new Path(tablePath, String.valueOf(i));
            DFSTestUtil.createFile(fs, new Path(tmpFile, "tmpfile.dat"), 10, (short) 2, 0xDEADDEADl);
            partitions.add(tmpFile);
        }

        assertTrue(fs.exists(tablePath));
        StorageManager sm = StorageManager.getStorageManager(new TajoConf(conf), tablePath);

        Schema schema = new Schema();
        schema.addColumn("id", Type.INT4);
        schema.addColumn("age", Type.INT4);
        schema.addColumn("name", Type.TEXT);
        TableMeta meta = CatalogUtil.newTableMeta(StoreType.CSV);

        List<FileFragment> splits = Lists.newArrayList();
        // Get FileFragments in partition batch
        splits.addAll(sm.getSplits("data", meta, schema, partitions.toArray(new Path[partitions.size()])));
        assertEquals(testCount, splits.size());
        // -1 is unknown volumeId
        assertEquals(-1, splits.get(0).getDiskIds()[0]);

        splits.clear();
        splits.addAll(sm.getSplits("data", meta, schema,
                partitions.subList(0, partitions.size() / 2).toArray(new Path[partitions.size() / 2])));
        assertEquals(testCount / 2, splits.size());
        assertEquals(1, splits.get(0).getHosts().length);
        assertEquals(-1, splits.get(0).getDiskIds()[0]);
        fs.close();
    } finally {
        cluster.shutdown();

        File dir = new File(testDataPath);
        dir.delete();
    }
}

From source file:org.apache.tajo.storage.TestStorageManager.java

License:Apache License

@Test
public void testGetSplitWithBlockStorageLocationsBatching() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    String testDataPath = TEST_PATH + "/" + UUID.randomUUID().toString();
    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath);
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0);
    conf.setBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, true);

    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();

    int testCount = 10;
    Path tablePath = new Path("/testGetSplitWithBlockStorageLocationsBatching");
    try {//from   w w  w. ja v a2s.c  o  m
        DistributedFileSystem fs = cluster.getFileSystem();

        // Create test files
        for (int i = 0; i < testCount; i++) {
            Path tmpFile = new Path(tablePath, "tmpfile" + i + ".dat");
            DFSTestUtil.createFile(fs, tmpFile, 10, (short) 2, 0xDEADDEADl);
        }
        assertTrue(fs.exists(tablePath));
        StorageManager sm = StorageManager.getStorageManager(new TajoConf(conf), tablePath);

        Schema schema = new Schema();
        schema.addColumn("id", Type.INT4);
        schema.addColumn("age", Type.INT4);
        schema.addColumn("name", Type.TEXT);
        TableMeta meta = CatalogUtil.newTableMeta(StoreType.CSV);

        List<FileFragment> splits = Lists.newArrayList();
        splits.addAll(sm.getSplits("data", meta, schema, tablePath));

        assertEquals(testCount, splits.size());
        assertEquals(2, splits.get(0).getHosts().length);
        assertEquals(2, splits.get(0).getDiskIds().length);
        assertNotEquals(-1, splits.get(0).getDiskIds()[0]);
        fs.close();
    } finally {
        cluster.shutdown();

        File dir = new File(testDataPath);
        dir.delete();
    }
}