List of usage examples for org.apache.hadoop.hdfs MiniDFSCluster MiniDFSCluster
@Deprecated public MiniDFSCluster(Configuration conf, int numDataNodes, boolean format, String[] racks) throws IOException
From source file:org.apache.blur.HdfsMiniClusterUtil.java
License:Apache License
public static MiniDFSCluster startDfs(Configuration conf, boolean format, String path) { String perm;/* ww w . j ava 2s . c o m*/ Path p = new Path(new File("./target").getAbsolutePath()); try { FileSystem fileSystem = p.getFileSystem(conf); FileStatus fileStatus = fileSystem.getFileStatus(p); FsPermission permission = fileStatus.getPermission(); perm = permission.getUserAction().ordinal() + "" + permission.getGroupAction().ordinal() + "" + permission.getOtherAction().ordinal(); } catch (IOException e) { throw new RuntimeException(e); } LOG.info("dfs.datanode.data.dir.perm=" + perm); conf.set("dfs.datanode.data.dir.perm", perm); System.setProperty("test.build.data", path); try { MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, (String[]) null); cluster.waitActive(); return cluster; } catch (Exception e) { LOG.error("error opening file system", e); throw new RuntimeException(e); } }
From source file:org.apache.blur.MiniCluster.java
License:Apache License
public void startDfs(final Configuration conf, final boolean format, final String path, final String[] racks) { Thread thread = new Thread(group, new Runnable() { @SuppressWarnings("deprecation") @Override//from w w w .j a v a2 s . c om public void run() { _conf = conf; String perm; Path p = new Path(new File(path).getAbsolutePath()); try { FileSystem fileSystem = p.getFileSystem(conf); if (!fileSystem.exists(p)) { if (!fileSystem.mkdirs(p)) { throw new RuntimeException("Could not create path [" + path + "]"); } } FileStatus fileStatus = fileSystem.getFileStatus(p); FsPermission permission = fileStatus.getPermission(); perm = permission.getUserAction().ordinal() + "" + permission.getGroupAction().ordinal() + "" + permission.getOtherAction().ordinal(); } catch (IOException e) { throw new RuntimeException(e); } LOG.info("dfs.datanode.data.dir.perm=" + perm); conf.set("dfs.datanode.data.dir.perm", perm); System.setProperty("test.build.data", path); try { if (racks == null) { cluster = new MiniDFSCluster(conf, 1, format, racks); } else { cluster = new MiniDFSCluster(conf, racks.length, format, racks); } } catch (Exception e) { LOG.error("error opening file system", e); throw new RuntimeException(e); } } }); thread.start(); try { thread.join(); cluster.waitActive(); } catch (IOException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } }
From source file:org.apache.flume.sink.hdfs.TestHDFSEventSinkOnMiniCluster.java
License:Apache License
/** * This is a very basic test that writes one event to HDFS and reads it back. *//* ww w. j av a2 s. c om*/ @Test public void simpleHDFSTest() throws EventDeliveryException, IOException { cluster = new MiniDFSCluster(new Configuration(), 1, true, null); cluster.waitActive(); String outputDir = "/flume/simpleHDFSTest"; Path outputDirPath = new Path(outputDir); logger.info("Running test with output dir: {}", outputDir); FileSystem fs = cluster.getFileSystem(); // ensure output directory is empty if (fs.exists(outputDirPath)) { fs.delete(outputDirPath, true); } String nnURL = getNameNodeURL(cluster); logger.info("Namenode address: {}", nnURL); Context chanCtx = new Context(); MemoryChannel channel = new MemoryChannel(); channel.setName("simpleHDFSTest-mem-chan"); channel.configure(chanCtx); channel.start(); Context sinkCtx = new Context(); sinkCtx.put("hdfs.path", nnURL + outputDir); sinkCtx.put("hdfs.fileType", HDFSWriterFactory.DataStreamType); sinkCtx.put("hdfs.batchSize", Integer.toString(1)); HDFSEventSink sink = new HDFSEventSink(); sink.setName("simpleHDFSTest-hdfs-sink"); sink.configure(sinkCtx); sink.setChannel(channel); sink.start(); // create an event String EVENT_BODY = "yarg!"; channel.getTransaction().begin(); try { channel.put(EventBuilder.withBody(EVENT_BODY, Charsets.UTF_8)); channel.getTransaction().commit(); } finally { channel.getTransaction().close(); } // store event to HDFS sink.process(); // shut down flume sink.stop(); channel.stop(); // verify that it's in HDFS and that its content is what we say it should be FileStatus[] statuses = fs.listStatus(outputDirPath); Assert.assertNotNull("No files found written to HDFS", statuses); Assert.assertEquals("Only one file expected", 1, statuses.length); for (FileStatus status : statuses) { Path filePath = status.getPath(); logger.info("Found file on DFS: {}", filePath); FSDataInputStream stream = fs.open(filePath); BufferedReader reader = new BufferedReader(new InputStreamReader(stream)); String line = reader.readLine(); logger.info("First line in file {}: {}", filePath, line); Assert.assertEquals(EVENT_BODY, line); } if (!KEEP_DATA) { fs.delete(outputDirPath, true); } cluster.shutdown(); cluster = null; }
From source file:org.apache.flume.sink.hdfs.TestHDFSEventSinkOnMiniCluster.java
License:Apache License
/** * Writes two events in GZIP-compressed serialize. *//*from w w w . ja va 2 s .co m*/ @Test public void simpleHDFSGZipCompressedTest() throws EventDeliveryException, IOException { cluster = new MiniDFSCluster(new Configuration(), 1, true, null); cluster.waitActive(); String outputDir = "/flume/simpleHDFSGZipCompressedTest"; Path outputDirPath = new Path(outputDir); logger.info("Running test with output dir: {}", outputDir); FileSystem fs = cluster.getFileSystem(); // ensure output directory is empty if (fs.exists(outputDirPath)) { fs.delete(outputDirPath, true); } String nnURL = getNameNodeURL(cluster); logger.info("Namenode address: {}", nnURL); Context chanCtx = new Context(); MemoryChannel channel = new MemoryChannel(); channel.setName("simpleHDFSTest-mem-chan"); channel.configure(chanCtx); channel.start(); Context sinkCtx = new Context(); sinkCtx.put("hdfs.path", nnURL + outputDir); sinkCtx.put("hdfs.fileType", HDFSWriterFactory.CompStreamType); sinkCtx.put("hdfs.batchSize", Integer.toString(1)); sinkCtx.put("hdfs.codeC", "gzip"); HDFSEventSink sink = new HDFSEventSink(); sink.setName("simpleHDFSTest-hdfs-sink"); sink.configure(sinkCtx); sink.setChannel(channel); sink.start(); // create an event String EVENT_BODY_1 = "yarg1"; String EVENT_BODY_2 = "yarg2"; channel.getTransaction().begin(); try { channel.put(EventBuilder.withBody(EVENT_BODY_1, Charsets.UTF_8)); channel.put(EventBuilder.withBody(EVENT_BODY_2, Charsets.UTF_8)); channel.getTransaction().commit(); } finally { channel.getTransaction().close(); } // store event to HDFS sink.process(); // shut down flume sink.stop(); channel.stop(); // verify that it's in HDFS and that its content is what we say it should be FileStatus[] statuses = fs.listStatus(outputDirPath); Assert.assertNotNull("No files found written to HDFS", statuses); Assert.assertEquals("Only one file expected", 1, statuses.length); for (FileStatus status : statuses) { Path filePath = status.getPath(); logger.info("Found file on DFS: {}", filePath); FSDataInputStream stream = fs.open(filePath); BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(stream))); String line = reader.readLine(); logger.info("First line in file {}: {}", filePath, line); Assert.assertEquals(EVENT_BODY_1, line); // The rest of this test is commented-out (will fail) for 2 reasons: // // (1) At the time of this writing, Hadoop has a bug which causes the // non-native gzip implementation to create invalid gzip files when // finish() and resetState() are called. See HADOOP-8522. // // (2) Even if HADOOP-8522 is fixed, the JDK GZipInputStream is unable // to read multi-member (concatenated) gzip files. See this Sun bug: // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4691425 // //line = reader.readLine(); //logger.info("Second line in file {}: {}", filePath, line); //Assert.assertEquals(EVENT_BODY_2, line); } if (!KEEP_DATA) { fs.delete(outputDirPath, true); } cluster.shutdown(); cluster = null; }
From source file:org.apache.flume.sink.hdfs.TestHDFSEventSinkOnMiniCluster.java
License:Apache License
/** * This is a very basic test that writes one event to HDFS and reads it back. *//*from w w w.j a v a 2 s .co m*/ @Test public void underReplicationTest() throws EventDeliveryException, IOException { Configuration conf = new Configuration(); conf.set("dfs.replication", String.valueOf(3)); cluster = new MiniDFSCluster(conf, 3, true, null); cluster.waitActive(); String outputDir = "/flume/underReplicationTest"; Path outputDirPath = new Path(outputDir); logger.info("Running test with output dir: {}", outputDir); FileSystem fs = cluster.getFileSystem(); // ensure output directory is empty if (fs.exists(outputDirPath)) { fs.delete(outputDirPath, true); } String nnURL = getNameNodeURL(cluster); logger.info("Namenode address: {}", nnURL); Context chanCtx = new Context(); MemoryChannel channel = new MemoryChannel(); channel.setName("simpleHDFSTest-mem-chan"); channel.configure(chanCtx); channel.start(); Context sinkCtx = new Context(); sinkCtx.put("hdfs.path", nnURL + outputDir); sinkCtx.put("hdfs.fileType", HDFSWriterFactory.DataStreamType); sinkCtx.put("hdfs.batchSize", Integer.toString(1)); HDFSEventSink sink = new HDFSEventSink(); sink.setName("simpleHDFSTest-hdfs-sink"); sink.configure(sinkCtx); sink.setChannel(channel); sink.start(); // create an event channel.getTransaction().begin(); try { channel.put(EventBuilder.withBody("yarg 1", Charsets.UTF_8)); channel.put(EventBuilder.withBody("yarg 2", Charsets.UTF_8)); channel.put(EventBuilder.withBody("yarg 3", Charsets.UTF_8)); channel.put(EventBuilder.withBody("yarg 4", Charsets.UTF_8)); channel.put(EventBuilder.withBody("yarg 5", Charsets.UTF_8)); channel.put(EventBuilder.withBody("yarg 5", Charsets.UTF_8)); channel.getTransaction().commit(); } finally { channel.getTransaction().close(); } // store events to HDFS logger.info("Running process(). Create new file."); sink.process(); // create new file; logger.info("Running process(). Same file."); sink.process(); // kill a datanode logger.info("Killing datanode #1..."); cluster.stopDataNode(0); // there is a race here.. the client may or may not notice that the // datanode is dead before it next sync()s. // so, this next call may or may not roll a new file. logger.info("Running process(). Create new file? (racy)"); sink.process(); logger.info("Running process(). Create new file."); sink.process(); logger.info("Running process(). Create new file."); sink.process(); logger.info("Running process(). Create new file."); sink.process(); // shut down flume sink.stop(); channel.stop(); // verify that it's in HDFS and that its content is what we say it should be FileStatus[] statuses = fs.listStatus(outputDirPath); Assert.assertNotNull("No files found written to HDFS", statuses); for (FileStatus status : statuses) { Path filePath = status.getPath(); logger.info("Found file on DFS: {}", filePath); FSDataInputStream stream = fs.open(filePath); BufferedReader reader = new BufferedReader(new InputStreamReader(stream)); String line = reader.readLine(); logger.info("First line in file {}: {}", filePath, line); Assert.assertTrue(line.startsWith("yarg")); } Assert.assertTrue("4 or 5 files expected, found " + statuses.length, statuses.length == 4 || statuses.length == 5); System.out.println("There are " + statuses.length + " files."); if (!KEEP_DATA) { fs.delete(outputDirPath, true); } cluster.shutdown(); cluster = null; }
From source file:org.apache.flume.sink.hdfs.TestHDFSEventSinkOnMiniCluster.java
License:Apache License
/** * This is a very basic test that writes one event to HDFS and reads it back. *//*from ww w . j a va 2 s. co m*/ @Ignore("This test is flakey and causes tests to fail pretty often.") @Test public void maxUnderReplicationTest() throws EventDeliveryException, IOException { Configuration conf = new Configuration(); conf.set("dfs.replication", String.valueOf(3)); cluster = new MiniDFSCluster(conf, 3, true, null); cluster.waitActive(); String outputDir = "/flume/underReplicationTest"; Path outputDirPath = new Path(outputDir); logger.info("Running test with output dir: {}", outputDir); FileSystem fs = cluster.getFileSystem(); // ensure output directory is empty if (fs.exists(outputDirPath)) { fs.delete(outputDirPath, true); } String nnURL = getNameNodeURL(cluster); logger.info("Namenode address: {}", nnURL); Context chanCtx = new Context(); MemoryChannel channel = new MemoryChannel(); channel.setName("simpleHDFSTest-mem-chan"); channel.configure(chanCtx); channel.start(); Context sinkCtx = new Context(); sinkCtx.put("hdfs.path", nnURL + outputDir); sinkCtx.put("hdfs.fileType", HDFSWriterFactory.DataStreamType); sinkCtx.put("hdfs.batchSize", Integer.toString(1)); HDFSEventSink sink = new HDFSEventSink(); sink.setName("simpleHDFSTest-hdfs-sink"); sink.configure(sinkCtx); sink.setChannel(channel); sink.start(); // create an event channel.getTransaction().begin(); try { for (int i = 0; i < 50; i++) { channel.put(EventBuilder.withBody("yarg " + i, Charsets.UTF_8)); } channel.getTransaction().commit(); } finally { channel.getTransaction().close(); } // store events to HDFS logger.info("Running process(). Create new file."); sink.process(); // create new file; logger.info("Running process(). Same file."); sink.process(); // kill a datanode logger.info("Killing datanode #1..."); cluster.stopDataNode(0); // there is a race here.. the client may or may not notice that the // datanode is dead before it next sync()s. // so, this next call may or may not roll a new file. logger.info("Running process(). Create new file? (racy)"); sink.process(); for (int i = 3; i < 50; i++) { logger.info("Running process()."); sink.process(); } // shut down flume sink.stop(); channel.stop(); // verify that it's in HDFS and that its content is what we say it should be FileStatus[] statuses = fs.listStatus(outputDirPath); Assert.assertNotNull("No files found written to HDFS", statuses); for (FileStatus status : statuses) { Path filePath = status.getPath(); logger.info("Found file on DFS: {}", filePath); FSDataInputStream stream = fs.open(filePath); BufferedReader reader = new BufferedReader(new InputStreamReader(stream)); String line = reader.readLine(); logger.info("First line in file {}: {}", filePath, line); Assert.assertTrue(line.startsWith("yarg")); } System.out.println("There are " + statuses.length + " files."); Assert.assertEquals("31 files expected, found " + statuses.length, 31, statuses.length); if (!KEEP_DATA) { fs.delete(outputDirPath, true); } cluster.shutdown(); cluster = null; }
From source file:org.apache.hcatalog.MiniCluster.java
License:Apache License
private void setupMiniDfsAndMrClusters() { try {/*from w w w . ja v a 2s . co m*/ final int dataNodes = 1; // There will be 4 data nodes final int taskTrackers = 1; // There will be 4 task tracker nodes Configuration config = new Configuration(); // Builds and starts the mini dfs and mapreduce clusters System.setProperty("hadoop.log.dir", "."); m_dfs = new MiniDFSCluster(config, dataNodes, true, null); m_fileSys = m_dfs.getFileSystem(); m_mr = new MiniMRCluster(taskTrackers, m_fileSys.getUri().toString(), 1); // Create the configuration hadoop-site.xml file File conf_dir = new File(System.getProperty("user.home"), "pigtest/conf/"); conf_dir.mkdirs(); File conf_file = new File(conf_dir, "hadoop-site.xml"); // Write the necessary config info to hadoop-site.xml m_conf = m_mr.createJobConf(); m_conf.setInt("mapred.submit.replication", 1); m_conf.set("dfs.datanode.address", "0.0.0.0:0"); m_conf.set("dfs.datanode.http.address", "0.0.0.0:0"); m_conf.writeXml(new FileOutputStream(conf_file)); // Set the system properties needed by Pig System.setProperty("cluster", m_conf.get("mapred.job.tracker")); System.setProperty("namenode", m_conf.get("fs.default.name")); System.setProperty("junit.hadoop.conf", conf_dir.getPath()); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:org.apache.hive.hcatalog.MiniCluster.java
License:Apache License
private void setupMiniDfsAndMrClusters() { try {//from w w w . j a v a2 s.co m final int dataNodes = 1; // There will be 4 data nodes final int taskTrackers = 1; // There will be 4 task tracker nodes Configuration config = new Configuration(); // Builds and starts the mini dfs and mapreduce clusters if (System.getProperty("hadoop.log.dir") == null) { System.setProperty("hadoop.log.dir", "target/tmp/logs/"); } m_dfs = new MiniDFSCluster(config, dataNodes, true, null); m_fileSys = m_dfs.getFileSystem(); m_mr = new MiniMRCluster(taskTrackers, m_fileSys.getUri().toString(), 1); // Create the configuration hadoop-site.xml file File conf_dir = new File(System.getProperty("user.home"), "pigtest/conf/"); conf_dir.mkdirs(); File conf_file = new File(conf_dir, "hadoop-site.xml"); // Write the necessary config info to hadoop-site.xml m_conf = m_mr.createJobConf(); m_conf.setInt("mapred.submit.replication", 1); m_conf.set("dfs.datanode.address", "0.0.0.0:0"); m_conf.set("dfs.datanode.http.address", "0.0.0.0:0"); m_conf.writeXml(new FileOutputStream(conf_file)); // Set the system properties needed by Pig System.setProperty("cluster", m_conf.get("mapred.job.tracker")); System.setProperty("namenode", m_conf.get("fs.default.name")); System.setProperty("junit.hadoop.conf", conf_dir.getPath()); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:org.apache.ivory.cleanup.LogCleanupServiceTest.java
License:Apache License
@BeforeClass public void setup() throws Exception { conf.set("hadoop.log.dir", "/tmp"); this.dfsCluster = new MiniDFSCluster(conf, 1, true, null); fs = dfsCluster.getFileSystem();/*from w ww.jav a2 s .c om*/ storeEntity(EntityType.CLUSTER, "testCluster"); conf = new Configuration(); System.setProperty("test.build.data", "target/tdfs/data" + System.currentTimeMillis()); this.targetDfsCluster = new MiniDFSCluster(conf, 1, true, null); storeEntity(EntityType.CLUSTER, "backupCluster"); storeEntity(EntityType.FEED, "impressionFeed"); storeEntity(EntityType.FEED, "clicksFeed"); storeEntity(EntityType.FEED, "imp-click-join1"); storeEntity(EntityType.FEED, "imp-click-join2"); storeEntity(EntityType.PROCESS, "sample"); Process process = ConfigurationStore.get().get(EntityType.PROCESS, "sample"); Process otherProcess = (Process) process.clone(); otherProcess.setName("sample2"); otherProcess.setFrequency(new Frequency("days(1)")); ConfigurationStore.get().remove(EntityType.PROCESS, otherProcess.getName()); ConfigurationStore.get().publish(EntityType.PROCESS, otherProcess); fs.mkdirs(instanceLogPath); fs.mkdirs(instanceLogPath1); fs.mkdirs(instanceLogPath2); fs.mkdirs(instanceLogPath3); fs.mkdirs(instanceLogPath4); // fs.setTimes wont work on dirs fs.createNewFile(new Path(instanceLogPath, "oozie.log")); fs.createNewFile(new Path(instanceLogPath, "pigAction_SUCCEEDED.log")); tfs = targetDfsCluster.getFileSystem(); fs.mkdirs(feedInstanceLogPath); fs.mkdirs(feedInstanceLogPath1); tfs.mkdirs(feedInstanceLogPath); tfs.mkdirs(feedInstanceLogPath1); fs.createNewFile(new Path(feedInstanceLogPath, "oozie.log")); tfs.createNewFile(new Path(feedInstanceLogPath, "oozie.log")); Thread.sleep(61000); }
From source file:org.apache.ivory.cluster.util.EmbeddedCluster.java
License:Apache License
private static EmbeddedCluster createClusterAsUser(String name, boolean withMR) throws IOException { EmbeddedCluster cluster = new EmbeddedCluster(); File target = new File("webapp/target"); if (!target.exists()) { target = new File("target"); System.setProperty("test.build.data", "target/" + name + "/data"); } else {/* ww w .j a v a 2 s. com*/ System.setProperty("test.build.data", "webapp/target/" + name + "/data"); } String user = System.getProperty("user.name"); cluster.conf.set("hadoop.log.dir", "/tmp"); cluster.conf.set("hadoop.proxyuser.oozie.groups", "*"); cluster.conf.set("hadoop.proxyuser.oozie.hosts", "127.0.0.1"); cluster.conf.set("hadoop.proxyuser.hdfs.groups", "*"); cluster.conf.set("hadoop.proxyuser.hdfs.hosts", "127.0.0.1"); cluster.conf.set("mapreduce.jobtracker.kerberos.principal", ""); cluster.conf.set("dfs.namenode.kerberos.principal", ""); cluster.dfsCluster = new MiniDFSCluster(cluster.conf, 1, true, null); String hdfsUrl = cluster.conf.get("fs.default.name"); LOG.info("Cluster Namenode = " + hdfsUrl); if (withMR) { System.setProperty("hadoop.log.dir", "/tmp"); System.setProperty("org.apache.hadoop.mapred.TaskTracker", "/tmp"); cluster.conf.set("org.apache.hadoop.mapred.TaskTracker", "/tmp"); cluster.conf.set("org.apache.hadoop.mapred.TaskTracker", "/tmp"); cluster.conf.set("mapreduce.jobtracker.staging.root.dir", "/user"); Path path = new Path("/tmp/hadoop-" + user, "mapred"); FileSystem.get(cluster.conf).mkdirs(path); FileSystem.get(cluster.conf).setPermission(path, new FsPermission((short) 511)); cluster.mrCluster = new MiniMRCluster(1, hdfsUrl, 1); Configuration mrConf = cluster.mrCluster.createJobConf(); cluster.conf.set("mapred.job.tracker", mrConf.get("mapred.job.tracker")); cluster.conf.set("mapred.job.tracker.http.address", mrConf.get("mapred.job.tracker.http.address")); LOG.info("Cluster JobTracker = " + cluster.conf.get("mapred.job.tracker")); } cluster.buildClusterObject(name); return cluster; }