List of usage examples for org.apache.hadoop.mapred MiniMRCluster MiniMRCluster
public MiniMRCluster(int jobTrackerPort, int taskTrackerPort, int numTaskTrackers, String namenode, int numDir, String[] racks) throws IOException
From source file:cascading.platform.hadoop.HadoopPlatform.java
License:Open Source License
@Override public synchronized void setUp() throws IOException { if (configuration != null) return;//from w w w .ja v a 2 s. c om if (!isUseCluster()) { LOG.info("not using cluster"); configuration = new JobConf(); // enforce the local file system in local mode configuration.set("fs.default.name", "file:///"); configuration.set("mapred.job.tracker", "local"); configuration.set("mapreduce.jobtracker.staging.root.dir", System.getProperty("user.dir") + "/build/tmp/cascading/staging"); String stagingDir = configuration.get("mapreduce.jobtracker.staging.root.dir"); if (Util.isEmpty(stagingDir)) configuration.set("mapreduce.jobtracker.staging.root.dir", System.getProperty("user.dir") + "/build/tmp/cascading/staging"); fileSys = FileSystem.get(configuration); } else { LOG.info("using cluster"); if (Util.isEmpty(System.getProperty("hadoop.log.dir"))) System.setProperty("hadoop.log.dir", "cascading-hadoop/build/test/log"); if (Util.isEmpty(System.getProperty("hadoop.tmp.dir"))) System.setProperty("hadoop.tmp.dir", "cascading-hadoop/build/test/tmp"); new File(System.getProperty("hadoop.log.dir")).mkdirs(); // ignored JobConf conf = new JobConf(); if (!Util.isEmpty(System.getProperty("mapred.jar"))) { LOG.info("using a remote cluster with jar: {}", System.getProperty("mapred.jar")); configuration = conf; ((JobConf) configuration).setJar(System.getProperty("mapred.jar")); if (!Util.isEmpty(System.getProperty("fs.default.name"))) { LOG.info("using {}={}", "fs.default.name", System.getProperty("fs.default.name")); configuration.set("fs.default.name", System.getProperty("fs.default.name")); } if (!Util.isEmpty(System.getProperty("mapred.job.tracker"))) { LOG.info("using {}={}", "mapred.job.tracker", System.getProperty("mapred.job.tracker")); configuration.set("mapred.job.tracker", System.getProperty("mapred.job.tracker")); } configuration.set("mapreduce.user.classpath.first", "true"); // use test dependencies fileSys = FileSystem.get(configuration); } else { dfs = new MiniDFSCluster(conf, 4, true, null); fileSys = dfs.getFileSystem(); mr = new MiniMRCluster(4, fileSys.getUri().toString(), 1, null, null, conf); configuration = mr.createJobConf(); } // jobConf.set( "mapred.map.max.attempts", "1" ); // jobConf.set( "mapred.reduce.max.attempts", "1" ); configuration.set("mapred.child.java.opts", "-Xmx512m"); configuration.setInt("mapred.job.reuse.jvm.num.tasks", -1); configuration.setInt("jobclient.completion.poll.interval", 50); configuration.setInt("jobclient.progress.monitor.poll.interval", 50); ((JobConf) configuration).setMapSpeculativeExecution(false); ((JobConf) configuration).setReduceSpeculativeExecution(false); } ((JobConf) configuration).setNumMapTasks(numMappers); ((JobConf) configuration).setNumReduceTasks(numReducers); Map<Object, Object> globalProperties = getGlobalProperties(); if (logger != null) globalProperties.put("log4j.logger", logger); FlowProps.setJobPollingInterval(globalProperties, 10); // should speed up tests HadoopPlanner.copyProperties((JobConf) configuration, globalProperties); // copy any external properties HadoopPlanner.copyJobConf(properties, (JobConf) configuration); // put all properties on the jobconf }
From source file:com.hadoop.mapreduce.TestLzoLazyLoading.java
License:Open Source License
public void testWithLocal() throws Exception { MiniMRCluster mr = null;/* w w w .ja v a2 s . c om*/ try { JobConf jconf = new JobConf(); jconf.set("mapred.queue.names", "default"); mr = new MiniMRCluster(2, "file:///", 3, null, null, jconf); Configuration cf = mr.createJobConf(); cf.set("io.compression.codecs", LzoCodec.class.getName()); runWordCount(cf, false, false); runWordCount(cf, false, true); runWordCount(cf, true, false); } finally { if (mr != null) { mr.shutdown(); } } }
From source file:org.apache.ambari.servicemonitor.utils.DFSUtils.java
License:Apache License
public static MiniMRCluster createMRCluster(JobConf conf, String fsURI) throws IOException { String logdir = System.getProperty("java.io.tmpdir") + "/mrcluster/logs"; System.setProperty("hadoop.log.dir", logdir); conf.set("hadoop.job.history.location", "file:///" + logdir + "/history"); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fsURI); return new MiniMRCluster(3, fsURI, 1, null, null, conf); }
From source file:org.apache.hcatalog.mapreduce.TestHCatMultiOutputFormat.java
License:Apache License
@BeforeClass public static void setup() throws Exception { String testDir = System.getProperty("test.data.dir", "./"); testDir = testDir + "/test_multitable_" + Math.abs(new Random().nextLong()) + "/"; workDir = new File(new File(testDir).getCanonicalPath()); FileUtil.fullyDelete(workDir);/*from w ww. ja va 2 s. co m*/ workDir.mkdirs(); warehousedir = new Path(workDir + "/warehouse"); // Run hive metastore server t = new Thread(new RunMS()); t.start(); // LocalJobRunner does not work with mapreduce OutputCommitter. So need // to use MiniMRCluster. MAPREDUCE-2350 Configuration conf = new Configuration(true); conf.set("yarn.scheduler.capacity.root.queues", "default"); conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); FileSystem fs = FileSystem.get(conf); System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, new JobConf(conf)); mrConf = mrCluster.createJobConf(); fs.mkdirs(warehousedir); initializeSetup(); }
From source file:org.apache.hcatalog.mapreduce.TestHCatPartitionPublish.java
License:Apache License
@BeforeClass public static void setup() throws Exception { Configuration conf = new Configuration(true); conf.set("yarn.scheduler.capacity.root.queues", "default"); conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); fs = FileSystem.get(conf);//from w w w .j a v a 2 s . c om System.setProperty("hadoop.log.dir", new File(fs.getWorkingDirectory().toString(), "/logs").getAbsolutePath()); // LocalJobRunner does not work with mapreduce OutputCommitter. So need // to use MiniMRCluster. MAPREDUCE-2350 mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, new JobConf(conf)); mrConf = mrCluster.createJobConf(); if (isServerRunning) { return; } MetaStoreUtils.startMetaStore(msPort, ShimLoader.getHadoopThriftAuthBridge()); isServerRunning = true; securityManager = System.getSecurityManager(); System.setSecurityManager(new NoExitSecurityManager()); hcatConf = new HiveConf(TestHCatPartitionPublish.class); hcatConf.set("hive.metastore.local", "false"); hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort); hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); msc = new HiveMetaStoreClient(hcatConf, null); System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); }
From source file:org.apache.hcatalog.mapreduce.TestMultiOutputFormat.java
License:Apache License
@BeforeClass public static void setup() throws IOException { createWorkDir();/*from w w w .ja v a 2 s . co m*/ Configuration conf = new Configuration(true); conf.set("yarn.scheduler.capacity.root.queues", "default"); conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); fs = FileSystem.get(conf); System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); // LocalJobRunner does not work with mapreduce OutputCommitter. So need // to use MiniMRCluster. MAPREDUCE-2350 mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, new JobConf(conf)); mrConf = mrCluster.createJobConf(); }
From source file:org.apache.hive.hcatalog.mapreduce.TestHCatMultiOutputFormat.java
License:Apache License
@BeforeClass public static void setup() throws Exception { System.clearProperty("mapred.job.tracker"); String testDir = System.getProperty("test.tmp.dir", "./"); testDir = testDir + "/test_multitable_" + Math.abs(new Random().nextLong()) + "/"; workDir = new File(new File(testDir).getCanonicalPath()); FileUtil.fullyDelete(workDir);/*from ww w . j av a 2 s.c om*/ workDir.mkdirs(); warehousedir = new Path(System.getProperty("test.warehouse.dir")); HiveConf metastoreConf = new HiveConf(); metastoreConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, warehousedir.toString()); // Run hive metastore server msPort = MetaStoreUtils.startMetaStore(metastoreConf); // LocalJobRunner does not work with mapreduce OutputCommitter. So need // to use MiniMRCluster. MAPREDUCE-2350 Configuration conf = new Configuration(true); conf.set("yarn.scheduler.capacity.root.queues", "default"); conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); FileSystem fs = FileSystem.get(conf); System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, new JobConf(conf)); mrConf = mrCluster.createJobConf(); initializeSetup(); warehousedir.getFileSystem(conf).mkdirs(warehousedir); }
From source file:org.apache.hive.hcatalog.mapreduce.TestHCatPartitionPublish.java
License:Apache License
@BeforeClass public static void setup() throws Exception { File workDir = handleWorkDir(); conf.set("yarn.scheduler.capacity.root.queues", "default"); conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); conf.set("fs.pfile.impl", "org.apache.hadoop.fs.ProxyLocalFileSystem"); fs = FileSystem.get(conf);//from w ww . j a v a2 s. co m System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); // LocalJobRunner does not work with mapreduce OutputCommitter. So need // to use MiniMRCluster. MAPREDUCE-2350 mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, new JobConf(conf)); mrConf = mrCluster.createJobConf(); if (isServerRunning) { return; } msPort = MetaStoreUtils.findFreePort(); MetaStoreUtils.startMetaStore(msPort, ShimLoader.getHadoopThriftAuthBridge()); Thread.sleep(10000); isServerRunning = true; securityManager = System.getSecurityManager(); System.setSecurityManager(new NoExitSecurityManager()); hcatConf = new HiveConf(TestHCatPartitionPublish.class); hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort); hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); hcatConf.setTimeVar(HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT, 120, TimeUnit.SECONDS); hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); msc = new HiveMetaStoreClient(hcatConf); System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); }
From source file:org.apache.hive.hcatalog.mapreduce.TestMultiOutputFormat.java
License:Apache License
@BeforeClass public static void setup() throws IOException { createWorkDir();//from w w w . j a v a2 s. co m Configuration conf = new Configuration(true); conf.set("yarn.scheduler.capacity.root.queues", "default"); conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); fs = FileSystem.get(conf); System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); // LocalJobRunner does not work with mapreduce OutputCommitter. So need // to use MiniMRCluster. MAPREDUCE-2350 mrConf = new JobConf(conf); mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, mrConf); }
From source file:org.apache.kylin.storage.minicluster.HiveMiniClusterTest.java
License:Apache License
protected void startHiveMiniCluster() { //Create and configure location for hive to dump junk in target folder try {//from w ww . j a v a2s.co m FileUtils.forceMkdir(HIVE_BASE_DIR); FileUtils.forceMkdir(HIVE_SCRATCH_DIR); FileUtils.forceMkdir(HIVE_LOCAL_SCRATCH_DIR); FileUtils.forceMkdir(HIVE_LOGS_DIR); FileUtils.forceMkdir(HIVE_TMP_DIR); FileUtils.forceMkdir(HIVE_WAREHOUSE_DIR); FileUtils.forceMkdir(HIVE_HADOOP_TMP_DIR); FileUtils.forceMkdir(HIVE_TESTDATA_DIR); } catch (IOException e1) { e1.printStackTrace(); System.exit(1); } System.setProperty("javax.jdo.option.ConnectionURL", "jdbc:derby:;databaseName=" + HIVE_METADB_DIR.getAbsolutePath() + ";create=true"); System.setProperty("hive.metastore.warehouse.dir", HIVE_WAREHOUSE_DIR.getAbsolutePath()); System.setProperty("hive.exec.scratchdir", HIVE_SCRATCH_DIR.getAbsolutePath()); System.setProperty("hive.exec.local.scratchdir", HIVE_LOCAL_SCRATCH_DIR.getAbsolutePath()); System.setProperty("hive.metastore.metadb.dir", HIVE_METADB_DIR.getAbsolutePath()); System.setProperty("test.log.dir", HIVE_LOGS_DIR.getAbsolutePath()); System.setProperty("hive.querylog.location", HIVE_TMP_DIR.getAbsolutePath()); System.setProperty("hadoop.tmp.dir", HIVE_HADOOP_TMP_DIR.getAbsolutePath()); System.setProperty("derby.stream.error.file", HIVE_BASE_DIR.getAbsolutePath() + "/derby.log"); // custom properties System.setProperty("hive.server2.long.polling.timeout", "5000"); HiveConf conf = new HiveConf(); /* Build MiniDFSCluster */ try { miniDFS = new MiniDFSCluster.Builder(conf).build(); /* Build MiniMR Cluster */ int numTaskTrackers = 1; int numTaskTrackerDirectories = 1; String[] racks = null; String[] hosts = null; miniMR = new MiniMRCluster(numTaskTrackers, miniDFS.getFileSystem().getUri().toString(), numTaskTrackerDirectories, racks, hosts, new JobConf(conf)); JobConf jobConf = miniMR.createJobConf(new JobConf(conf)); System.out.println("-------" + jobConf.get("fs.defaultFS")); System.out.println("-------" + miniDFS.getFileSystem().getUri().toString()); System.setProperty("mapred.job.tracker", jobConf.get("mapred.job.tracker")); } catch (IOException e) { e.printStackTrace(); System.exit(1); } }