Example usage for org.apache.hadoop.mapred MiniMRCluster MiniMRCluster

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred MiniMRCluster MiniMRCluster.

Prototype

public MiniMRCluster(int jobTrackerPort, int taskTrackerPort, int numTaskTrackers, String namenode, int numDir,
            String[] racks) throws IOException

Source Link

Usage

From source file:cascading.platform.hadoop.HadoopPlatform.java

License:Open Source License

@Override
public synchronized void setUp() throws IOException {
    if (configuration != null)
        return;//from  w w  w .ja v  a 2  s. c om

    if (!isUseCluster()) {
        LOG.info("not using cluster");
        configuration = new JobConf();

        // enforce the local file system in local mode
        configuration.set("fs.default.name", "file:///");
        configuration.set("mapred.job.tracker", "local");
        configuration.set("mapreduce.jobtracker.staging.root.dir",
                System.getProperty("user.dir") + "/build/tmp/cascading/staging");

        String stagingDir = configuration.get("mapreduce.jobtracker.staging.root.dir");

        if (Util.isEmpty(stagingDir))
            configuration.set("mapreduce.jobtracker.staging.root.dir",
                    System.getProperty("user.dir") + "/build/tmp/cascading/staging");

        fileSys = FileSystem.get(configuration);
    } else {
        LOG.info("using cluster");

        if (Util.isEmpty(System.getProperty("hadoop.log.dir")))
            System.setProperty("hadoop.log.dir", "cascading-hadoop/build/test/log");

        if (Util.isEmpty(System.getProperty("hadoop.tmp.dir")))
            System.setProperty("hadoop.tmp.dir", "cascading-hadoop/build/test/tmp");

        new File(System.getProperty("hadoop.log.dir")).mkdirs(); // ignored

        JobConf conf = new JobConf();

        if (!Util.isEmpty(System.getProperty("mapred.jar"))) {
            LOG.info("using a remote cluster with jar: {}", System.getProperty("mapred.jar"));
            configuration = conf;

            ((JobConf) configuration).setJar(System.getProperty("mapred.jar"));

            if (!Util.isEmpty(System.getProperty("fs.default.name"))) {
                LOG.info("using {}={}", "fs.default.name", System.getProperty("fs.default.name"));
                configuration.set("fs.default.name", System.getProperty("fs.default.name"));
            }

            if (!Util.isEmpty(System.getProperty("mapred.job.tracker"))) {
                LOG.info("using {}={}", "mapred.job.tracker", System.getProperty("mapred.job.tracker"));
                configuration.set("mapred.job.tracker", System.getProperty("mapred.job.tracker"));
            }

            configuration.set("mapreduce.user.classpath.first", "true"); // use test dependencies
            fileSys = FileSystem.get(configuration);
        } else {
            dfs = new MiniDFSCluster(conf, 4, true, null);
            fileSys = dfs.getFileSystem();
            mr = new MiniMRCluster(4, fileSys.getUri().toString(), 1, null, null, conf);

            configuration = mr.createJobConf();
        }

        //      jobConf.set( "mapred.map.max.attempts", "1" );
        //      jobConf.set( "mapred.reduce.max.attempts", "1" );
        configuration.set("mapred.child.java.opts", "-Xmx512m");
        configuration.setInt("mapred.job.reuse.jvm.num.tasks", -1);
        configuration.setInt("jobclient.completion.poll.interval", 50);
        configuration.setInt("jobclient.progress.monitor.poll.interval", 50);
        ((JobConf) configuration).setMapSpeculativeExecution(false);
        ((JobConf) configuration).setReduceSpeculativeExecution(false);
    }

    ((JobConf) configuration).setNumMapTasks(numMappers);
    ((JobConf) configuration).setNumReduceTasks(numReducers);

    Map<Object, Object> globalProperties = getGlobalProperties();

    if (logger != null)
        globalProperties.put("log4j.logger", logger);

    FlowProps.setJobPollingInterval(globalProperties, 10); // should speed up tests

    HadoopPlanner.copyProperties((JobConf) configuration, globalProperties); // copy any external properties

    HadoopPlanner.copyJobConf(properties, (JobConf) configuration); // put all properties on the jobconf
}

From source file:com.hadoop.mapreduce.TestLzoLazyLoading.java

License:Open Source License

public void testWithLocal() throws Exception {
    MiniMRCluster mr = null;/*  w w w .ja v  a2 s  . c  om*/
    try {
        JobConf jconf = new JobConf();
        jconf.set("mapred.queue.names", "default");
        mr = new MiniMRCluster(2, "file:///", 3, null, null, jconf);
        Configuration cf = mr.createJobConf();
        cf.set("io.compression.codecs", LzoCodec.class.getName());
        runWordCount(cf, false, false);
        runWordCount(cf, false, true);
        runWordCount(cf, true, false);
    } finally {
        if (mr != null) {
            mr.shutdown();
        }
    }
}

From source file:org.apache.ambari.servicemonitor.utils.DFSUtils.java

License:Apache License

public static MiniMRCluster createMRCluster(JobConf conf, String fsURI) throws IOException {
    String logdir = System.getProperty("java.io.tmpdir") + "/mrcluster/logs";
    System.setProperty("hadoop.log.dir", logdir);
    conf.set("hadoop.job.history.location", "file:///" + logdir + "/history");
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fsURI);
    return new MiniMRCluster(3, fsURI, 1, null, null, conf);
}

From source file:org.apache.hcatalog.mapreduce.TestHCatMultiOutputFormat.java

License:Apache License

@BeforeClass
public static void setup() throws Exception {
    String testDir = System.getProperty("test.data.dir", "./");
    testDir = testDir + "/test_multitable_" + Math.abs(new Random().nextLong()) + "/";
    workDir = new File(new File(testDir).getCanonicalPath());
    FileUtil.fullyDelete(workDir);/*from   w  ww. ja  va 2 s. co  m*/
    workDir.mkdirs();

    warehousedir = new Path(workDir + "/warehouse");

    // Run hive metastore server
    t = new Thread(new RunMS());
    t.start();

    // LocalJobRunner does not work with mapreduce OutputCommitter. So need
    // to use MiniMRCluster. MAPREDUCE-2350
    Configuration conf = new Configuration(true);
    conf.set("yarn.scheduler.capacity.root.queues", "default");
    conf.set("yarn.scheduler.capacity.root.default.capacity", "100");

    FileSystem fs = FileSystem.get(conf);
    System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath());
    mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, new JobConf(conf));
    mrConf = mrCluster.createJobConf();
    fs.mkdirs(warehousedir);

    initializeSetup();
}

From source file:org.apache.hcatalog.mapreduce.TestHCatPartitionPublish.java

License:Apache License

@BeforeClass
public static void setup() throws Exception {
    Configuration conf = new Configuration(true);
    conf.set("yarn.scheduler.capacity.root.queues", "default");
    conf.set("yarn.scheduler.capacity.root.default.capacity", "100");

    fs = FileSystem.get(conf);//from   w w w  .j a  v a 2 s . c om
    System.setProperty("hadoop.log.dir",
            new File(fs.getWorkingDirectory().toString(), "/logs").getAbsolutePath());
    // LocalJobRunner does not work with mapreduce OutputCommitter. So need
    // to use MiniMRCluster. MAPREDUCE-2350
    mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, new JobConf(conf));
    mrConf = mrCluster.createJobConf();

    if (isServerRunning) {
        return;
    }

    MetaStoreUtils.startMetaStore(msPort, ShimLoader.getHadoopThriftAuthBridge());
    isServerRunning = true;
    securityManager = System.getSecurityManager();
    System.setSecurityManager(new NoExitSecurityManager());

    hcatConf = new HiveConf(TestHCatPartitionPublish.class);
    hcatConf.set("hive.metastore.local", "false");
    hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort);
    hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3);
    hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3);
    hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName());
    hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
    hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
    hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
    msc = new HiveMetaStoreClient(hcatConf, null);
    System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " ");
    System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " ");
}

From source file:org.apache.hcatalog.mapreduce.TestMultiOutputFormat.java

License:Apache License

@BeforeClass
public static void setup() throws IOException {
    createWorkDir();/*from   w w w  .ja v  a 2 s .  co m*/
    Configuration conf = new Configuration(true);
    conf.set("yarn.scheduler.capacity.root.queues", "default");
    conf.set("yarn.scheduler.capacity.root.default.capacity", "100");

    fs = FileSystem.get(conf);
    System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath());
    // LocalJobRunner does not work with mapreduce OutputCommitter. So need
    // to use MiniMRCluster. MAPREDUCE-2350
    mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, new JobConf(conf));
    mrConf = mrCluster.createJobConf();
}

From source file:org.apache.hive.hcatalog.mapreduce.TestHCatMultiOutputFormat.java

License:Apache License

@BeforeClass
public static void setup() throws Exception {
    System.clearProperty("mapred.job.tracker");
    String testDir = System.getProperty("test.tmp.dir", "./");
    testDir = testDir + "/test_multitable_" + Math.abs(new Random().nextLong()) + "/";
    workDir = new File(new File(testDir).getCanonicalPath());
    FileUtil.fullyDelete(workDir);/*from   ww  w  .  j av a 2 s.c  om*/
    workDir.mkdirs();

    warehousedir = new Path(System.getProperty("test.warehouse.dir"));

    HiveConf metastoreConf = new HiveConf();
    metastoreConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, warehousedir.toString());

    // Run hive metastore server
    msPort = MetaStoreUtils.startMetaStore(metastoreConf);
    // LocalJobRunner does not work with mapreduce OutputCommitter. So need
    // to use MiniMRCluster. MAPREDUCE-2350
    Configuration conf = new Configuration(true);
    conf.set("yarn.scheduler.capacity.root.queues", "default");
    conf.set("yarn.scheduler.capacity.root.default.capacity", "100");

    FileSystem fs = FileSystem.get(conf);
    System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath());
    mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, new JobConf(conf));
    mrConf = mrCluster.createJobConf();

    initializeSetup();

    warehousedir.getFileSystem(conf).mkdirs(warehousedir);
}

From source file:org.apache.hive.hcatalog.mapreduce.TestHCatPartitionPublish.java

License:Apache License

@BeforeClass
public static void setup() throws Exception {
    File workDir = handleWorkDir();
    conf.set("yarn.scheduler.capacity.root.queues", "default");
    conf.set("yarn.scheduler.capacity.root.default.capacity", "100");
    conf.set("fs.pfile.impl", "org.apache.hadoop.fs.ProxyLocalFileSystem");

    fs = FileSystem.get(conf);//from   w ww . j  a  v  a2 s.  co m
    System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath());
    // LocalJobRunner does not work with mapreduce OutputCommitter. So need
    // to use MiniMRCluster. MAPREDUCE-2350
    mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, new JobConf(conf));
    mrConf = mrCluster.createJobConf();

    if (isServerRunning) {
        return;
    }

    msPort = MetaStoreUtils.findFreePort();

    MetaStoreUtils.startMetaStore(msPort, ShimLoader.getHadoopThriftAuthBridge());
    Thread.sleep(10000);
    isServerRunning = true;
    securityManager = System.getSecurityManager();
    System.setSecurityManager(new NoExitSecurityManager());

    hcatConf = new HiveConf(TestHCatPartitionPublish.class);
    hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort);
    hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3);
    hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3);
    hcatConf.setTimeVar(HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT, 120, TimeUnit.SECONDS);
    hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName());
    hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
    hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
    hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
    msc = new HiveMetaStoreClient(hcatConf);
    System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " ");
    System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " ");
}

From source file:org.apache.hive.hcatalog.mapreduce.TestMultiOutputFormat.java

License:Apache License

@BeforeClass
public static void setup() throws IOException {
    createWorkDir();//from w w w  .  j  a v a2 s.  co  m
    Configuration conf = new Configuration(true);
    conf.set("yarn.scheduler.capacity.root.queues", "default");
    conf.set("yarn.scheduler.capacity.root.default.capacity", "100");

    fs = FileSystem.get(conf);
    System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath());
    // LocalJobRunner does not work with mapreduce OutputCommitter. So need
    // to use MiniMRCluster. MAPREDUCE-2350
    mrConf = new JobConf(conf);
    mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, mrConf);
}

From source file:org.apache.kylin.storage.minicluster.HiveMiniClusterTest.java

License:Apache License

protected void startHiveMiniCluster() {
    //Create and configure location for hive to dump junk in target folder
    try {//from   w  ww . j  a  v a2s.co  m
        FileUtils.forceMkdir(HIVE_BASE_DIR);
        FileUtils.forceMkdir(HIVE_SCRATCH_DIR);
        FileUtils.forceMkdir(HIVE_LOCAL_SCRATCH_DIR);
        FileUtils.forceMkdir(HIVE_LOGS_DIR);
        FileUtils.forceMkdir(HIVE_TMP_DIR);
        FileUtils.forceMkdir(HIVE_WAREHOUSE_DIR);
        FileUtils.forceMkdir(HIVE_HADOOP_TMP_DIR);
        FileUtils.forceMkdir(HIVE_TESTDATA_DIR);
    } catch (IOException e1) {
        e1.printStackTrace();
        System.exit(1);
    }

    System.setProperty("javax.jdo.option.ConnectionURL",
            "jdbc:derby:;databaseName=" + HIVE_METADB_DIR.getAbsolutePath() + ";create=true");
    System.setProperty("hive.metastore.warehouse.dir", HIVE_WAREHOUSE_DIR.getAbsolutePath());
    System.setProperty("hive.exec.scratchdir", HIVE_SCRATCH_DIR.getAbsolutePath());
    System.setProperty("hive.exec.local.scratchdir", HIVE_LOCAL_SCRATCH_DIR.getAbsolutePath());
    System.setProperty("hive.metastore.metadb.dir", HIVE_METADB_DIR.getAbsolutePath());
    System.setProperty("test.log.dir", HIVE_LOGS_DIR.getAbsolutePath());
    System.setProperty("hive.querylog.location", HIVE_TMP_DIR.getAbsolutePath());
    System.setProperty("hadoop.tmp.dir", HIVE_HADOOP_TMP_DIR.getAbsolutePath());
    System.setProperty("derby.stream.error.file", HIVE_BASE_DIR.getAbsolutePath() + "/derby.log");

    // custom properties
    System.setProperty("hive.server2.long.polling.timeout", "5000");

    HiveConf conf = new HiveConf();

    /* Build MiniDFSCluster */
    try {
        miniDFS = new MiniDFSCluster.Builder(conf).build();

        /* Build MiniMR Cluster */
        int numTaskTrackers = 1;
        int numTaskTrackerDirectories = 1;
        String[] racks = null;
        String[] hosts = null;
        miniMR = new MiniMRCluster(numTaskTrackers, miniDFS.getFileSystem().getUri().toString(),
                numTaskTrackerDirectories, racks, hosts, new JobConf(conf));
        JobConf jobConf = miniMR.createJobConf(new JobConf(conf));
        System.out.println("-------" + jobConf.get("fs.defaultFS"));
        System.out.println("-------" + miniDFS.getFileSystem().getUri().toString());
        System.setProperty("mapred.job.tracker", jobConf.get("mapred.job.tracker"));
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
    }
}