Example usage for org.apache.hadoop.mapred.jobcontrol JobControl getWaitingJobs

List of usage examples for org.apache.hadoop.mapred.jobcontrol JobControl getWaitingJobs

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred.jobcontrol JobControl getWaitingJobs.

Prototype

public ArrayList<Job> getWaitingJobs() 

Source Link

Usage

From source file:org.apache.pig.test.TestGroupConstParallelMR.java

License:Apache License

@Override
public void checkGroupConstWithParallelResult(PhysicalPlan pp, PigContext pc) throws Exception {
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

    ConfigurationValidator.validatePigProperties(pc.getProperties());
    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);

    JobControl jobControl = jcc.compile(mrPlan, "Test");
    Job job = jobControl.getWaitingJobs().get(0);
    int parallel = job.getJobConf().getNumReduceTasks();

    assertEquals("parallism", 1, parallel);
}

From source file:org.apache.pig.test.TestGroupConstParallelMR.java

License:Apache License

@Override
public void checkGroupNonConstWithParallelResult(PhysicalPlan pp, PigContext pc) throws Exception {
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

    ConfigurationValidator.validatePigProperties(pc.getProperties());
    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);

    JobControl jobControl = jcc.compile(mrPlan, "Test");
    Job job = jobControl.getWaitingJobs().get(0);
    int parallel = job.getJobConf().getNumReduceTasks();

    assertEquals("parallism", 100, parallel);
}

From source file:org.apache.pig.test.TestJobControlCompiler.java

License:Apache License

/**
 * specifically tests that REGISTERED jars get added to distributed cache
 * @throws Exception/*from   w w w .j  av a2s  .c  om*/
 */
@Test
public void testJarAddedToDistributedCache() throws Exception {

    // creating a jar with a UDF *not* in the current classloader
    File tmpFile = File.createTempFile("Some_", ".jar");
    tmpFile.deleteOnExit();
    String className = createTestJar(tmpFile);
    final String testUDFFileName = className + ".class";

    // JobControlCompiler setup
    PigServer pigServer = new PigServer(ExecType.MAPREDUCE);
    PigContext pigContext = pigServer.getPigContext();
    pigContext.connect();
    pigContext.addJar(tmpFile.getAbsolutePath());
    JobControlCompiler jobControlCompiler = new JobControlCompiler(pigContext, CONF);
    MROperPlan plan = new MROperPlan();
    MapReduceOper mro = new MapReduceOper(new OperatorKey());
    mro.UDFs = new HashSet<String>();
    mro.UDFs.add(className + "()");
    plan.add(mro);

    // compiling the job
    JobControl jobControl = jobControlCompiler.compile(plan, "test");
    JobConf jobConf = jobControl.getWaitingJobs().get(0).getJobConf();

    // verifying the jar gets on distributed cache
    Path[] fileClassPaths = DistributedCache.getFileClassPaths(jobConf);
    Assert.assertEquals("size for " + Arrays.toString(fileClassPaths), 8, fileClassPaths.length);
    Path distributedCachePath = fileClassPaths[0];
    Assert.assertEquals("ends with jar name: " + distributedCachePath, distributedCachePath.getName(),
            tmpFile.getName());
    // hadoop bug requires path to not contain hdfs://hotname in front
    Assert.assertTrue("starts with /: " + distributedCachePath,
            distributedCachePath.toString().startsWith("/"));
    Assert.assertTrue("jar pushed to distributed cache should contain testUDF",
            jarContainsFileNamed(new File(fileClassPaths[0].toUri().getPath()), testUDFFileName));
}

From source file:org.apache.pig.test.TestJobControlCompiler.java

License:Apache License

@Test
public void testAddArchiveToDistributedCache() throws IOException {
    final File textFile = File.createTempFile("file", ".txt");
    textFile.deleteOnExit();// w  w  w.ja va 2  s  .  c  o m

    final List<File> zipArchives = createFiles(".zip");
    zipArchives.add(textFile);
    final List<File> tarArchives = createFiles(".tgz", ".tar.gz", ".tar");

    final PigServer pigServer = new PigServer(ExecType.MAPREDUCE);
    final PigContext pigContext = pigServer.getPigContext();
    pigContext.connect();
    pigContext.getProperties().put("pig.streaming.ship.files", StringUtils.join(zipArchives, ","));
    pigContext.getProperties().put("pig.streaming.cache.files", StringUtils.join(tarArchives, ","));
    final JobControlCompiler jobControlCompiler = new JobControlCompiler(pigContext, CONF);

    final MROperPlan plan = new MROperPlan();
    plan.add(new MapReduceOper(new OperatorKey()));

    final JobControl jobControl = jobControlCompiler.compile(plan, "test");
    final JobConf jobConf = jobControl.getWaitingJobs().get(0).getJobConf();

    URI[] uris = DistributedCache.getCacheFiles(jobConf);
    int sizeTxt = 0;
    for (int i = 0; i < uris.length; i++) {
        if (uris[i].toString().endsWith(".txt")) {
            sizeTxt++;
        }
    }
    Assert.assertTrue(sizeTxt == 1);
    assertFilesInDistributedCache(DistributedCache.getCacheArchives(jobConf), 4, ".zip", ".tgz", ".tar.gz",
            ".tar");
}

From source file:org.apache.pig.test.TestJobSubmission.java

License:Apache License

@Test
public void testDefaultParallel() throws Throwable {
    pc.defaultParallel = 100;//  w  ww.  j  a v a 2  s .c om

    String query = "a = load 'input';" + "b = group a by $0;" + "store b into 'output';";
    PigServer ps = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
    PhysicalPlan pp = Util.buildPp(ps, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

    ConfigurationValidator.validatePigProperties(pc.getProperties());
    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);

    JobControl jobControl = jcc.compile(mrPlan, "Test");
    Job job = jobControl.getWaitingJobs().get(0);
    int parallel = job.getJobConf().getNumReduceTasks();

    assertEquals(100, parallel);
    Util.assertParallelValues(100, -1, -1, 100, job.getJobConf());

    pc.defaultParallel = -1;
}

From source file:org.apache.pig.test.TestJobSubmission.java

License:Apache License

@Test
public void testReducerNumEstimation() throws Exception {
    // use the estimation
    Configuration conf = HBaseConfiguration.create(new Configuration());
    HBaseTestingUtility util = new HBaseTestingUtility(conf);
    int clientPort = util.startMiniZKCluster().getClientPort();
    util.startMiniHBaseCluster(1, 1);/*from   ww w .j  av  a 2s.c o m*/

    String query = "a = load '/passwd';" + "b = group a by $0;" + "store b into 'output';";
    PigServer ps = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
    PhysicalPlan pp = Util.buildPp(ps, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");
    pc.getConf().setProperty(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(clientPort));
    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);
    JobControl jc = jcc.compile(mrPlan, "Test");
    Job job = jc.getWaitingJobs().get(0);
    long reducer = Math.min((long) Math.ceil(new File("test/org/apache/pig/test/data/passwd").length() / 100.0),
            10);

    Util.assertParallelValues(-1, -1, reducer, reducer, job.getJobConf());

    // use the PARALLEL key word, it will override the estimated reducer number
    query = "a = load '/passwd';" + "b = group a by $0 PARALLEL 2;" + "store b into 'output';";
    pp = Util.buildPp(ps, query);
    mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");
    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    jcc = new JobControlCompiler(pc, conf);
    jc = jcc.compile(mrPlan, "Test");
    job = jc.getWaitingJobs().get(0);

    Util.assertParallelValues(-1, 2, -1, 2, job.getJobConf());

    final byte[] COLUMNFAMILY = Bytes.toBytes("pig");
    util.createTable(Bytes.toBytesBinary("test_table"), COLUMNFAMILY);

    // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as hbase
    query = "a = load 'hbase://test_table' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');"
            + "b = group a by $0 ;" + "store b into 'output';";
    pp = Util.buildPp(ps, query);
    mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");

    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    jcc = new JobControlCompiler(pc, conf);
    jc = jcc.compile(mrPlan, "Test");
    job = jc.getWaitingJobs().get(0);

    Util.assertParallelValues(-1, -1, -1, 1, job.getJobConf());

    util.deleteTable(Bytes.toBytesBinary("test_table"));
    // In HBase 0.90.1 and above we can use util.shutdownMiniHBaseCluster()
    // here instead.
    MiniHBaseCluster hbc = util.getHBaseCluster();
    if (hbc != null) {
        hbc.shutdown();
        hbc.join();
    }
    util.shutdownMiniZKCluster();
}

From source file:org.apache.pig.test.TestJobSubmission.java

License:Apache License

@Test
public void testReducerNumEstimationForOrderBy() throws Exception {
    // use the estimation
    pc.getProperties().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getProperties().setProperty("pig.exec.reducers.max", "10");

    String query = "a = load '/passwd';" + "b = order a by $0;" + "store b into 'output';";
    PigServer ps = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
    PhysicalPlan pp = Util.buildPp(ps, query);

    MROperPlan mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);
    JobControl jobControl = jcc.compile(mrPlan, query);

    assertEquals(2, mrPlan.size());//  ww  w. j  a  v a2 s. co  m

    // first job uses a single reducer for the sampling
    Util.assertParallelValues(-1, 1, -1, 1, jobControl.getWaitingJobs().get(0).getJobConf());

    // Simulate the first job having run so estimation kicks in.
    MapReduceOper sort = mrPlan.getLeaves().get(0);
    jcc.updateMROpPlan(jobControl.getReadyJobs());
    FileLocalizer.create(sort.getQuantFile(), pc);
    jobControl = jcc.compile(mrPlan, query);

    sort = mrPlan.getLeaves().get(0);
    long reducer = Math.min((long) Math.ceil(new File("test/org/apache/pig/test/data/passwd").length() / 100.0),
            10);
    assertEquals(reducer, sort.getRequestedParallelism());

    // the second job estimates reducers
    Util.assertParallelValues(-1, -1, reducer, reducer, jobControl.getWaitingJobs().get(0).getJobConf());

    // use the PARALLEL key word, it will override the estimated reducer number
    query = "a = load '/passwd';" + "b = order a by $0 PARALLEL 2;" + "store b into 'output';";
    pp = Util.buildPp(ps, query);

    mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);

    assertEquals(2, mrPlan.size());

    sort = mrPlan.getLeaves().get(0);
    assertEquals(2, sort.getRequestedParallelism());

    // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as hbase
    query = "a = load 'hbase://passwd' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');"
            + "b = order a by $0 ;" + "store b into 'output';";
    pp = Util.buildPp(ps, query);

    mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
    assertEquals(2, mrPlan.size());

    sort = mrPlan.getLeaves().get(0);

    // the requested parallel will be -1 if users don't set any of default_parallel, paralllel
    // and the estimation doesn't take effect. MR framework will finally set it to 1.
    assertEquals(-1, sort.getRequestedParallelism());

    // test order by with three jobs (after optimization)
    query = "a = load '/passwd';" + "b = foreach a generate $0, $1, $2;" + "c = order b by $0;"
            + "store c into 'output';";
    pp = Util.buildPp(ps, query);

    mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
    assertEquals(3, mrPlan.size());

    // Simulate the first 2 jobs having run so estimation kicks in.
    sort = mrPlan.getLeaves().get(0);
    FileLocalizer.create(sort.getQuantFile(), pc);

    jobControl = jcc.compile(mrPlan, query);
    Util.copyFromLocalToCluster(cluster, "test/org/apache/pig/test/data/passwd",
            ((POLoad) sort.mapPlan.getRoots().get(0)).getLFile().getFileName());

    //First job is just foreach with projection, mapper-only job, so estimate gets ignored
    Util.assertParallelValues(-1, -1, -1, 0, jobControl.getWaitingJobs().get(0).getJobConf());

    jcc.updateMROpPlan(jobControl.getReadyJobs());
    jobControl = jcc.compile(mrPlan, query);
    jcc.updateMROpPlan(jobControl.getReadyJobs());

    //Second job is a sampler, which requests and gets 1 reducer
    Util.assertParallelValues(-1, 1, -1, 1, jobControl.getWaitingJobs().get(0).getJobConf());

    jobControl = jcc.compile(mrPlan, query);
    sort = mrPlan.getLeaves().get(0);
    assertEquals(reducer, sort.getRequestedParallelism());

    //Third job is the order, which uses the estimated number of reducers
    Util.assertParallelValues(-1, -1, reducer, reducer, jobControl.getWaitingJobs().get(0).getJobConf());
}

From source file:org.apache.pig.test.TestJobSubmissionMR.java

License:Apache License

@Override
public void checkDefaultParallelResult(PhysicalPlan pp, PigContext pc) throws Exception {
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

    ConfigurationValidator.validatePigProperties(pc.getProperties());
    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);

    JobControl jobControl = jcc.compile(mrPlan, "Test");
    Job job = jobControl.getWaitingJobs().get(0);
    int parallel = job.getJobConf().getNumReduceTasks();

    assertEquals(100, parallel);//from   w w  w. ja va  2  s.  co  m
    Util.assertParallelValues(100, -1, -1, 100, job.getJobConf());
}