List of usage examples for org.apache.hadoop.mapred.jobcontrol JobControl getWaitingJobs
public ArrayList<Job> getWaitingJobs()
From source file:org.apache.pig.test.TestGroupConstParallelMR.java
License:Apache License
@Override public void checkGroupConstWithParallelResult(PhysicalPlan pp, PigContext pc) throws Exception { MROperPlan mrPlan = Util.buildMRPlan(pp, pc); ConfigurationValidator.validatePigProperties(pc.getProperties()); Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties()); JobControlCompiler jcc = new JobControlCompiler(pc, conf); JobControl jobControl = jcc.compile(mrPlan, "Test"); Job job = jobControl.getWaitingJobs().get(0); int parallel = job.getJobConf().getNumReduceTasks(); assertEquals("parallism", 1, parallel); }
From source file:org.apache.pig.test.TestGroupConstParallelMR.java
License:Apache License
@Override public void checkGroupNonConstWithParallelResult(PhysicalPlan pp, PigContext pc) throws Exception { MROperPlan mrPlan = Util.buildMRPlan(pp, pc); ConfigurationValidator.validatePigProperties(pc.getProperties()); Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties()); JobControlCompiler jcc = new JobControlCompiler(pc, conf); JobControl jobControl = jcc.compile(mrPlan, "Test"); Job job = jobControl.getWaitingJobs().get(0); int parallel = job.getJobConf().getNumReduceTasks(); assertEquals("parallism", 100, parallel); }
From source file:org.apache.pig.test.TestJobControlCompiler.java
License:Apache License
/** * specifically tests that REGISTERED jars get added to distributed cache * @throws Exception/*from w w w .j av a2s .c om*/ */ @Test public void testJarAddedToDistributedCache() throws Exception { // creating a jar with a UDF *not* in the current classloader File tmpFile = File.createTempFile("Some_", ".jar"); tmpFile.deleteOnExit(); String className = createTestJar(tmpFile); final String testUDFFileName = className + ".class"; // JobControlCompiler setup PigServer pigServer = new PigServer(ExecType.MAPREDUCE); PigContext pigContext = pigServer.getPigContext(); pigContext.connect(); pigContext.addJar(tmpFile.getAbsolutePath()); JobControlCompiler jobControlCompiler = new JobControlCompiler(pigContext, CONF); MROperPlan plan = new MROperPlan(); MapReduceOper mro = new MapReduceOper(new OperatorKey()); mro.UDFs = new HashSet<String>(); mro.UDFs.add(className + "()"); plan.add(mro); // compiling the job JobControl jobControl = jobControlCompiler.compile(plan, "test"); JobConf jobConf = jobControl.getWaitingJobs().get(0).getJobConf(); // verifying the jar gets on distributed cache Path[] fileClassPaths = DistributedCache.getFileClassPaths(jobConf); Assert.assertEquals("size for " + Arrays.toString(fileClassPaths), 8, fileClassPaths.length); Path distributedCachePath = fileClassPaths[0]; Assert.assertEquals("ends with jar name: " + distributedCachePath, distributedCachePath.getName(), tmpFile.getName()); // hadoop bug requires path to not contain hdfs://hotname in front Assert.assertTrue("starts with /: " + distributedCachePath, distributedCachePath.toString().startsWith("/")); Assert.assertTrue("jar pushed to distributed cache should contain testUDF", jarContainsFileNamed(new File(fileClassPaths[0].toUri().getPath()), testUDFFileName)); }
From source file:org.apache.pig.test.TestJobControlCompiler.java
License:Apache License
@Test public void testAddArchiveToDistributedCache() throws IOException { final File textFile = File.createTempFile("file", ".txt"); textFile.deleteOnExit();// w w w.ja va 2 s . c o m final List<File> zipArchives = createFiles(".zip"); zipArchives.add(textFile); final List<File> tarArchives = createFiles(".tgz", ".tar.gz", ".tar"); final PigServer pigServer = new PigServer(ExecType.MAPREDUCE); final PigContext pigContext = pigServer.getPigContext(); pigContext.connect(); pigContext.getProperties().put("pig.streaming.ship.files", StringUtils.join(zipArchives, ",")); pigContext.getProperties().put("pig.streaming.cache.files", StringUtils.join(tarArchives, ",")); final JobControlCompiler jobControlCompiler = new JobControlCompiler(pigContext, CONF); final MROperPlan plan = new MROperPlan(); plan.add(new MapReduceOper(new OperatorKey())); final JobControl jobControl = jobControlCompiler.compile(plan, "test"); final JobConf jobConf = jobControl.getWaitingJobs().get(0).getJobConf(); URI[] uris = DistributedCache.getCacheFiles(jobConf); int sizeTxt = 0; for (int i = 0; i < uris.length; i++) { if (uris[i].toString().endsWith(".txt")) { sizeTxt++; } } Assert.assertTrue(sizeTxt == 1); assertFilesInDistributedCache(DistributedCache.getCacheArchives(jobConf), 4, ".zip", ".tgz", ".tar.gz", ".tar"); }
From source file:org.apache.pig.test.TestJobSubmission.java
License:Apache License
@Test public void testDefaultParallel() throws Throwable { pc.defaultParallel = 100;// w ww. j a v a 2 s .c om String query = "a = load 'input';" + "b = group a by $0;" + "store b into 'output';"; PigServer ps = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); PhysicalPlan pp = Util.buildPp(ps, query); MROperPlan mrPlan = Util.buildMRPlan(pp, pc); ConfigurationValidator.validatePigProperties(pc.getProperties()); Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties()); JobControlCompiler jcc = new JobControlCompiler(pc, conf); JobControl jobControl = jcc.compile(mrPlan, "Test"); Job job = jobControl.getWaitingJobs().get(0); int parallel = job.getJobConf().getNumReduceTasks(); assertEquals(100, parallel); Util.assertParallelValues(100, -1, -1, 100, job.getJobConf()); pc.defaultParallel = -1; }
From source file:org.apache.pig.test.TestJobSubmission.java
License:Apache License
@Test public void testReducerNumEstimation() throws Exception { // use the estimation Configuration conf = HBaseConfiguration.create(new Configuration()); HBaseTestingUtility util = new HBaseTestingUtility(conf); int clientPort = util.startMiniZKCluster().getClientPort(); util.startMiniHBaseCluster(1, 1);/*from ww w .j av a 2s.c o m*/ String query = "a = load '/passwd';" + "b = group a by $0;" + "store b into 'output';"; PigServer ps = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); PhysicalPlan pp = Util.buildPp(ps, query); MROperPlan mrPlan = Util.buildMRPlan(pp, pc); pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100"); pc.getConf().setProperty("pig.exec.reducers.max", "10"); pc.getConf().setProperty(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(clientPort)); ConfigurationValidator.validatePigProperties(pc.getProperties()); conf = ConfigurationUtil.toConfiguration(pc.getProperties()); JobControlCompiler jcc = new JobControlCompiler(pc, conf); JobControl jc = jcc.compile(mrPlan, "Test"); Job job = jc.getWaitingJobs().get(0); long reducer = Math.min((long) Math.ceil(new File("test/org/apache/pig/test/data/passwd").length() / 100.0), 10); Util.assertParallelValues(-1, -1, reducer, reducer, job.getJobConf()); // use the PARALLEL key word, it will override the estimated reducer number query = "a = load '/passwd';" + "b = group a by $0 PARALLEL 2;" + "store b into 'output';"; pp = Util.buildPp(ps, query); mrPlan = Util.buildMRPlan(pp, pc); pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100"); pc.getConf().setProperty("pig.exec.reducers.max", "10"); ConfigurationValidator.validatePigProperties(pc.getProperties()); conf = ConfigurationUtil.toConfiguration(pc.getProperties()); jcc = new JobControlCompiler(pc, conf); jc = jcc.compile(mrPlan, "Test"); job = jc.getWaitingJobs().get(0); Util.assertParallelValues(-1, 2, -1, 2, job.getJobConf()); final byte[] COLUMNFAMILY = Bytes.toBytes("pig"); util.createTable(Bytes.toBytesBinary("test_table"), COLUMNFAMILY); // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as hbase query = "a = load 'hbase://test_table' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');" + "b = group a by $0 ;" + "store b into 'output';"; pp = Util.buildPp(ps, query); mrPlan = Util.buildMRPlan(pp, pc); pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100"); pc.getConf().setProperty("pig.exec.reducers.max", "10"); ConfigurationValidator.validatePigProperties(pc.getProperties()); conf = ConfigurationUtil.toConfiguration(pc.getProperties()); jcc = new JobControlCompiler(pc, conf); jc = jcc.compile(mrPlan, "Test"); job = jc.getWaitingJobs().get(0); Util.assertParallelValues(-1, -1, -1, 1, job.getJobConf()); util.deleteTable(Bytes.toBytesBinary("test_table")); // In HBase 0.90.1 and above we can use util.shutdownMiniHBaseCluster() // here instead. MiniHBaseCluster hbc = util.getHBaseCluster(); if (hbc != null) { hbc.shutdown(); hbc.join(); } util.shutdownMiniZKCluster(); }
From source file:org.apache.pig.test.TestJobSubmission.java
License:Apache License
@Test public void testReducerNumEstimationForOrderBy() throws Exception { // use the estimation pc.getProperties().setProperty("pig.exec.reducers.bytes.per.reducer", "100"); pc.getProperties().setProperty("pig.exec.reducers.max", "10"); String query = "a = load '/passwd';" + "b = order a by $0;" + "store b into 'output';"; PigServer ps = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); PhysicalPlan pp = Util.buildPp(ps, query); MROperPlan mrPlan = Util.buildMRPlanWithOptimizer(pp, pc); Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties()); JobControlCompiler jcc = new JobControlCompiler(pc, conf); JobControl jobControl = jcc.compile(mrPlan, query); assertEquals(2, mrPlan.size());// ww w. j a v a2 s. co m // first job uses a single reducer for the sampling Util.assertParallelValues(-1, 1, -1, 1, jobControl.getWaitingJobs().get(0).getJobConf()); // Simulate the first job having run so estimation kicks in. MapReduceOper sort = mrPlan.getLeaves().get(0); jcc.updateMROpPlan(jobControl.getReadyJobs()); FileLocalizer.create(sort.getQuantFile(), pc); jobControl = jcc.compile(mrPlan, query); sort = mrPlan.getLeaves().get(0); long reducer = Math.min((long) Math.ceil(new File("test/org/apache/pig/test/data/passwd").length() / 100.0), 10); assertEquals(reducer, sort.getRequestedParallelism()); // the second job estimates reducers Util.assertParallelValues(-1, -1, reducer, reducer, jobControl.getWaitingJobs().get(0).getJobConf()); // use the PARALLEL key word, it will override the estimated reducer number query = "a = load '/passwd';" + "b = order a by $0 PARALLEL 2;" + "store b into 'output';"; pp = Util.buildPp(ps, query); mrPlan = Util.buildMRPlanWithOptimizer(pp, pc); assertEquals(2, mrPlan.size()); sort = mrPlan.getLeaves().get(0); assertEquals(2, sort.getRequestedParallelism()); // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as hbase query = "a = load 'hbase://passwd' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');" + "b = order a by $0 ;" + "store b into 'output';"; pp = Util.buildPp(ps, query); mrPlan = Util.buildMRPlanWithOptimizer(pp, pc); assertEquals(2, mrPlan.size()); sort = mrPlan.getLeaves().get(0); // the requested parallel will be -1 if users don't set any of default_parallel, paralllel // and the estimation doesn't take effect. MR framework will finally set it to 1. assertEquals(-1, sort.getRequestedParallelism()); // test order by with three jobs (after optimization) query = "a = load '/passwd';" + "b = foreach a generate $0, $1, $2;" + "c = order b by $0;" + "store c into 'output';"; pp = Util.buildPp(ps, query); mrPlan = Util.buildMRPlanWithOptimizer(pp, pc); assertEquals(3, mrPlan.size()); // Simulate the first 2 jobs having run so estimation kicks in. sort = mrPlan.getLeaves().get(0); FileLocalizer.create(sort.getQuantFile(), pc); jobControl = jcc.compile(mrPlan, query); Util.copyFromLocalToCluster(cluster, "test/org/apache/pig/test/data/passwd", ((POLoad) sort.mapPlan.getRoots().get(0)).getLFile().getFileName()); //First job is just foreach with projection, mapper-only job, so estimate gets ignored Util.assertParallelValues(-1, -1, -1, 0, jobControl.getWaitingJobs().get(0).getJobConf()); jcc.updateMROpPlan(jobControl.getReadyJobs()); jobControl = jcc.compile(mrPlan, query); jcc.updateMROpPlan(jobControl.getReadyJobs()); //Second job is a sampler, which requests and gets 1 reducer Util.assertParallelValues(-1, 1, -1, 1, jobControl.getWaitingJobs().get(0).getJobConf()); jobControl = jcc.compile(mrPlan, query); sort = mrPlan.getLeaves().get(0); assertEquals(reducer, sort.getRequestedParallelism()); //Third job is the order, which uses the estimated number of reducers Util.assertParallelValues(-1, -1, reducer, reducer, jobControl.getWaitingJobs().get(0).getJobConf()); }
From source file:org.apache.pig.test.TestJobSubmissionMR.java
License:Apache License
@Override public void checkDefaultParallelResult(PhysicalPlan pp, PigContext pc) throws Exception { MROperPlan mrPlan = Util.buildMRPlan(pp, pc); ConfigurationValidator.validatePigProperties(pc.getProperties()); Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties()); JobControlCompiler jcc = new JobControlCompiler(pc, conf); JobControl jobControl = jcc.compile(mrPlan, "Test"); Job job = jobControl.getWaitingJobs().get(0); int parallel = job.getJobConf().getNumReduceTasks(); assertEquals(100, parallel);//from w w w. ja va 2 s. co m Util.assertParallelValues(100, -1, -1, 100, job.getJobConf()); }