org.apache.tez.mapreduce.TestMRRJobs.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tez.mapreduce.TestMRRJobs.java

Source

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.tez.mapreduce;

import java.io.File;
import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.RandomTextWriterJob;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.io.compress.DefaultCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobStatus;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.tez.mapreduce.examples.MRRSleepJob;
import org.apache.tez.mapreduce.hadoop.MRJobConfig;
import org.apache.tez.test.MiniTezCluster;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;

public class TestMRRJobs {

    private static final Log LOG = LogFactory.getLog(TestMRRJobs.class);

    protected static MiniTezCluster mrrTezCluster;
    protected static MiniDFSCluster dfsCluster;

    private static Configuration conf = new Configuration();
    private static FileSystem remoteFs;

    private static String TEST_ROOT_DIR = "target" + Path.SEPARATOR + TestMRRJobs.class.getName() + "-tmpDir";

    private static final String OUTPUT_ROOT_DIR = "/tmp" + Path.SEPARATOR + TestMRRJobs.class.getSimpleName();

    @BeforeClass
    public static void setup() throws IOException {
        try {
            conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
            conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR);
            dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).format(true).racks(null).build();
            remoteFs = dfsCluster.getFileSystem();
        } catch (IOException io) {
            throw new RuntimeException("problem starting mini dfs cluster", io);
        }

        if (!(new File(MiniTezCluster.APPJAR)).exists()) {
            LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test.");
            return;
        }

        if (mrrTezCluster == null) {
            mrrTezCluster = new MiniTezCluster(TestMRRJobs.class.getName(), 1, 1, 1);
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", remoteFs.getUri().toString()); // use HDFS
            conf.set(MRJobConfig.MR_AM_STAGING_DIR, "/apps_staging_dir");
            conf.setLong(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, 0l);
            mrrTezCluster.init(conf);
            mrrTezCluster.start();
        }

    }

    @AfterClass
    public static void tearDown() {
        if (mrrTezCluster != null) {
            mrrTezCluster.stop();
            mrrTezCluster = null;
        }
        if (dfsCluster != null) {
            dfsCluster.shutdown();
            dfsCluster = null;
        }
    }

    @Test(timeout = 60000)
    public void testMRRSleepJob() throws IOException, InterruptedException, ClassNotFoundException {
        LOG.info("\n\n\nStarting testMRRSleepJob().");

        if (!(new File(MiniTezCluster.APPJAR)).exists()) {
            LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test.");
            return;
        }

        Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());

        MRRSleepJob sleepJob = new MRRSleepJob();
        sleepJob.setConf(sleepConf);

        Job job = sleepJob.createJob(1, 1, 1, 1, 1, 1, 1, 1, 1, 1);

        job.setJarByClass(MRRSleepJob.class);
        job.setMaxMapAttempts(1); // speed up failures
        job.submit();
        String trackingUrl = job.getTrackingURL();
        String jobId = job.getJobID().toString();
        boolean succeeded = job.waitForCompletion(true);
        Assert.assertTrue(succeeded);
        Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
        Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId,
                trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));

        // FIXME once counters and task progress can be obtained properly
        // TODO use dag client to test counters and task progress?
        // what about completed jobs?
    }

    @Test(timeout = 60000)
    public void testRandomWriter() throws IOException, InterruptedException, ClassNotFoundException {

        LOG.info("\n\n\nStarting testRandomWriter().");
        if (!(new File(MiniTezCluster.APPJAR)).exists()) {
            LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test.");
            return;
        }

        RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
        mrrTezCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
        mrrTezCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
        Job job = randomWriterJob.createJob(mrrTezCluster.getConfig());
        Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
        FileOutputFormat.setOutputPath(job, outputDir);
        job.setSpeculativeExecution(false);
        job.setJarByClass(RandomTextWriterJob.class);
        job.setMaxMapAttempts(1); // speed up failures
        job.submit();
        String trackingUrl = job.getTrackingURL();
        String jobId = job.getJobID().toString();
        boolean succeeded = job.waitForCompletion(true);
        Assert.assertTrue(succeeded);
        Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
        Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId,
                trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));

        // Make sure there are three files in the output-dir

        RemoteIterator<FileStatus> iterator = FileContext.getFileContext(mrrTezCluster.getConfig())
                .listStatus(outputDir);
        int count = 0;
        while (iterator.hasNext()) {
            FileStatus file = iterator.next();
            if (!file.getPath().getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
                count++;
            }
        }
        Assert.assertEquals("Number of part files is wrong!", 3, count);

    }

    @Test(timeout = 60000)
    public void testFailingJob() throws IOException, InterruptedException, ClassNotFoundException {

        LOG.info("\n\n\nStarting testFailingJob().");

        if (!(new File(MiniTezCluster.APPJAR)).exists()) {
            LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test.");
            return;
        }

        Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());

        MRRSleepJob sleepJob = new MRRSleepJob();
        sleepJob.setConf(sleepConf);

        Job job = sleepJob.createJob(1, 1, 1, 1, 1, 1, 1, 1, 1, 1);

        job.setJarByClass(MRRSleepJob.class);
        job.setMaxMapAttempts(1); // speed up failures
        job.getConfiguration().setBoolean(MRRSleepJob.MAP_FATAL_ERROR, true);
        job.getConfiguration().set(MRRSleepJob.MAP_ERROR_TASK_IDS, "*");

        job.submit();
        boolean succeeded = job.waitForCompletion(true);
        Assert.assertFalse(succeeded);
        Assert.assertEquals(JobStatus.State.FAILED, job.getJobState());

        // FIXME once counters and task progress can be obtained properly
        // TODO verify failed task diagnostics
    }

    @Test(timeout = 60000)
    public void testFailingAttempt() throws IOException, InterruptedException, ClassNotFoundException {

        LOG.info("\n\n\nStarting testFailingAttempt().");

        if (!(new File(MiniTezCluster.APPJAR)).exists()) {
            LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test.");
            return;
        }

        Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());

        MRRSleepJob sleepJob = new MRRSleepJob();
        sleepJob.setConf(sleepConf);

        Job job = sleepJob.createJob(1, 1, 1, 1, 1, 1, 1, 1, 1, 1);

        job.setJarByClass(MRRSleepJob.class);
        job.setMaxMapAttempts(3); // speed up failures
        job.getConfiguration().setBoolean(MRRSleepJob.MAP_THROW_ERROR, true);
        job.getConfiguration().set(MRRSleepJob.MAP_ERROR_TASK_IDS, "0");

        job.submit();
        boolean succeeded = job.waitForCompletion(true);
        Assert.assertTrue(succeeded);
        Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());

        // FIXME once counters and task progress can be obtained properly
        // TODO verify failed task diagnostics
    }

    @Test(timeout = 60000)
    public void testMRRSleepJobWithCompression() throws IOException, InterruptedException, ClassNotFoundException {
        LOG.info("\n\n\nStarting testMRRSleepJobWithCompression().");

        if (!(new File(MiniTezCluster.APPJAR)).exists()) {
            LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test.");
            return;
        }

        Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());

        MRRSleepJob sleepJob = new MRRSleepJob();
        sleepJob.setConf(sleepConf);

        Job job = sleepJob.createJob(1, 1, 2, 1, 1, 1, 1, 1, 1, 1);

        job.setJarByClass(MRRSleepJob.class);
        job.setMaxMapAttempts(1); // speed up failures

        // enable compression
        job.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
        job.getConfiguration().set(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, DefaultCodec.class.getName());

        job.submit();
        String trackingUrl = job.getTrackingURL();
        String jobId = job.getJobID().toString();
        boolean succeeded = job.waitForCompletion(true);
        Assert.assertTrue(succeeded);
        Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
        Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId,
                trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));

        // FIXME once counters and task progress can be obtained properly
        // TODO use dag client to test counters and task progress?
        // what about completed jobs?

    }

    /*
    //@Test (timeout = 60000)
    public void testMRRSleepJobWithSecurityOn() throws IOException,
        InterruptedException, ClassNotFoundException {
        
      LOG.info("\n\n\nStarting testMRRSleepJobWithSecurityOn().");
        
      if (!(new File(MiniMRRTezCluster.APPJAR)).exists()) {
        return;
      }
        
      mrrTezCluster.getConfig().set(
    CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION,
    "kerberos");
      mrrTezCluster.getConfig().set(YarnConfiguration.RM_KEYTAB, "/etc/krb5.keytab");
      mrrTezCluster.getConfig().set(YarnConfiguration.NM_KEYTAB, "/etc/krb5.keytab");
      mrrTezCluster.getConfig().set(YarnConfiguration.RM_PRINCIPAL,
    "rm/sightbusy-lx@LOCALHOST");
      mrrTezCluster.getConfig().set(YarnConfiguration.NM_PRINCIPAL,
    "nm/sightbusy-lx@LOCALHOST");
        
      UserGroupInformation.setConfiguration(mrrTezCluster.getConfig());
        
      // Keep it in here instead of after RM/NM as multiple user logins happen in
      // the same JVM.
      UserGroupInformation user = UserGroupInformation.getCurrentUser();
        
      LOG.info("User name is " + user.getUserName());
      for (Token<? extends TokenIdentifier> str : user.getTokens()) {
        LOG.info("Token is " + str.encodeToUrlString());
      }
      user.doAs(new PrivilegedExceptionAction<Void>() {
        @Override
        public Void run() throws Exception {
    MRRSleepJob sleepJob = new MRRSleepJob();
    sleepJob.setConf(mrrTezCluster.getConfig());
    Job job = sleepJob.createJob(3, 0, 10000, 1, 0, 0);
    // //Job with reduces
    // Job job = sleepJob.createJob(3, 2, 10000, 1, 10000, 1);
    job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
    job.submit();
    String trackingUrl = job.getTrackingURL();
    String jobId = job.getJobID().toString();
    job.waitForCompletion(true);
    Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
    Assert.assertTrue("Tracking URL was " + trackingUrl +
                      " but didn't Match Job ID " + jobId ,
      trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
    return null;
        }
      });
        
      // TODO later:  add explicit "isUber()" checks of some sort
    }
    */

}