Example usage for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration()

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.inmobi.conduit.distcp.tools.TestDistCp.java

License:Apache License

public void testUniformSizeDistCp() throws Exception {
    try {//from w w  w.j a v a 2  s. c  o  m
        clearState();
        Path sourcePath = new Path(SOURCE_PATH).makeQualified(cluster.getFileSystem());
        List<Path> sources = new ArrayList<Path>();
        sources.add(sourcePath);

        Path targetPath = new Path(TARGET_PATH).makeQualified(cluster.getFileSystem());
        DistCpOptions options = new DistCpOptions(sources, targetPath);
        options.setOutPutDirectory(counterOutputPath);
        options.setAtomicCommit(true);
        options.setBlocking(false);
        Job job = new DistCp(configuration, options).execute();
        Path workDir = CopyOutputFormat.getWorkingDirectory(job);
        Path finalDir = CopyOutputFormat.getCommitDirectory(job);

        while (!job.isComplete()) {
            if (cluster.getFileSystem().exists(workDir)) {
                break;
            }
        }
        job.waitForCompletion(true);
        Assert.assertFalse(cluster.getFileSystem().exists(workDir));
        Assert.assertTrue(cluster.getFileSystem().exists(finalDir));
        Assert.assertFalse(cluster.getFileSystem()
                .exists(new Path(job.getConfiguration().get(DistCpConstants.CONF_LABEL_META_FOLDER))));
        verifyResults();
    } catch (Exception e) {
        LOG.error("Exception encountered", e);
        Assert.fail("Unexpected exception: " + e.getMessage());
    }
}

From source file:com.inmobi.conduit.local.LocalStreamService.java

License:Apache License

protected Job createJob(Path inputPath, long totalSize) throws IOException {
    String jobName = getName();//ww  w  .ja v a  2  s .co m
    Configuration conf = currentCluster.getHadoopConf();
    conf.set(ConduitConstants.AUDIT_ENABLED_KEY, System.getProperty(ConduitConstants.AUDIT_ENABLED_KEY));
    Job job = new Job(conf);
    job.setJobName(jobName);
    // DistributedCache.addFileToClassPath(inputFormatJarDestPath,
    // job.getConfiguration());
    job.getConfiguration().set("tmpjars",
            inputFormatJarDestPath.toString() + "," + auditUtilJarDestPath.toString());
    LOG.debug("Adding file [" + inputFormatJarDestPath + "] to distributed cache");
    job.setInputFormatClass(UniformSizeInputFormat.class);
    Class<? extends Mapper<Text, FileStatus, NullWritable, Text>> mapperClass = getMapperClass();
    job.setJarByClass(mapperClass);

    job.setMapperClass(mapperClass);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    // setting identity reducer
    job.setReducerClass(Reducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, tmpCounterOutputPath);
    job.getConfiguration().set("mapred.map.tasks.speculative.execution", "false");
    job.getConfiguration().set(LOCALSTREAM_TMP_PATH, tmpPath.toString());
    job.getConfiguration().set(SRC_FS_DEFAULT_NAME_KEY, srcCluster.getHadoopConf().get(FS_DEFAULT_NAME_KEY));

    // set configurations needed for UniformSizeInputFormat
    int numMaps = getNumMapsForJob(totalSize);
    job.getConfiguration().setInt(DistCpConstants.CONF_LABEL_NUM_MAPS, numMaps);
    job.getConfiguration().setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, totalSize);
    job.getConfiguration().set(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, inputPath.toString());
    LOG.info("Expected number of maps [" + numMaps + "] Total data size [" + totalSize + "]");

    return job;
}

From source file:com.inmobi.conduit.local.LocalStreamServiceTest.java

License:Apache License

private void testClusterName(String configName, String currentClusterName) throws Exception {
    ConduitConfigParser parser = new ConduitConfigParser(configName);
    ConduitConfig config = parser.getConfig();
    Set<String> streamsToProcess = new HashSet<String>();
    streamsToProcess.addAll(config.getSourceStreams().keySet());
    Set<String> clustersToProcess = new HashSet<String>();
    Set<TestLocalStreamService> services = new HashSet<TestLocalStreamService>();
    Cluster currentCluster = null;//from   w  w  w.ja  va 2  s.  c o m
    for (SourceStream sStream : config.getSourceStreams().values()) {
        for (String cluster : sStream.getSourceClusters()) {
            clustersToProcess.add(cluster);
        }
    }
    if (currentClusterName != null) {
        currentCluster = config.getClusters().get(currentClusterName);
    }
    for (String clusterName : clustersToProcess) {
        Cluster cluster = config.getClusters().get(clusterName);
        cluster.getHadoopConf().set("mapred.job.tracker", super.CreateJobConf().get("mapred.job.tracker"));
        TestLocalStreamService service = new TestLocalStreamService(config, cluster, currentCluster,
                new NullCheckPointProvider(), streamsToProcess);
        services.add(service);
    }

    for (TestLocalStreamService service : services) {
        FileSystem fs = service.getFileSystem();
        service.preExecute();
        if (currentClusterName != null)
            Assert.assertEquals(service.getCurrentCluster().getName(), currentClusterName);
        // creating a job with empty input path
        Path tmpJobInputPath = new Path("/tmp/job/input/path");
        Map<FileStatus, String> fileListing = new TreeMap<FileStatus, String>();
        Set<FileStatus> trashSet = new HashSet<FileStatus>();
        // checkpointKey, CheckPointPath
        Table<String, String, String> checkpointPaths = HashBasedTable.create();
        service.createMRInput(tmpJobInputPath, fileListing, trashSet, checkpointPaths);
        Job testJobConf = service.createJob(tmpJobInputPath, 1000);
        testJobConf.waitForCompletion(true);

        int numberOfCountersPerFile = 0;
        long sumOfCounterValues = 0;
        Path outputCounterPath = new Path(new Path(service.getCluster().getTmpPath(), service.getName()),
                "counters");
        FileStatus[] statuses = fs.listStatus(outputCounterPath, new PathFilter() {
            public boolean accept(Path path) {
                return path.toString().contains("part");
            }
        });
        for (FileStatus fileSt : statuses) {
            Scanner scanner = new Scanner(fs.open(fileSt.getPath()));
            while (scanner.hasNext()) {
                String counterNameValue = null;
                try {
                    counterNameValue = scanner.next();
                    String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER);
                    Assert.assertEquals(4, tmp.length);
                    Long numOfMsgs = Long.parseLong(tmp[3]);
                    numberOfCountersPerFile++;
                    sumOfCounterValues += numOfMsgs;
                } catch (Exception e) {
                    LOG.error("Counters file has malformed line with counter name =" + counterNameValue
                            + "..skipping the line", e);
                }
            }
        }
        // Should have 2 counters for each file
        Assert.assertEquals(NUMBER_OF_FILES * 2, numberOfCountersPerFile);
        // sum of all counter values should be equal to total number of messages
        Assert.assertEquals(NUMBER_OF_FILES * 3, sumOfCounterValues);

        Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY),
                service.getCurrentCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY));
        Assert.assertEquals(testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY),
                service.getCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY));
        if (currentCluster == null)
            Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY),
                    testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY));
        service.getFileSystem().delete(new Path(service.getCluster().getRootDir()), true);
    }

}

From source file:com.inmobi.databus.local.LocalStreamService.java

License:Apache License

private Job createJob(Path inputPath) throws IOException {
    String jobName = "localstream";
    Configuration conf = cluster.getHadoopConf();
    Job job = new Job(conf);
    job.setJobName(jobName);//from w  w  w .  j  a  v a  2s. co m
    KeyValueTextInputFormat.setInputPaths(job, inputPath);
    job.setInputFormatClass(KeyValueTextInputFormat.class);

    job.setJarByClass(CopyMapper.class);
    job.setMapperClass(CopyMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(NullOutputFormat.class);
    job.getConfiguration().set("mapred.map.tasks.speculative.execution", "false");
    job.getConfiguration().set("localstream.tmp.path", tmpPath.toString());

    return job;
}

From source file:com.intel.hadoop.hbase.dot.KEY.java

License:Apache License

private void doMapReduce(Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass,
        String mrTableName) throws IOException, ClassNotFoundException, InterruptedException {

    this.conf.set(KEY.INPUT_TABLE, mrTableName);
    Job job = new Job(this.conf);
    job.setJobName("Generate Data for [" + mrTableName + "]");
    job.setJarByClass(GenerateTestTable.class);

    job.setInputFormatClass(inputFormatClass);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LongWritable.class);

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path("/tmp", "tempout");
    fs.delete(path, true);/*from  w w w .  j av a 2s. c  o  m*/

    FileOutputFormat.setOutputPath(job, path);

    job.setMapperClass(mapperClass);
    job.setNumReduceTasks(0);

    TableMapReduceUtil.addDependencyJars(job);
    // Add a Class from the hbase.jar so it gets registered too.
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(), org.apache.hadoop.hbase.util.Bytes.class);

    TableMapReduceUtil.initCredentials(job);

    job.waitForCompletion(true);

}

From source file:com.intel.hadoop.hbase.dot.mapreduce.DotImportTsv.java

License:Apache License

/**
 * Sets up the actual job.//w  w  w  .  ja v  a 2s  .c o m
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
        throws IOException, ClassNotFoundException {

    // Support non-XML supported characters
    // by re-encoding the passed separator as a Base64 string.
    String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
    if (actualSeparator != null) {
        conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes()));
    }

    // See if a non-default Mapper was set
    String mapperClassName = conf.get(MAPPER_CONF_KEY);
    Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;

    String tableName = args[0];
    Path inputDir = new Path(args[1]);
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(mapperClass);
    FileInputFormat.setInputPaths(job, inputDir);
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(mapperClass);

    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
    if (hfileOutPath != null) {
        if (!doesTableExist(tableName)) {
            createTable(conf, tableName);
        }
        HTable table = new HTable(conf, tableName);
        job.setReducerClass(PutSortReducer.class);
        Path outputDir = new Path(hfileOutPath);
        FileOutputFormat.setOutputPath(job, outputDir);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);
        HFileOutputFormat.configureIncrementalLoad(job, table);
    } else {
        // No reducers.  Just write straight to table.  Call initTableReducerJob
        // to set up the TableOutputFormat.
        TableMapReduceUtil.initTableReducerJob(tableName, null, job);
        job.setNumReduceTasks(0);
    }

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
            com.google.common.base.Function.class /* Guava used by TsvParser */);
    return job;
}

From source file:com.intel.hibench.DFSIOWriter.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    startTime = System.currentTimeMillis();
    benchData.put(new Put(ONE, ONE, startTime));

    Job job = context.getHadoopJob();
    job.setInputFormatClass(RandomInputFormat.class);
    job.setMapperClass(Generator.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setNumReduceTasks(0);/*from  w ww .  java2 s  .c  o  m*/

    String sizeStr = context.getRuntimeArguments().get("size");
    if (sizeStr != null) {
        LOG.info("size we get in config is : " + sizeStr);
        long totalBytes = Long.valueOf(sizeStr) * 1024 * 1024;
        job.getConfiguration().setLong(BENCH_SIZE, totalBytes);
        benchData.put(new Put(ONE, THREE, totalBytes));
    }

}

From source file:com.jbw.jobcontrol.Patent.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Job job1 = Job.getInstance(conf);
    job1.setJobName("test");
    job1.setJarByClass(Patent.class);

    ChainMapper.addMapper(job1, InverseMapper.class, LongWritable.class, Text.class, Text.class, Text.class,
            conf);/*  w ww . jav  a2s  .  c om*/
    ChainMapper.addMapper(job1, CountMapper.class, Text.class, Text.class, Text.class, IntWritable.class, conf);

    job1.setReducerClass(IntSumReducer.class);

    Job job2 = Job.getInstance();
    ControlledJob cjob1 = new ControlledJob(job1.getConfiguration());
    ControlledJob cjob2 = new ControlledJob(job2.getConfiguration());
    cjob2.addDependingJob(cjob1);
    JobControl jc = new JobControl("process job");
    jc.addJob(cjob1);
    jc.addJob(cjob2);
    Thread t = new Thread(jc);
    t.start();
    while (true) {
        for (ControlledJob j : jc.getRunningJobList()) {
            break;
        }
        break;
    }
    return 0;
}

From source file:com.junz.hadoop.custom.SytsLogInputFormat.java

License:Apache License

public static void setStartId(Job job, long id) {
    job.getConfiguration().setLong(START_ID_PROPERTY, id);
}

From source file:com.junz.hadoop.custom.SytsLogInputFormat.java

License:Apache License

public static void setNumberOfIds(Job job, long number) {
    job.getConfiguration().setLong(NUMBER_LOG_PROPERTY, number);
}