Example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks

List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks.

Prototype

public void setNumReduceTasks(int tasks) throws IllegalStateException 

Source Link

Document

Set the number of reduce tasks for the job.

Usage

From source file:com.bizosys.oneline.maintenance.Import.java

License:Apache License

/**
 * Sets up the actual job.//from   w  ww  .  ja v a2 s .c o m
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];
    Path inputDir = new Path(args[1]);
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(Importer.class);
    System.out.println("Input Dir:" + inputDir);
    FileInputFormat.setInputPaths(job, inputDir);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(Importer.class);
    // No reducers.  Just write straight to table.  Call initTableReducerJob
    // because it sets up the TableOutputFormat.
    TableMapReduceUtil.initTableReducerJob(tableName, null, job);
    job.setNumReduceTasks(0);
    return job;
}

From source file:com.blackberry.logdriver.util.Cat.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*w  w w .  j av  a 2  s .  co m*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 2) {
        System.out.println("usage: [genericOptions] input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    for (int i = 0; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Cat.class);
    jobConf.setIfUnset("mapred.job.name", "Cat Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }
        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(CatMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);

    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.blackberry.logdriver.util.FastSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//  w  w w  .  j a v a  2s  . c  o m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchString = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchString input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchString = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(FastSearch.class);
    jobConf.setIfUnset("mapred.job.name", "Search Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string", Base64.encodeBase64String(searchString.getBytes("UTF-8")));

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.blackberry.logdriver.util.Grep.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from w  w w. j a  v  a2 s .  com

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String regex = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] regex input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    regex = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Grep.class);
    jobConf.setIfUnset("mapred.job.name", "Grep Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.grep.regex", Base64.encodeBase64String(regex.getBytes("UTF-8")));

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(GrepMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }

}

From source file:com.blackberry.logdriver.util.MultiSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/* ww w . ja  v a2 s  . c o m*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchStringDir = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchStringDir = args[0];
    // We are going to be reading all the files in this directory a lot. So
    // let's up the replication factor by a lot so that they're easy to read.
    for (FileStatus f : fs.listStatus(new Path(searchStringDir))) {
        fs.setReplication(f.getPath(), (short) 16);
    }

    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }

    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(MultiSearch.class);
    jobConf.setIfUnset("mapred.job.name", "MultiSearch");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string.dir", searchStringDir);

    // This search is generally too fast to make good use of 128MB blocks, so
    // let's set the value to 256MB (if it's not set already)
    if (jobConf.get("mapred.max.split.size") == null) {
        jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024);
    }

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.blackberry.logdriver.util.Search.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from   ww w . jav  a  2s  .  c o m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchString = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchString input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchString = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Search.class);
    jobConf.setIfUnset("mapred.job.name", "Search Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string", searchString);

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.ci.backports.hadoop.hbase.ZHFileOutputFormat.java

License:Apache License

/**
 * Configure a MapReduce Job to perform an incremental load into the given
 * table. This/*from   w  w  w  . jav a  2 s.co m*/
 * <ul>
 *   <li>Inspects the table to configure a total order partitioner</li>
 *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
 *   <li>Sets the number of reduce tasks to match the current number of regions</li>
 *   <li>Sets the output key/value class to match ZHFileOutputFormat's requirements</li>
 *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
 *     ZPutSortReducer)</li>
 * </ul> 
 * The user should be sure to set the map output value class to either KeyValue or Put before
 * running this function.
 */
public static void configureIncrementalLoad(Job job, HTable table) throws IOException {
    Configuration conf = job.getConfiguration();
    job.setPartitionerClass(TotalOrderPartitioner.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(ZHFileOutputFormat.class);

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(ZPutSortReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    LOG.info("Looking up current regions for table " + table);
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    Path partitionsPath = new Path(job.getWorkingDirectory(), "partitions_" + System.currentTimeMillis());
    LOG.info("Writing partition information to " + partitionsPath);

    FileSystem fs = partitionsPath.getFileSystem(conf);
    writePartitions(conf, partitionsPath, startKeys);
    partitionsPath.makeQualified(fs);
    URI cacheUri;
    try {
        cacheUri = new URI(partitionsPath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }
    DistributedCache.addCacheFile(cacheUri, conf);
    DistributedCache.createSymlink(conf);

    LOG.info("Incremental table output configured.");
}

From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java

License:Apache License

/**
 * Configure a MapReduce Job to perform an incremental load into the given
 * table. This//ww  w  . j a  v a  2 s. co  m
 * <ul>
 * <li>Inspects the table to configure a total order partitioner</li>
 * <li>Uploads the partitions file to the cluster and adds it to the
 * DistributedCache</li>
 * <li>Sets the number of reduce tasks to match the current number of
 * regions</li>
 * <li>Sets the output key/value class to match HFileOutputFormat's
 * requirements</li>
 * <li>Sets the reducer up to perform the appropriate sorting (either
 * KeyValueSortReducer or PutSortReducer)</li>
 * </ul>
 * The user should be sure to set the map output value class to either
 * KeyValue or Put before running this function.
 */
public static void configureIncrementalLoad(Job job, HTable table,
        Class<? extends HFileOutputFormatBase> hfileOutputFormatBase) throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(hfileOutputFormatBase);

    // Based on the configured map output class, set the correct reducer to
    // properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(TextSortReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(),
            ResultSerialization.class.getName(), KeyValueSerialization.class.getName());

    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName()));
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(table, conf);
    configureBloomType(table, conf);
    configureBlockSize(table, conf);

    // TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + " output configured.");
}

From source file:com.cloudera.accumulo.upgrade.compatibility.DataCompatibilityLoad.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    final String jobName = this.getClass().getName();
    options.parseArgs(jobName, args);/*from  www. j a  va  2  s.  c o  m*/
    final Job job = new Job(getConf(), jobName);

    if (-1 == options.test.numRows) {
        options.test.numRows = job.getConfiguration().getInt("mapred.map.tasks",
                DataCompatibilityTestCli.DEFAULT_NUM_ROWS);
    }

    job.setJarByClass(this.getClass());

    job.setInputFormatClass(DataLoadInputFormat.class);
    DataLoadInputFormat.setTabletServers(job,
            options.connection.getConnector().instanceOperations().getTabletServers());
    DataLoadInputFormat.setNumRows(job, options.test.numRows);
    DataLoadInputFormat.setNumQualifiersPerFamily(job, options.test.qualifiers);

    job.getConfiguration().set(VISIBILITY, new String(options.visibility.visibility.getExpression(), "UTF-8"));

    final TableOperations ops = options.connection.getConnector().tableOperations();

    final List<String> names = options.test.getTableNamesAndConfigureThem(ops);
    for (String name : names) {
        final int numSplits = ops.getSplits(name, options.test.numRows).size();
        if (options.test.numRows > numSplits) {
            log.info("adding splits to table '" + name + "', to bring it from " + numSplits + " to "
                    + options.test.numRows + ".");
            final SortedSet<Text> splits = new TreeSet<Text>();
            // for cases where we're adding way more splits than there are currently possible servers to handle them, do a pre-pre-split
            //   N.B. If we've just created this table, there will be 0 splits because we'll just have the initial tablet.
            if (0 == numSplits || options.test.numRows / numSplits > 10) {
                log.info("splitting in two waves due to the number of splits we need to add.");
                // TODO turtles all the way down.
                final int prepre = options.test.numRows / (0 == numSplits ? 10 : numSplits * 10);
                for (int i = 0; i < prepre; i++) {
                    splits.add(new Text(new StringBuilder(Long.toString(i)).reverse().toString()));
                }
                ops.addSplits(name, splits);
                log.debug("delay 30s for splits to get assigned off host.");
                try {
                    Thread.currentThread().sleep(30 * 1000);
                } catch (InterruptedException exception) {
                    log.warn("interrupted from sleep early.");
                }
                splits.clear();
            }
            for (int i = 0; i < options.test.numRows; i++) {
                splits.add(new Text(new StringBuilder(Long.toString(i)).reverse().toString()));
            }
            ops.addSplits(name, splits);
        }
    }
    log.debug("delay 30s for splits to get assigned off host.");
    try {
        Thread.currentThread().sleep(30 * 1000);
    } catch (InterruptedException exception) {
        log.warn("interrupted from sleep early.");
    }

    job.getConfiguration().setStrings(OUTPUT_TABLE_NAMES, names.toArray(new String[0]));

    job.setMapperClass(DataLoadMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    job.setNumReduceTasks(0);

    log.info("launching map-only job to insert " + options.test.numRows + " rows of "
            + (FAMILIES.length * options.test.qualifiers) + " cells each into each of the tables " + names);
    options.output.useAccumuloOutputFormat(job);

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:com.cloudera.accumulo.upgrade.compatibility.DataCompatibilityVerify.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    final String jobName = this.getClass().getName();
    options.parseArgs(jobName, args);//w ww .  j  av  a  2s .  c o m
    try {
        final int totalMapSlots = getConf().getInt("mapred.map.tasks",
                DataCompatibilityTestCli.DEFAULT_NUM_ROWS);
        if (-1 == options.test.numRows) {
            options.test.numRows = totalMapSlots;
        }
        final TableOperations ops = options.connection.getConnector().tableOperations();
        final List<String> names = options.test.getTableNames(ops);
        int totalReduceSlots = getConf().getInt("mapred.reduce.tasks", 0);
        if (-1 != options.test.numReduceSlots) {
            totalReduceSlots = options.test.numReduceSlots;
        }
        if (0 == totalReduceSlots) {
            totalReduceSlots = names.size();
        }
        final int reducesPerJob = Math.max(1, totalReduceSlots / names.size());

        final List<Job> jobs = new ArrayList();
        for (String name : names) {
            final Job job = new Job(getConf(), jobName + " " + name);
            job.setJarByClass(this.getClass());
            options.input.useAccumuloInputFormat(job, name);
            job.setMapperClass(DataVerifyMapper.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(LongWritable.class);
            job.setReducerClass(LongSumReducer.class);
            job.setCombinerClass(LongSumReducer.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            TextOutputFormat.setOutputPath(job, new Path(options.test.output, name));
            job.setNumReduceTasks(reducesPerJob);
            job.submit();
            jobs.add(job);
        }

        boolean success = true;
        final long numCellsPerRow = options.test.qualifiers * DataCompatibilityLoad.FAMILIES.length;
        final long numCellsPerFamily = options.test.qualifiers * options.test.numRows;
        for (Job job : jobs) {
            success &= job.waitForCompletion(true);
            final CounterGroup group = job.getCounters().getGroup(DataVerifyMapper.class.getName());
            if (null == group) {
                log.error("Job '" + job.getJobName() + "' doesn't have counters for the verification mapper.");
                success = false;
            } else {
                final Counter badCounter = group.findCounter(BAD_COUNTER);
                if (null != badCounter && 0 < badCounter.getValue()) {
                    log.error("Job '" + job.getJobName() + "' has " + badCounter.getValue()
                            + " entries with bad checksums.");
                    success = false;
                }
                int numRows = 0;
                int numFamilies = 0;
                for (Counter counter : group) {
                    if (counter.getName().startsWith(ROW_COUNTER_PREFIX)) {
                        numRows++;
                        if (numCellsPerRow != counter.getValue()) {
                            log.error("Job '" + job.getJobName() + "', counter '" + counter.getName()
                                    + "' should have " + numCellsPerRow + " cells, but instead has "
                                    + counter.getValue());
                            success = false;
                        }
                    } else if (counter.getName().startsWith(FAMILY_COUNTER_PREFIX)) {
                        numFamilies++;
                        if (numCellsPerFamily != counter.getValue()) {
                            log.error("Job '" + job.getJobName() + "', counter '" + counter.getName()
                                    + "' should have " + numCellsPerFamily + " cells, but instead has "
                                    + counter.getValue());
                            success = false;
                        }
                    }
                }
                if (options.test.numRows != numRows) {
                    log.error("Job '" + job.getJobName() + "' is supposed to have " + options.test.numRows
                            + " rows, but has " + numRows);
                    success = false;
                }
                if (DataCompatibilityLoad.FAMILIES.length != numFamilies) {
                    log.error("Job '" + job.getJobName() + "' is supposed to have "
                            + DataCompatibilityLoad.FAMILIES.length + " families, but has " + numFamilies);
                    success = false;
                }
            }
        }
        if (success) {
            log.info("All internal checks passed.");
        } else {
            log.info("Some checks failed. see log.");
        }
        return success ? 0 : 1;
    } finally {
        options.input.close();
    }
}