Example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the key class for the map output data.

Usage

From source file:com.ailk.oci.ocnosql.tools.load.csvbulkload.CsvBulkLoadTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    HBaseConfiguration.addHbaseResources(getConf());
    Configuration conf = getConf();
    String quorum = conf.get("hbase.zookeeper.quorum");
    String clientPort = conf.get("hbase.zookeeper.property.clientPort");
    LOG.info("hbase.zookeeper.quorum=" + quorum);
    LOG.info("hbase.zookeeper.property.clientPort=" + clientPort);
    LOG.info("phoenix.query.dateFormat=" + conf.get("phoenix.query.dateFormat"));

    CommandLine cmdLine = null;/*w  ww .j  a va2 s.  c o  m*/
    try {
        cmdLine = parseOptions(args);
        LOG.info("JdbcUrl=" + getJdbcUrl(quorum + ":" + clientPort));
    } catch (IllegalStateException e) {
        printHelpAndExit(e.getMessage(), getOptions());
    }
    Class.forName(DriverManager.class.getName());
    Connection conn = DriverManager.getConnection(getJdbcUrl(quorum + ":" + clientPort));
    String tableName = cmdLine.getOptionValue(TABLE_NAME_OPT.getOpt());
    String schemaName = cmdLine.getOptionValue(SCHEMA_NAME_OPT.getOpt());
    String qualifiedTableName = getQualifiedTableName(schemaName, tableName);
    List<ColumnInfo> importColumns = buildImportColumns(conn, cmdLine, qualifiedTableName);

    LOG.info("tableName=" + tableName);
    LOG.info("schemaName=" + schemaName);
    LOG.info("qualifiedTableName=" + qualifiedTableName);

    configureOptions(cmdLine, importColumns, getConf());

    try {
        validateTable(conn, schemaName, tableName);
    } finally {
        conn.close();
    }

    Path inputPath = new Path(cmdLine.getOptionValue(INPUT_PATH_OPT.getOpt()));
    Path outputPath = null;
    if (cmdLine.hasOption(OUTPUT_PATH_OPT.getOpt())) {
        outputPath = new Path(cmdLine.getOptionValue(OUTPUT_PATH_OPT.getOpt()));
    } else {
        outputPath = new Path("/tmp/" + UUID.randomUUID());
    }
    LOG.info("Configuring HFile output path to {}", outputPath);

    Job job = new Job(getConf(),
            "Phoenix MapReduce import for " + getConf().get(PhoenixCsvToKeyValueMapper.TABLE_NAME_CONFKEY));

    // Allow overriding the job jar setting by using a -D system property at startup
    if (job.getJar() == null) {
        job.setJarByClass(PhoenixCsvToKeyValueMapper.class);
    }
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.addInputPath(job, inputPath);

    FileSystem.get(getConf());
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setMapperClass(PhoenixCsvToKeyValueMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);

    HTable htable = new HTable(getConf(), qualifiedTableName);

    // Auto configure partitioner and reducer according to the Main Data table
    HFileOutputFormat.configureIncrementalLoad(job, htable);

    LOG.info("Running MapReduce import job from {} to {}", inputPath, outputPath);
    boolean success = job.waitForCompletion(true);
    if (!success) {
        LOG.error("Import job failed, check JobTracker for details");
        return 1;
    }

    LOG.info("Loading HFiles from {}", outputPath);
    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(getConf());
    loader.doBulkLoad(outputPath, htable);
    htable.close();

    LOG.info("Incremental load complete");

    LOG.info("Removing output directory {}", outputPath);
    if (!FileSystem.get(getConf()).delete(outputPath, true)) {
        LOG.error("Removing output directory {} failed", outputPath);
    }

    return 0;
}

From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImportTsv.java

License:Apache License

/**
 * Sets up the actual job.//from ww  w .ja  va  2s.  co m
 *
 * @param conf  The current configuration.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String tableName, String inputPath,
        String tmpOutputPath) throws IOException, ClassNotFoundException {

    // Support non-XML supported characters
    // by re-encoding the passed separator as a Base64 string.
    String actualSeparator = conf.get(CommonConstants.SEPARATOR);
    if (actualSeparator != null) {
        conf.set(CommonConstants.SEPARATOR, Base64.encodeBytes(actualSeparator.getBytes()));
    }
    String tableNameConf = conf.get(CommonConstants.TABLE_NAME);
    if (tableNameConf == null) {
        conf.set(CommonConstants.TABLE_NAME, tableName);
    }

    // See if a non-default Mapper was set
    String mapperClassName = conf.get(MAPPER_CONF_KEY);
    Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;

    Path inputDir = new Path(inputPath);
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(MutipleColumnImportTsv.class);
    FileInputFormat.setInputPaths(job, inputDir);

    //??Dimporttsv.inputFormatInputFormat,TextInputFormat
    String inputFmtName = conf.get(CommonConstants.INPUTFORMAT,
            "org.apache.hadoop.mapreduce.lib.input.TextInputFormat");
    LOG.info(CommonConstants.INPUTFORMAT + " is " + inputFmtName);
    Class<? extends InputFormat> inputFmtClass = Class.forName(inputFmtName).asSubclass(InputFormat.class);
    job.setInputFormatClass(inputFmtClass);
    job.setMapperClass(mapperClass);

    String hfileOutPath = tmpOutputPath;
    if (hfileOutPath != null) {
        if (!doesTableExist(tableName)) {
            createTable(conf, tableName);
        }
        HTable table = new HTable(conf, tableName);
        //      job.setReducerClass(MutipleColumnReducer.class);
        Path outputDir = new Path(hfileOutPath);
        FileOutputFormat.setOutputPath(job, outputDir);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);
        HFileOutputFormat.configureIncrementalLoad(job, table);
    } else {
        // No reducers.  Just write straight to table.  Call initTableReducerJob
        // to set up the TableOutputFormat.
        TableMapReduceUtil.initTableReducerJob(tableName, null, job);
        job.setNumReduceTasks(0);
    }

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
            com.google.common.base.Function.class /* Guava used by TsvParser */);
    return job;
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java

License:Apache License

/**
 * Sets up the actual job. importtsvmapreduce job
 *
 * @param conf  The current configuration.
 * @return The newly created job./*from ww  w  .  java  2  s  .c  om*/
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String tableName, String inputPath,
        String tmpOutputPath) throws IOException, ClassNotFoundException {

    // Support non-XML supported characters
    // by re-encoding the passed separator as a Base64 string.
    //???BASE64?
    String actualSeparator = conf.get(CommonConstants.SEPARATOR);
    if (actualSeparator != null) {
        conf.set(CommonConstants.SEPARATOR, Base64.encodeBytes(actualSeparator.getBytes()));
    }

    // See if a non-default Mapper was set?mapper?SingleColumnImporterMapper
    String mapperClassName = conf.get(MAPPER_CONF_KEY);
    Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;

    Path inputDir = new Path(inputPath);
    //?job
    Job job = new Job(conf, NAME + "_" + tableName);
    //Set the Jar by finding where a given class came from.
    job.setJarByClass(SingleColumnImportTsv.class);
    //
    FileInputFormat.setInputPaths(job, inputDir);
    //jobinputformat

    //??Dimporttsv.inputFormatInputFormat,TextInputFormat
    //??Dimporttsv.inputFormatInputFormat,TextInputFormat
    String inputFmtName = conf.get(CommonConstants.INPUTFORMAT,
            "org.apache.hadoop.mapreduce.lib.input.TextInputFormat");
    LOG.info(CommonConstants.INPUTFORMAT + " is " + inputFmtName);
    Class<? extends InputFormat> inputFmtClass = Class.forName(inputFmtName).asSubclass(InputFormat.class);
    job.setInputFormatClass(inputFmtClass);
    job.setMapperClass(mapperClass);

    //mapper
    job.setMapperClass(mapperClass);

    String hfileOutPath = tmpOutputPath;
    if (hfileOutPath != null) {
        //?
        if (!doesTableExist(tableName)) {
            createTable(conf, tableName);
        }
        HTable table = new HTable(conf, tableName);
        //reducer
        job.setReducerClass(SingleColumnReducer.class);

        Path outputDir = new Path(hfileOutPath);
        //
        FileOutputFormat.setOutputPath(job, outputDir);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(TextArrayWritable.class);
        //job?partition?outputformat?reduce
        configureIncrementalLoad(job, table);

    } else {//put
        // No reducers.  Just write straight to table.  Call initTableReducerJob
        // to set up the TableOutputFormat.
        TableMapReduceUtil.initTableReducerJob(tableName, null, job);
        job.setNumReduceTasks(0);
    }

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
            com.google.common.base.Function.class /* Guava used by TsvParser */);
    return job;
}

From source file:com.airline.analytics.AirlineDelayAnalytics.java

@Override
public int run(String[] strings) throws Exception {

    Job job = Job.getInstance(getConf(), "Hadoop Airline Delay Analytics");

    job.setJarByClass(AirlineDelayAnalytics.class);

    job.setMapperClass(AirlineMapper.class);
    // job.setCombinerClass(AirlineReducer.class);
    job.setReducerClass(AirlineReducer.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(strings[0]));
    FileOutputFormat.setOutputPath(job, new Path(strings[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.airline.analytics.AirlineUniqueRoutesAnalytics.java

@Override
public int run(String[] strings) throws Exception {

    Job job = Job.getInstance(getConf(), "Hadoop Airline Orign Destination Analytics");

    job.setJarByClass(getClass());//w  ww  .  j av a 2  s . co m

    // Distributed Cache
    job.addCacheFile(new URI("/airline/codes.csv"));

    job.setMapperClass(AirlineMapper.class);
    // job.setCombinerClass(AirlineReducer.class);
    job.setReducerClass(AirlineReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(strings[0]));
    FileOutputFormat.setOutputPath(job, new Path(strings[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.alectenharmsel.research.hadoop.CodeTokenizer.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MoabLicenses <input> <output>");
        System.exit(-1);/*from  ww  w  . j a  v  a  2s . c  om*/
    }

    Configuration conf = getConf();
    Job job = new Job(conf, "SrcTok");
    job.setJarByClass(CodeTokenizer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
}

From source file:com.alectenharmsel.research.SrcTok.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MoabLicenses <input> <output>");
        System.exit(-1);/*from   w w  w .j  av a  2s. c  o  m*/
    }

    Configuration conf = getConf();
    Job job = new Job(conf, "SrcTok");
    job.setJarByClass(SrcTok.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(SrcTokMapper.class);
    job.setReducerClass(SrcTokReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
}

From source file:com.alexholmes.hadooputils.combine.avro.mapreduce.CombineAvroKeyValueInputFormatTest.java

License:Apache License

@Test
public void testKeyValueInput() throws ClassNotFoundException, IOException, InterruptedException {
    // Create a test input file.
    File inputFile = createInputFile();

    // Configure the job input.
    Job job = new Job();
    FileInputFormat.setInputPaths(job, new Path(inputFile.getAbsolutePath()));
    job.setInputFormatClass(CombineAvroKeyValueInputFormat.class);
    AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.INT));
    AvroJob.setInputValueSchema(job, Schema.create(Schema.Type.STRING));

    // Configure a mapper.
    job.setMapperClass(IndexMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    // Configure a reducer.
    job.setReducerClass(IndexReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(AvroValue.class);
    AvroJob.setOutputValueSchema(job, Schema.createArray(Schema.create(Schema.Type.INT)));

    // Configure the output format.
    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    Path outputPath = new Path(mTempDir.getRoot().getPath(), "out-index");
    FileOutputFormat.setOutputPath(job, outputPath);

    // Run the job.
    assertTrue(job.waitForCompletion(true));

    // Verify that the output Avro container file as the expected data.
    File avroFile = new File(outputPath.toString(), "part-r-00000.avro");
    DatumReader<GenericRecord> datumReader = new SpecificDatumReader<GenericRecord>(AvroKeyValue
            .getSchema(Schema.create(Schema.Type.STRING), Schema.createArray(Schema.create(Schema.Type.INT))));
    DataFileReader<GenericRecord> avroFileReader = new DataFileReader<GenericRecord>(avroFile, datumReader);
    assertTrue(avroFileReader.hasNext());

    AvroKeyValue<CharSequence, List<Integer>> appleRecord = new AvroKeyValue<CharSequence, List<Integer>>(
            avroFileReader.next());/*from  w  ww  .j  a  va2 s .c o  m*/
    assertNotNull(appleRecord.get());
    assertEquals("apple", appleRecord.getKey().toString());
    List<Integer> appleDocs = appleRecord.getValue();
    assertEquals(3, appleDocs.size());
    assertTrue(appleDocs.contains(1));
    assertTrue(appleDocs.contains(2));
    assertTrue(appleDocs.contains(3));

    assertTrue(avroFileReader.hasNext());
    AvroKeyValue<CharSequence, List<Integer>> bananaRecord = new AvroKeyValue<CharSequence, List<Integer>>(
            avroFileReader.next());
    assertNotNull(bananaRecord.get());
    assertEquals("banana", bananaRecord.getKey().toString());
    List<Integer> bananaDocs = bananaRecord.getValue();
    assertEquals(2, bananaDocs.size());
    assertTrue(bananaDocs.contains(1));
    assertTrue(bananaDocs.contains(2));

    assertTrue(avroFileReader.hasNext());
    AvroKeyValue<CharSequence, List<Integer>> carrotRecord = new AvroKeyValue<CharSequence, List<Integer>>(
            avroFileReader.next());
    assertEquals("carrot", carrotRecord.getKey().toString());
    List<Integer> carrotDocs = carrotRecord.getValue();
    assertEquals(1, carrotDocs.size());
    assertTrue(carrotDocs.contains(1));

    assertFalse(avroFileReader.hasNext());
    avroFileReader.close();
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileJob.java

License:Apache License

/**
 * The driver for the MapReduce job./*  w w w  .ja v  a 2  s  . c om*/
 *
 * @param conf           configuration
 * @param inputDirAsString  input directory in CSV-form
 * @param outputDirAsString output directory
 * @return true if the job completed successfully
 * @throws java.io.IOException         if something went wrong
 * @throws java.net.URISyntaxException if a URI wasn't correctly formed
 */
public boolean runJob(final Configuration conf, final String inputDirAsString, final String outputDirAsString)
        throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {

    Job job = new Job(conf);

    job.setJarByClass(CombineSequenceFileJob.class);
    job.setJobName("seqfilecombiner");

    job.setNumReduceTasks(0);

    //        job.setMapperClass(IdentityMapper.class);

    job.setInputFormatClass(CombineSequenceFileInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, inputDirAsString);
    FileOutputFormat.setOutputPath(job, new Path(outputDirAsString));

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    boolean jobResult = job.waitForCompletion(true);
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took "
            + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds.");

    return jobResult;
}

From source file:com.aliyun.openservices.tablestore.hadoop.TableStoreOutputFormatExample.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (!parseArgs(args)) {
        printUsage();//from w w  w  . ja v  a2s. c om
        System.exit(1);
    }
    if (endpoint == null || accessKeyId == null || accessKeySecret == null || inputTable == null
            || outputTable == null) {
        printUsage();
        System.exit(1);
    }

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, TableStoreOutputFormatExample.class.getName());
    job.setMapperClass(OwnerMapper.class);
    job.setReducerClass(IntoTableReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(MapWritable.class);
    job.setInputFormatClass(TableStoreInputFormat.class);
    job.setOutputFormatClass(TableStoreOutputFormat.class);

    TableStore.setCredential(job, accessKeyId, accessKeySecret, securityToken);
    TableStore.setEndpoint(job, endpoint, instance);
    TableStoreInputFormat.addCriteria(job, fetchCriteria());
    TableStoreOutputFormat.setOutputTable(job, outputTable);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}