Example usage for org.apache.hadoop.mapred JobConf setOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputKeyClass.

Prototype

public void setOutputKeyClass(Class<?> theClass)

Source Link

Document

Set the key class for the job output data.

Usage

From source file:com.cloudera.hbase.OldWordCount.java

License:Open Source License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(WordCount.class);
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from   w  w  w  . j  av  a  2 s .  c o  m
    }
    conf.setJobName("word count");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(conf, new Path(otherArgs[1]));
    JobClient.runJob(conf);
    return;
}

From source file:com.cloudera.recordservice.examples.mapreduce.WordCount.java

License:Apache License

public void run(String[] args) throws Exception {
    boolean useRecordService = true;
    if (args.length == 3) {
        useRecordService = Boolean.parseBoolean(args[2]);
    } else if (args.length != 2) {
        System.err.println("Usage: WordCount <input path> <output path>");
        System.exit(-1);//from  ww w  .  j  av a  2  s .  c om
    }
    String input = args[0].trim();
    String output = args[1];

    JobConf conf = new JobConf(WordCount.class);
    conf.setJobName("wordcount-" + (useRecordService ? "with" : "without") + "-RecordService");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    if (useRecordService) {
        conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class);
        RecordServiceConfig.setInput(conf, input);
    } else {
        conf.setInputFormat(TextInputFormat.class);
        FileInputFormat.setInputPaths(conf, new Path(input));
    }

    FileSystem fs = FileSystem.get(conf);
    Path outputPath = new Path(output);
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    conf.setOutputFormat(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(conf, outputPath);

    JobClient.runJob(conf);
    System.out.println("Done");
}

From source file:com.cloudera.recordservice.mapreduce.testapps.RecordCount.java

License:Apache License

public static long countRecords(String path) throws IOException {
    String output = TestUtil.getTempDirectory();
    Path inputPath = new Path(path);
    Path outputPath = new Path(output);

    JobConf conf = new JobConf(RecordCount.class);
    conf.setJobName("recordcount");

    conf.setOutputKeyClass(NullWritable.class);
    conf.setOutputValueClass(LongWritable.class);

    conf.setInt("mapreduce.job.reduces", 1);
    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    JobClient.runJob(conf);//from   www .j  a  v a 2s .c o m

    // Read the result and return it. Since we set the number of reducers to 1,
    // there is always just one file containing the value.
    FileSystem fs = outputPath.getFileSystem(conf);
    FSDataInputStream resultStream = fs.open(new Path(output + "/part-00000"));
    byte[] bytes = new byte[16];
    int length = resultStream.read(bytes);
    String result = new String(bytes, 0, length).trim();
    return Long.parseLong(result);
}

From source file:com.cloudera.recordservice.tests.TestMiniClusterController.java

License:Apache License

public static void fillInWordCountMRJobConf(JobConf conf) {
    String input = "select n_comment from tpch.nation";

    conf.setJobName("samplejob-wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    com.cloudera.recordservice.mr.RecordServiceConfig.setInputQuery(conf, input);
    setRandomOutputDir(conf);/*from w ww .  j a  v a 2  s  .c o  m*/
}

From source file:com.cloudera.sqoop.orm.TestParseMethods.java

License:Apache License

public void runParseTest(String fieldTerminator, String lineTerminator, String encloser, String escape,
        boolean encloseRequired) throws IOException {

    ClassLoader prevClassLoader = null;

    String[] argv = getArgv(true, fieldTerminator, lineTerminator, encloser, escape, encloseRequired);
    runImport(argv);//from w  ww. j av a  2  s.co  m
    try {
        String tableClassName = getTableName();

        argv = getArgv(false, fieldTerminator, lineTerminator, encloser, escape, encloseRequired);
        SqoopOptions opts = new ImportTool().parseArguments(argv, null, null, true);

        CompilationManager compileMgr = new CompilationManager(opts);
        String jarFileName = compileMgr.getJarFilename();

        // Make sure the user's class is loaded into our address space.
        prevClassLoader = ClassLoaderStack.addJarFile(jarFileName, tableClassName);

        JobConf job = new JobConf();
        job.setJar(jarFileName);

        // Tell the job what class we're testing.
        job.set(ReparseMapper.USER_TYPE_NAME_KEY, tableClassName);

        // use local mode in the same JVM.
        ConfigurationHelper.setJobtrackerAddr(job, "local");
        if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
            job.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
        }
        String warehouseDir = getWarehouseDir();
        Path warehousePath = new Path(warehouseDir);
        Path inputPath = new Path(warehousePath, getTableName());
        Path outputPath = new Path(warehousePath, getTableName() + "-out");

        job.setMapperClass(ReparseMapper.class);
        job.setNumReduceTasks(0);
        FileInputFormat.addInputPath(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);

        JobClient.runJob(job);
    } catch (InvalidOptionsException ioe) {
        fail(ioe.toString());
    } catch (ParseException pe) {
        fail(pe.toString());
    } finally {
        if (null != prevClassLoader) {
            ClassLoaderStack.setCurrentClassLoader(prevClassLoader);
        }
    }
}

From source file:com.cloudera.sqoop.orm.TestParseMethods.java

License:Apache License

public void testFieldSetter() throws IOException {
    ClassLoader prevClassLoader = null;

    String[] types = { "VARCHAR(32)", "VARCHAR(32)" };
    String[] vals = { "'meep'", "'foo'" };
    createTableWithColTypes(types, vals);

    String[] argv = getArgv(true, ",", "\\n", "\\\'", "\\", false);
    runImport(argv);//from ww  w . jav  a  2s .co m
    try {
        String tableClassName = getTableName();

        argv = getArgv(false, ",", "\\n", "\\\'", "\\", false);
        SqoopOptions opts = new ImportTool().parseArguments(argv, null, null, true);

        CompilationManager compileMgr = new CompilationManager(opts);
        String jarFileName = compileMgr.getJarFilename();

        // Make sure the user's class is loaded into our address space.
        prevClassLoader = ClassLoaderStack.addJarFile(jarFileName, tableClassName);

        JobConf job = new JobConf();
        job.setJar(jarFileName);

        // Tell the job what class we're testing.
        job.set(ExplicitSetMapper.USER_TYPE_NAME_KEY, tableClassName);
        job.set(ExplicitSetMapper.SET_COL_KEY, BASE_COL_NAME + "0");
        job.set(ExplicitSetMapper.SET_VAL_KEY, "this-is-a-test");

        // use local mode in the same JVM.
        ConfigurationHelper.setJobtrackerAddr(job, "local");
        if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
            job.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
        }
        String warehouseDir = getWarehouseDir();
        Path warehousePath = new Path(warehouseDir);
        Path inputPath = new Path(warehousePath, getTableName());
        Path outputPath = new Path(warehousePath, getTableName() + "-out");

        job.setMapperClass(ExplicitSetMapper.class);
        job.setNumReduceTasks(0);
        FileInputFormat.addInputPath(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);

        JobClient.runJob(job);
    } catch (InvalidOptionsException ioe) {
        fail(ioe.toString());
    } catch (ParseException pe) {
        fail(pe.toString());
    } finally {
        if (null != prevClassLoader) {
            ClassLoaderStack.setCurrentClassLoader(prevClassLoader);
        }
    }
}

From source file:com.datasalt.pangool.benchmark.urlresolution.HadoopUrlResolution.java

License:Apache License

public final static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: urlresolution <url-map> <url-register> <out>");
        System.exit(2);/*from  ww  w.j a va2  s.c om*/
    }
    JobConf job = new JobConf(conf);
    FileSystem fS = FileSystem.get(conf);
    fS.delete(new Path(otherArgs[2]), true);

    MultipleInputs.addInputPath(job, new Path(otherArgs[0]), TextInputFormat.class, UrlMapClass.class);
    MultipleInputs.addInputPath(job, new Path(otherArgs[1]), TextInputFormat.class, UrlRegisterMapClass.class);

    job.setJarByClass(HadoopUrlResolution.class);

    job.setPartitionerClass(KeyPartitioner.class);
    job.setOutputValueGroupingComparator(GroupingComparator.class);

    job.setMapOutputKeyClass(UrlRegJoinUrlMap.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));

    Job j = new Job(job);
    j.setReducerClass(Reduce.class);
    j.waitForCompletion(true);
}

From source file:com.datascience.cascading.scheme.CsvScheme.java

License:Apache License

@Override
public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap,
        JobConf conf) {
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(ListWritable.class);
    conf.setOutputFormat(CsvOutputFormat.class);
    configureWriterFormat(format, conf);
}

From source file:com.datatorrent.demos.mroperator.LineIndexer.java

License:Open Source License

/**
 * The actual main() method for our program; this is the
 * "driver" for the MapReduce job.//from   www. j a va2  s. c  o  m
 */
public static void main(String[] args) {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(LineIndexer.class);

    conf.setJobName("LineIndexer");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(conf, new Path("input"));
    FileOutputFormat.setOutputPath(conf, new Path("output"));

    conf.setMapperClass(LineIndexMapper.class);
    conf.setReducerClass(LineIndexReducer.class);

    client.setConf(conf);

    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:com.datatorrent.demos.mroperator.LogCountsPerHour.java

License:Open Source License

public int run(String[] args) throws Exception {
    // Create a configuration
    Configuration conf = getConf();

    // Create a job from the default configuration that will use the WordCount class
    JobConf job = new JobConf(conf, LogCountsPerHour.class);

    // Define our input path as the first command line argument and our output path as the second
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    // Create File Input/Output formats for these paths (in the job)
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    // Configure the job: name, mapper, reducer, and combiner
    job.setJobName("LogAveragePerHour");
    job.setMapperClass(LogMapClass.class);
    job.setReducerClass(LogReduce.class);
    job.setCombinerClass(LogReduce.class);

    // Configure the output
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(DateWritable.class);
    job.setOutputValueClass(IntWritable.class);

    // Run the job
    JobClient.runJob(job);/*from  ww w .  j  a va2  s  .co m*/
    return 0;
}