Example usage for org.apache.hadoop.mapred JobConf setMapOutputValueClass

List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMapOutputValueClass.

Prototype

public void setMapOutputValueClass(Class<?> theClass) 

Source Link

Document

Set the value class for the map output data.

Usage

From source file:org.locationtech.geomesa.jobs.interop.mapred.FeatureWriterJob.java

License:Open Source License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(FeatureCountJob.class);
    conf.setJobName("simple feature writing");

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(ScalaSimpleFeature.class);

    conf.setMapperClass(MyMapper.class);
    conf.setNumReduceTasks(0);//w ww.  ja va 2 s.c o m

    conf.setInputFormat(GeoMesaInputFormat.class);
    conf.setOutputFormat(GeoMesaOutputFormat.class);

    Map<String, String> params = new HashMap<String, String>();
    params.put("instanceId", "myinstance");
    params.put("zookeepers", "zoo1,zoo2,zoo3");
    params.put("user", "myuser");
    params.put("password", "mypassword");
    params.put("tableName", "mycatalog");

    Query query = new Query("myfeature", ECQL.toFilter("BBOX(geom, -165,5,-50,75)"));

    GeoMesaInputFormat.configure(conf, params, query);

    Map<String, String> outParams = new HashMap<String, String>();
    outParams.put("instanceId", "myinstance");
    outParams.put("zookeepers", "zoo1,zoo2,zoo3");
    outParams.put("user", "myuser");
    outParams.put("password", "mypassword");
    outParams.put("tableName", "mycatalog_2");

    GeoMesaOutputFormat.configureDataStore(conf, outParams);

    JobClient.runJob(conf);
}

From source file:org.mitre.ccv.mapred.CalculateCosineDistanceMatrix.java

License:Open Source License

public int initJob(JobConf jobConf, String input, String output) throws Exception {
    JobConf conf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class);

    final Path inputPath = new Path(input);
    final FileSystem fs = inputPath.getFileSystem(conf);
    final Path qInputPath = fs.makeQualified(inputPath);

    /**//from   w w  w .j  av  a2  s. c  om
     * Need to get all of the sample names/labels
     */
    JobConf cacheConf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class);
    cacheConf.setJobName("CacheNorm2MapReduce");
    cacheConf.setNumReduceTasks(1); // Want ONE part file

    // Set up IdentityMapper
    SequenceFileInputFormat.setInputPaths(cacheConf, new Path(input));
    cacheConf.setInputFormat(SequenceFileInputFormat.class);
    cacheConf.setMapperClass(Norm2Mapper.class);
    cacheConf.setOutputKeyClass(StringDoublePairWritable.class);
    cacheConf.setOutputValueClass(SparseVectorWritable.class);

    // Set up IdentityReducer
    cacheConf.setReducerClass(IdentityReducer.class);
    cacheConf.setOutputFormat(SequenceFileOutputFormat.class);
    cacheConf.setNumReduceTasks(1);
    Path sfPath = FileUtils.createRemoteTempPath(fs, qInputPath.getParent());
    LOG.info(String.format("Generating feature vector SequenceFile path %s", sfPath.toString()));
    SequenceFileOutputFormat.setOutputPath(cacheConf, sfPath);
    JobClient.runJob(cacheConf);

    Path cachePath = new Path(sfPath.toString() + Path.SEPARATOR + "part-00000");

    // need to know the size (the reducer might be able to send this back via the Reporter, but how do we grab that info?
    StringDoublePairWritable key = new StringDoublePairWritable();
    int size = 0;
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, cachePath, conf);
    boolean hasNext = reader.next(key);
    while (hasNext) {
        size += 1;
        hasNext = reader.next(key);
    }
    try {
        reader.close();
    } catch (IOException ioe) {
        // closing the SequenceFile.Reader will throw an exception if the file is over some unknown size
        LOG.debug("Probably caused by closing the SequenceFile.Reader. All is well", ioe);
    }

    //LOG.info(String.format("Caching model file %s", qInputPath.toString()));
    URI listURI = new URI(fs.makeQualified(cachePath).toString());
    DistributedCache.addCacheFile(listURI, conf);
    LOG.info(String.format("SequenceFile cache path %s (%s) with %d labels", listURI.toString(),
            cachePath.getName(), size));
    conf.set(CACHE_PATH, cachePath.getName());
    conf.setInt(DISTANCE_MATRIX_SIZE, size);

    /**
     * Main MapReduce Task of generating dot products
     */
    LOG.info("Generating distances");
    JobConf distanceConf = new JobConf(conf, CalculateCosineDistanceMatrix.class);
    distanceConf.setJobName("DistanceMapReduce");
    // Set up distance mapper
    SequenceFileInputFormat.setInputPaths(distanceConf, new Path(input));
    distanceConf.setInputFormat(SequenceFileInputFormat.class);
    distanceConf.setMapperClass(DistanceMap.class);
    distanceConf.setMapOutputKeyClass(Text.class);
    distanceConf.setMapOutputValueClass(SparseVectorWritable.class);

    // Set up reducer to merge lower-triangle results into a single dense distance vector
    distanceConf.setReducerClass(DistanceReducer.class);
    distanceConf.setOutputKeyClass(Text.class);
    distanceConf.setOutputValueClass(DenseVectorWritable.class);
    distanceConf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(distanceConf, new Path(output));
    JobClient.runJob(distanceConf);

    return 0;
}

From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java

License:Open Source License

@Test
public void testCombinerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./test-res/wordcount-mapper.ktr", "./test-res/wordcount-reducer.ktr",
            "./test-res/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransCombiner combiner = new GenericTransCombiner();

    combiner.configure(jobConf);//  ww w .  ja va  2s.co  m

    assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK());
    assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java

License:Open Source License

@Test
public void testReducerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./test-res/wordcount-mapper.ktr", "./test-res/wordcount-reducer.ktr",
            "./test-res/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransReduce reducer = new GenericTransReduce();

    reducer.configure(jobConf);//  w  ww  .jav  a2s  . c om

    assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
    assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java

License:Apache License

@Test
public void testCombinerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr",
            "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransCombiner combiner = new GenericTransCombiner();

    combiner.configure(jobConf);/*w ww.j av a 2s  .c o  m*/

    assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK());
    assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java

License:Apache License

@Test
public void testReducerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr",
            "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransReduce reducer = new GenericTransReduce();

    reducer.configure(jobConf);//from  w ww.  ja va2  s  .  co  m

    assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
    assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIT.java

License:Apache License

@Test
public void testCombinerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/it/resources/wordcount-mapper.ktr",
            "./src/it/resources/wordcount-reducer.ktr", "./src/it/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransCombiner combiner = new GenericTransCombiner();

    combiner.configure(jobConf);/*from  w  w  w .ja  va 2s  . co  m*/

    assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK());
    assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIT.java

License:Apache License

@Test
public void testReducerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/it/resources/wordcount-mapper.ktr",
            "./src/it/resources/wordcount-reducer.ktr", "./src/it/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransReduce reducer = new GenericTransReduce();

    reducer.configure(jobConf);/* w  ww . j  ava 2  s . c om*/

    assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
    assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
}

From source file:org.pentaho.hadoop.sample.wordcount.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    String hdfsHost = "localhost:9000";
    String jobTrackerHost = "localhost:9001";
    String fsPrefix = "hdfs";

    String dirInput = "/wordcount/input";
    String dirOutput = "/wordcount/output";

    if (args.length == 1 && (args[0].equals("--help") || args[0].equals("-h") || args[0].equals("/?"))) {
        System.out.println("Usage: WordCount <options>");
        System.out.println();//  w ww . j a va  2 s.c  o m
        System.out.println("Options:");
        System.out.println();
        System.out.println("--input=DIR                   The directory containing the input files for the");
        System.out.println("                              WordCount Hadoop job");
        System.out.println("--output=DIR                  The directory where the results of the WordCount");
        System.out.println("                              Hadoop job will be stored");
        System.out.println("--hdfsHost=HOST               The host<:port> of the HDFS service");
        System.out.println("                              e.g.- localhost:9000");
        System.out.println("--jobTrackerHost=HOST         The host<:port> of the job tracker service");
        System.out.println("                              e.g.- localhost:9001");
        System.out.println("--fsPrefix=PREFIX             The prefix to use for for the filesystem");
        System.out.println("                              e.g.- hdfs");
        System.out.println();
        System.out.println();
        System.out.println("If an option is not provided through the command prompt the following defaults");
        System.out.println("will be used:");
        System.out.println("--input='/wordcount/input'");
        System.out.println("--output='/wordcount/output'");
        System.out.println("--hdfsHost=localhost:9000");
        System.out.println("--jobTrackerHost=localhost:9001");
        System.out.println("--fsPrefix=hdfs");

    } else {
        if (args.length > 0) {
            for (String arg : args) {
                if (arg.startsWith("--input=")) {
                    dirInput = WordCount.getArgValue(arg);
                } else if (arg.startsWith("--output=")) {
                    dirOutput = WordCount.getArgValue(arg);
                } else if (arg.startsWith("--hdfsHost=")) {
                    hdfsHost = WordCount.getArgValue(arg);
                } else if (arg.startsWith("--jobTrackerHost=")) {
                    jobTrackerHost = WordCount.getArgValue(arg);
                } else if (arg.startsWith("--fsPrefix=")) {
                    fsPrefix = WordCount.getArgValue(arg);
                }
            }
        }

        JobConf conf = new JobConf(WordCount.class);
        conf.setJobName("WordCount");

        String hdfsBaseUrl = fsPrefix + "://" + hdfsHost;
        conf.set("fs.default.name", hdfsBaseUrl + "/");
        if (jobTrackerHost != null && jobTrackerHost.length() > 0) {
            conf.set("mapred.job.tracker", jobTrackerHost);
        }

        FileInputFormat.setInputPaths(conf, new Path[] { new Path(hdfsBaseUrl + dirInput) });
        FileOutputFormat.setOutputPath(conf, new Path(hdfsBaseUrl + dirOutput));

        conf.setMapperClass(WordCountMapper.class);
        conf.setReducerClass(WordCountReducer.class);

        conf.setMapOutputKeyClass(Text.class);
        conf.setMapOutputValueClass(IntWritable.class);

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        JobClient.runJob(conf);
    }
}

From source file:org.pooledtimeseries.MeanChiSquareDistanceCalculation.java

License:Apache License

public static void main(String[] args) throws Exception {

    Configuration baseConf = new Configuration();
    baseConf.set("mapreduce.job.maps", "96");
    baseConf.set("mapred.tasktracker.map.tasks.maximum", "96");

    JobConf conf = new JobConf(baseConf, MeanChiSquareDistanceCalculation.class);
    System.out.println("Before Map:" + conf.getNumMapTasks());
    conf.setNumMapTasks(96);/*ww  w .j a  v a 2  s. c  o  m*/
    System.out.println("After Map:" + conf.getNumMapTasks());

    conf.setJobName("mean_chi_square_calculation");

    System.out.println("Track:" + baseConf.get("mapred.job.tracker"));
    System.out.println("Job Name- " + conf.getJobName());
    System.out.println(baseConf.get("mapreduce.job.maps"));

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(DoubleWritable.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(DoubleWritable.class);

    conf.setOutputFormat(TextOutputFormat.class);

    conf.setInputFormat(CartesianInputFormat.class);
    CartesianInputFormat.setLeftInputInfo(conf, SequenceFileInputFormat.class, args[0]);
    CartesianInputFormat.setRightInputInfo(conf, SequenceFileInputFormat.class, args[0]);

    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);

    JobClient.runJob(conf);
}