Example usage for org.apache.hadoop.mapred JobConf setMapOutputValueClass

List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMapOutputValueClass.

Prototype

public void setMapOutputValueClass(Class<?> theClass) 

Source Link

Document

Set the value class for the map output data.

Usage

From source file:TVA.Hadoop.Samples.TestRecordReader.java

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker./*from   ww w .  j a va  2s. co m*/
 */
public int run(String[] args) throws Exception {

    JobConf conf = new JobConf(getConf(), TestRecordReader.class);
    conf.setJobName("TestRecordReader");

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(StandardPointFile.class);

    conf.setMapperClass(MapClass.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(HistorianInputFormat.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);

    return 0;
}

From source file:U.CC.ExpDestFromUserCity.java

public static void main(String[] args) throws Exception {
    String inputPath = "/Volumes/MacBackUp/train4.txt";
    String outputPath = "/Volumes/MacBackUp/expedia/outDestRec/";
    //   String temp = "/Volumes/MacBackUp/expedia/temp2Exp/";

    JobClient client = new JobClient();
    JobConf conf = new JobConf(ExpDestFromUserCity.class);
    conf.setJobName("ExpediaDestHotelRec");
    conf.setMapperClass(ExpMapperDest.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setReducerClass(ExpReducerDest.class);
    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    client.setConf(conf);// w  ww.  j a  v  a  2s . c  om
    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:U.CC.ExpediaRunner1.java

public static void main(String[] args) throws Exception {

    String inputPath = "/Volumes/MacBackUp/trainfullData2.txt";
    String outputPath = "/Volumes/MacBackUp/expedia/outputExpedia/";
    String temp = "/Volumes/MacBackUp/expedia/temp2Exp/";

    //    String inputPath = "hdfs://ec2-52-43-100-208.us-west-2.compute.amazonaws.com/home/ubuntu/hadoop/train4.txt";
    //  String outputPath = "hdfs://ec2-52-43-100-208.us-west-2.compute.amazonaws.com/home/ubuntu/hdfstmp/expedia/outputex/";
    //  String temp = "hdfs://ec2-52-43-100-208.us-west-2.compute.amazonaws.com/home/ubuntu/hdfstmp/expedia/tempex/";

    JobClient client = new JobClient();
    JobConf conf = new JobConf(ExpediaRunner1.class);
    conf.setJobName("ExpediaMapper1");
    conf.setMapperClass(ExpediaMapper2.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setReducerClass(ExpediaReducer2.class);
    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(temp));

    client.setConf(conf);//from   w w w.j a  va  2  s  .  com
    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }

    // chaining  

    JobConf conf1 = new JobConf(ExpediaRunner1.class);
    conf1.setJobName("ExpediaMapper2");
    conf1.setMapperClass(ExpediaMapper3.class);
    conf1.setMapOutputKeyClass(Text.class);
    conf1.setMapOutputValueClass(Text.class);
    conf1.setReducerClass(ExpediaReducer3.class);
    FileInputFormat.setInputPaths(conf1, new Path(temp));
    FileOutputFormat.setOutputPath(conf1, new Path(outputPath));

    //   FileInputFormat.setInputPaths(conf1, new Path(OUTPUT_PATH));
    //   FileOutputFormat.setOutputPath(conf1, new Path(args[1]));

    client.setConf(conf1);
    try {
        JobClient.runJob(conf1);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:U.CC.ExpediaViewer.java

public static void main(String[] args) throws Exception {

    String inputPath = "/Volumes/MacBackUp/mahFullOutput.txt";
    String outputPath = "/Volumes/MacBackUp/expedia/mahFullOutput_format";

    //          String inputPath = "/user/tejageetla/inputExpedia/trainfullData2.txt";
    //   String outputPath = "/user/tejageetla/expedia/Dest_lngth_UsrCityReco/";
    //   String temp = "/Volumes/MacBackUp/expedia/temp2Exp/";

    JobClient client = new JobClient();
    JobConf conf = new JobConf(ExpediaViewer.class);
    conf.setJobName("ExpediaDestHotelRecViewer");
    conf.setMapperClass(ExpedMapView.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class);
    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    client.setConf(conf);//from w w w. j  av a  2s . c om
    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:U.CC.LicenseDriver1.java

public static void main(String[] args) throws Exception {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(LicenseDriver1.class);
    conf.setJobName("License Driver");

    //conf.setOutputKeyClass(Text.class);
    //conf.setOutputValueClass(Text.class);
    conf.setMapperClass(LicenseMapper1.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    //conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class);

    conf.setReducerClass(LicenseReducer1.class);
    //conf.setCombinerClass(Reducer.class);

    // take the input and output from the command line
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    //  FileInputFormat.setInputPaths(conf, new Path(args[1]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    client.setConf(conf);//from   w  w  w.  j a va  2s  .  c  om
    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:U.CC.SpeciesGraphBuilder.java

public static void main(String[] args) throws Exception {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(SpeciesGraphBuilder.class);
    conf.setJobName("Page-rank Species Graph Builder");

    //conf.setOutputKeyClass(Text.class); 
    //conf.setOutputValueClass(Text.class); 
    conf.setMapperClass(SpeciesGraphBuilderMapper.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    //conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); 
    //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); 

    conf.setReducerClass(SpeciesGraphBuilderReducer.class);
    //conf.setCombinerClass(SpeciesGraphBuilderReducer.class); 

    //conf.setInputPath(new Path("graph1")); 
    //conf.setOutputPath(new Path("graph2")); 
    // take the input and output from the command line
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    client.setConf(conf);//  www .ja  va  2s. c o  m
    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:uk.bl.wa.hadoop.datasets.WARCDatasetGenerator.java

License:Open Source License

/**
 * //from w w w  .j a  v  a2  s .  c  o  m
 * @param args
 * @return
 * @throws IOException
 * @throws ParseException
 * @throws InterruptedException
 * @throws KeeperException
 */
protected void createJobConf(JobConf conf, String[] args)
        throws IOException, ParseException, KeeperException, InterruptedException {
    // Parse the command-line parameters.
    this.setup(args, conf);

    // Store application properties where the mappers/reducers can access
    // them
    Config index_conf;
    if (this.configPath != null) {
        index_conf = ConfigFactory.parseFile(new File(this.configPath));
    } else {
        index_conf = ConfigFactory.load();
    }
    if (this.dumpConfig) {
        ConfigPrinter.print(index_conf);
        System.exit(0);
    }
    // Decide whether to apply annotations:
    // Store the properties:
    conf.set(CONFIG_PROPERTIES, index_conf.withOnlyPath("warc").root().render(ConfigRenderOptions.concise()));
    LOG.info("Loaded warc config.");
    LOG.info(index_conf.getString("warc.title"));

    // Reducer count
    int numReducers = 1;
    try {
        numReducers = index_conf.getInt("warc.hadoop.num_reducers");
    } catch (NumberFormatException n) {
        numReducers = 10;
    }

    // Add input paths:
    LOG.info("Reading input files...");
    String line = null;
    BufferedReader br = new BufferedReader(new FileReader(this.inputPath));
    while ((line = br.readLine()) != null) {
        FileInputFormat.addInputPath(conf, new Path(line));
    }
    br.close();
    LOG.info("Read " + FileInputFormat.getInputPaths(conf).length + " input files.");

    FileOutputFormat.setOutputPath(conf, new Path(this.outputPath));

    conf.setJobName(this.inputPath + "_" + System.currentTimeMillis());
    conf.setInputFormat(ArchiveFileInputFormat.class);
    conf.setMapperClass(WARCDatasetMapper.class);
    conf.setReducerClass(FrequencyCountingReducer.class);
    // This can be optionally use to suppress keys:
    // conf.setOutputFormat(KeylessTextOutputFormat.class);
    // conf.set( "map.output.key.field.separator", "" );

    // Compress the output from the maps, to cut down temp space
    // requirements between map and reduce.
    conf.setBoolean("mapreduce.map.output.compress", true); // Wrong syntax
    // for 0.20.x ?
    conf.set("mapred.compress.map.output", "true");
    // conf.set("mapred.map.output.compression.codec",
    // "org.apache.hadoop.io.compress.GzipCodec");
    // Ensure the JARs we provide take precedence over ones from Hadoop:
    conf.setBoolean("mapreduce.task.classpath.user.precedence", true);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setNumReduceTasks(numReducers);

    MultipleOutputs.addMultiNamedOutput(conf, FORMATS_SUMMARY_NAME, TextOutputFormat.class, Text.class,
            Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, FORMATS_FFB_NAME, TextOutputFormat.class, Text.class, Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, HOSTS_NAME, TextOutputFormat.class, Text.class, Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, HOST_LINKS_NAME, TextOutputFormat.class, Text.class, Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, GEO_SUMMARY_NAME, TextOutputFormat.class, Text.class, Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, FACES_NAME, TextOutputFormat.class, Text.class, Text.class);

}

From source file:uk.bl.wa.hadoop.hosts.HostsReport.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), HostsReport.class);

    log.info("Adding logs...");
    String line;/*from  w  w w .j  a  v a2s .c  o  m*/
    BufferedReader br = new BufferedReader(new FileReader(args[0]));
    while ((line = br.readLine()) != null) {
        log.info("Adding " + line);
        FileInputFormat.addInputPath(conf, new Path(line));
    }
    br.close();

    FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    conf.setJarByClass(HostsReport.class);
    conf.setInputFormat(TextInputFormat.class);
    conf.setMapperClass(HostsReportMapper.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setCombinerClass(HostsReportReducer.class);
    conf.setReducerClass(HostsReportReducer.class);
    conf.setOutputFormat(TextOutputFormat.class);

    JobClient.runJob(conf);
    return 0;
}

From source file:uk.bl.wa.hadoop.indexer.mdx.MDXSeqSampleGenerator.java

License:Open Source License

/**
 * /* w  w  w . j  a v  a2s .  c  o m*/
 * @param args
 * @return
 * @throws IOException
 * @throws ParseException
 * @throws InterruptedException
 * @throws KeeperException
 */
protected void createJobConf(JobConf conf, String[] args)
        throws IOException, ParseException, KeeperException, InterruptedException {
    // Parse the command-line parameters.
    this.setup(args, conf);

    // Add input paths:
    LOG.info("Reading input files...");
    String line = null;
    BufferedReader br = new BufferedReader(new FileReader(this.inputPath));
    while ((line = br.readLine()) != null) {
        FileInputFormat.addInputPath(conf, new Path(line));
    }
    br.close();
    LOG.info("Read " + FileInputFormat.getInputPaths(conf).length + " input files.");

    FileOutputFormat.setOutputPath(conf, new Path(this.outputPath));

    conf.setJobName(this.inputPath + "_" + System.currentTimeMillis());
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapperClass(MDXSeqSampleMapper.class);
    conf.setReducerClass(ReservoirSamplingReducer.class);
    conf.setOutputFormat(KeylessTextOutputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setNumReduceTasks(numReducers);

    MultipleOutputs.addMultiNamedOutput(conf, GEO_NAME, KeylessTextOutputFormat.class, Text.class, Text.class);

    MultipleOutputs.addMultiNamedOutput(conf, FORMATS_FFB_SAMPLE_NAME, KeylessTextOutputFormat.class,
            Text.class, Text.class);

    KeylessTextOutputFormat.setCompressOutput(conf, true);
    KeylessTextOutputFormat.setOutputCompressorClass(conf, GzipCodec.class);
}

From source file:uk.bl.wa.hadoop.indexer.mdx.MDXSeqStatsGenerator.java

License:Open Source License

/**
 * /*from  ww  w. java2s .c o  m*/
 * @param args
 * @return
 * @throws IOException
 * @throws ParseException
 * @throws InterruptedException
 * @throws KeeperException
 */
protected void createJobConf(JobConf conf, String[] args)
        throws IOException, ParseException, KeeperException, InterruptedException {
    // Parse the command-line parameters.
    this.setup(args, conf);

    // Add input paths:
    LOG.info("Reading input files...");
    String line = null;
    BufferedReader br = new BufferedReader(new FileReader(this.inputPath));
    while ((line = br.readLine()) != null) {
        FileInputFormat.addInputPath(conf, new Path(line));
    }
    br.close();
    LOG.info("Read " + FileInputFormat.getInputPaths(conf).length + " input files.");

    FileOutputFormat.setOutputPath(conf, new Path(this.outputPath));

    conf.setJobName(this.inputPath + "_" + System.currentTimeMillis());
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapperClass(MDXSeqStatsMapper.class);
    conf.setReducerClass(FrequencyCountingReducer.class);
    conf.setOutputFormat(KeylessTextOutputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setNumReduceTasks(numReducers);

    MultipleOutputs.addMultiNamedOutput(conf, FORMATS_SUMMARY_NAME, KeylessTextOutputFormat.class, Text.class,
            Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, FORMATS_FFB_NAME, KeylessTextOutputFormat.class, Text.class,
            Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, HOST_LINKS_NAME, KeylessTextOutputFormat.class, Text.class,
            Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, GEO_SUMMARY_NAME, KeylessTextOutputFormat.class, Text.class,
            Text.class);

    KeylessTextOutputFormat.setCompressOutput(conf, true);
    KeylessTextOutputFormat.setOutputCompressorClass(conf, GzipCodec.class);
}