Example usage for org.apache.hadoop.mapred JobConf setMapOutputKeyClass

List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) 

Source Link

Document

Set the key class for the map output data.

Usage

From source file:U.CC.ExpDestFromUserCity.java

public static void main(String[] args) throws Exception {
    String inputPath = "/Volumes/MacBackUp/train4.txt";
    String outputPath = "/Volumes/MacBackUp/expedia/outDestRec/";
    //   String temp = "/Volumes/MacBackUp/expedia/temp2Exp/";

    JobClient client = new JobClient();
    JobConf conf = new JobConf(ExpDestFromUserCity.class);
    conf.setJobName("ExpediaDestHotelRec");
    conf.setMapperClass(ExpMapperDest.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setReducerClass(ExpReducerDest.class);
    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    client.setConf(conf);/* w w w  .  ja v  a 2s .  c o  m*/
    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:U.CC.ExpediaRunner1.java

public static void main(String[] args) throws Exception {

    String inputPath = "/Volumes/MacBackUp/trainfullData2.txt";
    String outputPath = "/Volumes/MacBackUp/expedia/outputExpedia/";
    String temp = "/Volumes/MacBackUp/expedia/temp2Exp/";

    //    String inputPath = "hdfs://ec2-52-43-100-208.us-west-2.compute.amazonaws.com/home/ubuntu/hadoop/train4.txt";
    //  String outputPath = "hdfs://ec2-52-43-100-208.us-west-2.compute.amazonaws.com/home/ubuntu/hdfstmp/expedia/outputex/";
    //  String temp = "hdfs://ec2-52-43-100-208.us-west-2.compute.amazonaws.com/home/ubuntu/hdfstmp/expedia/tempex/";

    JobClient client = new JobClient();
    JobConf conf = new JobConf(ExpediaRunner1.class);
    conf.setJobName("ExpediaMapper1");
    conf.setMapperClass(ExpediaMapper2.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setReducerClass(ExpediaReducer2.class);
    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(temp));

    client.setConf(conf);/*from w  w w  . ja va2s.  co  m*/
    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }

    // chaining  

    JobConf conf1 = new JobConf(ExpediaRunner1.class);
    conf1.setJobName("ExpediaMapper2");
    conf1.setMapperClass(ExpediaMapper3.class);
    conf1.setMapOutputKeyClass(Text.class);
    conf1.setMapOutputValueClass(Text.class);
    conf1.setReducerClass(ExpediaReducer3.class);
    FileInputFormat.setInputPaths(conf1, new Path(temp));
    FileOutputFormat.setOutputPath(conf1, new Path(outputPath));

    //   FileInputFormat.setInputPaths(conf1, new Path(OUTPUT_PATH));
    //   FileOutputFormat.setOutputPath(conf1, new Path(args[1]));

    client.setConf(conf1);
    try {
        JobClient.runJob(conf1);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:U.CC.ExpediaViewer.java

public static void main(String[] args) throws Exception {

    String inputPath = "/Volumes/MacBackUp/mahFullOutput.txt";
    String outputPath = "/Volumes/MacBackUp/expedia/mahFullOutput_format";

    //          String inputPath = "/user/tejageetla/inputExpedia/trainfullData2.txt";
    //   String outputPath = "/user/tejageetla/expedia/Dest_lngth_UsrCityReco/";
    //   String temp = "/Volumes/MacBackUp/expedia/temp2Exp/";

    JobClient client = new JobClient();
    JobConf conf = new JobConf(ExpediaViewer.class);
    conf.setJobName("ExpediaDestHotelRecViewer");
    conf.setMapperClass(ExpedMapView.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class);
    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    client.setConf(conf);//from w  w  w  . j a  v  a 2 s.c  o m
    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:U.CC.LicenseDriver1.java

public static void main(String[] args) throws Exception {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(LicenseDriver1.class);
    conf.setJobName("License Driver");

    //conf.setOutputKeyClass(Text.class);
    //conf.setOutputValueClass(Text.class);
    conf.setMapperClass(LicenseMapper1.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    //conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class);

    conf.setReducerClass(LicenseReducer1.class);
    //conf.setCombinerClass(Reducer.class);

    // take the input and output from the command line
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    //  FileInputFormat.setInputPaths(conf, new Path(args[1]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    client.setConf(conf);/*www  .ja va 2s.c  o m*/
    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:U.CC.SpeciesGraphBuilder.java

public static void main(String[] args) throws Exception {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(SpeciesGraphBuilder.class);
    conf.setJobName("Page-rank Species Graph Builder");

    //conf.setOutputKeyClass(Text.class); 
    //conf.setOutputValueClass(Text.class); 
    conf.setMapperClass(SpeciesGraphBuilderMapper.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    //conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); 
    //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); 

    conf.setReducerClass(SpeciesGraphBuilderReducer.class);
    //conf.setCombinerClass(SpeciesGraphBuilderReducer.class); 

    //conf.setInputPath(new Path("graph1")); 
    //conf.setOutputPath(new Path("graph2")); 
    // take the input and output from the command line
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    client.setConf(conf);/*from   ww w . ja v  a 2s .  co  m*/
    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:uk.bl.wa.hadoop.datasets.WARCDatasetGenerator.java

License:Open Source License

/**
 * // w  w w.ja v  a2 s.  c o  m
 * @param args
 * @return
 * @throws IOException
 * @throws ParseException
 * @throws InterruptedException
 * @throws KeeperException
 */
protected void createJobConf(JobConf conf, String[] args)
        throws IOException, ParseException, KeeperException, InterruptedException {
    // Parse the command-line parameters.
    this.setup(args, conf);

    // Store application properties where the mappers/reducers can access
    // them
    Config index_conf;
    if (this.configPath != null) {
        index_conf = ConfigFactory.parseFile(new File(this.configPath));
    } else {
        index_conf = ConfigFactory.load();
    }
    if (this.dumpConfig) {
        ConfigPrinter.print(index_conf);
        System.exit(0);
    }
    // Decide whether to apply annotations:
    // Store the properties:
    conf.set(CONFIG_PROPERTIES, index_conf.withOnlyPath("warc").root().render(ConfigRenderOptions.concise()));
    LOG.info("Loaded warc config.");
    LOG.info(index_conf.getString("warc.title"));

    // Reducer count
    int numReducers = 1;
    try {
        numReducers = index_conf.getInt("warc.hadoop.num_reducers");
    } catch (NumberFormatException n) {
        numReducers = 10;
    }

    // Add input paths:
    LOG.info("Reading input files...");
    String line = null;
    BufferedReader br = new BufferedReader(new FileReader(this.inputPath));
    while ((line = br.readLine()) != null) {
        FileInputFormat.addInputPath(conf, new Path(line));
    }
    br.close();
    LOG.info("Read " + FileInputFormat.getInputPaths(conf).length + " input files.");

    FileOutputFormat.setOutputPath(conf, new Path(this.outputPath));

    conf.setJobName(this.inputPath + "_" + System.currentTimeMillis());
    conf.setInputFormat(ArchiveFileInputFormat.class);
    conf.setMapperClass(WARCDatasetMapper.class);
    conf.setReducerClass(FrequencyCountingReducer.class);
    // This can be optionally use to suppress keys:
    // conf.setOutputFormat(KeylessTextOutputFormat.class);
    // conf.set( "map.output.key.field.separator", "" );

    // Compress the output from the maps, to cut down temp space
    // requirements between map and reduce.
    conf.setBoolean("mapreduce.map.output.compress", true); // Wrong syntax
    // for 0.20.x ?
    conf.set("mapred.compress.map.output", "true");
    // conf.set("mapred.map.output.compression.codec",
    // "org.apache.hadoop.io.compress.GzipCodec");
    // Ensure the JARs we provide take precedence over ones from Hadoop:
    conf.setBoolean("mapreduce.task.classpath.user.precedence", true);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setNumReduceTasks(numReducers);

    MultipleOutputs.addMultiNamedOutput(conf, FORMATS_SUMMARY_NAME, TextOutputFormat.class, Text.class,
            Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, FORMATS_FFB_NAME, TextOutputFormat.class, Text.class, Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, HOSTS_NAME, TextOutputFormat.class, Text.class, Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, HOST_LINKS_NAME, TextOutputFormat.class, Text.class, Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, GEO_SUMMARY_NAME, TextOutputFormat.class, Text.class, Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, FACES_NAME, TextOutputFormat.class, Text.class, Text.class);

}

From source file:uk.bl.wa.hadoop.hosts.HostsReport.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), HostsReport.class);

    log.info("Adding logs...");
    String line;/*from  w ww.j  a  v  a  2  s .  c o m*/
    BufferedReader br = new BufferedReader(new FileReader(args[0]));
    while ((line = br.readLine()) != null) {
        log.info("Adding " + line);
        FileInputFormat.addInputPath(conf, new Path(line));
    }
    br.close();

    FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    conf.setJarByClass(HostsReport.class);
    conf.setInputFormat(TextInputFormat.class);
    conf.setMapperClass(HostsReportMapper.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setCombinerClass(HostsReportReducer.class);
    conf.setReducerClass(HostsReportReducer.class);
    conf.setOutputFormat(TextOutputFormat.class);

    JobClient.runJob(conf);
    return 0;
}

From source file:uk.bl.wa.hadoop.indexer.mdx.MDXSeqSampleGenerator.java

License:Open Source License

/**
 * //from  w ww  . j  a  v a  2  s.  c  om
 * @param args
 * @return
 * @throws IOException
 * @throws ParseException
 * @throws InterruptedException
 * @throws KeeperException
 */
protected void createJobConf(JobConf conf, String[] args)
        throws IOException, ParseException, KeeperException, InterruptedException {
    // Parse the command-line parameters.
    this.setup(args, conf);

    // Add input paths:
    LOG.info("Reading input files...");
    String line = null;
    BufferedReader br = new BufferedReader(new FileReader(this.inputPath));
    while ((line = br.readLine()) != null) {
        FileInputFormat.addInputPath(conf, new Path(line));
    }
    br.close();
    LOG.info("Read " + FileInputFormat.getInputPaths(conf).length + " input files.");

    FileOutputFormat.setOutputPath(conf, new Path(this.outputPath));

    conf.setJobName(this.inputPath + "_" + System.currentTimeMillis());
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapperClass(MDXSeqSampleMapper.class);
    conf.setReducerClass(ReservoirSamplingReducer.class);
    conf.setOutputFormat(KeylessTextOutputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setNumReduceTasks(numReducers);

    MultipleOutputs.addMultiNamedOutput(conf, GEO_NAME, KeylessTextOutputFormat.class, Text.class, Text.class);

    MultipleOutputs.addMultiNamedOutput(conf, FORMATS_FFB_SAMPLE_NAME, KeylessTextOutputFormat.class,
            Text.class, Text.class);

    KeylessTextOutputFormat.setCompressOutput(conf, true);
    KeylessTextOutputFormat.setOutputCompressorClass(conf, GzipCodec.class);
}

From source file:uk.bl.wa.hadoop.indexer.mdx.MDXSeqStatsGenerator.java

License:Open Source License

/**
 * /* w ww.j a va2  s  . c om*/
 * @param args
 * @return
 * @throws IOException
 * @throws ParseException
 * @throws InterruptedException
 * @throws KeeperException
 */
protected void createJobConf(JobConf conf, String[] args)
        throws IOException, ParseException, KeeperException, InterruptedException {
    // Parse the command-line parameters.
    this.setup(args, conf);

    // Add input paths:
    LOG.info("Reading input files...");
    String line = null;
    BufferedReader br = new BufferedReader(new FileReader(this.inputPath));
    while ((line = br.readLine()) != null) {
        FileInputFormat.addInputPath(conf, new Path(line));
    }
    br.close();
    LOG.info("Read " + FileInputFormat.getInputPaths(conf).length + " input files.");

    FileOutputFormat.setOutputPath(conf, new Path(this.outputPath));

    conf.setJobName(this.inputPath + "_" + System.currentTimeMillis());
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapperClass(MDXSeqStatsMapper.class);
    conf.setReducerClass(FrequencyCountingReducer.class);
    conf.setOutputFormat(KeylessTextOutputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setNumReduceTasks(numReducers);

    MultipleOutputs.addMultiNamedOutput(conf, FORMATS_SUMMARY_NAME, KeylessTextOutputFormat.class, Text.class,
            Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, FORMATS_FFB_NAME, KeylessTextOutputFormat.class, Text.class,
            Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, HOST_LINKS_NAME, KeylessTextOutputFormat.class, Text.class,
            Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, GEO_SUMMARY_NAME, KeylessTextOutputFormat.class, Text.class,
            Text.class);

    KeylessTextOutputFormat.setCompressOutput(conf, true);
    KeylessTextOutputFormat.setOutputCompressorClass(conf, GzipCodec.class);
}

From source file:uk.bl.wa.hadoop.indexer.mdx.WARCMDXGenerator.java

License:Open Source License

/**
 * //from   ww w.j a  v a 2  s.c om
 * @param args
 * @return
 * @throws IOException
 * @throws ParseException
 * @throws InterruptedException
 * @throws KeeperException
 */
protected void createJobConf(JobConf conf, String[] args)
        throws IOException, ParseException, KeeperException, InterruptedException {
    // Parse the command-line parameters.
    this.setup(args, conf);

    // Store application properties where the mappers/reducers can access
    // them
    Config index_conf;
    if (this.configPath != null) {
        LOG.info("Loading config from: " + configPath);
        index_conf = ConfigFactory.parseFile(new File(this.configPath));
    } else {
        LOG.info("Using default config: mdx");
        index_conf = ConfigFactory.load("mdx");
    }
    if (this.dumpConfig) {
        ConfigPrinter.print(index_conf);
        System.exit(0);
    }
    conf.set(CONFIG_PROPERTIES, index_conf.withOnlyPath("warc").root().render(ConfigRenderOptions.concise()));
    LOG.info("Loaded warc config: " + index_conf.getString("warc.title"));

    // Reducer count:
    int numReducers = 10;
    if (index_conf.hasPath(WARC_HADOOP_NUM_REDUCERS)) {
        numReducers = index_conf.getInt(WARC_HADOOP_NUM_REDUCERS);
    }
    if (conf.getInt(WARC_HADOOP_NUM_REDUCERS, -1) != -1) {
        LOG.info("Overriding num_reducers using Hadoop config.");
        numReducers = conf.getInt(WARC_HADOOP_NUM_REDUCERS, numReducers);
    }

    // Add input paths:
    LOG.info("Reading input files...");
    String line = null;
    BufferedReader br = new BufferedReader(new FileReader(this.inputPath));
    while ((line = br.readLine()) != null) {
        FileInputFormat.addInputPath(conf, new Path(line));
    }
    br.close();
    LOG.info("Read " + FileInputFormat.getInputPaths(conf).length + " input files.");

    FileOutputFormat.setOutputPath(conf, new Path(this.outputPath));

    conf.setJobName(this.inputPath + "_" + System.currentTimeMillis());
    conf.setInputFormat(ArchiveFileInputFormat.class);
    conf.setMapperClass(WARCMDXMapper.class);
    conf.setReducerClass(MDXReduplicatingReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    // conf.setOutputFormat(TextOutputFormat.class);
    // SequenceFileOutputFormat.setOutputCompressionType(conf,
    // CompressionType.BLOCK);
    // OR TextOutputFormat?
    // conf.set("map.output.key.field.separator", "");
    // Compress the output from the maps, to cut down temp space
    // requirements between map and reduce.
    conf.setBoolean("mapreduce.map.output.compress", true); // Wrong syntax
    // for 0.20.x ?
    conf.set("mapred.compress.map.output", "true");
    // conf.set("mapred.map.output.compression.codec",
    // "org.apache.hadoop.io.compress.GzipCodec");
    // Ensure the JARs we provide take precedence over ones from Hadoop:
    conf.setBoolean("mapreduce.task.classpath.user.precedence", true);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setNumReduceTasks(numReducers);
}