List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputValueClass
public void setMapOutputValueClass(Class<?> theClass)
From source file:TVA.Hadoop.Samples.TestRecordReader.java
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker./*from ww w . j a va 2s. co m*/ */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), TestRecordReader.class); conf.setJobName("TestRecordReader"); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(StandardPointFile.class); conf.setMapperClass(MapClass.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(HistorianInputFormat.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:U.CC.ExpDestFromUserCity.java
public static void main(String[] args) throws Exception { String inputPath = "/Volumes/MacBackUp/train4.txt"; String outputPath = "/Volumes/MacBackUp/expedia/outDestRec/"; // String temp = "/Volumes/MacBackUp/expedia/temp2Exp/"; JobClient client = new JobClient(); JobConf conf = new JobConf(ExpDestFromUserCity.class); conf.setJobName("ExpediaDestHotelRec"); conf.setMapperClass(ExpMapperDest.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setReducerClass(ExpReducerDest.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); client.setConf(conf);// w ww. j a v a 2s . c om try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } }
From source file:U.CC.ExpediaRunner1.java
public static void main(String[] args) throws Exception { String inputPath = "/Volumes/MacBackUp/trainfullData2.txt"; String outputPath = "/Volumes/MacBackUp/expedia/outputExpedia/"; String temp = "/Volumes/MacBackUp/expedia/temp2Exp/"; // String inputPath = "hdfs://ec2-52-43-100-208.us-west-2.compute.amazonaws.com/home/ubuntu/hadoop/train4.txt"; // String outputPath = "hdfs://ec2-52-43-100-208.us-west-2.compute.amazonaws.com/home/ubuntu/hdfstmp/expedia/outputex/"; // String temp = "hdfs://ec2-52-43-100-208.us-west-2.compute.amazonaws.com/home/ubuntu/hdfstmp/expedia/tempex/"; JobClient client = new JobClient(); JobConf conf = new JobConf(ExpediaRunner1.class); conf.setJobName("ExpediaMapper1"); conf.setMapperClass(ExpediaMapper2.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setReducerClass(ExpediaReducer2.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(temp)); client.setConf(conf);//from w w w.j a va 2 s . com try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } // chaining JobConf conf1 = new JobConf(ExpediaRunner1.class); conf1.setJobName("ExpediaMapper2"); conf1.setMapperClass(ExpediaMapper3.class); conf1.setMapOutputKeyClass(Text.class); conf1.setMapOutputValueClass(Text.class); conf1.setReducerClass(ExpediaReducer3.class); FileInputFormat.setInputPaths(conf1, new Path(temp)); FileOutputFormat.setOutputPath(conf1, new Path(outputPath)); // FileInputFormat.setInputPaths(conf1, new Path(OUTPUT_PATH)); // FileOutputFormat.setOutputPath(conf1, new Path(args[1])); client.setConf(conf1); try { JobClient.runJob(conf1); } catch (Exception e) { e.printStackTrace(); } }
From source file:U.CC.ExpediaViewer.java
public static void main(String[] args) throws Exception { String inputPath = "/Volumes/MacBackUp/mahFullOutput.txt"; String outputPath = "/Volumes/MacBackUp/expedia/mahFullOutput_format"; // String inputPath = "/user/tejageetla/inputExpedia/trainfullData2.txt"; // String outputPath = "/user/tejageetla/expedia/Dest_lngth_UsrCityReco/"; // String temp = "/Volumes/MacBackUp/expedia/temp2Exp/"; JobClient client = new JobClient(); JobConf conf = new JobConf(ExpediaViewer.class); conf.setJobName("ExpediaDestHotelRecViewer"); conf.setMapperClass(ExpedMapView.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); client.setConf(conf);//from w w w. j av a 2s . c om try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } }
From source file:U.CC.LicenseDriver1.java
public static void main(String[] args) throws Exception { JobClient client = new JobClient(); JobConf conf = new JobConf(LicenseDriver1.class); conf.setJobName("License Driver"); //conf.setOutputKeyClass(Text.class); //conf.setOutputValueClass(Text.class); conf.setMapperClass(LicenseMapper1.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); //conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); conf.setReducerClass(LicenseReducer1.class); //conf.setCombinerClass(Reducer.class); // take the input and output from the command line FileInputFormat.setInputPaths(conf, new Path(args[0])); // FileInputFormat.setInputPaths(conf, new Path(args[1])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); client.setConf(conf);//from w w w. j a va 2s . c om try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } }
From source file:U.CC.SpeciesGraphBuilder.java
public static void main(String[] args) throws Exception { JobClient client = new JobClient(); JobConf conf = new JobConf(SpeciesGraphBuilder.class); conf.setJobName("Page-rank Species Graph Builder"); //conf.setOutputKeyClass(Text.class); //conf.setOutputValueClass(Text.class); conf.setMapperClass(SpeciesGraphBuilderMapper.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); //conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); conf.setReducerClass(SpeciesGraphBuilderReducer.class); //conf.setCombinerClass(SpeciesGraphBuilderReducer.class); //conf.setInputPath(new Path("graph1")); //conf.setOutputPath(new Path("graph2")); // take the input and output from the command line FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); client.setConf(conf);// www .ja va 2s. c o m try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } }
From source file:uk.bl.wa.hadoop.datasets.WARCDatasetGenerator.java
License:Open Source License
/** * //from w w w .j a v a2 s . c o m * @param args * @return * @throws IOException * @throws ParseException * @throws InterruptedException * @throws KeeperException */ protected void createJobConf(JobConf conf, String[] args) throws IOException, ParseException, KeeperException, InterruptedException { // Parse the command-line parameters. this.setup(args, conf); // Store application properties where the mappers/reducers can access // them Config index_conf; if (this.configPath != null) { index_conf = ConfigFactory.parseFile(new File(this.configPath)); } else { index_conf = ConfigFactory.load(); } if (this.dumpConfig) { ConfigPrinter.print(index_conf); System.exit(0); } // Decide whether to apply annotations: // Store the properties: conf.set(CONFIG_PROPERTIES, index_conf.withOnlyPath("warc").root().render(ConfigRenderOptions.concise())); LOG.info("Loaded warc config."); LOG.info(index_conf.getString("warc.title")); // Reducer count int numReducers = 1; try { numReducers = index_conf.getInt("warc.hadoop.num_reducers"); } catch (NumberFormatException n) { numReducers = 10; } // Add input paths: LOG.info("Reading input files..."); String line = null; BufferedReader br = new BufferedReader(new FileReader(this.inputPath)); while ((line = br.readLine()) != null) { FileInputFormat.addInputPath(conf, new Path(line)); } br.close(); LOG.info("Read " + FileInputFormat.getInputPaths(conf).length + " input files."); FileOutputFormat.setOutputPath(conf, new Path(this.outputPath)); conf.setJobName(this.inputPath + "_" + System.currentTimeMillis()); conf.setInputFormat(ArchiveFileInputFormat.class); conf.setMapperClass(WARCDatasetMapper.class); conf.setReducerClass(FrequencyCountingReducer.class); // This can be optionally use to suppress keys: // conf.setOutputFormat(KeylessTextOutputFormat.class); // conf.set( "map.output.key.field.separator", "" ); // Compress the output from the maps, to cut down temp space // requirements between map and reduce. conf.setBoolean("mapreduce.map.output.compress", true); // Wrong syntax // for 0.20.x ? conf.set("mapred.compress.map.output", "true"); // conf.set("mapred.map.output.compression.codec", // "org.apache.hadoop.io.compress.GzipCodec"); // Ensure the JARs we provide take precedence over ones from Hadoop: conf.setBoolean("mapreduce.task.classpath.user.precedence", true); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setNumReduceTasks(numReducers); MultipleOutputs.addMultiNamedOutput(conf, FORMATS_SUMMARY_NAME, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.addMultiNamedOutput(conf, FORMATS_FFB_NAME, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.addMultiNamedOutput(conf, HOSTS_NAME, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.addMultiNamedOutput(conf, HOST_LINKS_NAME, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.addMultiNamedOutput(conf, GEO_SUMMARY_NAME, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.addMultiNamedOutput(conf, FACES_NAME, TextOutputFormat.class, Text.class, Text.class); }
From source file:uk.bl.wa.hadoop.hosts.HostsReport.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), HostsReport.class); log.info("Adding logs..."); String line;/*from w w w .j a v a2s .c o m*/ BufferedReader br = new BufferedReader(new FileReader(args[0])); while ((line = br.readLine()) != null) { log.info("Adding " + line); FileInputFormat.addInputPath(conf, new Path(line)); } br.close(); FileOutputFormat.setOutputPath(conf, new Path(args[1])); conf.setJarByClass(HostsReport.class); conf.setInputFormat(TextInputFormat.class); conf.setMapperClass(HostsReportMapper.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setCombinerClass(HostsReportReducer.class); conf.setReducerClass(HostsReportReducer.class); conf.setOutputFormat(TextOutputFormat.class); JobClient.runJob(conf); return 0; }
From source file:uk.bl.wa.hadoop.indexer.mdx.MDXSeqSampleGenerator.java
License:Open Source License
/** * /* w w w . j a v a2s . c o m*/ * @param args * @return * @throws IOException * @throws ParseException * @throws InterruptedException * @throws KeeperException */ protected void createJobConf(JobConf conf, String[] args) throws IOException, ParseException, KeeperException, InterruptedException { // Parse the command-line parameters. this.setup(args, conf); // Add input paths: LOG.info("Reading input files..."); String line = null; BufferedReader br = new BufferedReader(new FileReader(this.inputPath)); while ((line = br.readLine()) != null) { FileInputFormat.addInputPath(conf, new Path(line)); } br.close(); LOG.info("Read " + FileInputFormat.getInputPaths(conf).length + " input files."); FileOutputFormat.setOutputPath(conf, new Path(this.outputPath)); conf.setJobName(this.inputPath + "_" + System.currentTimeMillis()); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapperClass(MDXSeqSampleMapper.class); conf.setReducerClass(ReservoirSamplingReducer.class); conf.setOutputFormat(KeylessTextOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setNumReduceTasks(numReducers); MultipleOutputs.addMultiNamedOutput(conf, GEO_NAME, KeylessTextOutputFormat.class, Text.class, Text.class); MultipleOutputs.addMultiNamedOutput(conf, FORMATS_FFB_SAMPLE_NAME, KeylessTextOutputFormat.class, Text.class, Text.class); KeylessTextOutputFormat.setCompressOutput(conf, true); KeylessTextOutputFormat.setOutputCompressorClass(conf, GzipCodec.class); }
From source file:uk.bl.wa.hadoop.indexer.mdx.MDXSeqStatsGenerator.java
License:Open Source License
/** * /*from ww w. java2s .c o m*/ * @param args * @return * @throws IOException * @throws ParseException * @throws InterruptedException * @throws KeeperException */ protected void createJobConf(JobConf conf, String[] args) throws IOException, ParseException, KeeperException, InterruptedException { // Parse the command-line parameters. this.setup(args, conf); // Add input paths: LOG.info("Reading input files..."); String line = null; BufferedReader br = new BufferedReader(new FileReader(this.inputPath)); while ((line = br.readLine()) != null) { FileInputFormat.addInputPath(conf, new Path(line)); } br.close(); LOG.info("Read " + FileInputFormat.getInputPaths(conf).length + " input files."); FileOutputFormat.setOutputPath(conf, new Path(this.outputPath)); conf.setJobName(this.inputPath + "_" + System.currentTimeMillis()); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapperClass(MDXSeqStatsMapper.class); conf.setReducerClass(FrequencyCountingReducer.class); conf.setOutputFormat(KeylessTextOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setNumReduceTasks(numReducers); MultipleOutputs.addMultiNamedOutput(conf, FORMATS_SUMMARY_NAME, KeylessTextOutputFormat.class, Text.class, Text.class); MultipleOutputs.addMultiNamedOutput(conf, FORMATS_FFB_NAME, KeylessTextOutputFormat.class, Text.class, Text.class); MultipleOutputs.addMultiNamedOutput(conf, HOST_LINKS_NAME, KeylessTextOutputFormat.class, Text.class, Text.class); MultipleOutputs.addMultiNamedOutput(conf, GEO_SUMMARY_NAME, KeylessTextOutputFormat.class, Text.class, Text.class); KeylessTextOutputFormat.setCompressOutput(conf, true); KeylessTextOutputFormat.setOutputCompressorClass(conf, GzipCodec.class); }