List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputValueClass
public void setMapOutputValueClass(Class<?> theClass)
From source file:pegasus.matvec.MatvecNaive.java
License:Apache License
protected JobConf configPass1() throws Exception { final JobConf conf = new JobConf(getConf(), MatvecNaive.class); conf.set("number_nodes", "" + number_nodes); conf.set("makesym", "" + makesym); conf.set("transpose", "" + transpose); conf.set("ignore_weights", "" + ignore_weights); conf.setJobName("MatvecNaive_pass1"); conf.setMapperClass(MapPass1.class); conf.setReducerClass(RedPass1.class); if (vector_path == null) FileInputFormat.setInputPaths(conf, edge_path); else/*from ww w. ja v a2s .c o m*/ FileInputFormat.setInputPaths(conf, edge_path, vector_path); FileOutputFormat.setOutputPath(conf, tempmv_path); conf.setNumReduceTasks(nreducer); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(DoubleWritable.class); conf.setMapOutputValueClass(Text.class); return conf; }
From source file:pegasus.matvec.MatvecNaive.java
License:Apache License
protected JobConf configPass2() throws Exception { final JobConf conf = new JobConf(getConf(), MatvecNaive.class); conf.set("number_nodes", "" + number_nodes); conf.setJobName("MatvecNaive_pass2"); conf.setMapperClass(MapPass2.class); conf.setReducerClass(RedPass2.class); FileInputFormat.setInputPaths(conf, tempmv_path); FileOutputFormat.setOutputPath(conf, output_path); conf.setNumReduceTasks(nreducer);/*from ww w .ja va 2 s . c o m*/ conf.setOutputKeyClass(LongWritable.class); conf.setMapOutputValueClass(DoubleWritable.class); conf.setOutputValueClass(Text.class); return conf; }
From source file:pegasus.matvec.MatvecPrep.java
License:Apache License
protected JobConf configStage1(String out_prefix) throws Exception { final JobConf conf = new JobConf(getConf(), MatvecPrep.class); conf.set("block_size", "" + block_size); conf.set("matrix_row", "" + number_nodes); conf.set("out_prefix", "" + out_prefix); conf.set("makesym", "" + makesym); conf.setJobName("MatvecPrep_Stage1"); conf.setMapperClass(MapStage1.class); conf.setReducerClass(RedStage1.class); FileSystem fs = FileSystem.get(getConf()); fs.delete(output_path, true);/* ww w . jav a 2 s .com*/ FileInputFormat.setInputPaths(conf, edge_path); FileOutputFormat.setOutputPath(conf, output_path); int num_reduce_tasks = nreducer; conf.setNumReduceTasks(num_reduce_tasks); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapOutputValueClass(Text.class); return conf; }
From source file:pegasus.pagerank.PagerankInitVector.java
License:Apache License
protected JobConf configStage1() throws Exception { final JobConf conf = new JobConf(getConf(), PagerankInitVector.class); conf.set("number_nodes", "" + number_nodes); conf.setJobName("PagerankInitVector_Stage1"); conf.setMapperClass(MapStage1.class); conf.setReducerClass(RedStage1.class); FileInputFormat.setInputPaths(conf, initial_prinput_path); FileOutputFormat.setOutputPath(conf, output_path); conf.setNumReduceTasks(nreducers);// w ww.ja v a 2s .c o m conf.setOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(Text.class); conf.setOutputValueClass(Text.class); return conf; }
From source file:ronchy.BigramCount.java
License:Apache License
/** * Runs this tool./*from www . j a v a 2s. c o m*/ */ public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int mapTasks = Integer.parseInt(args[2]); int reduceTasks = Integer.parseInt(args[3]); sLogger.info("Tool: BigramCount"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - number of mappers: " + mapTasks); sLogger.info(" - number of reducers: " + reduceTasks); JobConf conf = new JobConf(BigramCount.class); conf.setJobName("BigramCount"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); /** * Note that these must match the Class arguments given in the mapper */ conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setMapperClass(MyMapper.class); conf.setReducerClass(MyReducer.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:sa.edu.kaust.fwindex.BuildIntDocVectorsForwardIndex.java
License:Apache License
/** * Runs this tool.//from w w w . ja va2 s .c o m */ public int run(String[] args) throws Exception { if (args.length != 2) { printUsage(); return -1; } String inPath = args[0]; String outPath = args[1]; JobConf conf = new JobConf(getConf(), BuildIntDocVectorsForwardIndex.class); FileSystem fs = FileSystem.get(conf); int mapTasks = 10; sLogger.info("Tool: BuildIntDocVectorsIndex"); String intDocVectorsPath = inPath; String forwardIndexPath = outPath; if (!fs.exists(new Path(intDocVectorsPath))) { sLogger.info("Error: IntDocVectors don't exist!"); return 0; } if (fs.exists(new Path(forwardIndexPath))) { sLogger.info("IntDocVectorsForwardIndex already exists: skipping!"); return 0; } conf.set("ForwardIndexPath", forwardIndexPath); conf.setJobName("BuildIntDocVectorsForwardIndex"); Path inputPath = new Path(intDocVectorsPath); FileInputFormat.setInputPaths(conf, inputPath); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(1); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapOutputKeyClass(TermDF.class); conf.setMapOutputValueClass(Text.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapRunnerClass(MyMapRunner.class); conf.setReducerClass(MyReducer.class); JobClient.runJob(conf); return 0; }
From source file:sa.edu.kaust.twitter.index.BuildPostingsForwardIndex.java
License:Apache License
/** * Runs this tool.//from w w w. ja v a 2s.c o m */ public int run(String[] args) throws Exception { if (args.length != 2) { printUsage(); return -1; } JobConf conf = new JobConf(BuildPostingsForwardIndex.class); FileSystem fs = FileSystem.get(conf); int mapTasks = 10; sLogger.info("Tool: PostingsForwardIndex"); String postingsPath = args[0]; String forwardIndexPath = args[1]; if (!fs.exists(new Path(postingsPath))) { sLogger.info("Error: IntDocVectors don't exist!"); return 0; } // delete the output directory if it exists already //FileSystem.get(conf).delete(new Path(forwardIndexPath), true); if (fs.exists(new Path(forwardIndexPath))) { sLogger.info("PostingsForwardIndex already exists: skipping!"); return 0; } conf.set("ForwardIndexPath", forwardIndexPath); conf.setJobName("BuildPostingsForwardIndex"); Path inputPath = new Path(postingsPath); FileInputFormat.setInputPaths(conf, inputPath); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(1); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapRunnerClass(MyMapRunner.class); conf.setReducerClass(MyReducer.class); JobClient.runJob(conf); return 0; }
From source file:sa.edu.kaust.twitter.index.BuildTweetsForwardIndex.java
License:Apache License
/** * Runs this tool./* ww w. j a v a2 s. c o m*/ */ public int run(String[] args) throws Exception { if (args.length != 2) { printUsage(); return -1; } JobConf conf = new JobConf(BuildTweetsForwardIndex.class); FileSystem fs = FileSystem.get(conf); int mapTasks = 10; sLogger.info("Tool: TweetsForwardIndex"); String postingsPath = args[0]; String forwardIndexPath = args[1]; if (!fs.exists(new Path(postingsPath))) { sLogger.info("Error: IntDocVectors don't exist!"); return 0; } // delete the output directory if it exists already //FileSystem.get(conf).delete(new Path(forwardIndexPath), true); if (fs.exists(new Path(forwardIndexPath))) { sLogger.info("PostingsForwardIndex already exists: skipping!"); return 0; } conf.set("ForwardIndexPath", forwardIndexPath); conf.setJobName("BuildTweetsForwardIndex"); Path inputPath = new Path(postingsPath); FileInputFormat.setInputPaths(conf, inputPath); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(1); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapOutputKeyClass(LongWritable.class); conf.setMapOutputValueClass(Text.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapRunnerClass(MyMapRunner.class); conf.setReducerClass(MyReducer.class); JobClient.runJob(conf); return 0; }
From source file:sg.edu.astar.dsi.mergespill.App.java
public synchronized static void doProcess(String directory, int spillNumber) throws IOException, InterruptedException { // TODO code application logic here System.out.println("directory: " + directory); System.out.println("numberOfSpill: " + spillNumber); //SETUP/*from ww w .jav a 2 s .co m*/ JobConf job = new JobConf(); //job.setMapOutputKeyClass(Text.class); job.setMapOutputKeyClass(TextDsi.class); job.setMapOutputValueClass(IntWritable.class); //Class<Text> keyClass = (Class<Text>)job.getMapOutputKeyClass(); Class<TextDsi> keyClass = (Class<TextDsi>) job.getMapOutputKeyClass(); Class<IntWritable> valClass = (Class<IntWritable>) job.getMapOutputValueClass(); FileSystem rfs; CompressionCodec codec = null; Counters.Counter spilledRecordsCounter = null; rfs = ((LocalFileSystem) FileSystem.getLocal(job)).getRaw(); while (!new File(directory).isDirectory()) { sleep(5000); } if (new File(directory).isDirectory()) { ArrayList<Path> spillFile = new ArrayList(); ArrayList<Path> spillFileIndex = new ArrayList(); App myApp; myApp = new App(); myApp.getSpillFilesAndIndices(new File(directory), spillFile, spillFileIndex, spillNumber); ArrayList<SpillRecord> indexCacheList = new ArrayList<>(); int numSpills = 0; Iterator itrSpillFileIndex = spillFileIndex.iterator(); while (itrSpillFileIndex.hasNext()) { numSpills++; Path temp = (Path) itrSpillFileIndex.next(); System.out.println(temp); SpillRecord sr = new SpillRecord(temp, job); indexCacheList.add(sr); System.out.println("indexFile partition size: " + sr.size()); long startOffset = 0; for (int i = 0; i < sr.size(); i++) { //sr.size is the number of partitions IndexRecord ir = sr.getIndex(i); System.out.println("index[" + i + "] rawLength = " + ir.rawLength); System.out.println("index[" + i + "] partLength = " + ir.partLength); System.out.println("index[" + i + "] startOffset= " + ir.startOffset); startOffset = ir.startOffset; } System.out.println("========================================"); } System.out.println("Number of spills: " + numSpills); //FinalOutputFile Path finalOutputFile = new Path(directory + File.separator + "FINALOUTPUTFILE"); FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096); System.out.println("GOT HERE 1"); Path finalIndexFile = new Path(directory + File.separator + "FINALOUTPUTFILE.index"); //ONE PARTITION ONLY List<Segment<TextDsi, IntWritable>> segmentList = new ArrayList<>(numSpills); for (int i = 0; i < numSpills; i++) { IndexRecord theIndexRecord = indexCacheList.get(i).getIndex(0); Path temp = spillFileIndex.get(i); String temp1 = temp.toString(); String temp2 = temp1.substring(0, temp1.length() - 6); //System.out.println(temp2); //System.out.println(new Path(temp2).getParent()); //File myFile = new File(temp2); //System.out.println(myFile.getPath()); Segment<TextDsi, IntWritable> s = new Segment<>(job, rfs, new Path(temp2), theIndexRecord.startOffset, theIndexRecord.partLength, codec, true); segmentList.add(i, s); } System.out.println("GOT HERE 2"); RawKeyValueIterator kvIter = Merger.merge(job, rfs, keyClass, valClass, null, segmentList, 4, new Path("/home/hduser/spillSample2/My"), job.getOutputKeyComparator(), null, false, null, spilledRecordsCounter, null, TaskType.MAP); System.out.println("GOT HERE 3"); //write merged output to disk long segmentStart = finalOut.getPos(); FSDataOutputStream finalPartitionOut = CryptoUtils.wrapIfNecessary(job, finalOut); Writer<TextDsi, IntWritable> writer = new Writer<TextDsi, IntWritable>(job, finalPartitionOut, TextDsi.class, IntWritable.class, codec, spilledRecordsCounter); System.out.println("GOT HERE 4"); Merger.writeFile(kvIter, writer, null, job); writer.close(); finalOut.close(); System.out.println("GOT HERE 5"); IndexRecord rec = new IndexRecord(); final SpillRecord spillRec = new SpillRecord(1); rec.startOffset = segmentStart; rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job); rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job); System.out.println("rec.startOffset: " + rec.startOffset); System.out.println("rec.rawLength : " + rec.rawLength); System.out.println("rec.partLength : " + rec.partLength); spillRec.putIndex(rec, 0); spillRec.writeToFile(finalIndexFile, job); System.out.println("GOT HERE 6"); } else { System.out.println("argument is not a directory! : " + directory); } }
From source file:TVA.Hadoop.MapReduce.Development.Test_RecordReader_Alt.java
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker./*from w w w.j a v a2 s . co m*/ */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), Test_RecordReader_Alt.class); conf.setJobName("Test_RecordReader_Alt"); // the keys are words (strings) //conf.setOutputKeyClass(IntWritable.class); //conf.setOutputValueClass(DoubleWritable.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(StandardPointFile.class); conf.set("gov.tva.mapreduce.AverageFrequency.connectionstring", "jdbc:sqlserver://rgocdsql:1433; databaseName=PhasorMeasurementData;user=NaspiApp;password=pw4site;"); conf.set("gov.tva.mapreduce.AverageFrequency.HistorianID", "2"); conf.setMapperClass(MapClass.class); //conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(DatAware_InputFormat.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } /* * at this point, we need to check for a parameter that represents the id * of any other info we may need to view * --- then set the parameter in the job configuration * ex: conf.set( "gov.tva.AvgFreq.Company.ID", other_args.get( n ) ); */ FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }