List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputKeyClass
public void setMapOutputKeyClass(Class<?> theClass)
From source file:PDI.Hadoop.Datamining.Tools.HistorianParser.java
/** * The main driver for historian map/reduce program. Invoke this method to * submit the map/reduce job./* w w w. ja v a2s .co m*/ * * @throws IOException * When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), HistorianParser.class); JobClient jobClient = new JobClient(conf); List<String> sourcePaths = new ArrayList<String>(); String destPath = ""; String currentDate = DateUtils.getCurrentDateString(); String startTS = ""; String endTS = ""; String pointIDS = ""; String outputSize = ""; conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(StandardPointFile.class); conf.setMapperClass(MapClass.class); conf.setReducerClass(ReduceClass.class); conf.setInputFormat(HistorianInputFormat.class); conf.set("compression", "no"); conf.set("filePrefix", "devarchive_archive_"); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-startTS".equals(args[i])) { conf.set("startTS", args[++i]); startTS = args[i]; } else if ("-endTS".equals(args[i])) { conf.set("endTS", args[++i]); endTS = args[i]; } else if ("-pointIDS".equals(args[i])) { conf.set("pointIDS", args[++i]); pointIDS = args[i]; } else if ("-outputMaxSize".equals(args[i])) { conf.set("outputSize", args[++i]); outputSize = args[i]; } else if ("-sourcePATH".equals(args[i])) { String sourcePath = "" + args[++i]; if (sourcePath.indexOf(',') == -1) { sourcePaths.add(sourcePath); } else { String[] paths = sourcePath.split(","); for (int ii = 0; ii < paths.length; ii++) { sourcePaths.add(paths[ii]); } } } else if ("-destPATH".equals(args[i])) { destPath = "" + args[++i] + "/"; } else if ("-compression".equals(args[i])) { conf.set("compression", args[++i]); } else if ("-filePrefix".equals(args[i])) { conf.set("filePrefix", args[++i]); } else if ("-v".equals(args[i])) { pdi_showVersion(); return 0; } else if ("-verbose".equals(args[i])) { this.pdi_setVerbose(true); } else if ("-h".equals(args[i])) { return printUsage(); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Check for the user input parameters if ((0 == sourcePaths.size()) || destPath.equals("") || startTS.equals("") || endTS.equals("") || pointIDS.equals("") || outputSize.equals("") || (0 == conf.get("filePrefix").length())) { System.out.println("ERROR: Wrong input parameters."); return printUsage(); } String startTime = DateUtils.unixTimestampToHumanReadableTime2(startTS); String endTime = DateUtils.unixTimestampToHumanReadableTime2(endTS); System.out.println("-------------------------------------------------------"); System.out.println("jobName : " + currentDate); System.out.println("filePrefix : " + conf.get("filePrefix")); for (int i = 0; i < sourcePaths.size(); i++) { System.out.println("sourcePath[" + i + "]: " + sourcePaths.get(i)); } System.out.println("destPath : " + destPath); System.out.println("startTS : " + startTS + " (" + startTime + ")"); System.out.println("endTS : " + endTS + " (" + endTime + ")"); System.out.println("pointIDS : " + pointIDS); System.out.println("outputMaxSize: " + outputSize + " MB"); System.out.println("compression : " + conf.get("compression")); System.out.println("-------------------------------------------------------"); PathUtils utils = new PathUtils(this.pdi_isVerbose()); if (false == utils.pdi_setRecursiveInputPaths(conf, sourcePaths, startTS, endTS)) { return -1; } // set output path to current time FileOutputFormat.setOutputPath(conf, utils.getOutputPath(destPath, currentDate)); // set jobName to current time // conf.setJobName(date.toString()); conf.setJobName(currentDate); JobClient.runJob(conf); // run the job // mergeAndCopyToLocal(conf, destPath); return 0; }
From source file:pegasus.column_joiner.JoinTablePegasus.java
License:Apache License
protected JobConf configPass1() throws Exception { final JobConf conf = new JobConf(getConf(), JoinTablePegasus.class); conf.set("number_tables", "" + number_tables); conf.set("join_type", "" + join_type); conf.setJobName("JoinTablePegasus"); conf.setMapperClass(MapPass1.class); conf.setReducerClass(RedPass1.class); int i = 1;//from www . j a v a 2 s.co m Iterator<Path> iter = input_paths.iterator(); while (iter.hasNext()) { Path cur_path = iter.next(); FileInputFormat.addInputPath(conf, cur_path); conf.set("path" + i, cur_path.toString()); i++; } FileOutputFormat.setOutputPath(conf, output_path); final FileSystem fs = FileSystem.get(conf); fs.delete(output_path); conf.setNumReduceTasks(nreducer); conf.setMapOutputKeyClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); return conf; }
From source file:ronchy.BigramCount.java
License:Apache License
/** * Runs this tool./*from w ww .j av a 2 s . c o m*/ */ public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int mapTasks = Integer.parseInt(args[2]); int reduceTasks = Integer.parseInt(args[3]); sLogger.info("Tool: BigramCount"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - number of mappers: " + mapTasks); sLogger.info(" - number of reducers: " + reduceTasks); JobConf conf = new JobConf(BigramCount.class); conf.setJobName("BigramCount"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); /** * Note that these must match the Class arguments given in the mapper */ conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setMapperClass(MyMapper.class); conf.setReducerClass(MyReducer.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:sa.edu.kaust.fwindex.BuildIntDocVectorsForwardIndex.java
License:Apache License
/** * Runs this tool.//from w ww .ja va2 s . co m */ public int run(String[] args) throws Exception { if (args.length != 2) { printUsage(); return -1; } String inPath = args[0]; String outPath = args[1]; JobConf conf = new JobConf(getConf(), BuildIntDocVectorsForwardIndex.class); FileSystem fs = FileSystem.get(conf); int mapTasks = 10; sLogger.info("Tool: BuildIntDocVectorsIndex"); String intDocVectorsPath = inPath; String forwardIndexPath = outPath; if (!fs.exists(new Path(intDocVectorsPath))) { sLogger.info("Error: IntDocVectors don't exist!"); return 0; } if (fs.exists(new Path(forwardIndexPath))) { sLogger.info("IntDocVectorsForwardIndex already exists: skipping!"); return 0; } conf.set("ForwardIndexPath", forwardIndexPath); conf.setJobName("BuildIntDocVectorsForwardIndex"); Path inputPath = new Path(intDocVectorsPath); FileInputFormat.setInputPaths(conf, inputPath); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(1); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapOutputKeyClass(TermDF.class); conf.setMapOutputValueClass(Text.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapRunnerClass(MyMapRunner.class); conf.setReducerClass(MyReducer.class); JobClient.runJob(conf); return 0; }
From source file:sa.edu.kaust.twitter.index.BuildPostingsForwardIndex.java
License:Apache License
/** * Runs this tool./*from w w w. j a v a 2s .com*/ */ public int run(String[] args) throws Exception { if (args.length != 2) { printUsage(); return -1; } JobConf conf = new JobConf(BuildPostingsForwardIndex.class); FileSystem fs = FileSystem.get(conf); int mapTasks = 10; sLogger.info("Tool: PostingsForwardIndex"); String postingsPath = args[0]; String forwardIndexPath = args[1]; if (!fs.exists(new Path(postingsPath))) { sLogger.info("Error: IntDocVectors don't exist!"); return 0; } // delete the output directory if it exists already //FileSystem.get(conf).delete(new Path(forwardIndexPath), true); if (fs.exists(new Path(forwardIndexPath))) { sLogger.info("PostingsForwardIndex already exists: skipping!"); return 0; } conf.set("ForwardIndexPath", forwardIndexPath); conf.setJobName("BuildPostingsForwardIndex"); Path inputPath = new Path(postingsPath); FileInputFormat.setInputPaths(conf, inputPath); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(1); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapRunnerClass(MyMapRunner.class); conf.setReducerClass(MyReducer.class); JobClient.runJob(conf); return 0; }
From source file:sa.edu.kaust.twitter.index.BuildTweetsForwardIndex.java
License:Apache License
/** * Runs this tool./*from w w w . j a v a 2 s . com*/ */ public int run(String[] args) throws Exception { if (args.length != 2) { printUsage(); return -1; } JobConf conf = new JobConf(BuildTweetsForwardIndex.class); FileSystem fs = FileSystem.get(conf); int mapTasks = 10; sLogger.info("Tool: TweetsForwardIndex"); String postingsPath = args[0]; String forwardIndexPath = args[1]; if (!fs.exists(new Path(postingsPath))) { sLogger.info("Error: IntDocVectors don't exist!"); return 0; } // delete the output directory if it exists already //FileSystem.get(conf).delete(new Path(forwardIndexPath), true); if (fs.exists(new Path(forwardIndexPath))) { sLogger.info("PostingsForwardIndex already exists: skipping!"); return 0; } conf.set("ForwardIndexPath", forwardIndexPath); conf.setJobName("BuildTweetsForwardIndex"); Path inputPath = new Path(postingsPath); FileInputFormat.setInputPaths(conf, inputPath); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(1); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapOutputKeyClass(LongWritable.class); conf.setMapOutputValueClass(Text.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapRunnerClass(MyMapRunner.class); conf.setReducerClass(MyReducer.class); JobClient.runJob(conf); return 0; }
From source file:sg.edu.astar.dsi.mergespill.App.java
public synchronized static void doProcess(String directory, int spillNumber) throws IOException, InterruptedException { // TODO code application logic here System.out.println("directory: " + directory); System.out.println("numberOfSpill: " + spillNumber); //SETUP/* w w w .j av a2s . c om*/ JobConf job = new JobConf(); //job.setMapOutputKeyClass(Text.class); job.setMapOutputKeyClass(TextDsi.class); job.setMapOutputValueClass(IntWritable.class); //Class<Text> keyClass = (Class<Text>)job.getMapOutputKeyClass(); Class<TextDsi> keyClass = (Class<TextDsi>) job.getMapOutputKeyClass(); Class<IntWritable> valClass = (Class<IntWritable>) job.getMapOutputValueClass(); FileSystem rfs; CompressionCodec codec = null; Counters.Counter spilledRecordsCounter = null; rfs = ((LocalFileSystem) FileSystem.getLocal(job)).getRaw(); while (!new File(directory).isDirectory()) { sleep(5000); } if (new File(directory).isDirectory()) { ArrayList<Path> spillFile = new ArrayList(); ArrayList<Path> spillFileIndex = new ArrayList(); App myApp; myApp = new App(); myApp.getSpillFilesAndIndices(new File(directory), spillFile, spillFileIndex, spillNumber); ArrayList<SpillRecord> indexCacheList = new ArrayList<>(); int numSpills = 0; Iterator itrSpillFileIndex = spillFileIndex.iterator(); while (itrSpillFileIndex.hasNext()) { numSpills++; Path temp = (Path) itrSpillFileIndex.next(); System.out.println(temp); SpillRecord sr = new SpillRecord(temp, job); indexCacheList.add(sr); System.out.println("indexFile partition size: " + sr.size()); long startOffset = 0; for (int i = 0; i < sr.size(); i++) { //sr.size is the number of partitions IndexRecord ir = sr.getIndex(i); System.out.println("index[" + i + "] rawLength = " + ir.rawLength); System.out.println("index[" + i + "] partLength = " + ir.partLength); System.out.println("index[" + i + "] startOffset= " + ir.startOffset); startOffset = ir.startOffset; } System.out.println("========================================"); } System.out.println("Number of spills: " + numSpills); //FinalOutputFile Path finalOutputFile = new Path(directory + File.separator + "FINALOUTPUTFILE"); FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096); System.out.println("GOT HERE 1"); Path finalIndexFile = new Path(directory + File.separator + "FINALOUTPUTFILE.index"); //ONE PARTITION ONLY List<Segment<TextDsi, IntWritable>> segmentList = new ArrayList<>(numSpills); for (int i = 0; i < numSpills; i++) { IndexRecord theIndexRecord = indexCacheList.get(i).getIndex(0); Path temp = spillFileIndex.get(i); String temp1 = temp.toString(); String temp2 = temp1.substring(0, temp1.length() - 6); //System.out.println(temp2); //System.out.println(new Path(temp2).getParent()); //File myFile = new File(temp2); //System.out.println(myFile.getPath()); Segment<TextDsi, IntWritable> s = new Segment<>(job, rfs, new Path(temp2), theIndexRecord.startOffset, theIndexRecord.partLength, codec, true); segmentList.add(i, s); } System.out.println("GOT HERE 2"); RawKeyValueIterator kvIter = Merger.merge(job, rfs, keyClass, valClass, null, segmentList, 4, new Path("/home/hduser/spillSample2/My"), job.getOutputKeyComparator(), null, false, null, spilledRecordsCounter, null, TaskType.MAP); System.out.println("GOT HERE 3"); //write merged output to disk long segmentStart = finalOut.getPos(); FSDataOutputStream finalPartitionOut = CryptoUtils.wrapIfNecessary(job, finalOut); Writer<TextDsi, IntWritable> writer = new Writer<TextDsi, IntWritable>(job, finalPartitionOut, TextDsi.class, IntWritable.class, codec, spilledRecordsCounter); System.out.println("GOT HERE 4"); Merger.writeFile(kvIter, writer, null, job); writer.close(); finalOut.close(); System.out.println("GOT HERE 5"); IndexRecord rec = new IndexRecord(); final SpillRecord spillRec = new SpillRecord(1); rec.startOffset = segmentStart; rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job); rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job); System.out.println("rec.startOffset: " + rec.startOffset); System.out.println("rec.rawLength : " + rec.rawLength); System.out.println("rec.partLength : " + rec.partLength); spillRec.putIndex(rec, 0); spillRec.writeToFile(finalIndexFile, job); System.out.println("GOT HERE 6"); } else { System.out.println("argument is not a directory! : " + directory); } }
From source file:thinkbig.hadoop.inputformat.TestDocumentInputFormat.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf job = new JobConf(); job.setInputFormat(DocumentInputFormat.class); job.set("docinput.prepend.key", "TRUE"); DocumentInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapOutputKeyClass(Text.class); JobClient.runJob(job);/*ww w .j av a 2 s . c o m*/ return 0; }
From source file:TVA.Hadoop.MapReduce.Development.Test_RecordReader_Alt.java
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker.//from w ww.j a va2s .c o m */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), Test_RecordReader_Alt.class); conf.setJobName("Test_RecordReader_Alt"); // the keys are words (strings) //conf.setOutputKeyClass(IntWritable.class); //conf.setOutputValueClass(DoubleWritable.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(StandardPointFile.class); conf.set("gov.tva.mapreduce.AverageFrequency.connectionstring", "jdbc:sqlserver://rgocdsql:1433; databaseName=PhasorMeasurementData;user=NaspiApp;password=pw4site;"); conf.set("gov.tva.mapreduce.AverageFrequency.HistorianID", "2"); conf.setMapperClass(MapClass.class); //conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(DatAware_InputFormat.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } /* * at this point, we need to check for a parameter that represents the id * of any other info we may need to view * --- then set the parameter in the job configuration * ex: conf.set( "gov.tva.AvgFreq.Company.ID", other_args.get( n ) ); */ FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:TVA.Hadoop.Samples.TestRecordReader.java
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker./* w w w. ja v a 2s . c om*/ */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), TestRecordReader.class); conf.setJobName("TestRecordReader"); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(StandardPointFile.class); conf.setMapperClass(MapClass.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(HistorianInputFormat.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }