List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputValueClass
public void setMapOutputValueClass(Class<?> theClass)
From source file:org.smartfrog.services.hadoop.benchmark.citerank.UpdateRanks.java
License:Open Source License
@Override public int run(String[] args) throws Exception { if (args.length != 4) { return usage("<input path> <output path> <number of pages> <dangling pages contribution>"); }/*w w w.j a v a 2 s. c o m*/ JobConf conf = createInputOutputConfiguration(args); conf.set(CiteRankTool.RANK_COUNT, args[2]); conf.set(CiteRankTool.RANK_DANGLING, args[3]); conf.setMapperClass(UpdateRanksMapper.class); conf.setReducerClass(UpdateRanksReducer.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setNumMapTasks(CiteRankTool.NUM_MAP_TASKS); conf.setNumReduceTasks(CiteRank.NUM_REDUCE_TASKS); return runJob(conf); }
From source file:org.terrier.applications.HadoopIndexing.java
License:Mozilla Public License
/** Starts the MapReduce indexing. * @param args/*from www .java 2 s . c o m*/ * @throws Exception */ public static void main(String[] args) throws Exception { long time = System.currentTimeMillis(); boolean docPartitioned = false; int numberOfReducers = Integer .parseInt(ApplicationSetup.getProperty("terrier.hadoop.indexing.reducers", "26")); final HadoopPlugin.JobFactory jf = HadoopPlugin.getJobFactory("HOD-TerrierIndexing"); if (args.length == 2 && args[0].equals("-p")) { logger.info("Document-partitioned Mode, " + numberOfReducers + " output indices."); numberOfReducers = Integer.parseInt(args[1]); docPartitioned = true; } else if (args.length == 1 && args[0].equals("--merge")) { if (numberOfReducers > 1) mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers); else logger.error("No point merging 1 reduce task output"); return; } else if (args.length == 0) { logger.info("Term-partitioned Mode, " + numberOfReducers + " reducers creating one inverted index."); docPartitioned = false; if (numberOfReducers > MAX_REDUCE) { logger.warn("Excessive reduce tasks (" + numberOfReducers + ") in use " + "- SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm can use " + MAX_REDUCE + " at most"); } } else { logger.fatal(usage()); return; } if (!(CompressionFactory.getCompressionConfiguration("inverted", new String[0], false) instanceof BitCompressionConfiguration)) { logger.error("Sorry, only default BitCompressionConfiguration is supported by HadoopIndexing" + " - you can recompress the inverted index later using IndexRecompressor"); return; } if (jf == null) throw new Exception("Could not get JobFactory from HadoopPlugin"); final JobConf conf = jf.newJob(); conf.setJobName("terrierIndexing"); if (Files.exists(ApplicationSetup.TERRIER_INDEX_PATH) && Index.existsIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)) { logger.fatal("Cannot index while index exists at " + ApplicationSetup.TERRIER_INDEX_PATH + "," + ApplicationSetup.TERRIER_INDEX_PREFIX); return; } boolean blockIndexing = ApplicationSetup.BLOCK_INDEXING; if (blockIndexing) { conf.setMapperClass(Hadoop_BlockSinglePassIndexer.class); conf.setReducerClass(Hadoop_BlockSinglePassIndexer.class); } else { conf.setMapperClass(Hadoop_BasicSinglePassIndexer.class); conf.setReducerClass(Hadoop_BasicSinglePassIndexer.class); } FileOutputFormat.setOutputPath(conf, new Path(ApplicationSetup.TERRIER_INDEX_PATH)); conf.set("indexing.hadoop.prefix", ApplicationSetup.TERRIER_INDEX_PREFIX); conf.setMapOutputKeyClass(SplitEmittedTerm.class); conf.setMapOutputValueClass(MapEmittedPostingList.class); conf.setBoolean("indexing.hadoop.multiple.indices", docPartitioned); if (!conf.get("mapred.job.tracker").equals("local")) { conf.setMapOutputCompressorClass(GzipCodec.class); conf.setCompressMapOutput(true); } else { conf.setCompressMapOutput(false); } conf.setInputFormat(MultiFileCollectionInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setOutputKeyComparatorClass(SplitEmittedTerm.SETRawComparatorTermSplitFlush.class); conf.setOutputValueGroupingComparator(SplitEmittedTerm.SETRawComparatorTerm.class); conf.setReduceSpeculativeExecution(false); //parse the collection.spec BufferedReader specBR = Files.openFileReader(ApplicationSetup.COLLECTION_SPEC); String line = null; List<Path> paths = new ArrayList<Path>(); while ((line = specBR.readLine()) != null) { if (line.startsWith("#")) continue; paths.add(new Path(line)); } specBR.close(); FileInputFormat.setInputPaths(conf, paths.toArray(new Path[paths.size()])); conf.setNumReduceTasks(numberOfReducers); if (numberOfReducers > 1) { if (docPartitioned) conf.setPartitionerClass(SplitEmittedTerm.SETPartitioner.class); else conf.setPartitionerClass(SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm.class); } else { //for JUnit tests, we seem to need to restore the original partitioner class conf.setPartitionerClass(HashPartitioner.class); } JobID jobId = null; boolean ranOK = true; try { RunningJob rj = JobClient.runJob(conf); jobId = rj.getID(); HadoopUtility.finishTerrierJob(conf); } catch (Exception e) { logger.error("Problem running job", e); ranOK = false; } if (jobId != null) { deleteTaskFiles(ApplicationSetup.TERRIER_INDEX_PATH, jobId); } if (ranOK) { if (!docPartitioned) { if (numberOfReducers > 1) mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers); } Hadoop_BasicSinglePassIndexer.finish(ApplicationSetup.TERRIER_INDEX_PATH, docPartitioned ? numberOfReducers : 1, jf); } System.out.println("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds"); jf.close(); }
From source file:org.terrier.structures.indexing.CompressingMetaIndexBuilder.java
License:Mozilla Public License
/** * reverseAsMapReduceJob//w w w . jav a 2 s. co m * @param index * @param structureName * @param keys * @param jf * @throws Exception */ //@SuppressWarnings("deprecation") public static void reverseAsMapReduceJob(IndexOnDisk index, String structureName, String[] keys, HadoopPlugin.JobFactory jf) throws Exception { long time = System.currentTimeMillis(); final JobConf conf = jf.newJob(); conf.setJobName("Reverse MetaIndex"); conf.setMapOutputKeyClass(KeyValueTuple.class); conf.setMapOutputValueClass(IntWritable.class); conf.setMapperClass(MapperReducer.class); conf.setReducerClass(MapperReducer.class); conf.setNumReduceTasks(keys.length); conf.setPartitionerClass(KeyedPartitioner.class); conf.setInputFormat(CompressingMetaIndexInputFormat.class); conf.setReduceSpeculativeExecution(false); conf.set("MetaIndexInputStreamRecordReader.structureName", structureName); conf.setInt("CompressingMetaIndexBuilder.reverse.keyCount", keys.length); conf.set("CompressingMetaIndexBuilder.reverse.keys", ArrayUtils.join(keys, ",")); conf.set("CompressingMetaIndexBuilder.forward.valueLengths", index.getIndexProperty("index." + structureName + ".value-lengths", "")); conf.set("CompressingMetaIndexBuilder.forward.keys", index.getIndexProperty("index." + structureName + ".key-names", "")); FileOutputFormat.setOutputPath(conf, new Path(index.getPath())); HadoopUtility.toHConfiguration(index, conf); conf.setOutputFormat(NullOutputFormat.class); try { RunningJob rj = JobClient.runJob(conf); rj.getID(); HadoopUtility.finishTerrierJob(conf); } catch (Exception e) { throw new Exception("Problem running job to reverse metadata", e); } //only update the index from the controlling process, so that we dont have locking/concurrency issues index.setIndexProperty("index." + structureName + ".reverse-key-names", ArrayUtils.join(keys, ",")); index.flush(); logger.info("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds"); }
From source file:org.warcbase.index.IndexerRunner.java
License:Apache License
@SuppressWarnings("static-access") public int run(String[] args) throws IOException, ParseException { LOG.info("Initializing indexer..."); Options options = new Options(); options.addOption(//from w w w . ja v a2 s . c o m OptionBuilder.withArgName("file").hasArg().withDescription("input file list").create(INPUT_OPTION)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("HDFS index output path") .create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of shards") .create(SHARDS_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("config file (optional)") .create(CONFIG_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_OPTION) || !cmdline.hasOption(SHARDS_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String configPath = null; if (cmdline.hasOption(CONFIG_OPTION)) { configPath = cmdline.getOptionValue(CONFIG_OPTION); } String inputPath = cmdline.getOptionValue(INPUT_OPTION); String outputPath = cmdline.getOptionValue(INDEX_OPTION); int shards = Integer.parseInt(cmdline.getOptionValue(SHARDS_OPTION)); JobConf conf = new JobConf(getConf(), IndexerRunner.class); if (configPath == null) { LOG.info("Config not specified, using default src/main/solr/WARCIndexer.conf"); configPath = "src/main/solr/WARCIndexer.conf"; } File configFile = new File(configPath); if (!configFile.exists()) { LOG.error("Error: config does not exist!"); System.exit(-1); } Config config = ConfigFactory.parseFile(configFile); conf.set(CONFIG_PROPERTIES, config.withOnlyPath("warc").root().render(ConfigRenderOptions.concise())); FileSystem fs = FileSystem.get(conf); LOG.info("HDFS index output path: " + outputPath); conf.set(IndexerReducer.HDFS_OUTPUT_PATH, outputPath); if (fs.exists(new Path(outputPath))) { LOG.error("Error: path exists already!"); System.exit(-1); } LOG.info("Number of shards: " + shards); conf.setInt(IndexerMapper.NUM_SHARDS, shards); // Add input paths: LOG.info("Reading input files..."); String line = null; BufferedReader br = new BufferedReader(new FileReader(inputPath)); while ((line = br.readLine()) != null) { FileInputFormat.addInputPath(conf, new Path(line)); } br.close(); LOG.info("Read " + FileInputFormat.getInputPaths(conf).length + " input files."); conf.setJobName(IndexerRunner.class.getSimpleName() + ": " + inputPath); conf.setInputFormat(ArchiveFileInputFormat.class); conf.setMapperClass(IndexerMapper.class); conf.setReducerClass(IndexerReducer.class); conf.setOutputFormat(NullOutputFormat.class); // Ensure the JARs we provide take precedence over ones from Hadoop: conf.setBoolean("mapreduce.job.user.classpath.first", true); // Also set reduce speculative execution off, avoiding duplicate submissions to Solr. conf.setBoolean("mapreduce.reduce.speculative", false); // Note that we need this to ensure FileSystem.get is thread-safe: // @see https://issues.apache.org/jira/browse/HDFS-925 // @see https://mail-archives.apache.org/mod_mbox/hadoop-user/201208.mbox/%3CCA+4kjVt-QE2L83p85uELjWXiog25bYTKOZXdc1Ahun+oBSJYpQ@mail.gmail.com%3E conf.setBoolean("fs.hdfs.impl.disable.cache", true); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(WritableSolrRecord.class); conf.setNumReduceTasks(shards); // number of reducers = number of shards cacheSolrHome(conf, solrHomeZipName); JobClient.runJob(conf); return 0; }
From source file:org.zuinnote.hadoop.bitcoin.example.driver.BitcoinBlockCounterDriver.java
License:Apache License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(BitcoinBlockCounterDriver.class); conf.setJobName("example-hadoop-bitcoin-transactioncounter-job"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(LongWritable.class); conf.setMapperClass(BitcoinBlockMap.class); conf.setReducerClass(BitcoinBlockReducer.class); conf.setInputFormat(BitcoinBlockFileInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); /** Set as an example some of the options to configure the Bitcoin fileformat **/ /** Find here all configuration options: https://github.com/ZuInnoTe/hadoopcryptoledger/wiki/Hadoop-File-Format **/ conf.set("hadoopcryptoledger.bitcoinblockinputformat.filter.magic", "F9BEB4D9"); FileInputFormat.addInputPath(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/*from w w w .j ava 2s . co m*/ }
From source file:org.zuinnote.hadoop.bitcoin.example.driver.BitcoinTransactionCounterDriver.java
License:Apache License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(BitcoinTransactionCounterDriver.class); conf.setJobName("example-hadoop-bitcoin-transactioninputcounter-job"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(LongWritable.class); conf.setMapperClass(BitcoinTransactionMap.class); conf.setReducerClass(BitcoinTransactionReducer.class); conf.setInputFormat(BitcoinTransactionFileInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.addInputPath(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);//w w w . j a v a 2 s .co m }
From source file:pathmerge.linear.MergePathH1Driver.java
License:Apache License
public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer, int mergeRound, String defaultConfPath) throws IOException { JobConf conf = new JobConf(MergePathH1Driver.class); conf.setInt("sizeKmer", sizeKmer); if (defaultConfPath != null) { conf.addResource(new Path(defaultConfPath)); }//from w w w .java2 s .c o m conf.setJobName("Initial Path-Starting-Points Table"); conf.setMapperClass(SNodeInitialMapper.class); conf.setReducerClass(SNodeInitialReducer.class); conf.setMapOutputKeyClass(Kmer.class); conf.setMapOutputValueClass(MergePathValueWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); String singlePointPath = "comSinglePath0"; MultipleOutputs.addNamedOutput(conf, singlePointPath, MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); conf.setOutputKeyClass(VKmerBytesWritable.class); conf.setOutputValueClass(MergePathValueWritable.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(inputPath + "stepNext")); conf.setNumReduceTasks(numReducers); FileSystem dfs = FileSystem.get(conf); dfs.delete(new Path(inputPath + "stepNext"), true); JobClient.runJob(conf); dfs.rename(new Path(inputPath + "stepNext" + "/" + singlePointPath), new Path(mergeResultPath + "/" + singlePointPath)); int iMerge = 0; /*----------------------------------------------------------------------*/ for (iMerge = 1; iMerge <= mergeRound; iMerge++) { // if (!dfs.exists(new Path(inputPath + "-step1"))) // break; conf = new JobConf(MergePathH1Driver.class); conf.setInt("sizeKmer", sizeKmer); conf.setInt("iMerge", iMerge); if (defaultConfPath != null) { conf.addResource(new Path(defaultConfPath)); } conf.setJobName("Path Merge"); conf.setMapperClass(MergePathH1Mapper.class); conf.setReducerClass(MergePathH1Reducer.class); conf.setMapOutputKeyClass(VKmerBytesWritable.class); conf.setMapOutputValueClass(MergePathValueWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); String uncompSinglePath = "uncompSinglePath" + iMerge; String comSinglePath = "comSinglePath" + iMerge; String comCircle = "comCircle" + iMerge; MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); conf.setOutputKeyClass(VKmerBytesWritable.class); conf.setOutputValueClass(MergePathValueWritable.class); FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext")); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setNumReduceTasks(numReducers); dfs.delete(new Path(outputPath), true); JobClient.runJob(conf); dfs.delete(new Path(inputPath + "stepNext"), true); dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext")); dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath)); dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle)); } }
From source file:pathmerge.log.MergePathH2Driver.java
License:Apache License
public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer, int mergeRound, String defaultConfPath) throws IOException { JobConf conf = new JobConf(MergePathH2Driver.class); conf.setInt("sizeKmer", sizeKmer); if (defaultConfPath != null) { conf.addResource(new Path(defaultConfPath)); }/*from ww w .ja v a 2s . c o m*/ conf.setJobName("Initial Path-Starting-Points Table"); conf.setMapperClass(SNodeInitialMapper.class); conf.setReducerClass(SNodeInitialReducer.class); conf.setMapOutputKeyClass(Kmer.class); conf.setMapOutputValueClass(MergePathValueWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); String singlePointPath = "comSinglePath0"; MultipleOutputs.addNamedOutput(conf, singlePointPath, MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); conf.setOutputKeyClass(VKmerBytesWritable.class); conf.setOutputValueClass(MergePathValueWritable.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(inputPath + "stepNext")); conf.setNumReduceTasks(numReducers); FileSystem dfs = FileSystem.get(conf); dfs.delete(new Path(inputPath + "stepNext"), true); JobClient.runJob(conf); dfs.rename(new Path(inputPath + "stepNext" + "/" + singlePointPath), new Path(mergeResultPath + "/" + singlePointPath)); int iMerge = 0; for (iMerge = 1; iMerge <= mergeRound; iMerge++) { // if (!dfs.exists(new Path(inputPath + "-step1"))) // break; conf = new JobConf(MergePathH2Driver.class); conf.setInt("sizeKmer", sizeKmer); conf.setInt("iMerge", iMerge); if (defaultConfPath != null) { conf.addResource(new Path(defaultConfPath)); } conf.setJobName("Path Merge"); conf.setMapperClass(MergePathH2Mapper.class); conf.setReducerClass(MergePathH2Reducer.class); conf.setMapOutputKeyClass(VKmerBytesWritable.class); conf.setMapOutputValueClass(MergePathValueWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); String uncompSinglePath = "uncompSinglePath" + iMerge; String comSinglePath = "comSinglePath" + iMerge; String comCircle = "comCircle" + iMerge; MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); conf.setOutputKeyClass(VKmerBytesWritable.class); conf.setOutputValueClass(MergePathValueWritable.class); FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext")); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setNumReduceTasks(numReducers); dfs.delete(new Path(outputPath), true); JobClient.runJob(conf); dfs.delete(new Path(inputPath + "stepNext"), true); dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext")); dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath)); dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle)); } /* conf = new JobConf(MergePathH2Driver.class); conf.setInt("sizeKmer", sizeKmer); conf.setInt("iMerge", iMerge); if (defaultConfPath != null) { conf.addResource(new Path(defaultConfPath)); } conf.setJobName("Path Merge"); conf.setMapperClass(MergePathH2Mapper.class); conf.setReducerClass(MergePathH2Reducer.class); conf.setMapOutputKeyClass(VKmerBytesWritable.class); conf.setMapOutputValueClass(MergePathValueWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); String uncompSinglePath = "uncompSinglePath" + iMerge; String comSinglePath = "comSinglePath" + iMerge; String comCircle = "comCircle" + iMerge; MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class, MergePathValueWritable.class); conf.setOutputKeyClass(VKmerBytesWritable.class); conf.setOutputValueClass(MergePathValueWritable.class); FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext")); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setNumReduceTasks(numReducers); dfs.delete(new Path(outputPath), true); JobClient.runJob(conf); dfs.delete(new Path(inputPath + "stepNext"), true); dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext")); dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath)); dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle));*/ }
From source file:PDI.Hadoop.Datamining.Tools.HistorianParser.java
/** * The main driver for historian map/reduce program. Invoke this method to * submit the map/reduce job.//from ww w . java 2 s. c om * * @throws IOException * When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), HistorianParser.class); JobClient jobClient = new JobClient(conf); List<String> sourcePaths = new ArrayList<String>(); String destPath = ""; String currentDate = DateUtils.getCurrentDateString(); String startTS = ""; String endTS = ""; String pointIDS = ""; String outputSize = ""; conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(StandardPointFile.class); conf.setMapperClass(MapClass.class); conf.setReducerClass(ReduceClass.class); conf.setInputFormat(HistorianInputFormat.class); conf.set("compression", "no"); conf.set("filePrefix", "devarchive_archive_"); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-startTS".equals(args[i])) { conf.set("startTS", args[++i]); startTS = args[i]; } else if ("-endTS".equals(args[i])) { conf.set("endTS", args[++i]); endTS = args[i]; } else if ("-pointIDS".equals(args[i])) { conf.set("pointIDS", args[++i]); pointIDS = args[i]; } else if ("-outputMaxSize".equals(args[i])) { conf.set("outputSize", args[++i]); outputSize = args[i]; } else if ("-sourcePATH".equals(args[i])) { String sourcePath = "" + args[++i]; if (sourcePath.indexOf(',') == -1) { sourcePaths.add(sourcePath); } else { String[] paths = sourcePath.split(","); for (int ii = 0; ii < paths.length; ii++) { sourcePaths.add(paths[ii]); } } } else if ("-destPATH".equals(args[i])) { destPath = "" + args[++i] + "/"; } else if ("-compression".equals(args[i])) { conf.set("compression", args[++i]); } else if ("-filePrefix".equals(args[i])) { conf.set("filePrefix", args[++i]); } else if ("-v".equals(args[i])) { pdi_showVersion(); return 0; } else if ("-verbose".equals(args[i])) { this.pdi_setVerbose(true); } else if ("-h".equals(args[i])) { return printUsage(); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Check for the user input parameters if ((0 == sourcePaths.size()) || destPath.equals("") || startTS.equals("") || endTS.equals("") || pointIDS.equals("") || outputSize.equals("") || (0 == conf.get("filePrefix").length())) { System.out.println("ERROR: Wrong input parameters."); return printUsage(); } String startTime = DateUtils.unixTimestampToHumanReadableTime2(startTS); String endTime = DateUtils.unixTimestampToHumanReadableTime2(endTS); System.out.println("-------------------------------------------------------"); System.out.println("jobName : " + currentDate); System.out.println("filePrefix : " + conf.get("filePrefix")); for (int i = 0; i < sourcePaths.size(); i++) { System.out.println("sourcePath[" + i + "]: " + sourcePaths.get(i)); } System.out.println("destPath : " + destPath); System.out.println("startTS : " + startTS + " (" + startTime + ")"); System.out.println("endTS : " + endTS + " (" + endTime + ")"); System.out.println("pointIDS : " + pointIDS); System.out.println("outputMaxSize: " + outputSize + " MB"); System.out.println("compression : " + conf.get("compression")); System.out.println("-------------------------------------------------------"); PathUtils utils = new PathUtils(this.pdi_isVerbose()); if (false == utils.pdi_setRecursiveInputPaths(conf, sourcePaths, startTS, endTS)) { return -1; } // set output path to current time FileOutputFormat.setOutputPath(conf, utils.getOutputPath(destPath, currentDate)); // set jobName to current time // conf.setJobName(date.toString()); conf.setJobName(currentDate); JobClient.runJob(conf); // run the job // mergeAndCopyToLocal(conf, destPath); return 0; }
From source file:pegasus.heigen.SaxpyTextoutput.java
License:Apache License
protected JobConf configSaxpyTextoutput(Path py, Path px, Path saxpy_output, double a) throws Exception { final JobConf conf = new JobConf(getConf(), SaxpyTextoutput.class); conf.set("y_path", py.getName()); conf.set("x_path", px.getName()); conf.set("a", "" + a); conf.setJobName("SaxpyTextoutput"); conf.setMapperClass(SaxpyTextoutput.MapStage1.class); conf.setReducerClass(SaxpyTextoutput.RedStage1.class); FileInputFormat.setInputPaths(conf, py, px); FileOutputFormat.setOutputPath(conf, saxpy_output); conf.setNumReduceTasks(nreducers);/* www . j av a 2 s . co m*/ conf.setOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(DoubleWritable.class); conf.setOutputValueClass(Text.class); return conf; }