List of usage examples for org.apache.hadoop.mapreduce Job setInputFormatClass
public void setInputFormatClass(Class<? extends InputFormat> cls) throws IllegalStateException
From source file:bigsidemodel.AutoCoder.java
License:Apache License
/** * Runs this tool.//ww w .j a v a 2 s . c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } /*if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; }*/ //String inputPath = cmdline.getOptionValue(INPUT); //String outputPath = cmdline.getOptionValue(OUTPUT); String inputPath = "qiwang321/best5-mingled-key-56x56/part*"; String outputPath = "shangfu/bigoutput"; int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath + "0"); LOG.info(" - number of reducers: " + reduceTasks); Configuration conf = getConf(); conf.setInt("num_reduce_task", reduceTasks); conf.set("sidepath", outputPath + "_side/"); Job job0 = Job.getInstance(conf); job0.setJobName(AutoCoder.class.getSimpleName()); job0.setJarByClass(AutoCoder.class); // set the path of the information of k clusters in this iteration job0.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job0, new Path(inputPath)); FileOutputFormat.setOutputPath(job0, new Path(outputPath + "0")); job0.setInputFormatClass(KeyValueTextInputFormat.class); job0.setOutputFormatClass(SequenceFileOutputFormat.class); job0.setMapOutputKeyClass(PairOfInts.class); job0.setMapOutputValueClass(DataNode.class); job0.setOutputKeyClass(PairOfInts.class); job0.setOutputValueClass(DataNode.class); job0.setMapperClass(MyMapper0.class); job0.setReducerClass(MyReducer0.class); job0.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath + "0"); FileSystem.get(getConf()).delete(outputDir, true); long codeStart = System.currentTimeMillis(); double jobTimeSum = 0; long startTime = System.currentTimeMillis(); job0.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); jobTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; //======= Job 1 LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + outputPath + "0"); LOG.info(" - output path: " + outputPath + "1"); LOG.info(" - number of reducers: " + 1); int nModel = reduceTasks; reduceTasks = 1; Job job1 = Job.getInstance(conf); job1.setJobName(AutoCoder.class.getSimpleName()); job1.setJarByClass(AutoCoder.class); // set the path of the information of k clusters in this iteration job1.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job1, new Path(outputPath + "0")); FileOutputFormat.setOutputPath(job1, new Path(outputPath + "1")); job1.setInputFormatClass(SequenceFileInputFormat.class); job1.setOutputFormatClass(SequenceFileOutputFormat.class); job1.setMapOutputKeyClass(PairOfInts.class); job1.setMapOutputValueClass(DataNode.class); job1.setOutputKeyClass(NullWritable.class); job1.setOutputValueClass(NullWritable.class); job1.setMapperClass(MyMapper1.class); job1.setReducerClass(MyReducer1.class); job1.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath + "1"); FileSystem.get(getConf()).delete(outputDir, true); startTime = System.currentTimeMillis(); job1.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); jobTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; LOG.info("Final Time: " + ((System.currentTimeMillis() - codeStart) / 1000.0) + " seconds, " + jobTimeSum + " seconds."); return 0; }
From source file:boostingPL.driver.AdaBoostPLDriver.java
License:Open Source License
@Override public int run(String[] args) throws Exception { int status = commandAnalysis(args); if (status != 0) { return status; }//from w w w . j av a 2 s. c o m @SuppressWarnings("deprecation") Job job = new Job(getConf()); job.setJobName("AdaBoostPL:" + runModel + " " + dataPath.toString() + " " + modelPath.toString() + " " + numLinesPerMap + " " + numIterations); job.setJarByClass(AdaBoostPLDriver.class); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.addInputPath(job, dataPath); NLineInputFormat.setNumLinesPerSplit(job, numLinesPerMap); if (runModel.equals("train")) { job.setMapperClass(AdaBoostPLMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(ClassifierWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(ClassifierWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, modelPath); } else { job.setMapperClass(AdaBoostPLTestMapper.class); job.setReducerClass(AdaBoostPLTestReducer.class); job.setOutputFormatClass(NullOutputFormat.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); } Configuration conf = job.getConfiguration(); conf.set("BoostingPL.boostingName", "AdaBoost"); conf.set("BoostingPL.numIterations", String.valueOf(numIterations)); conf.set("BoostingPL.modelPath", modelPath.toString()); if (metadataPath == null) { conf.set("BoostingPL.metadata", dataPath.toString() + ".metadata"); } else { conf.set("BoostingPL.metadata", metadataPath.toString()); } if (outputFolder != null) { conf.set("BoostingPL.outputFolder", outputFolder.toString()); } LOG.info(StringUtils.arrayToString(args)); return job.waitForCompletion(true) == true ? 0 : -1; }
From source file:boostingPL.driver.SAMMEPLDriver.java
License:Open Source License
@Override public int run(String[] args) throws Exception { int status = commandAnalysis(args); if (status != 0) { return status; }/*from w w w . j ava 2 s .c om*/ @SuppressWarnings("deprecation") Job job = new Job(getConf()); job.setJobName("SAMMEPL:" + runModel + " " + dataPath.toString() + " " + modelPath.toString() + " " + numLinesPerMap + " " + numIterations); job.setJarByClass(SAMMEPLDriver.class); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.addInputPath(job, dataPath); NLineInputFormat.setNumLinesPerSplit(job, numLinesPerMap); FileSystem fs = modelPath.getFileSystem(getConf()); if (fs.exists(modelPath)) { fs.delete(modelPath, true); } job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, modelPath); if (runModel.equals("train")) { job.setMapperClass(AdaBoostPLMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(ClassifierWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(ClassifierWritable.class); } else { job.setMapperClass(AdaBoostPLTestMapper.class); job.setReducerClass(AdaBoostPLTestReducer.class); job.setOutputFormatClass(NullOutputFormat.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); } Configuration conf = job.getConfiguration(); conf.set("BoostingPL.boostingName", "SAMME"); conf.set("BoostingPL.numIterations", String.valueOf(numIterations)); conf.set("BoostingPL.modelPath", modelPath.toString()); if (metadataPath == null) { conf.set("BoostingPL.metadata", dataPath.toString() + ".metadata"); } else { conf.set("BoostingPL.metadata", metadataPath.toString()); } if (outputFolder != null) { conf.set("BoostingPL.outputFolder", outputFolder.toString()); } LOG.info(StringUtils.arrayToString(args)); return job.waitForCompletion(true) == true ? 0 : -1; }
From source file:br.com.lassal.nqueens.grid.job.GenerateSolutions.java
/** * NQueens working folder structure /nqueens/board-{x}/partial/solution_X-4 * * @param queensSize/*from w w w.ja v a 2s . c o m*/ * @throws IOException */ private void setWorkingFolder(int queensSize, Job job) throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); if (fs.isDirectory(new Path("/nqueens/board-" + queensSize + "/final"))) { System.exit(0); // ja foi processado anteriormente nao processa de novo } String lastSolution = null; Path partialSolDir = new Path("/nqueens/board-" + queensSize + "/partial/"); Path inputPath = null; Path outputPath = null; if (fs.exists(partialSolDir)) { RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir); while (dirsFound.hasNext()) { LocatedFileStatus path = dirsFound.next(); if (lastSolution == null) { lastSolution = path.getPath().getName(); inputPath = path.getPath(); } else { String currentDir = path.getPath().getName(); if (lastSolution.compareToIgnoreCase(currentDir) < 0) { lastSolution = currentDir; inputPath = path.getPath(); } } } } int currentSolutionSet = 0; if (inputPath == null) { inputPath = new Path("/nqueens/board-" + queensSize + "/seed"); if (!fs.exists(inputPath)) { FSDataOutputStream seedFile = fs.create(inputPath, true); seedFile.writeBytes(queensSize + "#"); seedFile.close(); } } // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); if (lastSolution != null) { String[] solution = lastSolution.split("-"); if (solution[0].equalsIgnoreCase("solution_" + queensSize)) { currentSolutionSet = Integer.parseInt(solution[1]) + 4; if (currentSolutionSet >= queensSize) { outputPath = new Path("/nqueens/board-" + queensSize + "/final"); } else { outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-" + currentSolutionSet); } } } else { outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-4"); } // Output FileOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(TextOutputFormat.class); }
From source file:br.com.lassal.nqueens.grid.job.NQueenCounter.java
private Path setWorkingFolder(int queensSize, String workingFolder, boolean isFinal, Job job) throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path returnPath = null;/*from w w w . ja v a2 s. c om*/ if (workingFolder == null) { workingFolder = ""; } Path partialSolDir = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/"); Path inputPath = null; Path outputPath = null; String nextRunPath = "run_1"; if (fs.exists(partialSolDir)) { RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir); String lastRunPath = null; Path lastPath = null; while (dirsFound.hasNext()) { LocatedFileStatus dir = dirsFound.next(); if (dir.isDirectory()) { if (lastRunPath == null || dir.getPath().getName().compareTo(lastRunPath) > 0) { lastPath = dir.getPath(); lastRunPath = lastPath.getName(); } } } if (lastRunPath != null) { String[] runParts = lastRunPath.split("_"); int lastRun = Integer.parseInt(runParts[1]); nextRunPath = runParts[0] + "_" + (++lastRun); inputPath = lastPath; } } if (inputPath == null) { inputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/seed"); if (!fs.exists(inputPath)) { FSDataOutputStream seedFile = fs.create(inputPath, true); seedFile.writeBytes(queensSize + ":"); seedFile.close(); } } else { returnPath = inputPath; } // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); if (isFinal) { outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/final"); } else { outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/" + nextRunPath); } // Output FileOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(TextOutputFormat.class); return returnPath; }
From source file:br.ufpr.inf.hpath.HPath.java
License:Apache License
/** * Execute the XPath query as a Hadoop job * @param xpath_query XPath query submitted by the user via cli. * @param inputFile XML file which has all data. * @param outputFile Query's result is stored in this file. * @throws Exception//from w w w .ja v a 2s . c o m */ public static void main(String[] args) throws Exception { if (args.length < 1) { System.out.println("USAGE: hpath [xpath_query] [input_file] [<output_dir>]"); System.exit(-1); } System.out.println("***************"); System.out.println(" Query -> " + args[2]); System.out.println(" Input -> " + args[0]); System.out.println(" Output -> " + args[1]); System.out.println("***************"); String xpath_query = args[2]; String inputFile = args[0]; String outputFile = args[1]; String tag = ""; // tag = getFisrtQueryTag(xpath_query); tag = getLastQueryTag(xpath_query); Configuration conf = new Configuration(); conf.set("xmlinput.start", "<" + tag); conf.set("xmlinput.end", "</" + tag + ">"); conf.set("xpath.query", xpath_query); @SuppressWarnings("deprecation") Job job = new Job(conf, "HPath"); FileSystem fs = FileSystem.get(conf); Path inFile = new Path(inputFile); Path outFile = new Path(outputFile); if (!fs.exists(inFile)) { System.out.println("error: Input file not found."); System.exit(-1); } if (!fs.isFile(inFile)) { System.out.println("error: Input should be a file."); System.exit(-1); } if (fs.exists(outFile)) { System.out.println("error: Output already exists."); System.exit(-1); } job.setJarByClass(HPath.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(XmlItemInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, inFile); FileOutputFormat.setOutputPath(job, outFile); job.waitForCompletion(true); }
From source file:bulkload.ImportTsv.java
License:Apache License
/** * Sets up the actual job./*from w w w.jav a2s.c o m*/ * * @param conf * The current configuration. * @param args * The command line parameters. * @return The newly created job. * @throws IOException * When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { Job job = null; try (Connection connection = ConnectionFactory.createConnection(conf)) { try (Admin admin = connection.getAdmin()) { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(SEPARATOR_CONF_KEY); if (actualSeparator != null) { conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes())); } TableName tableName = TableName.valueOf(args[0]); if (!admin.tableExists(tableName)) { String errorMsg = format("Table '%s' does not exist.", tableName); LOG.error(errorMsg); throw new TableNotFoundException(errorMsg); } Path inputDir = new Path(args[1]); String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString()); job = Job.getInstance(conf, jobName); job.setJarByClass(TsvImporter.class); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(TsvImporter.class); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); if (hfileOutPath != null) { try (HTable table = (HTable) connection.getTable(tableName)) { Path outputDir = new Path(hfileOutPath); FileSystem fs = FileSystem.get(conf); if (fs.exists(outputDir)) { if (!fs.delete(outputDir, true)) { throw new IllegalStateException("delete path:" + outputDir + " failed"); } } FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); job.setReducerClass(PutSortReducer.class); HFileOutputFormat2.configureIncrementalLoad(job, table, table); } } else { // No reducers. Just write straight to table. Call // initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job); job.setNumReduceTasks(0); // TableMapReduceUtil.addDependencyJars(job); // TableMapReduceUtil.addDependencyJars(job.getConfiguration(), // com.google.common.base.Function.class /* Guava used by TsvParser */); } // Workaround to remove unnecessary hadoop dependencies String[] jars = job.getConfiguration().get("tmpjars").split(",", -1); StringBuilder filteredJars = new StringBuilder(); for (String j : jars) { String[] parts = j.split("/", -1); String fileName = parts[parts.length - 1]; if (fileName.indexOf("hadoop-") != 0) { filteredJars.append(j); filteredJars.append(","); } } job.getConfiguration().set("tmpjars", filteredJars.toString()); } } return job; }
From source file:byte_import.HexastoreBulkImport.java
License:Open Source License
public Job createSubmittableJob(String[] args) { TABLE_NAME = args[1];// w ww. ja v a 2s . c om Job job = null; try { job = new Job(new Configuration(), NAME); job.setJarByClass(HexastoreBulkImport.class); job.setMapperClass(sampler.TotalOrderPrep.Map.class); job.setReducerClass(Reduce.class); job.setCombinerClass(Combiner.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(ImmutableBytesWritable.class); job.setPartitionerClass(TotalOrderPartitioner.class); //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("/user/npapa/"+regions+"partitions/part-r-00000")); TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("partitions/part-r-00000")); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(HFileOutputFormat.class); Path out = new Path("out"); FileOutputFormat.setOutputPath(job, out); Configuration conf = new Configuration(); FileSystem fs; try { fs = FileSystem.get(conf); if (fs.exists(out)) { fs.delete(out, true); } } catch (IOException e) { e.printStackTrace(); } HBaseAdmin hadmin = new HBaseAdmin(conf); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME + "_stats"); HColumnDescriptor family = new HColumnDescriptor("size"); desc.addFamily(family); conf.setInt("zookeeper.session.timeout", 600000); if (hadmin.tableExists(TABLE_NAME + "_stats")) { //hadmin.disableTable(TABLE_NAME+"_stats"); //hadmin.deleteTable(TABLE_NAME+"_stats"); } else { hadmin.createTable(desc); } FileInputFormat.setInputPaths(job, new Path(args[0])); //job.getConfiguration().setInt("mapred.map.tasks", 18); job.getConfiguration().set("h2rdf.tableName", TABLE_NAME); job.getConfiguration().setInt("mapred.reduce.tasks", (int) TotalOrderPrep.regions); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().setInt("io.sort.mb", 100); job.getConfiguration().setInt("io.file.buffer.size", 131072); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864); job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432); job.getConfiguration().setInt("mapred.tasktracker.map.tasks.maximum", 5); job.getConfiguration().setInt("mapred.tasktracker.reduce.tasks.maximum", 5); //job.getConfiguration().setInt("io.sort.mb", 100); } catch (IOException e2) { e2.printStackTrace(); } return job; }
From source file:ca.uwaterloo.cs.bigdata2017w.assignment4.BuildPersonalizedPageRankRecords.java
License:Apache License
/** * Runs this tool.//from w w w . j a va 2 s. co m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); options.addOption( OptionBuilder.withArgName("sources").hasArg().withDescription("source nodes").create(SOURCES)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); String sourcesString = cmdline.getOptionValue(SOURCES); String[] sources = sourcesString.split(","); for (int i = 0; i < sources.length; i++) { sources[i] = sources[i].trim(); } LOG.info("Tool name: " + BuildPersonalizedPageRankRecords.class.getSimpleName()); LOG.info(" - inputDir: " + inputPath); LOG.info(" - outputDir: " + outputPath); LOG.info(" - numNodes: " + n); LOG.info(" - use sources: " + sourcesString); Configuration conf = getConf(); conf.setInt(NODE_CNT_FIELD, n); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); conf.setStrings(SOURCES, sources); Job job = Job.getInstance(conf); job.setJobName(BuildPersonalizedPageRankRecords.class.getSimpleName() + ":" + inputPath); job.setJarByClass(BuildPersonalizedPageRankRecords.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); job.setMapperClass(MyMapper.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
From source file:ca.uwaterloo.iss4e.hadoop.meterperfile.ThreelMain.java
License:Open Source License
public int run(String[] args) throws IOException { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: ca.uwaterloo.iss4e.hadoop.meterperfile.ThreelMain <input> <output>"); System.exit(2);/* ww w . ja va 2s. c o m*/ } conf.set("mapreduce.input.fileinputformat.split.maxsize", "100"); Job job = new Job(conf, "ThreelMain"); job.setJarByClass(ThreelMain.class); job.setInputFormatClass(UnsplitableTextInputFormat.class); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0); // job.setOutputKeyClass(LongWritable.class); //job.setOutputValueClass(Text.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.out.println("\nStarting Job ..."); final long startTime = System.currentTimeMillis(); try { if (!job.waitForCompletion(true)) { System.out.println("Job failed."); System.exit(1); } } catch (Exception e) { throw new RuntimeException(e); } finally { final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Duration is " + duration + " seconds."); } return 0; }