List of usage examples for org.apache.hadoop.fs Path Path
public Path(URI aUri)
From source file:BuildPageRankRecords.java
License:Apache License
/** * Runs this tool./*from w w w . j a v a 2s . c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); LOG.info("Tool name: " + BuildPageRankRecords.class.getSimpleName()); LOG.info(" - inputDir: " + inputPath); LOG.info(" - outputDir: " + outputPath); LOG.info(" - numNodes: " + n); Configuration conf = getConf(); conf.setInt(NODE_CNT_FIELD, n); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); Job job = Job.getInstance(conf); job.setJobName(BuildPageRankRecords.class.getSimpleName() + ":" + inputPath); job.setJarByClass(BuildPageRankRecords.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); job.setMapperClass(MyMapper.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
From source file:SleepJobWithArray.java
License:Apache License
public JobConf setupJobConf(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount) { JobConf job = new JobConf(getConf(), SleepJobWithArray.class); job.setNumMapTasks(numMapper);/*from w ww . j a v a 2 s .c o m*/ job.setNumReduceTasks(numReducer); job.setMapperClass(SleepJobWithArray.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(SleepJobWithArray.class); job.setOutputFormat(NullOutputFormat.class); job.setInputFormat(SleepInputFormat.class); job.setPartitionerClass(SleepJobWithArray.class); job.setSpeculativeExecution(false); FileInputFormat.addInputPath(job, new Path("ignored")); job.setLong("sleep.job.map.sleep.time", mapSleepTime); job.setLong("sleep.job.reduce.sleep.time", reduceSleepTime); job.setInt("sleep.job.map.sleep.count", mapSleepCount); job.setInt("sleep.job.reduce.sleep.count", reduceSleepCount); return job; }
From source file:Distinct.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: Distinct <in> <out>"); System.exit(2);//from w ww . j a v a 2s . com } Job job = Job.getInstance(conf, "distinct1"); job.setJarByClass(Distinct.class); job.setMapperClass(TokenizerMapper1.class); job.setReducerClass(Reducer1.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(tempDir)); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.waitForCompletion(true); Configuration conf2 = new Configuration(); Job job2 = Job.getInstance(conf2, "distinct2"); job2.setJarByClass(Distinct.class); job2.setMapperClass(TokenizerMapper2.class); job2.setReducerClass(Reducer2.class); FileInputFormat.addInputPath(job2, new Path(tempDir)); FileOutputFormat.setOutputPath(job2, new Path(otherArgs[1])); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(IntWritable.class); System.exit(job2.waitForCompletion(true) ? 0 : 1); }
From source file:WriteFDFFixedLengthRecord.java
License:Open Source License
public static void main(String[] args) throws Exception { FormatDataFile fdf = new FormatDataFile(new Configuration()); String fileName = "/indextest/testfile1"; FileSystem.get(new Configuration()).delete(new Path(fileName), true); Head head = new Head(); FieldMap fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0)); fieldMap.addField(new Field(ConstVar.FieldType_Short, ConstVar.Sizeof_Byte, (short) 1)); head.setFieldMap(fieldMap);/*from ww w . j a v a2s. c om*/ head.setPrimaryIndex((short) 0); fdf.create(fileName, head); for (int i = 0; i < 200; i++) { Record record = new Record(2); record.addValue(new FieldValue((byte) i, (short) 0)); record.addValue(new FieldValue((byte) i, (short) 1)); fdf.addRecord(record); } fdf.close(); }
From source file:DocToSeq.java
License:Apache License
public static void main(String args[]) throws Exception { if (args.length != 2) { System.err.println("Arguments: [input tsv file] [output sequence file]"); return;//from w w w. j a va 2 s. com } String inputFileName = args[0]; String outputDirName = args[1]; Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(configuration); Writer writer = new SequenceFile.Writer(fs, configuration, new Path(outputDirName + "/chunk-0"), Text.class, Text.class); int count = 0; BufferedReader reader = new BufferedReader(new FileReader(inputFileName)); Text key = new Text(); Text value = new Text(); while (true) { String line = reader.readLine(); if (line == null) { break; } String[] tokens = line.split("\t", 3); if (tokens.length != 3) { System.out.println("Skip line: " + line); continue; } String category = tokens[0]; String id = tokens[1]; String message = tokens[2]; key.set("/" + category + "/" + id); value.set(message); writer.append(key, value); count++; } reader.close(); writer.close(); System.out.println("Wrote " + count + " entries."); }
From source file:AnagramJob.java
License:Apache License
/** * @param args// ww w . ja v a2 s. c om */ public static void main(String[] args) throws Exception { JobConf conf = new JobConf(AnagramJob.class); conf.setJobName("anagramcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(AnagramMapper.class); // conf.setCombinerClass(AnagramReducer.class); conf.setReducerClass(AnagramReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
From source file:BigBWA.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); for (String argumento : args) { LOG.info("Arg: " + argumento); }/* w ww.j a va 2 s.c om*/ String inputPath = ""; String outputPath = ""; boolean useReducer = false; BwaOptions options = new BwaOptions(args); //We set the timeout and stablish the bwa library to call BWA methods conf.set("mapreduce.task.timeout", "0"); conf.set("mapreduce.map.env", "LD_LIBRARY_PATH=./bwa.zip/"); //==================Algorithm election================== //One of the algorithms is going to be in use, because tge default is always specified. if (options.isMemAlgorithm()) { //Case of the mem algorithm conf.set("mem", "true"); conf.set("aln", "false"); conf.set("bwasw", "false"); } else if (options.isAlnAlgorithm()) { // Case of aln algorithm conf.set("mem", "false"); conf.set("aln", "true"); conf.set("bwasw", "false"); } else if (options.isBwaswAlgorithm()) { // Case of bwasw algorithm conf.set("mem", "false"); conf.set("aln", "false"); conf.set("bwasw", "true"); } //==================Index election================== if (options.getIndexPath() != "") { conf.set("indexRoute", options.getIndexPath()); } else { System.err.println("No index has been found. Aborting."); System.exit(1); } //==================Type of reads election================== //There is always going to be a type of reads, because default is paired if (options.isPairedReads()) { conf.set("paired", "true"); conf.set("single", "false"); } else if (options.isSingleReads()) { conf.set("paired", "false"); conf.set("single", "true"); } //==================Use of reducer================== if (options.isUseReducer()) { useReducer = true; conf.set("useReducer", "true"); } else { conf.set("useReducer", "false"); } //==================Number of threads per map================== if (options.getNumThreads() != "0") { conf.set("bwathreads", options.getNumThreads()); } //==================RG Header=================== if (options.getReadgroupHeader() != "") { conf.set("rgheader", options.getReadgroupHeader()); } //==================Input and output paths================== inputPath = options.getInputPath(); outputPath = options.getOutputPath(); conf.set("outputGenomics", outputPath); //==================Partition number================== if (options.getPartitionNumber() != 0) { try { FileSystem fs = FileSystem.get(conf); Path inputFilePath = new Path(inputPath); ContentSummary cSummary = fs.getContentSummary(inputFilePath); long length = cSummary.getLength(); fs.close(); conf.set("mapreduce.input.fileinputformat.split.maxsize", String.valueOf((length) / options.getPartitionNumber())); conf.set("mapreduce.input.fileinputformat.split.minsize", String.valueOf((length) / options.getPartitionNumber())); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); LOG.error(e.toString()); System.exit(1); } } //Job job = new Job(conf,"BigBWA_"+outputPath); Job job = Job.getInstance(conf, "BigBWA_" + outputPath); job.setJarByClass(BigBWA.class); job.setMapperClass(BigBWAMap.class); //job.setCombinerClass(BigBWACombiner.class); if (useReducer) { job.setReducerClass(BigBWAReducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(1); } else { job.setNumReduceTasks(0); } job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:CountHistogram.java
License:Open Source License
@Override public int run(String[] args) throws Exception { try {/*from w w w . j ava 2 s .co m*/ JobClient client = new JobClient(); JobConf job = new JobConf(getConf(), CountHistogram.class); job.setJobName("CountHistogram"); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); client.setConf(job); JobClient.runJob(job); } catch (Exception e) { e.printStackTrace(); throw e; } return 0; }
From source file:BigramRelativeFrequencyTuple.java
License:Apache License
/** * Runs this tool.//from www . jav a 2 s . c o m */ public int run(String[] args) throws Exception { if (args.length != 3) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int reduceTasks = Integer.parseInt(args[2]); LOG.info("Tool name: " + BigramRelativeFrequencyTuple.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - num reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(BigramRelativeFrequencyTuple.class.getSimpleName()); job.setJarByClass(BigramRelativeFrequencyTuple.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(BinSedesTuple.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(BinSedesTuple.class); job.setOutputValueClass(FloatWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:WordCountA.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);// ww w . j a v a2 s . com } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCountA.class); job.setMapperClass(TokenizerMapper.class); // Disable the combiner // job.setCombinerClass(IntSumReducer.class); // Setup the Partitioner job.setPartitionerClass(Letterpartitioner.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }