List of usage examples for org.apache.hadoop.fs Path Path
public Path(URI aUri)
From source file:WordCount_NoCombiner.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);/*w ww.ja v a2s . c o m*/ } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount_NoCombiner.class); job.setMapperClass(TokenizerMapper.class); // delete this line to disable combining // job.setCombinerClass(IntSumReducer.class); job.setPartitionerClass(WordPartitioner.class); job.setNumReduceTasks(5); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:TestTextInputFormat.java
License:Open Source License
public static void main(String[] argv) throws IOException, SerDeException { try {/*from w w w . j a v a 2 s.c om*/ if (argv.length != 2) { System.out.println("TestTextInputFormat <input> <output>"); System.exit(-1); } JobConf conf = new JobConf(TestTextInputFormat.class); conf.setJobName("TestTextInputFormat"); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(FormatStorageOutputFormat.class); conf.set("mapred.output.compress", "flase"); conf.set("mapred.input.dir", argv[0]); LazySimpleSerDe serDe = initSerDe(conf); LazySimpleStructObjectInspector oi = (LazySimpleStructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); FileInputFormat.setInputPaths(conf, argv[0]); Path outputPath = new Path(argv[1]); FileOutputFormat.setOutputPath(conf, outputPath); InputFormat inputFormat = new TextInputFormat(); ((TextInputFormat) inputFormat).configure(conf); InputSplit[] inputSplits = inputFormat.getSplits(conf, 1); if (inputSplits.length == 0) { System.out.println("inputSplits is empty"); return; } else { System.out.println("get Splits:" + inputSplits.length); } int totalDelay = 0; RecordReader<WritableComparable, Writable> currRecReader = null; for (int i = 0; i < inputSplits.length; i++) { currRecReader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL); WritableComparable key; Writable value; key = currRecReader.createKey(); value = currRecReader.createValue(); long begin = System.currentTimeMillis(); int count = 0; while (currRecReader.next(key, value)) { Object row = serDe.deserialize((Text) value); oi.getStructFieldsDataAsList(row); count++; } long end = System.currentTimeMillis(); long delay = (end - begin) / 1000; totalDelay += delay; System.out.println(count + " record read over, delay " + delay + " s"); } System.out.println("total delay:" + totalDelay); return; } catch (Exception e) { e.printStackTrace(); System.out.println("get exception:" + e.getMessage()); } }
From source file:Outlinks.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: inlinks <in> [<in>...] <out>"); System.exit(2);/*ww w. j a v a 2 s .c o m*/ } Job job = new Job(conf, "inlinks"); job.setJarByClass(Outlinks.class); job.setMapperClass(TokenizerMapper.class); //job.setCombinerClass(IdentityReducer.class); job.setReducerClass(IdentityReducer.class); job.setNumReduceTasks(10); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:Edge.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);/*from w ww . j ava2s. c o m*/ } Path tempDir = new Path("/temp/edge"); Job job = new Job(conf, "word count"); job.setJarByClass(Edge.class); job.setMapperClass(SplitMapper.class); job.setCombinerClass(DuplicateCombiner.class); //job.setSortComparatorClass(DecentComparator.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, tempDir); if (job.waitForCompletion(true)) { Job job2 = new Job(conf, "edge"); job2.setJarByClass(Edge.class); job2.setMapperClass(SwitchMapper.class); job2.setSortComparatorClass(DecentComparator.class); job2.setReducerClass(SwitchReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job2, tempDir); FileOutputFormat.setOutputPath(job2, new Path(otherArgs[1])); System.exit(job2.waitForCompletion(true) ? 0 : 1); } System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:UtilIndexMR.java
License:Open Source License
public static void indexmrtest(String datadir, String indexdir, int filenum, int recnum, boolean var, boolean compress, boolean seq, boolean overwrite, boolean column, String idx, boolean removefile) throws Exception { if (column) { UtilIndexStorage.writeColumnFDF(datadir, filenum, recnum, (short) -1, var, compress, seq, overwrite); } else {// ww w . j a va2 s.c o m UtilIndexStorage.writeFDF(datadir, filenum, recnum, (short) -1, var, compress, seq, overwrite); } FileStatus[] ss = fs.listStatus(new Path(datadir)); StringBuffer sb = new StringBuffer(); for (FileStatus fileStatus : ss) { sb.append(fileStatus.getPath().toString()).append(","); } System.out.println(sb.toString()); IndexMR.running(conf, sb.substring(0, sb.length() - 1), column, idx, indexdir); IFormatDataFile ifdf = new IFormatDataFile(conf); ifdf.open(indexdir + "/part-00000"); ifdf.seek(filenum * recnum / 2); for (int i = 0; i < 10; i++) { ifdf.next().show(); } ifdf.close(); fs.delete(new Path(indexdir + "/_logs"), true); if (removefile) { fs.delete(new Path(datadir), true); fs.delete(new Path(indexdir), true); } }
From source file:MapReduceRunner.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf conf = new JobConf(MapReduceRunner.class); conf.setJobName("gapdeduce"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Mapper.class); conf.setReducerClass(Reducer.class); // KeyValueTextInputFormat treats each line as an input record, // and splits the line by the tab character to separate it into key and value conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/*from w w w . j a va 2 s. co m*/ }
From source file:LinkedGraph.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//from w w w. j av a2 s . c o m } Job job = new Job(conf, "Graph"); job.setJarByClass(LinkedGraph.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:MarkovStateTransitionModel.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Markov tate transition model"; job.setJobName(jobName);/*from www. j av a 2 s. c o m*/ job.setJarByClass(MarkovStateTransitionModel.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "avenir"); job.setMapperClass(MarkovStateTransitionModel.StateTransitionMapper.class); job.setReducerClass(MarkovStateTransitionModel.StateTransitionReducer.class); job.setCombinerClass(MarkovStateTransitionModel.StateTransitionCombiner.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:BwaInterpreter.java
License:Open Source License
private void setTotalInputLength() { try {//from w ww .j a v a 2s .c o m FileSystem fs = FileSystem.get(this.conf); // To get the input files sizes ContentSummary cSummaryFile1 = fs.getContentSummary(new Path(options.getInputPath())); long lengthFile1 = cSummaryFile1.getLength(); long lengthFile2 = 0; if (!options.getInputPath2().isEmpty()) { ContentSummary cSummaryFile2 = fs.getContentSummary(new Path(options.getInputPath())); lengthFile2 = cSummaryFile2.getLength(); } // Total size. Depends on paired or single reads this.totalInputLength = lengthFile1 + lengthFile2; fs.close(); } catch (IOException e) { LOG.error(e.toString()); e.printStackTrace(); } }
From source file:BwaInterpreter.java
License:Open Source License
private void createOutputFolder() { try {//from w ww. java2 s . c o m FileSystem fs = FileSystem.get(this.conf); // Path variable Path outputDir = new Path(options.getOutputPath()); // Directory creation if (!fs.exists(outputDir)) { fs.mkdirs(outputDir); } else { fs.delete(outputDir, true); fs.mkdirs(outputDir); } fs.close(); } catch (IOException e) { LOG.error(e.toString()); e.printStackTrace(); } }