List of usage examples for org.apache.hadoop.fs Path Path
public Path(URI aUri)
From source file:WordCountCounters.java
License:Apache License
public int run(String[] args) throws Exception { Job job = new Job(getConf(), "wordcountcounters"); job.setJarByClass(WordCountCounters.class); job.setMapperClass(SumMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX)); job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setInputPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.Murmur3Partitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), WordCount.KEYSPACE, WordCountCounters.COUNTER_COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setSlice_range(new SliceRange().setStart(ByteBufferUtil.EMPTY_BYTE_BUFFER) .setFinish(ByteBufferUtil.EMPTY_BYTE_BUFFER).setCount(100)); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); job.waitForCompletion(true);/*from ww w .ja va 2s .c om*/ return 0; }
From source file:ImageDuplicatesRemover.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); //This is the line that makes the hadoop run locally //conf.set("mapred.job.tracker", "local"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);// w w w . j av a2 s . c o m } Job job = new Job(conf, "image dups remover"); job.setJarByClass(ImageDuplicatesRemover.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(ImageMd5Mapper.class); job.setReducerClass(ImageDupsReducer.class); //job.setNumReduceTasks(2); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:FlinkBootstrap.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 2) { throw new IllegalArgumentException( "Provide `TaskManager` or `JobManager` parameter with config folder"); }//from w w w . j a va2s . c om //Load Hadoop S3 wrapper classes, due to ClassNotFound Exception without Class.forName("org.apache.flink.runtime.fs.hdfs.HadoopFileSystem"); Class.forName("org.apache.hadoop.fs.s3a.S3AFileSystem"); //Verify s3 is accessible Configuration conf = new Configuration(); conf.addResource(new Path("config/hadoop/core-site.xml")); conf.addResource(new Path("config/hadoop/hdfs-site.xml")); FileSystem fs = FileSystem.get(conf); fs.listStatus(new Path("s3://dir")); if (args[0].equals("TaskManager")) { TaskManager.main(new String[] { "--configDir", args[1], }); } else if (args[0].equals("JobManager")) { JobManager.main(new String[] { "--configDir", args[1], "--executionMode", "cluster", }); } else { throw new IllegalArgumentException("Unknown parameter `" + args[0] + "`"); } }
From source file:SBP.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 11) { for (int i = 0; i < args.length; i++) { System.out.println("Args: " + i + " " + args[i]); }/*from w ww.j a v a 2s. com*/ System.out.println(args.length); return printUsage(); } lambda = Double.parseDouble(args[10]); edge_path = new Path(args[0]); prior_path = new Path(args[1]); output_path = new Path(args[2]); Path prev_local_path = new Path("run_tmp/prev_local/"); Path new_local_path = new Path("run_tmp/new_local/"); Path tmp_output_path = new Path(output_path.toString()); number_msg = Long.parseLong(args[3]); nreducer = Integer.parseInt(args[4]); nreducer = 1; max_iter = Integer.parseInt(args[5]); nstate = Integer.parseInt(args[7]); edge_potential_str = read_edge_potential(args[8]); int cur_iter = 1; if (args[9].startsWith("new") == false) { cur_iter = Integer.parseInt(args[9].substring(4)); } System.out.println("edge_path=" + edge_path.toString() + ", prior_path=" + prior_path.toString() + ", output_path=" + output_path.toString() + ", |E|=" + number_msg + ", nreducer=" + nreducer + ", maxiter=" + max_iter + ", nstate=" + nstate + ", edge_potential_str=" + edge_potential_str + ", cur_iter=" + cur_iter + ", lambda=" + lambda); fs = FileSystem.get(getConf()); // Run Stage1 and Stage2. if (cur_iter == 1) { System.out.println("BP: Initializing messages..."); JobClient.runJob(configInitMessage()); } double converge_threshold = number_msg * EPS * nstate; for (int i = cur_iter; i <= max_iter; i++) { System.out.println(" *** ITERATION " + (i) + "/" + max_iter + " ***"); JobClient.runJob(configUpdateMessage()); JobClient.runJob(configSmoothMessage()); JobClient.runJob(configCheckErr()); JobClient.runJob(configSumErr()); String line = readLocaldirOneline(sum_error_path.toString()); fs.delete(check_error_path, true); fs.delete(sum_error_path, true); String[] parts = line.split("\t"); int n = Integer.parseInt(parts[0]); double sum = Double.parseDouble(parts[1]); System.out.println("Converged Msg: " + (number_msg - n)); System.out.println("Sum Error: " + sum); if (sum < converge_threshold) { break; } // rotate directory fs.delete(message_cur_path); fs.delete(message_next_path); fs.rename(message_smooth_path, message_cur_path); } System.out.println("BP: Computing beliefs..."); JobClient.runJob(configComputeBelief()); System.out.println("BP finished. The belief vector is in the HDFS " + args[2]); return 0; }
From source file:ReverseIndexer.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: ReverseIndexer <output> <input file(s)>"); System.exit(2);//from w ww . java2 s. com } Job job = new Job(conf, "reverse indexer"); job.setJarByClass(ReverseIndexer.class); job.setMapperClass(IndexerMapper.class); job.setReducerClass(IndexerReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LineRecWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); for (int i = 1; i < otherArgs.length; i++) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[0])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:ExportStressTest.java
License:Apache License
public void createFile(int fileId) throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path("ExportStressTest"); fs.mkdirs(dirPath);/*from w ww. j a va 2 s. c o m*/ Path filePath = new Path(dirPath, "input-" + fileId); OutputStream os = fs.create(filePath); Writer w = new BufferedWriter(new OutputStreamWriter(os)); for (int i = 0; i < RECORDS_PER_FILE; i++) { long v = (long) i + ((long) RECORDS_PER_FILE * (long) fileId); w.write("" + v + "," + ALPHABET + ALPHABET + ALPHABET + ALPHABET + "\n"); } w.close(); os.close(); }
From source file:ExportStressTest.java
License:Apache License
/** Create a set of data files to export. */ public void createData() throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path("ExportStressTest"); if (fs.exists(dirPath)) { System.out.println("Export directory appears to already exist. Skipping data-gen."); return;// w w w.jav a2 s . c o m } for (int i = 0; i < NUM_FILES; i++) { createFile(i); } }
From source file:Authset.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);//from w ww .ja v a 2 s.c o m } Job job = new Job(conf, "word count"); job.setJarByClass(Authset.class); job.setMapperClass(TokenizerMapper.class); //job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(NullWritable.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setNumReduceTasks(10); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); //DistributedCache.addCacheFile(new Path(otherArgs[0]).toUri(), //job.getConfiguration()); //DistributedCache.setLocalFiles(job.getConfiguration(), otherArgs[0]); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:gen_rank.java
License:LGPL
public static void runjob(String input, String output) throws Exception { JobConf conf = new JobConf(gen_rank.class); conf.setJobName("Preparing_data"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf);/*from w w w. j a va 2 s . c om*/ }
From source file:ConfTest.java
License:Open Source License
public static void main(String[] args) { Configuration conf = new Configuration(); conf.addResource(new Path("d:\\test\\a.xml")); System.out.println(conf.get("aaa")); }