List of usage examples for org.apache.hadoop.fs Path Path
public Path(URI aUri)
From source file:PT1.java
License:Open Source License
static void testreadcolumnrand(String filename, int num, int size, String mode) throws Exception { Path path = new Path(filename); ArrayList<Short> vector = new ArrayList<Short>(); if (mode == null || mode.equals("-1")) { for (short i = 0; i < 7; i++) { vector.add(i);/* ww w.j a va 2 s. c om*/ } } else if (mode.equals("half")) { short x = 0; vector.add(x); x = 1; vector.add(x); x = 2; vector.add(x); x = 3; vector.add(x); } else { vector.add(Short.valueOf(mode)); } Configuration conf = new Configuration(); ColumnStorageClient client = new ColumnStorageClient(path, vector, conf); Random r = new Random(); for (int i = 0; i < num; i++) { client.getRecordByLine(r.nextInt(size)); if (i % 1000000 == 0) { } } client.close(); }
From source file:BP.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 10) { for (int i = 0; i < args.length; i++) { System.out.println("Args: " + i + " " + args[i]); }//from w ww . j ava2s. c o m System.out.println(args.length); return printUsage(); } edge_path = new Path(args[0]); prior_path = new Path(args[1]); output_path = new Path(args[2]); number_msg = Long.parseLong(args[3]); nreducer = Integer.parseInt(args[4]); nreducer = 1; max_iter = Integer.parseInt(args[5]); nstate = Integer.parseInt(args[7]); edge_potential_str = read_edge_potential(args[8]); int cur_iter = 1; if (args[9].startsWith("new") == false) { cur_iter = Integer.parseInt(args[9].substring(4)); } System.out.println("edge_path=" + edge_path.toString() + ", prior_path=" + prior_path.toString() + ", output_path=" + output_path.toString() + ", |E|=" + number_msg + ", nreducer=" + nreducer + ", maxiter=" + max_iter + ", nstate=" + nstate + ", edge_potential_str=" + edge_potential_str + ", cur_iter=" + cur_iter); fs = FileSystem.get(getConf()); // Run Stage1 and Stage2. if (cur_iter == 1) { System.out.println("BP: Initializing messages..."); JobClient.runJob(configInitMessage()); } double converge_threshold = number_msg * EPS * nstate; int i; for (i = cur_iter; i <= max_iter; i++) { System.out.println(" *** ITERATION " + (i) + "/" + max_iter + " ***"); JobClient.runJob(configUpdateMessage()); JobClient.runJob(configCheckErr()); JobClient.runJob(configSumErr()); String line = readLocaldirOneline(sum_error_path.toString()); fs.delete(check_error_path, true); fs.delete(sum_error_path, true); String[] parts = line.split("\t"); int n = Integer.parseInt(parts[0]); double sum = Double.parseDouble(parts[1]); System.out.println("Converged Msg: " + (number_msg - n)); System.out.println("Sum Error: " + sum); if (sum < converge_threshold) { break; } // rotate directory fs.delete(message_cur_path); fs.rename(message_next_path, message_cur_path); } System.out.println("CONVERGE_ITER " + i); System.out.println("BP: Computing beliefs..."); JobClient.runJob(configComputeBelief()); System.out.println("BP finished. The belief vector is in the HDFS " + args[2]); return 0; }
From source file:StressTest.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // conf.addResource(new Path("../../env_vars")); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: TweetCategorizer <in> <out>"); System.exit(2);// w w w .ja v a2 s . c o m } Job job = new Job(conf, "categorize tweets"); job.setJarByClass(TweetCategorizer.class); job.setMapperClass(TokenizerMapper.class); // job.setCombinerClass(IntSumReducer.class); // job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:DisplayFuzzyKMeans.java
License:Apache License
public static void main(String[] args) throws Exception { DistanceMeasure measure = new ManhattanDistanceMeasure(); Path samples = new Path("samples"); Path output = new Path("output"); Configuration conf = new Configuration(); HadoopUtil.delete(conf, output);/*ww w. j a va 2 s . c o m*/ HadoopUtil.delete(conf, samples); RandomUtils.useTestSeed(); DisplayClustering.generateSamples(); writeSampleData(samples); boolean runClusterer = true; int maxIterations = 10; float threshold = 0.001F; float m = 1.1F; if (runClusterer) { runSequentialFuzzyKClusterer(conf, samples, output, measure, maxIterations, m, threshold); } else { int numClusters = 3; runSequentialFuzzyKClassifier(conf, samples, output, measure, numClusters, maxIterations, m, threshold); } new DisplayFuzzyKMeans(); }
From source file:FriendsJob.java
License:Apache License
/** * @param args/*from w w w .ja v a2s . co m*/ */ public static void main(String[] args) throws Exception { JobConf conf = new JobConf(FriendsJob.class); conf.setJobName("anagramcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(FriendsMapper.class); // conf.setCombinerClass(AnagramReducer.class); conf.setReducerClass(FriendsReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
From source file:HoodieJavaStreamingApp.java
License:Apache License
/** * * @throws Exception//from w w w .j a v a 2 s. c o m */ public void run() throws Exception { // Spark session setup.. SparkSession spark = SparkSession.builder().appName("Hoodie Spark Streaming APP") .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").master("local[1]") .getOrCreate(); JavaSparkContext jssc = new JavaSparkContext(spark.sparkContext()); // folder path clean up and creation, preparing the environment FileSystem fs = FileSystem.get(jssc.hadoopConfiguration()); fs.delete(new Path(streamingSourcePath), true); fs.delete(new Path(streamingCheckpointingPath), true); fs.delete(new Path(tablePath), true); fs.mkdirs(new Path(streamingSourcePath)); // Generator of some records to be loaded in. HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator(); List<String> records1 = DataSourceTestUtils.convertToStringList(dataGen.generateInserts("001", 100)); Dataset<Row> inputDF1 = spark.read().json(jssc.parallelize(records1, 2)); List<String> records2 = DataSourceTestUtils.convertToStringList(dataGen.generateUpdates("002", 100)); Dataset<Row> inputDF2 = spark.read().json(jssc.parallelize(records2, 2)); // setup the input for streaming Dataset<Row> streamingInput = spark.readStream().schema(inputDF1.schema()).json(streamingSourcePath); // start streaming and showing ExecutorService executor = Executors.newFixedThreadPool(2); // thread for spark strucutured streaming Future<Void> streamFuture = executor.submit(new Callable<Void>() { public Void call() throws Exception { logger.info("===== Streaming Starting ====="); stream(streamingInput); logger.info("===== Streaming Ends ====="); return null; } }); // thread for adding data to the streaming source and showing results over time Future<Void> showFuture = executor.submit(new Callable<Void>() { public Void call() throws Exception { logger.info("===== Showing Starting ====="); show(spark, fs, inputDF1, inputDF2); logger.info("===== Showing Ends ====="); return null; } }); // let the threads run streamFuture.get(); showFuture.get(); executor.shutdown(); }
From source file:PerTaskTally.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);/*from w ww . j av a 2 s .com*/ } Job job = new Job(conf, "word count"); job.setJarByClass(PerMapTally.class); job.setMapperClass(TokenizerMapper.class); // Aniket changes starts /* Here the partitioner is being called*/ job.setPartitionerClass(WordPartitioner.class); // Aniket changes ends // Part 4 Aniket changes starts /* Here I am just disabling the combiner */ // job.setCombinerClass(IntSumReducer.class); // Part 4 Aniket changes ends job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:AnalyzeBigramCount.java
License:Apache License
public static void main(String[] args) { if (args.length != 1) { System.out.println("usage: [input-path]"); System.exit(-1);/*from w w w . j a v a 2 s.c o m*/ } System.out.println("input path: " + args[0]); List<PairOfWritables<Text, IntWritable>> bigrams = SequenceFileUtils.readDirectory(new Path(args[0])); Collections.sort(bigrams, new Comparator<PairOfWritables<Text, IntWritable>>() { public int compare(PairOfWritables<Text, IntWritable> e1, PairOfWritables<Text, IntWritable> e2) { if (e2.getRightElement().compareTo(e1.getRightElement()) == 0) { return e1.getLeftElement().compareTo(e2.getLeftElement()); } return e2.getRightElement().compareTo(e1.getRightElement()); } }); int singletons = 0; int sum = 0; for (PairOfWritables<Text, IntWritable> bigram : bigrams) { sum += bigram.getRightElement().get(); if (bigram.getRightElement().get() == 1) { singletons++; } } System.out.println("total number of unique bigrams: " + bigrams.size()); System.out.println("total number of bigrams: " + sum); System.out.println("number of bigrams that appear only once: " + singletons); System.out.println("\nten most frequent bigrams: "); int cnt = 0; for (PairOfWritables<Text, IntWritable> bigram : bigrams) { System.out.println(bigram.getLeftElement() + "\t" + bigram.getRightElement()); cnt++; if (cnt > 10) { break; } } }
From source file:WordCount_PerTaskTally.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);/* w w w . j ava 2 s . co m*/ } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount_PerTaskTally.class); job.setMapperClass(TokenizerMapper.class); // disable combiner // job.setCombinerClass(IntSumReducer.class); job.setPartitionerClass(WordPartitioner.class); job.setNumReduceTasks(5); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:ComputeCooccurrenceMatrixStripes.java
License:Apache License
/** * Runs this tool.// w w w.ja va2 s. com */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("window size").create(WINDOW)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; int window = cmdline.hasOption(WINDOW) ? Integer.parseInt(cmdline.getOptionValue(WINDOW)) : 2; LOG.info("Tool: " + ComputeCooccurrenceMatrixStripes.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - window: " + window); LOG.info(" - number of reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(ComputeCooccurrenceMatrixStripes.class.getSimpleName()); job.setJarByClass(ComputeCooccurrenceMatrixStripes.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); job.getConfiguration().setInt("window", window); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(Text.class); job.setOutputValueClass(String2IntOpenHashMapWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(String2IntOpenHashMapWritable.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }