List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:com.ML_Hadoop.K_meansClustering.K_meansClusteringMap.java
@Override public void setup(Context context) { number_of_clusters = context.getConfiguration().getInt("number_of_clusters", 2); feature_size = context.getConfiguration().getInt("feature_size", 1); num_of_members_in_a_cluster = new int[number_of_clusters]; // initialization of ArrayLists 'cetroid_of_clusters', and 'sum_of_members_in_a_cluster' and array 'num_of_members_in_a_cluster' Float[] t = new Float[feature_size]; for (int i = 0; i < feature_size; i++) t[i] = 0.0f;//from w w w . j av a 2 s . c o m for (int i = 0; i < number_of_clusters; i++) { cetroid_of_clusters.add(t); sum_of_members_in_a_cluster.add(t); num_of_members_in_a_cluster[i] = 0; } // Read the current values of cetroids of clusters from k_means.txt file // If it is the first iteration, the cetroids of clusters must be initialized as // random number (regard to the min & max values of each features) or by user. try { Float[] t_float; String uri = "/user/hduser/k_mean.txt"; FileSystem fs = FileSystem.get(context.getConfiguration()); if (fs.exists(new Path(uri))) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(uri)))); String[] temp; for (int i = 0; i < number_of_clusters; i++) { temp = br.readLine().split(","); t_float = new Float[feature_size]; for (int j = 0; j < feature_size; j++) t_float[j] = Float.parseFloat(temp[j]); cetroid_of_clusters.set(i, t_float); } } else { // initialization of clusters' centroids by user for our specific data. // one good way is to randomly choose this values and put on "k_mean.txt" file as: // cetroid of feature-1 seperated by ',' // cetroid of feature-2 seperated by ',' // for example, regard to following values: // 13.325872,16.854961 // 13.5158205,8.382423 // 16.05023,4.76127 t_float = new Float[2]; t_float[0] = 13.325872f; t_float[1] = 16.854961f; cetroid_of_clusters.set(0, t_float); t_float = new Float[2]; t_float[0] = 13.5158205f; t_float[1] = 8.382423f; cetroid_of_clusters.set(1, t_float); t_float = new Float[2]; t_float[0] = 16.05023f; t_float[1] = 4.76127f; cetroid_of_clusters.set(2, t_float); } } catch (Exception e) { } }
From source file:com.ML_Hadoop.K_meansClustering.K_meansClusteringMapReduce.java
public static void main(String[] args) throws Exception { int iteration = 0, num_of_iteration = 30; int feature_size = 2; FileSystem fs; int number_of_clusters = 2; do {//from w w w . ja v a 2s. co m Configuration conf = new Configuration(); fs = FileSystem.get(conf); Job job = new Job(conf, "K_meansClusteringMapReduce"); job.setJarByClass(K_meansClusteringMapReduce.class); conf = job.getConfiguration(); // This line is mandatory. job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FloatArrayWritable.class); job.setMapperClass(K_meansClusteringMap.class); job.setReducerClass(K_meansClusteringReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(1); // set number of reducers to one. FileInputFormat.addInputPath(job, new Path(args[0])); Path out = new Path(args[1]); if (fs.exists(out)) fs.delete(out, true); FileOutputFormat.setOutputPath(job, out); number_of_clusters = Integer.parseInt(args[2]); num_of_iteration = Integer.parseInt(args[3]); feature_size = Integer.parseInt(args[4]); conf.setInt("number_of_clusters", number_of_clusters); conf.setInt("feature_size", feature_size); conf.setInt("current_iteration_num", iteration); try { job.waitForCompletion(true); iteration++; } catch (IOException e) { e.printStackTrace(); } } while (iteration < num_of_iteration); }
From source file:com.ML_Hadoop.K_meansClustering.K_meansClusteringReduce.java
@Override protected void cleanup(Context context) throws IOException { String uri = "/user/hduser/k_mean.txt"; Path path = new Path(uri); // Write the latest values of cetroids' of clusters in 'k_mean.txt' file try {//from w ww. j a v a 2 s . c om FileSystem fs = FileSystem.get(URI.create(uri), context.getConfiguration()); if (fs.exists(path)) fs.delete(path, true); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true))); for (int i = 0; i < number_of_clusters; i++) { for (int j = 0; j < feature_size; j++) br.write(((Float) ((FloatWritable) cetroids_of_all_clusters.get(i).get()[j]).get()).toString() + ","); br.write("\n"); } br.close(); } catch (Exception e) { System.out.println("File k_mean.txt not found"); } // Write the values of cetroids' of clusters for current iteration in directory '/user/hduser/K-means/...' uri = "/user/hduser/K-means/means-" + current_iteration_num + ".txt"; path = new Path(uri); try { FileSystem fs = FileSystem.get(context.getConfiguration()); if (current_iteration_num == 0) fs.delete(new Path("/user/hduser/K-means"), true); OutputStreamWriter osw = new OutputStreamWriter(fs.create(path, true)); BufferedWriter br = new BufferedWriter(osw); for (int i = 0; i < number_of_clusters; i++) { for (int j = 0; j < feature_size; j++) br.write((Float) ((FloatWritable) cetroids_of_all_clusters.get(i).get()[j]).get() + ","); br.write("\n"); } br.close(); } catch (Exception e) { System.out.println("File not found"); } }
From source file:com.ML_Hadoop.MultipleLinearRegression.MultipleLinearRegressionMapReduce.java
public static void main(String[] args) throws Exception { String[] theta;//from www.j a v a 2s .co m int iteration = 0, num_of_iteration = 1; int feature_size = 0, input_data_size = 0; FileSystem fs; Float alpha = 0.1f; do { Configuration conf = new Configuration(); fs = FileSystem.get(conf); Job job = new Job(conf, "LinearRegressionMapReduce"); job.setJarByClass(MultipleLinearRegressionMapReduce.class); // the following two lines are needed for propagating "theta" conf = job.getConfiguration(); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FloatWritable.class); job.setMapperClass(MultipleLinearRegressionMap.class); job.setReducerClass(MultipleLinearRegressionReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(1); // set mapred.reduce.tasks = 1 (only one reducer) FileInputFormat.addInputPath(job, new Path(args[0])); Path out = new Path(args[1]); if (fs.exists(out)) fs.delete(out, true); FileOutputFormat.setOutputPath(job, out); alpha = Float.parseFloat(args[2]); num_of_iteration = Integer.parseInt(args[3]); feature_size = Integer.parseInt(args[4]); input_data_size = Integer.parseInt(args[5]); conf.setFloat("alpha", alpha); conf.setInt("feature_size", feature_size); conf.setInt("input_data_size", input_data_size); conf.setInt("iteration", iteration); theta = new String[feature_size]; if (iteration == 0) { // first iteration for (int i = 0; i < theta.length; i++) theta[i] = "0.0"; conf.setStrings("theta", theta); } else { try { String uri = "/user/hduser/theta.txt"; fs = FileSystem.get(conf); //FSDataInputStream in = fs.open(new Path(uri)); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(uri)))); theta = br.readLine().split(","); } catch (Exception e) { } conf.setStrings("theta", theta); } for (int i = 0; i < theta.length; i++) System.out.println("In MapRedce main function: theta[ " + i + " ]" + theta[i]); try { job.waitForCompletion(true); iteration++; } catch (IOException e) { e.printStackTrace(); } } while (iteration < num_of_iteration); }
From source file:com.ML_Hadoop.MultipleLinearRegression.MultipleLinearRegressionReduce.java
@Override protected void cleanup(Context context) throws IOException { String uri = "/user/hduser/theta.txt"; Path path = new Path(uri); try {/*from ww w . ja v a 2 s . c om*/ FileSystem fs = FileSystem.get(URI.create(uri), context.getConfiguration()); if (fs.exists(path)) fs.delete(path, true); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true))); for (int i = 0; i < theta.length; i++) br.write(theta[i].toString() + ","); br.write("\n"); br.close(); } catch (Exception e) { System.out.println("File not found"); } uri = "/user/hduser/LinearReg/theta-" + iteration + ".txt"; path = new Path(uri); try { FileSystem fs = FileSystem.get(context.getConfiguration()); if (iteration == 0) fs.delete(new Path("/user/hduser/LinearReg"), true); OutputStreamWriter osw = new OutputStreamWriter(fs.create(path, true)); BufferedWriter br = new BufferedWriter(osw); br.write(prediction_error + ", "); for (int i = 0; i < theta.length; i++) br.write(theta[i].toString() + ", "); br.write("\n"); br.close(); } catch (Exception e) { System.out.println("File not found"); } }
From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierMapReduce_Continuous_Features.java
/** * @param args// w ww .j a va2 s . co m * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { int number_of_classes = 1; int number_of_features = 1; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Job job = new Job(conf, "NaiveBayesClassifierMapReduce_Continuous_Features"); job.setJarByClass(NaiveBayesClassifierMapReduce_Continuous_Features.class); conf = job.getConfiguration(); // This line is mandatory. job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FloatArrayWritable.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(MapArrayWritable.class); job.setMapperClass(NaiveBayesClassifierMap_Continuous_Features.class); job.setReducerClass(NaiveBayesClassifierReduce_Continuous_Features.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(args[0])); Path out = new Path(args[1]); if (fs.exists(out)) fs.delete(out, true); FileOutputFormat.setOutputPath(job, out); number_of_classes = Integer.parseInt(args[2]); number_of_features = Integer.parseInt(args[3]); conf.setInt("number_of_classes", number_of_classes); conf.setInt("number_of_features", number_of_features); try { job.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierReduce_Continuous_Features.java
@Override protected void cleanup(Context context) throws IOException { String uri = "/user/hduser/naive_bayes_continuous.txt"; Path path = new Path(uri); try {//from w ww . j a v a 2 s .c o m FileSystem fs = FileSystem.get(URI.create(uri), context.getConfiguration()); if (fs.exists(path)) fs.delete(path, true); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true))); br.write("class_id, mu(mean), std"); br.write("-------------------------------\n"); for (int i = 0; i < number_of_classes; i++) { br.write("-------- Class-" + i + "-------\n"); for (int j = 0; j < number_of_features; j++) { br.write(((FloatWritable) probablity_info_output.get(i)[j].get(new Text("class_id_mu"))) + ", "); br.write(((FloatWritable) probablity_info_output.get(i)[j].get(new Text("class_id_std"))) + "\n"); } br.write("\n"); } br.close(); } catch (Exception e) { System.out.println("File /user/hduser/naive_bayes_continuous.txt cannot be found"); } }
From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java
License:Apache License
/** Test FijiTableInputFormat in a map-only job. */ @Test//from www . ja va 2 s.co m public void testMapJob() throws Exception { final Path outputFile = createOutputFile(); // Create a test job. final Job job = setupJob("testMapJob", outputFile, TestMapper.class, null, // reducer class null, // start key null, // limit key null); // filter // Run the job. assertTrue("Hadoop job failed", job.waitForCompletion(true)); // Check to make sure output exists. final FileSystem fs = FileSystem.get(job.getConfiguration()); assertTrue(fs.exists(outputFile.getParent())); // Verify that the output matches what's expected. final FSDataInputStream in = fs.open(outputFile); final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n")); final Set<String> expected = Sets.newHashSet("usermail.example.com\tAaron Kimball", "gmail.com\tJohn Doe", "usermail.example.com\tChristophe Bisciglia", "usermail.example.com\tKiyan Ahmadizadeh", "gmail.com\tJane Doe", "usermail.example.com\tGarrett Wu"); assertEquals("Result of job wasn't what was expected", expected, actual); // Clean up. fs.delete(outputFile.getParent(), true); IOUtils.closeQuietly(in); // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that // causes it to close other thread's filesystem objects. For more information // see: https://issues.apache.org/jira/browse/HADOOP-7973 }
From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java
License:Apache License
/** Test FijiTableInputFormat in a map-only job with start and limit keys. */ @Test/*from w w w .ja v a2 s . co m*/ public void testMapJobWithStartAndLimitKeys() throws Exception { final Path outputFile = createOutputFile(); // Set the same entity IDs for start and limit, and we should get just the start row final EntityId startEntityId = getFooTable().getEntityId("jane.doe@gmail.com"); final byte[] endRowKey = startEntityId.getHBaseRowKey(); final EntityId rawLimitEntityId = HBaseEntityId .fromHBaseRowKey(Arrays.copyOf(endRowKey, endRowKey.length + 1)); // Create a test job. final Job job = setupJob("testMapJobWithStartAndLimitKeys", outputFile, TestMapper.class, null, // reducer class startEntityId, rawLimitEntityId, null); // filter // Run the job. assertTrue("Hadoop job failed", job.waitForCompletion(true)); // Check to make sure output exists. final FileSystem fs = FileSystem.get(job.getConfiguration()); assertTrue(fs.exists(outputFile.getParent())); // Verify that the output matches what's expected. final FSDataInputStream in = fs.open(outputFile); final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n")); final Set<String> expected = Sets.newHashSet("gmail.com\tJane Doe"); assertEquals("Result of job wasn't what was expected", expected, actual); // Clean up. fs.delete(outputFile.getParent(), true); IOUtils.closeQuietly(in); // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that // causes it to close other thread's filesystem objects. For more information // see: https://issues.apache.org/jira/browse/HADOOP-7973 }
From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java
License:Apache License
/** Test FijiTableInputFormat in a map-only job with a row filter. */ @Test/* ww w .j ava 2 s. com*/ public void testMapJobWithFilter() throws Exception { final FijiRowFilter filter = new ColumnValueEqualsRowFilter("info", "email", new DecodedCell<String>(Schema.create(Schema.Type.STRING), "aaron@usermail.example.com")); final Path outputFile = createOutputFile(); // Create a test job. final Job job = setupJob("testMapJobWithFilter", outputFile, TestMapper.class, null, // reducer class null, // start key null, // limit key filter); // Run the job. assertTrue("Hadoop job failed", job.waitForCompletion(true)); // Check to make sure output exists. final FileSystem fs = FileSystem.get(job.getConfiguration()); assertTrue(fs.exists(outputFile.getParent())); // Verify that the output matches what's expected. final FSDataInputStream in = fs.open(outputFile); final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n")); final Set<String> expected = Sets.newHashSet("usermail.example.com\tAaron Kimball"); assertEquals("Result of job wasn't what was expected", expected, actual); // Clean up. fs.delete(outputFile.getParent(), true); IOUtils.closeQuietly(in); // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that // causes it to close other thread's filesystem objects. For more information // see: https://issues.apache.org/jira/browse/HADOOP-7973 }