Example usage for org.apache.hadoop.fs FileSystem exists

List of usage examples for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException 

Source Link

Document

Check if a path exists.

Usage

From source file:com.ML_Hadoop.K_meansClustering.K_meansClusteringMap.java

@Override
public void setup(Context context) {
    number_of_clusters = context.getConfiguration().getInt("number_of_clusters", 2);
    feature_size = context.getConfiguration().getInt("feature_size", 1);
    num_of_members_in_a_cluster = new int[number_of_clusters];

    // initialization of ArrayLists 'cetroid_of_clusters', and 'sum_of_members_in_a_cluster' and array 'num_of_members_in_a_cluster'
    Float[] t = new Float[feature_size];
    for (int i = 0; i < feature_size; i++)
        t[i] = 0.0f;//from w  w w . j av a 2 s  . c  o m

    for (int i = 0; i < number_of_clusters; i++) {
        cetroid_of_clusters.add(t);
        sum_of_members_in_a_cluster.add(t);
        num_of_members_in_a_cluster[i] = 0;
    }

    // Read the current values of cetroids of clusters from k_means.txt file
    // If it is the first iteration, the cetroids of clusters must be initialized as 
    // random number (regard to the min & max values of each features) or by user.

    try {
        Float[] t_float;
        String uri = "/user/hduser/k_mean.txt";
        FileSystem fs = FileSystem.get(context.getConfiguration());
        if (fs.exists(new Path(uri))) {
            BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(uri))));
            String[] temp;
            for (int i = 0; i < number_of_clusters; i++) {
                temp = br.readLine().split(",");
                t_float = new Float[feature_size];
                for (int j = 0; j < feature_size; j++)
                    t_float[j] = Float.parseFloat(temp[j]);
                cetroid_of_clusters.set(i, t_float);
            }
        } else {
            // initialization of clusters' centroids by user for our specific data. 
            // one good way is to randomly choose this values and put on "k_mean.txt" file as:
            //        cetroid of feature-1 seperated by ','
            //        cetroid of feature-2 seperated by ','
            // for example, regard to following values:
            //        13.325872,16.854961
            //        13.5158205,8.382423
            //        16.05023,4.76127
            t_float = new Float[2];
            t_float[0] = 13.325872f;
            t_float[1] = 16.854961f;
            cetroid_of_clusters.set(0, t_float);

            t_float = new Float[2];
            t_float[0] = 13.5158205f;
            t_float[1] = 8.382423f;
            cetroid_of_clusters.set(1, t_float);

            t_float = new Float[2];
            t_float[0] = 16.05023f;
            t_float[1] = 4.76127f;
            cetroid_of_clusters.set(2, t_float);

        }
    } catch (Exception e) {

    }
}

From source file:com.ML_Hadoop.K_meansClustering.K_meansClusteringMapReduce.java

public static void main(String[] args) throws Exception {
    int iteration = 0, num_of_iteration = 30;
    int feature_size = 2;
    FileSystem fs;
    int number_of_clusters = 2;

    do {//from  w w w . ja  v a 2s. co  m
        Configuration conf = new Configuration();
        fs = FileSystem.get(conf);

        Job job = new Job(conf, "K_meansClusteringMapReduce");
        job.setJarByClass(K_meansClusteringMapReduce.class);

        conf = job.getConfiguration(); // This line is mandatory. 

        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(FloatArrayWritable.class);

        job.setMapperClass(K_meansClusteringMap.class);
        job.setReducerClass(K_meansClusteringReduce.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setNumReduceTasks(1); // set number of reducers to one.

        FileInputFormat.addInputPath(job, new Path(args[0]));
        Path out = new Path(args[1]);
        if (fs.exists(out))
            fs.delete(out, true);

        FileOutputFormat.setOutputPath(job, out);
        number_of_clusters = Integer.parseInt(args[2]);
        num_of_iteration = Integer.parseInt(args[3]);
        feature_size = Integer.parseInt(args[4]);

        conf.setInt("number_of_clusters", number_of_clusters);
        conf.setInt("feature_size", feature_size);
        conf.setInt("current_iteration_num", iteration);

        try {
            job.waitForCompletion(true);
            iteration++;
        } catch (IOException e) {
            e.printStackTrace();
        }
    } while (iteration < num_of_iteration);

}

From source file:com.ML_Hadoop.K_meansClustering.K_meansClusteringReduce.java

@Override
protected void cleanup(Context context) throws IOException {
    String uri = "/user/hduser/k_mean.txt";
    Path path = new Path(uri);

    // Write the latest values of cetroids' of clusters in 'k_mean.txt' file
    try {//from   w  ww.  j a v  a  2 s  . c  om
        FileSystem fs = FileSystem.get(URI.create(uri), context.getConfiguration());
        if (fs.exists(path))
            fs.delete(path, true);
        BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
        for (int i = 0; i < number_of_clusters; i++) {
            for (int j = 0; j < feature_size; j++)
                br.write(((Float) ((FloatWritable) cetroids_of_all_clusters.get(i).get()[j]).get()).toString()
                        + ",");
            br.write("\n");
        }
        br.close();
    } catch (Exception e) {
        System.out.println("File k_mean.txt not found");
    }

    // Write the values of cetroids' of clusters for current iteration in directory '/user/hduser/K-means/...'

    uri = "/user/hduser/K-means/means-" + current_iteration_num + ".txt";
    path = new Path(uri);

    try {
        FileSystem fs = FileSystem.get(context.getConfiguration());
        if (current_iteration_num == 0)
            fs.delete(new Path("/user/hduser/K-means"), true);
        OutputStreamWriter osw = new OutputStreamWriter(fs.create(path, true));
        BufferedWriter br = new BufferedWriter(osw);
        for (int i = 0; i < number_of_clusters; i++) {
            for (int j = 0; j < feature_size; j++)
                br.write((Float) ((FloatWritable) cetroids_of_all_clusters.get(i).get()[j]).get() + ",");
            br.write("\n");
        }
        br.close();
    } catch (Exception e) {
        System.out.println("File not found");
    }
}

From source file:com.ML_Hadoop.MultipleLinearRegression.MultipleLinearRegressionMapReduce.java

public static void main(String[] args) throws Exception {
    String[] theta;//from www.j a v a  2s .co m
    int iteration = 0, num_of_iteration = 1;
    int feature_size = 0, input_data_size = 0;
    FileSystem fs;
    Float alpha = 0.1f;

    do {
        Configuration conf = new Configuration();
        fs = FileSystem.get(conf);

        Job job = new Job(conf, "LinearRegressionMapReduce");
        job.setJarByClass(MultipleLinearRegressionMapReduce.class);

        // the following two lines are needed for propagating "theta"
        conf = job.getConfiguration();

        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(FloatWritable.class);

        job.setMapperClass(MultipleLinearRegressionMap.class);
        job.setReducerClass(MultipleLinearRegressionReduce.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setNumReduceTasks(1); // set mapred.reduce.tasks = 1 (only one reducer)

        FileInputFormat.addInputPath(job, new Path(args[0]));
        Path out = new Path(args[1]);
        if (fs.exists(out))
            fs.delete(out, true);

        FileOutputFormat.setOutputPath(job, out);
        alpha = Float.parseFloat(args[2]);
        num_of_iteration = Integer.parseInt(args[3]);
        feature_size = Integer.parseInt(args[4]);
        input_data_size = Integer.parseInt(args[5]);
        conf.setFloat("alpha", alpha);
        conf.setInt("feature_size", feature_size);
        conf.setInt("input_data_size", input_data_size);
        conf.setInt("iteration", iteration);

        theta = new String[feature_size];

        if (iteration == 0) { // first iteration
            for (int i = 0; i < theta.length; i++)
                theta[i] = "0.0";
            conf.setStrings("theta", theta);
        } else {
            try {
                String uri = "/user/hduser/theta.txt";
                fs = FileSystem.get(conf);
                //FSDataInputStream in = fs.open(new Path(uri));
                BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(uri))));
                theta = br.readLine().split(",");
            } catch (Exception e) {

            }
            conf.setStrings("theta", theta);
        }

        for (int i = 0; i < theta.length; i++)
            System.out.println("In MapRedce main function: theta[ " + i + " ]" + theta[i]);

        try {
            job.waitForCompletion(true);
            iteration++;
        } catch (IOException e) {
            e.printStackTrace();
        }
    } while (iteration < num_of_iteration);

}

From source file:com.ML_Hadoop.MultipleLinearRegression.MultipleLinearRegressionReduce.java

@Override
protected void cleanup(Context context) throws IOException {
    String uri = "/user/hduser/theta.txt";
    Path path = new Path(uri);

    try {/*from ww w  . ja v  a 2  s . c om*/
        FileSystem fs = FileSystem.get(URI.create(uri), context.getConfiguration());
        if (fs.exists(path))
            fs.delete(path, true);
        BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
        for (int i = 0; i < theta.length; i++)
            br.write(theta[i].toString() + ",");
        br.write("\n");
        br.close();
    } catch (Exception e) {
        System.out.println("File not found");
    }

    uri = "/user/hduser/LinearReg/theta-" + iteration + ".txt";
    path = new Path(uri);

    try {
        FileSystem fs = FileSystem.get(context.getConfiguration());
        if (iteration == 0)
            fs.delete(new Path("/user/hduser/LinearReg"), true);
        OutputStreamWriter osw = new OutputStreamWriter(fs.create(path, true));
        BufferedWriter br = new BufferedWriter(osw);
        br.write(prediction_error + ", ");
        for (int i = 0; i < theta.length; i++)
            br.write(theta[i].toString() + ", ");
        br.write("\n");
        br.close();
    } catch (Exception e) {
        System.out.println("File not found");
    }
}

From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierMapReduce_Continuous_Features.java

/**
 * @param args// w  ww  .j a  va2 s  .  co  m
 * @throws IOException 
 * @throws ClassNotFoundException 
 * @throws InterruptedException 
 */
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    int number_of_classes = 1;
    int number_of_features = 1;
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    Job job = new Job(conf, "NaiveBayesClassifierMapReduce_Continuous_Features");
    job.setJarByClass(NaiveBayesClassifierMapReduce_Continuous_Features.class);

    conf = job.getConfiguration(); // This line is mandatory. 

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FloatArrayWritable.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(MapArrayWritable.class);

    job.setMapperClass(NaiveBayesClassifierMap_Continuous_Features.class);
    job.setReducerClass(NaiveBayesClassifierReduce_Continuous_Features.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path out = new Path(args[1]);
    if (fs.exists(out))
        fs.delete(out, true);
    FileOutputFormat.setOutputPath(job, out);
    number_of_classes = Integer.parseInt(args[2]);
    number_of_features = Integer.parseInt(args[3]);
    conf.setInt("number_of_classes", number_of_classes);
    conf.setInt("number_of_features", number_of_features);

    try {
        job.waitForCompletion(true);

    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierReduce_Continuous_Features.java

@Override
protected void cleanup(Context context) throws IOException {
    String uri = "/user/hduser/naive_bayes_continuous.txt";
    Path path = new Path(uri);

    try {//from  w ww  .  j  a  v a  2 s .c o m
        FileSystem fs = FileSystem.get(URI.create(uri), context.getConfiguration());
        if (fs.exists(path))
            fs.delete(path, true);
        BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
        br.write("class_id,     mu(mean),     std");
        br.write("-------------------------------\n");
        for (int i = 0; i < number_of_classes; i++) {
            br.write("-------- Class-" + i + "-------\n");
            for (int j = 0; j < number_of_features; j++) {
                br.write(((FloatWritable) probablity_info_output.get(i)[j].get(new Text("class_id_mu")))
                        + ",  ");
                br.write(((FloatWritable) probablity_info_output.get(i)[j].get(new Text("class_id_std")))
                        + "\n");
            }
            br.write("\n");
        }
        br.close();
    } catch (Exception e) {
        System.out.println("File /user/hduser/naive_bayes_continuous.txt cannot be found");
    }

}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a map-only job. */
@Test//from  www  . ja  va  2 s.co  m
public void testMapJob() throws Exception {
    final Path outputFile = createOutputFile();
    // Create a test job.
    final Job job = setupJob("testMapJob", outputFile, TestMapper.class, null, // reducer class
            null, // start key
            null, // limit key
            null); // filter

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final Set<String> expected = Sets.newHashSet("usermail.example.com\tAaron Kimball", "gmail.com\tJohn Doe",
            "usermail.example.com\tChristophe Bisciglia", "usermail.example.com\tKiyan Ahmadizadeh",
            "gmail.com\tJane Doe", "usermail.example.com\tGarrett Wu");
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a map-only job with start and limit keys. */
@Test/*from   w  w  w  .ja  v a2  s .  co m*/
public void testMapJobWithStartAndLimitKeys() throws Exception {
    final Path outputFile = createOutputFile();
    // Set the same entity IDs for start and limit, and we should get just the start row
    final EntityId startEntityId = getFooTable().getEntityId("jane.doe@gmail.com");
    final byte[] endRowKey = startEntityId.getHBaseRowKey();
    final EntityId rawLimitEntityId = HBaseEntityId
            .fromHBaseRowKey(Arrays.copyOf(endRowKey, endRowKey.length + 1));

    // Create a test job.
    final Job job = setupJob("testMapJobWithStartAndLimitKeys", outputFile, TestMapper.class, null, // reducer class
            startEntityId, rawLimitEntityId, null); // filter

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final Set<String> expected = Sets.newHashSet("gmail.com\tJane Doe");
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a map-only job with a row filter. */
@Test/*  ww w  .j  ava  2  s. com*/
public void testMapJobWithFilter() throws Exception {
    final FijiRowFilter filter = new ColumnValueEqualsRowFilter("info", "email",
            new DecodedCell<String>(Schema.create(Schema.Type.STRING), "aaron@usermail.example.com"));
    final Path outputFile = createOutputFile();
    // Create a test job.
    final Job job = setupJob("testMapJobWithFilter", outputFile, TestMapper.class, null, // reducer class
            null, // start key
            null, // limit key
            filter);

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final Set<String> expected = Sets.newHashSet("usermail.example.com\tAaron Kimball");
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}