Example usage for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration()

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.ML_Hadoop.K_meansClustering.K_meansClusteringMapReduce.java

public static void main(String[] args) throws Exception {
    int iteration = 0, num_of_iteration = 30;
    int feature_size = 2;
    FileSystem fs;//from   ww  w.  j  ava 2s . c o  m
    int number_of_clusters = 2;

    do {
        Configuration conf = new Configuration();
        fs = FileSystem.get(conf);

        Job job = new Job(conf, "K_meansClusteringMapReduce");
        job.setJarByClass(K_meansClusteringMapReduce.class);

        conf = job.getConfiguration(); // This line is mandatory. 

        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(FloatArrayWritable.class);

        job.setMapperClass(K_meansClusteringMap.class);
        job.setReducerClass(K_meansClusteringReduce.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setNumReduceTasks(1); // set number of reducers to one.

        FileInputFormat.addInputPath(job, new Path(args[0]));
        Path out = new Path(args[1]);
        if (fs.exists(out))
            fs.delete(out, true);

        FileOutputFormat.setOutputPath(job, out);
        number_of_clusters = Integer.parseInt(args[2]);
        num_of_iteration = Integer.parseInt(args[3]);
        feature_size = Integer.parseInt(args[4]);

        conf.setInt("number_of_clusters", number_of_clusters);
        conf.setInt("feature_size", feature_size);
        conf.setInt("current_iteration_num", iteration);

        try {
            job.waitForCompletion(true);
            iteration++;
        } catch (IOException e) {
            e.printStackTrace();
        }
    } while (iteration < num_of_iteration);

}

From source file:com.ML_Hadoop.MultipleLinearRegression.MultipleLinearRegressionMapReduce.java

public static void main(String[] args) throws Exception {
    String[] theta;//w w w.java 2 s  .  c o  m
    int iteration = 0, num_of_iteration = 1;
    int feature_size = 0, input_data_size = 0;
    FileSystem fs;
    Float alpha = 0.1f;

    do {
        Configuration conf = new Configuration();
        fs = FileSystem.get(conf);

        Job job = new Job(conf, "LinearRegressionMapReduce");
        job.setJarByClass(MultipleLinearRegressionMapReduce.class);

        // the following two lines are needed for propagating "theta"
        conf = job.getConfiguration();

        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(FloatWritable.class);

        job.setMapperClass(MultipleLinearRegressionMap.class);
        job.setReducerClass(MultipleLinearRegressionReduce.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setNumReduceTasks(1); // set mapred.reduce.tasks = 1 (only one reducer)

        FileInputFormat.addInputPath(job, new Path(args[0]));
        Path out = new Path(args[1]);
        if (fs.exists(out))
            fs.delete(out, true);

        FileOutputFormat.setOutputPath(job, out);
        alpha = Float.parseFloat(args[2]);
        num_of_iteration = Integer.parseInt(args[3]);
        feature_size = Integer.parseInt(args[4]);
        input_data_size = Integer.parseInt(args[5]);
        conf.setFloat("alpha", alpha);
        conf.setInt("feature_size", feature_size);
        conf.setInt("input_data_size", input_data_size);
        conf.setInt("iteration", iteration);

        theta = new String[feature_size];

        if (iteration == 0) { // first iteration
            for (int i = 0; i < theta.length; i++)
                theta[i] = "0.0";
            conf.setStrings("theta", theta);
        } else {
            try {
                String uri = "/user/hduser/theta.txt";
                fs = FileSystem.get(conf);
                //FSDataInputStream in = fs.open(new Path(uri));
                BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(uri))));
                theta = br.readLine().split(",");
            } catch (Exception e) {

            }
            conf.setStrings("theta", theta);
        }

        for (int i = 0; i < theta.length; i++)
            System.out.println("In MapRedce main function: theta[ " + i + " ]" + theta[i]);

        try {
            job.waitForCompletion(true);
            iteration++;
        } catch (IOException e) {
            e.printStackTrace();
        }
    } while (iteration < num_of_iteration);

}

From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierMapReduce_Continuous_Features.java

/**
 * @param args//from  www .  j  a  v  a 2  s. c o  m
 * @throws IOException 
 * @throws ClassNotFoundException 
 * @throws InterruptedException 
 */
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    int number_of_classes = 1;
    int number_of_features = 1;
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    Job job = new Job(conf, "NaiveBayesClassifierMapReduce_Continuous_Features");
    job.setJarByClass(NaiveBayesClassifierMapReduce_Continuous_Features.class);

    conf = job.getConfiguration(); // This line is mandatory. 

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FloatArrayWritable.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(MapArrayWritable.class);

    job.setMapperClass(NaiveBayesClassifierMap_Continuous_Features.class);
    job.setReducerClass(NaiveBayesClassifierReduce_Continuous_Features.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path out = new Path(args[1]);
    if (fs.exists(out))
        fs.delete(out, true);
    FileOutputFormat.setOutputPath(job, out);
    number_of_classes = Integer.parseInt(args[2]);
    number_of_features = Integer.parseInt(args[3]);
    conf.setInt("number_of_classes", number_of_classes);
    conf.setInt("number_of_features", number_of_features);

    try {
        job.waitForCompletion(true);

    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.mongodb.hadoop.input.MongoMultipleInputs.java

License:Apache License

/**
 * Add a {@link Path} with a custom {@link InputFormat} and
 * {@link Mapper} to the list of inputs for the map-reduce job.
 * /*  ww  w  .  j av a2s .  co  m*/
 * @param job The {@link Job}
 * @param path {@link Path} to be added to the list of inputs for the job
 * @param inputFormatClass {@link InputFormat} class to use for this path
 * @param mapperClass {@link Mapper} class to use for this path
 * @param query {@link DBObject} query for path and mapper
 * @param fields {@link DBObject} fields for path and mapper
 * @param sort {@link DBObject} sort for path and mapper
 * @param limit {@link int} limit for path and mapper
 * @param skip {@link int} skip for path and mapper
 */
@SuppressWarnings("unchecked")
public static void addInputPath(Job job, String uri, Class<? extends InputFormat> inputFormatClass,
        Class<? extends Mapper> mapperClass, String query, String fields, String sort, int limit, int skip) {
    Configuration conf = job.getConfiguration();
    MongoConfigUtil.addMongoRequest(conf, uri, inputFormatClass, mapperClass, query, fields, sort, limit, skip);

    job.setMapperClass(DelegatingMapper.class);
    job.setInputFormatClass(DelegatingInputFormat.class);
}

From source file:com.mongodb.hadoop.pig.BSONStorage.java

License:Apache License

@Override
public void setStoreLocation(final String location, final Job job) throws IOException {
    final Configuration config = job.getConfiguration();
    config.set("mapred.output.file", location);
}

From source file:com.mongodb.hadoop.pig.MongoInsertStorage.java

License:Apache License

public void setStoreLocation(final String location, final Job job) throws IOException {
    final Configuration config = job.getConfiguration();
    LOG.info("Store Location Config: " + config + " For URI: " + location);
    if (!location.startsWith("mongodb://")) {
        throw new IllegalArgumentException(
                "Invalid URI Format.  URIs must begin with a mongodb:// protocol string.");
    }//from w ww .  j a  va  2 s .  c om
    MongoConfigUtil.setOutputURI(config, location);
}

From source file:com.mongodb.hadoop.pig.MongoStorage.java

License:Apache License

public void setStoreLocation(final String location, final Job job) throws IOException {
    final Configuration config = job.getConfiguration();
    LOG.info("Store Location Config: " + config + " For URI: " + location);
    MongoConfigUtil.setOutputURI(config, location);
    final Properties properties = UDFContext.getUDFContext().getUDFProperties(this.getClass(),
            new String[] { udfContextSignature });
    config.set(PIG_OUTPUT_SCHEMA, properties.getProperty(PIG_OUTPUT_SCHEMA_UDF_CONTEXT));
}

From source file:com.mongodb.hadoop.pig.MongoUpdateStorage.java

License:Apache License

@Override
public void setStoreLocation(final String location, final Job job) throws IOException {
    final Configuration config = job.getConfiguration();
    LOG.info("Store Location Config: " + config + "; For URI: " + location);

    if (!location.startsWith("mongodb://")) {
        throw new IllegalArgumentException(
                "Invalid URI Format.  URIs must begin with a mongodb:// protocol string.");
    }/*from w w w . j a  v a 2s  . com*/

    MongoConfigUtil.setOutputURI(config, location);
}

From source file:com.moz.fiji.mapreduce.bulkimport.FijiBulkImportJobBuilder.java

License:Apache License

/** {@inheritDoc} */
@Override//  www .j  a  v  a 2  s  . c o m
protected void configureJob(Job job) throws IOException {
    final Configuration conf = job.getConfiguration();

    // Store the name of the the importer to use in the job configuration so the mapper can
    // create instances of it.
    // Construct the bulk importer instance.
    if (null == mBulkImporterClass) {
        throw new JobConfigurationException("Must specify a bulk importer.");
    }
    conf.setClass(FijiConfKeys.FIJI_BULK_IMPORTER_CLASS, mBulkImporterClass, FijiBulkImporter.class);

    mJobOutput.configure(job);

    // Configure the mapper and reducer. This part depends on whether we're going to write
    // to HFiles or directly to the table.
    configureJobForHFileOutput(job);

    job.setJobName("Fiji bulk import: " + mBulkImporterClass.getSimpleName());

    mBulkImporter = ReflectionUtils.newInstance(mBulkImporterClass, conf);

    // Configure the MapReduce job (requires mBulkImporter to be set properly):
    super.configureJob(job);
}

From source file:com.moz.fiji.mapreduce.DistributedCacheJars.java

License:Apache License

/**
 * Adds the jars from a directory into the distributed cache of a job.
 *
 * @param job The job to configure.//  ww w.  j  a  v  a 2  s  .com
 * @param jarDirectory A path to a directory of jar files.
 * @throws IOException on I/O error.
 */
public static void addJarsToDistributedCache(Job job, String jarDirectory) throws IOException {
    addJarsToDistributedCache(job, qualifiedPathFromString(jarDirectory, job.getConfiguration()));
}