List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:com.ML_Hadoop.K_meansClustering.K_meansClusteringMapReduce.java
public static void main(String[] args) throws Exception { int iteration = 0, num_of_iteration = 30; int feature_size = 2; FileSystem fs;//from ww w. j ava 2s . c o m int number_of_clusters = 2; do { Configuration conf = new Configuration(); fs = FileSystem.get(conf); Job job = new Job(conf, "K_meansClusteringMapReduce"); job.setJarByClass(K_meansClusteringMapReduce.class); conf = job.getConfiguration(); // This line is mandatory. job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FloatArrayWritable.class); job.setMapperClass(K_meansClusteringMap.class); job.setReducerClass(K_meansClusteringReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(1); // set number of reducers to one. FileInputFormat.addInputPath(job, new Path(args[0])); Path out = new Path(args[1]); if (fs.exists(out)) fs.delete(out, true); FileOutputFormat.setOutputPath(job, out); number_of_clusters = Integer.parseInt(args[2]); num_of_iteration = Integer.parseInt(args[3]); feature_size = Integer.parseInt(args[4]); conf.setInt("number_of_clusters", number_of_clusters); conf.setInt("feature_size", feature_size); conf.setInt("current_iteration_num", iteration); try { job.waitForCompletion(true); iteration++; } catch (IOException e) { e.printStackTrace(); } } while (iteration < num_of_iteration); }
From source file:com.ML_Hadoop.MultipleLinearRegression.MultipleLinearRegressionMapReduce.java
public static void main(String[] args) throws Exception { String[] theta;//w w w.java 2 s . c o m int iteration = 0, num_of_iteration = 1; int feature_size = 0, input_data_size = 0; FileSystem fs; Float alpha = 0.1f; do { Configuration conf = new Configuration(); fs = FileSystem.get(conf); Job job = new Job(conf, "LinearRegressionMapReduce"); job.setJarByClass(MultipleLinearRegressionMapReduce.class); // the following two lines are needed for propagating "theta" conf = job.getConfiguration(); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FloatWritable.class); job.setMapperClass(MultipleLinearRegressionMap.class); job.setReducerClass(MultipleLinearRegressionReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(1); // set mapred.reduce.tasks = 1 (only one reducer) FileInputFormat.addInputPath(job, new Path(args[0])); Path out = new Path(args[1]); if (fs.exists(out)) fs.delete(out, true); FileOutputFormat.setOutputPath(job, out); alpha = Float.parseFloat(args[2]); num_of_iteration = Integer.parseInt(args[3]); feature_size = Integer.parseInt(args[4]); input_data_size = Integer.parseInt(args[5]); conf.setFloat("alpha", alpha); conf.setInt("feature_size", feature_size); conf.setInt("input_data_size", input_data_size); conf.setInt("iteration", iteration); theta = new String[feature_size]; if (iteration == 0) { // first iteration for (int i = 0; i < theta.length; i++) theta[i] = "0.0"; conf.setStrings("theta", theta); } else { try { String uri = "/user/hduser/theta.txt"; fs = FileSystem.get(conf); //FSDataInputStream in = fs.open(new Path(uri)); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(uri)))); theta = br.readLine().split(","); } catch (Exception e) { } conf.setStrings("theta", theta); } for (int i = 0; i < theta.length; i++) System.out.println("In MapRedce main function: theta[ " + i + " ]" + theta[i]); try { job.waitForCompletion(true); iteration++; } catch (IOException e) { e.printStackTrace(); } } while (iteration < num_of_iteration); }
From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierMapReduce_Continuous_Features.java
/** * @param args//from www . j a v a 2 s. c o m * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { int number_of_classes = 1; int number_of_features = 1; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Job job = new Job(conf, "NaiveBayesClassifierMapReduce_Continuous_Features"); job.setJarByClass(NaiveBayesClassifierMapReduce_Continuous_Features.class); conf = job.getConfiguration(); // This line is mandatory. job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FloatArrayWritable.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(MapArrayWritable.class); job.setMapperClass(NaiveBayesClassifierMap_Continuous_Features.class); job.setReducerClass(NaiveBayesClassifierReduce_Continuous_Features.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(args[0])); Path out = new Path(args[1]); if (fs.exists(out)) fs.delete(out, true); FileOutputFormat.setOutputPath(job, out); number_of_classes = Integer.parseInt(args[2]); number_of_features = Integer.parseInt(args[3]); conf.setInt("number_of_classes", number_of_classes); conf.setInt("number_of_features", number_of_features); try { job.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.mongodb.hadoop.input.MongoMultipleInputs.java
License:Apache License
/** * Add a {@link Path} with a custom {@link InputFormat} and * {@link Mapper} to the list of inputs for the map-reduce job. * /* ww w . j av a2s . co m*/ * @param job The {@link Job} * @param path {@link Path} to be added to the list of inputs for the job * @param inputFormatClass {@link InputFormat} class to use for this path * @param mapperClass {@link Mapper} class to use for this path * @param query {@link DBObject} query for path and mapper * @param fields {@link DBObject} fields for path and mapper * @param sort {@link DBObject} sort for path and mapper * @param limit {@link int} limit for path and mapper * @param skip {@link int} skip for path and mapper */ @SuppressWarnings("unchecked") public static void addInputPath(Job job, String uri, Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass, String query, String fields, String sort, int limit, int skip) { Configuration conf = job.getConfiguration(); MongoConfigUtil.addMongoRequest(conf, uri, inputFormatClass, mapperClass, query, fields, sort, limit, skip); job.setMapperClass(DelegatingMapper.class); job.setInputFormatClass(DelegatingInputFormat.class); }
From source file:com.mongodb.hadoop.pig.BSONStorage.java
License:Apache License
@Override public void setStoreLocation(final String location, final Job job) throws IOException { final Configuration config = job.getConfiguration(); config.set("mapred.output.file", location); }
From source file:com.mongodb.hadoop.pig.MongoInsertStorage.java
License:Apache License
public void setStoreLocation(final String location, final Job job) throws IOException { final Configuration config = job.getConfiguration(); LOG.info("Store Location Config: " + config + " For URI: " + location); if (!location.startsWith("mongodb://")) { throw new IllegalArgumentException( "Invalid URI Format. URIs must begin with a mongodb:// protocol string."); }//from w ww . j a va 2 s . c om MongoConfigUtil.setOutputURI(config, location); }
From source file:com.mongodb.hadoop.pig.MongoStorage.java
License:Apache License
public void setStoreLocation(final String location, final Job job) throws IOException { final Configuration config = job.getConfiguration(); LOG.info("Store Location Config: " + config + " For URI: " + location); MongoConfigUtil.setOutputURI(config, location); final Properties properties = UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] { udfContextSignature }); config.set(PIG_OUTPUT_SCHEMA, properties.getProperty(PIG_OUTPUT_SCHEMA_UDF_CONTEXT)); }
From source file:com.mongodb.hadoop.pig.MongoUpdateStorage.java
License:Apache License
@Override public void setStoreLocation(final String location, final Job job) throws IOException { final Configuration config = job.getConfiguration(); LOG.info("Store Location Config: " + config + "; For URI: " + location); if (!location.startsWith("mongodb://")) { throw new IllegalArgumentException( "Invalid URI Format. URIs must begin with a mongodb:// protocol string."); }/*from w w w . j a v a 2s . com*/ MongoConfigUtil.setOutputURI(config, location); }
From source file:com.moz.fiji.mapreduce.bulkimport.FijiBulkImportJobBuilder.java
License:Apache License
/** {@inheritDoc} */ @Override// www .j a v a 2 s . c o m protected void configureJob(Job job) throws IOException { final Configuration conf = job.getConfiguration(); // Store the name of the the importer to use in the job configuration so the mapper can // create instances of it. // Construct the bulk importer instance. if (null == mBulkImporterClass) { throw new JobConfigurationException("Must specify a bulk importer."); } conf.setClass(FijiConfKeys.FIJI_BULK_IMPORTER_CLASS, mBulkImporterClass, FijiBulkImporter.class); mJobOutput.configure(job); // Configure the mapper and reducer. This part depends on whether we're going to write // to HFiles or directly to the table. configureJobForHFileOutput(job); job.setJobName("Fiji bulk import: " + mBulkImporterClass.getSimpleName()); mBulkImporter = ReflectionUtils.newInstance(mBulkImporterClass, conf); // Configure the MapReduce job (requires mBulkImporter to be set properly): super.configureJob(job); }
From source file:com.moz.fiji.mapreduce.DistributedCacheJars.java
License:Apache License
/** * Adds the jars from a directory into the distributed cache of a job. * * @param job The job to configure.// ww w. j a v a 2 s .com * @param jarDirectory A path to a directory of jar files. * @throws IOException on I/O error. */ public static void addJarsToDistributedCache(Job job, String jarDirectory) throws IOException { addJarsToDistributedCache(job, qualifiedPathFromString(jarDirectory, job.getConfiguration())); }