List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf(boolean loadDefaults)
From source file:com.cloudera.knittingboar.sgd.TestBaseSGD.java
License:Apache License
public void testTrainer() throws Exception { POLRWorkerDriver olr_run = new POLRWorkerDriver(); // generate the debug conf ---- normally setup by YARN stuff olr_run.setConf(this.generateDebugConfigurationObject()); // ---- this all needs to be done in JobConf job = new JobConf(defaultConf); InputSplit[] splits = generateDebugSplits(workDir, job); InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[0]); // TODO: set this up to run through the conf pathways olr_run.setupInputSplit(custom_reader); olr_run.LoadConfigVarsLocally();//from w w w . j a v a2 s .c om olr_run.Setup(); for (int x = 0; x < 25; x++) { olr_run.RunNextTrainingBatch(); System.out.println("---------- cycle " + x + " done ------------- "); } // for //olr_run.PrintModelStats(); //LogisticModelParameters lmp = model_builder.lmp;//TrainLogistic.getParameters(); assertEquals(1.0e-4, olr_run.polr_modelparams.getLambda(), 1.0e-9); assertEquals(20, olr_run.polr_modelparams.getNumFeatures()); assertTrue(olr_run.polr_modelparams.useBias()); assertEquals("color", olr_run.polr_modelparams.getTargetVariable()); //CsvRecordFactory csv = model_builder.lmp.getCsvRecordFactory(); // assertEquals("[1, 2]", Sets.newTreeSet(olr_run.csvVectorFactory.getTargetCategories()).toString()); // assertEquals("[Intercept Term, x, y]", Sets.newTreeSet(olr_run.csvVectorFactory.getPredictors()).toString()); System.out.println("done!"); assertNotNull(0); }
From source file:com.cloudera.knittingboar.sgd.TestPOLRModelParameters.java
License:Apache License
public void testSaveLoadForDonutRun() throws IOException, Exception { POLRWorkerDriver olr_run = new POLRWorkerDriver(); // generate the debug conf ---- normally setup by YARN stuff olr_run.setConf(this.generateDebugConfigurationObject()); // ---- this all needs to be done in JobConf job = new JobConf(defaultConf); InputSplit[] splits = generateDebugSplits(workDir, job); InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[0]); // TODO: set this up to run through the conf pathways olr_run.setupInputSplit(custom_reader); olr_run.LoadConfigVarsLocally();/*from w w w . java2 s. co m*/ olr_run.Setup(); for (int x = 0; x < 25; x++) { olr_run.RunNextTrainingBatch(); System.out.println("---------- cycle " + x + " done ------------- "); } // for // olr_run.PrintModelStats(); //LogisticModelParameters lmp = model_builder.lmp;//TrainLogistic.getParameters(); assertEquals(1.0e-4, olr_run.polr_modelparams.getLambda(), 1.0e-9); assertEquals(20, olr_run.polr_modelparams.getNumFeatures()); assertTrue(olr_run.polr_modelparams.useBias()); assertEquals("color", olr_run.polr_modelparams.getTargetVariable()); //localFs.delete(workDir, true); olr_run.SaveModelLocally("/tmp/polr_run.model"); POLRWorkerDriver polr_new = new POLRWorkerDriver(); }
From source file:com.cloudera.knittingboar.sgd.TestPOLRWorkerDriver.java
License:Apache License
/** * [ ******* Rebuilding this currently ******* ] * // w w w.j a v a 2 s . co m * Tests replacing the beta, presumably from the master, after we've run POLR a bit * @throws Exception */ public void testReplaceBetaMechanics() throws Exception { System.out.println("\n------ testReplaceBetaMechanics --------- "); // ---- this all needs to be done in JobConf job = new JobConf(defaultConf); InputSplit[] splits = generateDebugSplits(workDir, job); System.out.println("split count: " + splits.length); POLRWorkerDriver worker_model_builder = new POLRWorkerDriver(); // ------------------ // generate the debug conf ---- normally setup by YARN stuff worker_model_builder.setConf(this.generateDebugConfigurationObject()); System.out.println("split: " + splits[0].toString()); InputRecordsSplit custom_reader_0 = new InputRecordsSplit(job, splits[0]); // TODO: set this up to run through the conf pathways worker_model_builder.setupInputSplit(custom_reader_0); // now load the conf stuff into locally used vars try { worker_model_builder.LoadConfigVarsLocally(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); System.out.println("Conf load fail: shutting down."); assertEquals(0, 1); } // now construct any needed machine learning data structures based on config worker_model_builder.Setup(); // ------------------ worker_model_builder.RunNextTrainingBatch(); // ------------------- now replace beta ------------ double val1 = -1.0; GradientBuffer g0 = new GradientBuffer(2, worker_model_builder.FeatureVectorSize); for (int x = 0; x < worker_model_builder.FeatureVectorSize; x++) { g0.setCell(0, x, val1); } GlobalParameterVectorUpdateMessage pvec_msg = new GlobalParameterVectorUpdateMessage("127.0.0.1", 2, worker_model_builder.FeatureVectorSize); pvec_msg.parameter_vector = g0.gamma.clone(); worker_model_builder.ProcessIncomingParameterVectorMessage(pvec_msg); System.out.println("Updated worker node's pvec via msg"); worker_model_builder.polr.Debug_PrintGamma(); for (int x = 0; x < worker_model_builder.FeatureVectorSize; x++) { assertEquals(worker_model_builder.polr.noReallyGetBeta().get(0, x), val1); } System.out.println("--------------------------------\n"); }
From source file:com.cloudera.knittingboar.sgd.TestPOLRWorkerDriver.java
License:Apache License
/** * [ ******* Rebuilding this currently ******* ] * @throws Exception /* w ww.java 2s . c o m*/ */ public void testPOLROnFullDatasetRun() throws Exception { POLRWorkerDriver worker_model_builder = new POLRWorkerDriver(); // generate the debug conf ---- normally setup by YARN stuff worker_model_builder.setConf(this.generateDebugConfigurationObject()); // ---- this all needs to be done in JobConf job = new JobConf(defaultConf); InputSplit[] splits = generateDebugSplits(workDir, job); InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[0]); // TODO: set this up to run through the conf pathways worker_model_builder.setupInputSplit(custom_reader); worker_model_builder.LoadConfigVarsLocally(); worker_model_builder.Setup(); for (int x = 0; x < 25; x++) { worker_model_builder.RunNextTrainingBatch(); System.out.println("---------- cycle " + x + " done ------------- "); } // for // ------ move this loop into the POLR Worker Driver -------- worker_model_builder.PrintModelStats(); assertEquals(1.0e-4, worker_model_builder.polr_modelparams.getLambda(), 1.0e-9); assertEquals(10, worker_model_builder.polr_modelparams.getNumFeatures()); assertTrue(worker_model_builder.polr_modelparams.useBias()); assertEquals("color", worker_model_builder.polr_modelparams.getTargetVariable()); System.out.println("done!"); assertNotNull(0); }
From source file:com.cloudera.knittingboar.sgd.TestPOLRWorkerNode.java
License:Apache License
/** * [ ******* Rebuilding this currently ******* ] * /*from w w w .j av a2s. co m*/ * Tests replacing the beta, presumably from the master, after we've run POLR a bit * @throws Exception */ public void testReplaceBetaMechanics() throws Exception { System.out.println("\n------ testReplaceBetaMechanics --------- "); // ---- this all needs to be done in JobConf job = new JobConf(defaultConf); InputSplit[] splits = generateDebugSplits(workDir, job); System.out.println("split count: " + splits.length); POLRWorkerNode worker_model_builder = new POLRWorkerNode(); // ------------------ // generate the debug conf ---- normally setup by YARN stuff worker_model_builder.setup(this.generateDebugConfigurationObject()); System.out.println("split: " + splits[0].toString()); TextRecordParser txt_reader = new TextRecordParser(); long len = Integer.parseInt(splits[0].toString().split(":")[2].split("\\+")[1]); txt_reader.setFile(splits[0].toString().split(":")[1], 0, len); worker_model_builder.setRecordParser(txt_reader); // worker_model_builder.RunNextTrainingBatch(); worker_model_builder.compute(); // worker_model_builder.polr.Set // ------------------- now replace beta ------------ double val1 = -1.0; // GradientBuffer g0 = new GradientBuffer( 2, worker_model_builder.FeatureVectorSize ); Matrix m = new DenseMatrix(2, feature_vector_size); for (int x = 0; x < feature_vector_size; x++) { m.set(0, x, val1); } worker_model_builder.polr.SetBeta(m); for (int x = 0; x < feature_vector_size; x++) { assertEquals(worker_model_builder.polr.noReallyGetBeta().get(0, x), val1); } System.out.println("--------------------------------\n"); }
From source file:com.cloudera.knittingboar.sgd.TestPOLRWorkerNode.java
License:Apache License
/** * [ ******* Rebuilding this currently ******* ] * @throws Exception // w w w . java 2 s . co m */ public void testPOLROnFullDatasetRun() throws Exception { POLRWorkerNode worker_model_builder = new POLRWorkerNode(); // generate the debug conf ---- normally setup by YARN stuff worker_model_builder.setup(this.generateDebugConfigurationObject()); // ---- this all needs to be done in JobConf job = new JobConf(defaultConf); InputSplit[] splits = generateDebugSplits(workDir, job); // InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[0]); // TODO: set this up to run through the conf pathways // worker_model_builder.setupInputSplit(custom_reader); /* worker_model_builder.LoadConfigVarsLocally(); worker_model_builder.Setup(); */ TextRecordParser txt_reader = new TextRecordParser(); long len = Integer.parseInt(splits[0].toString().split(":")[2].split("\\+")[1]); txt_reader.setFile(splits[0].toString().split(":")[1], 0, len); worker_model_builder.setRecordParser(txt_reader); //for ( int x = 0; x < 5; x++) { worker_model_builder.compute(); //System.out.println( "---------- cycle " + x + " done ------------- " ); //} // for // ------ move this loop into the POLR Worker Driver -------- // worker_model_builder.PrintModelStats(); assertEquals(1.0e-4, worker_model_builder.polr_modelparams.getLambda(), 1.0e-9); assertEquals(10, worker_model_builder.polr_modelparams.getNumFeatures()); assertTrue(worker_model_builder.polr_modelparams.useBias()); assertEquals("color", worker_model_builder.polr_modelparams.getTargetVariable()); System.out.println("done!"); assertNotNull(0); }
From source file:com.cloudera.knittingboar.sgd.TestRunPOLRMasterAndNWorkers.java
License:Apache License
public void testRunMasterAndTwoWorkers() throws Exception { long ts_start = System.currentTimeMillis(); System.out.println("start-ms:" + ts_start); POLRMasterDriver master = new POLRMasterDriver(); // ------------------ // generate the debug conf ---- normally setup by YARN stuff master.setConf(this.generateDebugConfigurationObject()); // now load the conf stuff into locally used vars try {/*from www . j a v a 2 s .c o m*/ master.LoadConfigVarsLocally(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); System.out.println("Conf load fail: shutting down."); assertEquals(0, 1); } // now construct any needed machine learning data structures based on config master.Setup(); // ------------------ // ---- this all needs to be done in JobConf job = new JobConf(defaultConf); // TODO: work on this, splits are generating for everything in dir InputSplit[] splits = generateDebugSplits(workDir, job); System.out.println("split count: " + splits.length); ArrayList<POLRWorkerDriver> workers = new ArrayList<POLRWorkerDriver>(); for (int x = 0; x < splits.length; x++) { //for ( int x = 0; x < 1; x++ ) { POLRWorkerDriver worker_model_builder = new POLRWorkerDriver(); //workers.get(x); worker_model_builder.internalID = String.valueOf(x); // simulates the conf stuff worker_model_builder.setConf(this.generateDebugConfigurationObject()); InputRecordsSplit custom_reader_0 = new InputRecordsSplit(job, splits[x]); // TODO: set this up to run through the conf pathways worker_model_builder.setupInputSplit(custom_reader_0); worker_model_builder.LoadConfigVarsLocally(); worker_model_builder.Setup(); workers.add(worker_model_builder); System.out.println("> Setup Worker " + x); } boolean bContinuePass = true; int x = 0; while (bContinuePass) { bContinuePass = false; for (int worker_id = 0; worker_id < workers.size(); worker_id++) { //arContinueTracker[worker_id] boolean result = workers.get(worker_id).RunNextTrainingBatch(); if (result) { bContinuePass = true; } //GradientUpdateMessage msg0 = workers.get(worker_id).GenerateUpdateMessage(); GradientUpdateMessage msg0 = workers.get(worker_id).GenerateParamVectorUpdateMessage(); master.AddIncomingGradientMessageToQueue(msg0); master.RecvGradientMessage(); // process msg } master.AveragePVec_GenerateGlobalUpdateVector(workers.size()); if (bContinuePass) { /* //master.GenerateGlobalUpdateVector(); master.AveragePVec_GenerateGlobalUpdateVector(workers.size()); */ GlobalParameterVectorUpdateMessage returned_msg = master.GetNextGlobalUpdateMsgFromQueue(); // process global updates for (int worker_id = 0; worker_id < workers.size(); worker_id++) { workers.get(worker_id).ProcessIncomingParameterVectorMessage(returned_msg); } System.out.println("---------- cycle " + x + " done in pass " + workers.get(0).GetCurrentLocalPassCount() + " ------------- "); } else { System.out.println("---------- cycle " + x + " done in pass " + workers.get(0).GetCurrentLocalPassCount() + " ------------- "); System.out.println("> Saving Model..."); master.SaveModelLocally("/tmp/TestRunPOLRMasterAndNWorkers.20news.model"); } // if x++; } // for Utils.PrintVectorSection(master.global_parameter_vector.gamma.viewRow(0), 3); long ts_total = System.currentTimeMillis() - ts_start; System.out.println("total time in ms:" + ts_total); }
From source file:com.cloudera.knittingboar.sgd.TestRunPOLRMasterAndSingleWorker.java
License:Apache License
@Test public void testRunSingleWorkerSingleMaster() throws Exception { // TODO a test with assertions is not a test POLRMasterDriver master = new POLRMasterDriver(); // ------------------ // generate the debug conf ---- normally setup by YARN stuff master.setConf(configuration);//from w ww . j a va 2 s. c o m // now load the conf stuff into locally used vars master.LoadConfigVarsLocally(); // now construct any needed machine learning data structures based on config master.Setup(); // ------------------ POLRWorkerDriver worker_model_builder_0 = new POLRWorkerDriver(); // simulates the conf stuff worker_model_builder_0.setConf(configuration); // ---- this all needs to be done in JobConf job = new JobConf(defaultConf); long block_size = localFs.getDefaultBlockSize(workDir); LOG.info("default block size: " + (block_size / 1024 / 1024) + "MB"); // ---- set where we'll read the input files from ------------- FileInputFormat.setInputPaths(job, workDir); // try splitting the file in a variety of sizes TextInputFormat format = new TextInputFormat(); format.configure(job); InputSplit[] splits = format.getSplits(job, 1); InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[0]); // TODO: set this up to run through the conf pathways worker_model_builder_0.setupInputSplit(custom_reader); worker_model_builder_0.LoadConfigVarsLocally(); worker_model_builder_0.Setup(); LOG.info("> Feature Size: " + worker_model_builder_0.FeatureVectorSize); LOG.info("> Category Size: " + worker_model_builder_0.num_categories); for (int x = 0; x < 25; x++) { worker_model_builder_0.RunNextTrainingBatch(); GradientUpdateMessage msg = worker_model_builder_0.GenerateUpdateMessage(); master.AddIncomingGradientMessageToQueue(msg); master.RecvGradientMessage(); // process msg master.GenerateGlobalUpdateVector(); GlobalParameterVectorUpdateMessage returned_msg = master.GetNextGlobalUpdateMsgFromQueue(); worker_model_builder_0.ProcessIncomingParameterVectorMessage(returned_msg); LOG.info("---------- cycle " + x + " done ------------- "); } // for worker_model_builder_0.Debug(); }
From source file:com.cloudera.knittingboar.sgd.TestRunPOLRMasterAndTwoWorkers.java
License:Apache License
@Test public void testRunMasterAndTwoWorkers() throws Exception { // TODO a test with assertions is not a test POLRMasterDriver master = new POLRMasterDriver(); // ------------------ // generate the debug conf ---- normally setup by YARN stuff master.setConf(configuration);/*from w ww . j av a2 s . c o m*/ // now load the conf stuff into locally used vars master.LoadConfigVarsLocally(); // now construct any needed machine learning data structures based on config master.Setup(); // ------------------ POLRWorkerDriver worker_model_builder_0 = new POLRWorkerDriver(); worker_model_builder_0.internalID = "0"; // simulates the conf stuff worker_model_builder_0.setConf(configuration); POLRWorkerDriver worker_model_builder_1 = new POLRWorkerDriver(); worker_model_builder_1.internalID = "1"; // simulates the conf stuff worker_model_builder_1.setConf(configuration); // ---- this all needs to be done in JobConf job = new JobConf(defaultConf); long block_size = localFs.getDefaultBlockSize(workDir); LOG.info("default block size: " + (block_size / 1024 / 1024) + "MB"); // ---- set where we'll read the input files from ------------- FileInputFormat.setInputPaths(job, workDir); // try splitting the file in a variety of sizes TextInputFormat format = new TextInputFormat(); format.configure(job); InputSplit[] splits = format.getSplits(job, 2); InputRecordsSplit custom_reader_0 = new InputRecordsSplit(job, splits[0]); InputRecordsSplit custom_reader_1 = new InputRecordsSplit(job, splits[1]); // TODO: set this up to run through the conf pathways worker_model_builder_0.setupInputSplit(custom_reader_0); worker_model_builder_0.LoadConfigVarsLocally(); worker_model_builder_0.Setup(); worker_model_builder_1.setupInputSplit(custom_reader_1); worker_model_builder_1.LoadConfigVarsLocally(); worker_model_builder_1.Setup(); LOG.info("> Feature Size: " + worker_model_builder_0.FeatureVectorSize); LOG.info("> Category Size: " + worker_model_builder_0.num_categories); for (int x = 0; x < 30; x++) { // run batch 0 worker_model_builder_0.RunNextTrainingBatch(); GradientUpdateMessage msg0 = worker_model_builder_0.GenerateUpdateMessage(); worker_model_builder_1.RunNextTrainingBatch(); GradientUpdateMessage msg1 = worker_model_builder_1.GenerateUpdateMessage(); master.AddIncomingGradientMessageToQueue(msg0); master.AddIncomingGradientMessageToQueue(msg1); master.RecvGradientMessage(); // process msg master.RecvGradientMessage(); // process msg master.GenerateGlobalUpdateVector(); GlobalParameterVectorUpdateMessage returned_msg = master.GetNextGlobalUpdateMsgFromQueue(); worker_model_builder_0.ProcessIncomingParameterVectorMessage(returned_msg); worker_model_builder_1.ProcessIncomingParameterVectorMessage(returned_msg); LOG.info("---------- cycle " + x + " done ------------- "); } // for }
From source file:com.cloudera.knittingboar.sgd.TestRunPOLRWorkerSingleBatch.java
License:Apache License
public void testRunSingleBatch() throws Exception { POLRWorkerDriver worker_model_builder_0 = new POLRWorkerDriver(); // simulates the conf stuff worker_model_builder_0.setConf(this.generateDebugConfigurationObject()); // ---- this all needs to be done in JobConf job = new JobConf(defaultConf); InputSplit[] splits = generateDebugSplits("kboar-shard-0.txt", job); InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[0]); // TODO: set this up to run through the conf pathways worker_model_builder_0.setupInputSplit(custom_reader); worker_model_builder_0.LoadConfigVarsLocally(); worker_model_builder_0.Setup();/*from w ww. j a v a 2 s . c o m*/ System.out.println("> Feature Size: " + worker_model_builder_0.FeatureVectorSize); System.out.println("> Category Size: " + worker_model_builder_0.num_categories); worker_model_builder_0.RunNextTrainingBatch(); GradientUpdateMessage msg = worker_model_builder_0.GenerateUpdateMessage(); msg.gradient.Debug(); }