Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults) 

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:com.cloudera.knittingboar.sgd.TestBaseSGD.java

License:Apache License

public void testTrainer() throws Exception {

    POLRWorkerDriver olr_run = new POLRWorkerDriver();

    // generate the debug conf ---- normally setup by YARN stuff
    olr_run.setConf(this.generateDebugConfigurationObject());

    // ---- this all needs to be done in 
    JobConf job = new JobConf(defaultConf);

    InputSplit[] splits = generateDebugSplits(workDir, job);

    InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[0]);

    // TODO: set this up to run through the conf pathways
    olr_run.setupInputSplit(custom_reader);

    olr_run.LoadConfigVarsLocally();//from  w  w  w .  j  a  v a2 s .c  om

    olr_run.Setup();

    for (int x = 0; x < 25; x++) {

        olr_run.RunNextTrainingBatch();

        System.out.println("---------- cycle " + x + " done ------------- ");

    } // for    

    //olr_run.PrintModelStats();

    //LogisticModelParameters lmp = model_builder.lmp;//TrainLogistic.getParameters();
    assertEquals(1.0e-4, olr_run.polr_modelparams.getLambda(), 1.0e-9);
    assertEquals(20, olr_run.polr_modelparams.getNumFeatures());
    assertTrue(olr_run.polr_modelparams.useBias());
    assertEquals("color", olr_run.polr_modelparams.getTargetVariable());
    //CsvRecordFactory csv = model_builder.lmp.getCsvRecordFactory();
    //   assertEquals("[1, 2]", Sets.newTreeSet(olr_run.csvVectorFactory.getTargetCategories()).toString());
    // assertEquals("[Intercept Term, x, y]", Sets.newTreeSet(olr_run.csvVectorFactory.getPredictors()).toString());

    System.out.println("done!");

    assertNotNull(0);

}

From source file:com.cloudera.knittingboar.sgd.TestPOLRModelParameters.java

License:Apache License

public void testSaveLoadForDonutRun() throws IOException, Exception {

    POLRWorkerDriver olr_run = new POLRWorkerDriver();

    // generate the debug conf ---- normally setup by YARN stuff
    olr_run.setConf(this.generateDebugConfigurationObject());

    // ---- this all needs to be done in 
    JobConf job = new JobConf(defaultConf);

    InputSplit[] splits = generateDebugSplits(workDir, job);

    InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[0]);

    // TODO: set this up to run through the conf pathways
    olr_run.setupInputSplit(custom_reader);

    olr_run.LoadConfigVarsLocally();/*from   w  w  w .  java2 s.  co m*/

    olr_run.Setup();

    for (int x = 0; x < 25; x++) {

        olr_run.RunNextTrainingBatch();

        System.out.println("---------- cycle " + x + " done ------------- ");

    } // for    

    //    olr_run.PrintModelStats();

    //LogisticModelParameters lmp = model_builder.lmp;//TrainLogistic.getParameters();
    assertEquals(1.0e-4, olr_run.polr_modelparams.getLambda(), 1.0e-9);
    assertEquals(20, olr_run.polr_modelparams.getNumFeatures());
    assertTrue(olr_run.polr_modelparams.useBias());
    assertEquals("color", olr_run.polr_modelparams.getTargetVariable());

    //localFs.delete(workDir, true);

    olr_run.SaveModelLocally("/tmp/polr_run.model");

    POLRWorkerDriver polr_new = new POLRWorkerDriver();

}

From source file:com.cloudera.knittingboar.sgd.TestPOLRWorkerDriver.java

License:Apache License

/**
 * [ ******* Rebuilding this currently ******* ]
 * // w  w  w.j  a  v a 2  s  .  co m
 * Tests replacing the beta, presumably from the master, after we've run POLR a bit 
 * @throws Exception 
 */
public void testReplaceBetaMechanics() throws Exception {

    System.out.println("\n------ testReplaceBetaMechanics --------- ");

    // ---- this all needs to be done in 
    JobConf job = new JobConf(defaultConf);

    InputSplit[] splits = generateDebugSplits(workDir, job);

    System.out.println("split count: " + splits.length);

    POLRWorkerDriver worker_model_builder = new POLRWorkerDriver();

    // ------------------    
    // generate the debug conf ---- normally setup by YARN stuff
    worker_model_builder.setConf(this.generateDebugConfigurationObject());

    System.out.println("split: " + splits[0].toString());
    InputRecordsSplit custom_reader_0 = new InputRecordsSplit(job, splits[0]);
    // TODO: set this up to run through the conf pathways
    worker_model_builder.setupInputSplit(custom_reader_0);

    // now load the conf stuff into locally used vars
    try {
        worker_model_builder.LoadConfigVarsLocally();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        System.out.println("Conf load fail: shutting down.");
        assertEquals(0, 1);
    }
    // now construct any needed machine learning data structures based on config
    worker_model_builder.Setup();
    // ------------------

    worker_model_builder.RunNextTrainingBatch();

    // ------------------- now replace beta ------------

    double val1 = -1.0;

    GradientBuffer g0 = new GradientBuffer(2, worker_model_builder.FeatureVectorSize);

    for (int x = 0; x < worker_model_builder.FeatureVectorSize; x++) {

        g0.setCell(0, x, val1);

    }

    GlobalParameterVectorUpdateMessage pvec_msg = new GlobalParameterVectorUpdateMessage("127.0.0.1", 2,
            worker_model_builder.FeatureVectorSize);
    pvec_msg.parameter_vector = g0.gamma.clone();

    worker_model_builder.ProcessIncomingParameterVectorMessage(pvec_msg);

    System.out.println("Updated worker node's pvec via msg");

    worker_model_builder.polr.Debug_PrintGamma();

    for (int x = 0; x < worker_model_builder.FeatureVectorSize; x++) {

        assertEquals(worker_model_builder.polr.noReallyGetBeta().get(0, x), val1);

    }

    System.out.println("--------------------------------\n");

}

From source file:com.cloudera.knittingboar.sgd.TestPOLRWorkerDriver.java

License:Apache License

/**
 * [ ******* Rebuilding this currently ******* ]
 * @throws Exception /*  w  ww.java 2s .  c o m*/
 */
public void testPOLROnFullDatasetRun() throws Exception {

    POLRWorkerDriver worker_model_builder = new POLRWorkerDriver();

    // generate the debug conf ---- normally setup by YARN stuff
    worker_model_builder.setConf(this.generateDebugConfigurationObject());

    // ---- this all needs to be done in 
    JobConf job = new JobConf(defaultConf);

    InputSplit[] splits = generateDebugSplits(workDir, job);

    InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[0]);

    // TODO: set this up to run through the conf pathways
    worker_model_builder.setupInputSplit(custom_reader);

    worker_model_builder.LoadConfigVarsLocally();

    worker_model_builder.Setup();

    for (int x = 0; x < 25; x++) {

        worker_model_builder.RunNextTrainingBatch();

        System.out.println("---------- cycle " + x + " done ------------- ");

    } // for    

    // ------ move this loop into the POLR Worker Driver --------

    worker_model_builder.PrintModelStats();

    assertEquals(1.0e-4, worker_model_builder.polr_modelparams.getLambda(), 1.0e-9);
    assertEquals(10, worker_model_builder.polr_modelparams.getNumFeatures());
    assertTrue(worker_model_builder.polr_modelparams.useBias());
    assertEquals("color", worker_model_builder.polr_modelparams.getTargetVariable());

    System.out.println("done!");

    assertNotNull(0);

}

From source file:com.cloudera.knittingboar.sgd.TestPOLRWorkerNode.java

License:Apache License

/**
 * [ ******* Rebuilding this currently ******* ]
 * /*from w w w  .j  av a2s. co  m*/
 * Tests replacing the beta, presumably from the master, after we've run POLR a bit 
 * @throws Exception 
 */
public void testReplaceBetaMechanics() throws Exception {

    System.out.println("\n------ testReplaceBetaMechanics --------- ");

    // ---- this all needs to be done in 
    JobConf job = new JobConf(defaultConf);

    InputSplit[] splits = generateDebugSplits(workDir, job);

    System.out.println("split count: " + splits.length);

    POLRWorkerNode worker_model_builder = new POLRWorkerNode();

    // ------------------    
    // generate the debug conf ---- normally setup by YARN stuff
    worker_model_builder.setup(this.generateDebugConfigurationObject());

    System.out.println("split: " + splits[0].toString());

    TextRecordParser txt_reader = new TextRecordParser();

    long len = Integer.parseInt(splits[0].toString().split(":")[2].split("\\+")[1]);

    txt_reader.setFile(splits[0].toString().split(":")[1], 0, len);

    worker_model_builder.setRecordParser(txt_reader);

    //      worker_model_builder.RunNextTrainingBatch();
    worker_model_builder.compute();

    //    worker_model_builder.polr.Set

    // ------------------- now replace beta ------------

    double val1 = -1.0;

    // GradientBuffer g0 = new GradientBuffer( 2, worker_model_builder.FeatureVectorSize );
    Matrix m = new DenseMatrix(2, feature_vector_size);

    for (int x = 0; x < feature_vector_size; x++) {

        m.set(0, x, val1);

    }

    worker_model_builder.polr.SetBeta(m);

    for (int x = 0; x < feature_vector_size; x++) {

        assertEquals(worker_model_builder.polr.noReallyGetBeta().get(0, x), val1);

    }

    System.out.println("--------------------------------\n");

}

From source file:com.cloudera.knittingboar.sgd.TestPOLRWorkerNode.java

License:Apache License

/**
 * [ ******* Rebuilding this currently ******* ]
 * @throws Exception //  w  w  w .  java 2  s . co  m
 */
public void testPOLROnFullDatasetRun() throws Exception {

    POLRWorkerNode worker_model_builder = new POLRWorkerNode();

    // generate the debug conf ---- normally setup by YARN stuff
    worker_model_builder.setup(this.generateDebugConfigurationObject());

    // ---- this all needs to be done in 
    JobConf job = new JobConf(defaultConf);

    InputSplit[] splits = generateDebugSplits(workDir, job);

    //    InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[0]);

    // TODO: set this up to run through the conf pathways
    //    worker_model_builder.setupInputSplit(custom_reader);
    /*    
        worker_model_builder.LoadConfigVarsLocally();
            
        worker_model_builder.Setup();    
      */

    TextRecordParser txt_reader = new TextRecordParser();

    long len = Integer.parseInt(splits[0].toString().split(":")[2].split("\\+")[1]);

    txt_reader.setFile(splits[0].toString().split(":")[1], 0, len);

    worker_model_builder.setRecordParser(txt_reader);

    //for ( int x = 0; x < 5; x++) {

    worker_model_builder.compute();

    //System.out.println( "---------- cycle " + x + " done ------------- " );

    //} // for    

    // ------ move this loop into the POLR Worker Driver --------

    // worker_model_builder.PrintModelStats();

    assertEquals(1.0e-4, worker_model_builder.polr_modelparams.getLambda(), 1.0e-9);
    assertEquals(10, worker_model_builder.polr_modelparams.getNumFeatures());
    assertTrue(worker_model_builder.polr_modelparams.useBias());
    assertEquals("color", worker_model_builder.polr_modelparams.getTargetVariable());

    System.out.println("done!");

    assertNotNull(0);

}

From source file:com.cloudera.knittingboar.sgd.TestRunPOLRMasterAndNWorkers.java

License:Apache License

public void testRunMasterAndTwoWorkers() throws Exception {

    long ts_start = System.currentTimeMillis();

    System.out.println("start-ms:" + ts_start);

    POLRMasterDriver master = new POLRMasterDriver();
    // ------------------    
    // generate the debug conf ---- normally setup by YARN stuff
    master.setConf(this.generateDebugConfigurationObject());
    // now load the conf stuff into locally used vars
    try {/*from  www .  j a  v a 2 s .c  o  m*/
        master.LoadConfigVarsLocally();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        System.out.println("Conf load fail: shutting down.");
        assertEquals(0, 1);
    }
    // now construct any needed machine learning data structures based on config
    master.Setup();
    // ------------------    

    // ---- this all needs to be done in 
    JobConf job = new JobConf(defaultConf);

    // TODO: work on this, splits are generating for everything in dir
    InputSplit[] splits = generateDebugSplits(workDir, job);

    System.out.println("split count: " + splits.length);

    ArrayList<POLRWorkerDriver> workers = new ArrayList<POLRWorkerDriver>();

    for (int x = 0; x < splits.length; x++) {
        //for ( int x = 0; x < 1; x++ ) {

        POLRWorkerDriver worker_model_builder = new POLRWorkerDriver(); //workers.get(x);
        worker_model_builder.internalID = String.valueOf(x);
        // simulates the conf stuff
        worker_model_builder.setConf(this.generateDebugConfigurationObject());

        InputRecordsSplit custom_reader_0 = new InputRecordsSplit(job, splits[x]);
        // TODO: set this up to run through the conf pathways
        worker_model_builder.setupInputSplit(custom_reader_0);

        worker_model_builder.LoadConfigVarsLocally();
        worker_model_builder.Setup();

        workers.add(worker_model_builder);

        System.out.println("> Setup Worker " + x);

    }

    boolean bContinuePass = true;
    int x = 0;

    while (bContinuePass) {

        bContinuePass = false;

        for (int worker_id = 0; worker_id < workers.size(); worker_id++) {

            //arContinueTracker[worker_id] 
            boolean result = workers.get(worker_id).RunNextTrainingBatch();
            if (result) {
                bContinuePass = true;
            }

            //GradientUpdateMessage msg0 = workers.get(worker_id).GenerateUpdateMessage();
            GradientUpdateMessage msg0 = workers.get(worker_id).GenerateParamVectorUpdateMessage();

            master.AddIncomingGradientMessageToQueue(msg0);
            master.RecvGradientMessage(); // process msg

        }

        master.AveragePVec_GenerateGlobalUpdateVector(workers.size());

        if (bContinuePass) {

            /*        
                    //master.GenerateGlobalUpdateVector();
                    master.AveragePVec_GenerateGlobalUpdateVector(workers.size());
            */
            GlobalParameterVectorUpdateMessage returned_msg = master.GetNextGlobalUpdateMsgFromQueue();

            // process global updates
            for (int worker_id = 0; worker_id < workers.size(); worker_id++) {

                workers.get(worker_id).ProcessIncomingParameterVectorMessage(returned_msg);

            }

            System.out.println("---------- cycle " + x + " done in pass "
                    + workers.get(0).GetCurrentLocalPassCount() + " ------------- ");

        } else {

            System.out.println("---------- cycle " + x + " done in pass "
                    + workers.get(0).GetCurrentLocalPassCount() + " ------------- ");

            System.out.println("> Saving Model...");

            master.SaveModelLocally("/tmp/TestRunPOLRMasterAndNWorkers.20news.model");

        } // if     

        x++;

    } // for

    Utils.PrintVectorSection(master.global_parameter_vector.gamma.viewRow(0), 3);

    long ts_total = System.currentTimeMillis() - ts_start;

    System.out.println("total time in ms:" + ts_total);

}

From source file:com.cloudera.knittingboar.sgd.TestRunPOLRMasterAndSingleWorker.java

License:Apache License

@Test
public void testRunSingleWorkerSingleMaster() throws Exception {
    // TODO a test with assertions is not a test
    POLRMasterDriver master = new POLRMasterDriver();
    // ------------------
    // generate the debug conf ---- normally setup by YARN stuff
    master.setConf(configuration);//from   w ww . j  a  va  2 s.  c o m
    // now load the conf stuff into locally used vars
    master.LoadConfigVarsLocally();
    // now construct any needed machine learning data structures based on config
    master.Setup();
    // ------------------

    POLRWorkerDriver worker_model_builder_0 = new POLRWorkerDriver();

    // simulates the conf stuff
    worker_model_builder_0.setConf(configuration);

    // ---- this all needs to be done in
    JobConf job = new JobConf(defaultConf);

    long block_size = localFs.getDefaultBlockSize(workDir);
    LOG.info("default block size: " + (block_size / 1024 / 1024) + "MB");
    // ---- set where we'll read the input files from -------------
    FileInputFormat.setInputPaths(job, workDir);
    // try splitting the file in a variety of sizes
    TextInputFormat format = new TextInputFormat();
    format.configure(job);

    InputSplit[] splits = format.getSplits(job, 1);

    InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[0]);

    // TODO: set this up to run through the conf pathways
    worker_model_builder_0.setupInputSplit(custom_reader);

    worker_model_builder_0.LoadConfigVarsLocally();

    worker_model_builder_0.Setup();

    LOG.info("> Feature Size: " + worker_model_builder_0.FeatureVectorSize);
    LOG.info("> Category Size: " + worker_model_builder_0.num_categories);

    for (int x = 0; x < 25; x++) {

        worker_model_builder_0.RunNextTrainingBatch();

        GradientUpdateMessage msg = worker_model_builder_0.GenerateUpdateMessage();
        master.AddIncomingGradientMessageToQueue(msg);
        master.RecvGradientMessage(); // process msg
        master.GenerateGlobalUpdateVector();
        GlobalParameterVectorUpdateMessage returned_msg = master.GetNextGlobalUpdateMsgFromQueue();
        worker_model_builder_0.ProcessIncomingParameterVectorMessage(returned_msg);
        LOG.info("---------- cycle " + x + " done ------------- ");
    } // for

    worker_model_builder_0.Debug();
}

From source file:com.cloudera.knittingboar.sgd.TestRunPOLRMasterAndTwoWorkers.java

License:Apache License

@Test
public void testRunMasterAndTwoWorkers() throws Exception {
    // TODO a test with assertions is not a test
    POLRMasterDriver master = new POLRMasterDriver();
    // ------------------    
    // generate the debug conf ---- normally setup by YARN stuff
    master.setConf(configuration);/*from w  ww .  j av a2  s .  c o  m*/
    // now load the conf stuff into locally used vars
    master.LoadConfigVarsLocally();
    // now construct any needed machine learning data structures based on config
    master.Setup();
    // ------------------    

    POLRWorkerDriver worker_model_builder_0 = new POLRWorkerDriver();
    worker_model_builder_0.internalID = "0";
    // simulates the conf stuff
    worker_model_builder_0.setConf(configuration);

    POLRWorkerDriver worker_model_builder_1 = new POLRWorkerDriver();
    worker_model_builder_1.internalID = "1";
    // simulates the conf stuff
    worker_model_builder_1.setConf(configuration);

    // ---- this all needs to be done in 
    JobConf job = new JobConf(defaultConf);

    long block_size = localFs.getDefaultBlockSize(workDir);
    LOG.info("default block size: " + (block_size / 1024 / 1024) + "MB");
    // ---- set where we'll read the input files from -------------
    FileInputFormat.setInputPaths(job, workDir);
    // try splitting the file in a variety of sizes
    TextInputFormat format = new TextInputFormat();
    format.configure(job);

    InputSplit[] splits = format.getSplits(job, 2);

    InputRecordsSplit custom_reader_0 = new InputRecordsSplit(job, splits[0]);
    InputRecordsSplit custom_reader_1 = new InputRecordsSplit(job, splits[1]);

    // TODO: set this up to run through the conf pathways
    worker_model_builder_0.setupInputSplit(custom_reader_0);
    worker_model_builder_0.LoadConfigVarsLocally();
    worker_model_builder_0.Setup();

    worker_model_builder_1.setupInputSplit(custom_reader_1);
    worker_model_builder_1.LoadConfigVarsLocally();
    worker_model_builder_1.Setup();

    LOG.info("> Feature Size: " + worker_model_builder_0.FeatureVectorSize);
    LOG.info("> Category Size: " + worker_model_builder_0.num_categories);

    for (int x = 0; x < 30; x++) {

        // run batch 0
        worker_model_builder_0.RunNextTrainingBatch();
        GradientUpdateMessage msg0 = worker_model_builder_0.GenerateUpdateMessage();

        worker_model_builder_1.RunNextTrainingBatch();
        GradientUpdateMessage msg1 = worker_model_builder_1.GenerateUpdateMessage();

        master.AddIncomingGradientMessageToQueue(msg0);
        master.AddIncomingGradientMessageToQueue(msg1);
        master.RecvGradientMessage(); // process msg
        master.RecvGradientMessage(); // process msg

        master.GenerateGlobalUpdateVector();

        GlobalParameterVectorUpdateMessage returned_msg = master.GetNextGlobalUpdateMsgFromQueue();
        worker_model_builder_0.ProcessIncomingParameterVectorMessage(returned_msg);

        worker_model_builder_1.ProcessIncomingParameterVectorMessage(returned_msg);

        LOG.info("---------- cycle " + x + " done ------------- ");

    } // for

}

From source file:com.cloudera.knittingboar.sgd.TestRunPOLRWorkerSingleBatch.java

License:Apache License

public void testRunSingleBatch() throws Exception {

    POLRWorkerDriver worker_model_builder_0 = new POLRWorkerDriver();

    // simulates the conf stuff
    worker_model_builder_0.setConf(this.generateDebugConfigurationObject());

    // ---- this all needs to be done in 
    JobConf job = new JobConf(defaultConf);

    InputSplit[] splits = generateDebugSplits("kboar-shard-0.txt", job);
    InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[0]);

    // TODO: set this up to run through the conf pathways
    worker_model_builder_0.setupInputSplit(custom_reader);
    worker_model_builder_0.LoadConfigVarsLocally();
    worker_model_builder_0.Setup();/*from  w ww. j  a v  a  2 s  . c o m*/

    System.out.println("> Feature Size: " + worker_model_builder_0.FeatureVectorSize);
    System.out.println("> Category Size: " + worker_model_builder_0.num_categories);

    worker_model_builder_0.RunNextTrainingBatch();

    GradientUpdateMessage msg = worker_model_builder_0.GenerateUpdateMessage();

    msg.gradient.Debug();

}