All Data Structures Files Functions Variables Enumerations Enumerator Properties Defines
/Projects/Cogito/src/GameObjects/CogitoAgents/QLearningAgent.m
Go to the documentation of this file.
00001 //
00002 //  QLearningAgent.m
00003 //  Author: Thomas Taylor
00004 //
00005 //  Handles the machine learning using Q-learning
00006 //
00007 //  15/01/2012: Created class
00008 //
00009 
00010 #import "QLearningAgent.h"
00011 
00012 @interface QLearningAgent()
00013 
00014 -(void)updateQValues:(QState*)_newState;
00015 
00016 @end
00017 
00018 @implementation QLearningAgent
00019 
00020 #pragma mark -
00021 #pragma mark Memory Allocation
00022 
00026 -(void)dealloc
00027 {
00028     [gameStates release]; 
00029     [super dealloc];
00030 }
00031 
00032 #pragma mark -
00033 #pragma mark Initialisation
00034 
00039 -(id)init
00040 {
00041     self = [super init];
00042     
00043     if (self != nil) 
00044     {
00045         gameStates = [[CCArray alloc] init];
00046         currentAction = -1;
00047     }
00048     return self;
00049 }
00050 
00051 #pragma mark -
00052 #pragma mark Q-Value Calculations
00053 
00058 -(void)updateQValues:(QState*)_newState
00059 {    
00060     if(currentState == nil) return;
00061             
00062     float oldQValue = [currentState getQValueForAction:currentAction];
00063     float maximumQValue = [_newState calculateMaxQValue];
00064     float reward = [_newState getReward];
00065     
00066     // apply a negative reward if using a tool
00067     if(reward == kQDefaultReward) 
00068     {
00069         switch (currentAction)
00070         {
00071             case kActionDownUmbrella:
00072             case kActionEquipUmbrella:
00073             case kActionLeftHelmet:
00074             case kActionRightHelmet:
00075                 reward = kQToolReward;
00076                 break;
00077                 
00078             default:
00079                 break;
00080         } 
00081     }
00082         
00083     float updatedQValue = oldQValue * (1 - kQLearningRate) + kQLearningRate * (reward + kQDiscountFactor * maximumQValue);
00084     //CCLOG(@"Q: %f => newQ: %f maxQ: %f R: %i [%@ - %@]", oldQValue, updatedQValue, maximumQValue, (int)reward, [Utils getObjectAsString:currentState.getGameObject.gameObjectType], [Utils getActionAsString:currentAction]);
00085     [currentState setQValue:updatedQValue forAction:currentAction];
00086 }
00087 
00088 #pragma mark -
00089 #pragma mark Overrides
00090 
00096 -(Action)selectAction:(QState*)_state
00097 {        
00098     Action action = -1;
00099 
00100     // get  list of the available action
00101     CCArray* options = [_state getActions];
00102     if([options count] < 1) options = [self calculateAvailableActions:_state];
00103 
00104     // calcuates the Q-value for the previous state
00105     [self updateQValues:_state];
00106         
00107     if(self.state != kStateDead && [_state getGameObject].gameObjectType != kObjectExit) 
00108     {
00109         // uses the Constant to randomise actions  
00110         int randomNumber = [Utils generateRandomNumberFrom:0 to:(int)(1/kLearningRandomProbability)];
00111         BOOL chooseRandom = (randomNumber == 0) ? chooseRandom = YES : NO;
00112         if(kLearningRandomProbability == 0.0f) chooseRandom = NO;
00113         
00114         // if still learning, randomly choose action
00115         if(learningMode || chooseRandom) action = [self chooseRandomAction:options];  
00116         // not learning, choose the optimum action
00117         else
00118         {
00119             action = [_state getOptimumAction];
00120             
00121             // no data for the current state, choose random action
00122             if(action == -1) action = [self chooseRandomAction:options];  
00123         }
00124     }
00125     
00126     // update the current state/action variable (nil if we've reached a goal state)
00127     currentState = (action != -1) ? _state : nil;
00128     currentAction = action;
00129     
00130     return action;
00131 }
00132 
00138 -(QState*)getStateForGameObject:(GameObject*)_object
00139 {        
00140     // if we're using the shared knowledge base...
00141     if([LemmingManager sharedLemmingManager].sharedKnowledge)
00142         return [[KnowledgeBase sharedKnowledgeBase] getStateForGameObject:_object];
00143         
00144     for (int i = 0; i < [gameStates count]; i++) 
00145     {
00146         QState* tempState = [gameStates objectAtIndex:i];
00147         if([tempState getGameObject] == _object) return tempState;
00148     }
00149         
00150     // state not found, make a new one    
00151     float reward = kQDefaultReward;
00152     if(_object.gameObjectType == kObjectExit) reward = kQWinReward;
00153     else if(self.state == kStateDead) reward = kQDeathReward;
00154     
00155     QState* returnState = [[QState alloc] initStateForObject:_object withReward:reward];
00156     [gameStates addObject:returnState];
00157     return returnState;
00158 }
00159 
00160 @end