Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #import "QLearningAgent.h"
00011
00012 @interface QLearningAgent()
00013
00014 -(void)updateQValues:(QState*)_newState;
00015
00016 @end
00017
00018 @implementation QLearningAgent
00019
00020 #pragma mark -
00021 #pragma mark Memory Allocation
00022
00026 -(void)dealloc
00027 {
00028 [gameStates release];
00029 [super dealloc];
00030 }
00031
00032 #pragma mark -
00033 #pragma mark Initialisation
00034
00039 -(id)init
00040 {
00041 self = [super init];
00042
00043 if (self != nil)
00044 {
00045 gameStates = [[CCArray alloc] init];
00046 currentAction = -1;
00047 }
00048 return self;
00049 }
00050
00051 #pragma mark -
00052 #pragma mark Q-Value Calculations
00053
00058 -(void)updateQValues:(QState*)_newState
00059 {
00060 if(currentState == nil) return;
00061
00062 float oldQValue = [currentState getQValueForAction:currentAction];
00063 float maximumQValue = [_newState calculateMaxQValue];
00064 float reward = [_newState getReward];
00065
00066
00067 if(reward == kQDefaultReward)
00068 {
00069 switch (currentAction)
00070 {
00071 case kActionDownUmbrella:
00072 case kActionEquipUmbrella:
00073 case kActionLeftHelmet:
00074 case kActionRightHelmet:
00075 reward = kQToolReward;
00076 break;
00077
00078 default:
00079 break;
00080 }
00081 }
00082
00083 float updatedQValue = oldQValue * (1 - kQLearningRate) + kQLearningRate * (reward + kQDiscountFactor * maximumQValue);
00084
00085 [currentState setQValue:updatedQValue forAction:currentAction];
00086 }
00087
00088 #pragma mark -
00089 #pragma mark Overrides
00090
00096 -(Action)selectAction:(QState*)_state
00097 {
00098 Action action = -1;
00099
00100
00101 CCArray* options = [_state getActions];
00102 if([options count] < 1) options = [self calculateAvailableActions:_state];
00103
00104
00105 [self updateQValues:_state];
00106
00107 if(self.state != kStateDead && [_state getGameObject].gameObjectType != kObjectExit)
00108 {
00109
00110 int randomNumber = [Utils generateRandomNumberFrom:0 to:(int)(1/kLearningRandomProbability)];
00111 BOOL chooseRandom = (randomNumber == 0) ? chooseRandom = YES : NO;
00112 if(kLearningRandomProbability == 0.0f) chooseRandom = NO;
00113
00114
00115 if(learningMode || chooseRandom) action = [self chooseRandomAction:options];
00116
00117 else
00118 {
00119 action = [_state getOptimumAction];
00120
00121
00122 if(action == -1) action = [self chooseRandomAction:options];
00123 }
00124 }
00125
00126
00127 currentState = (action != -1) ? _state : nil;
00128 currentAction = action;
00129
00130 return action;
00131 }
00132
00138 -(QState*)getStateForGameObject:(GameObject*)_object
00139 {
00140
00141 if([LemmingManager sharedLemmingManager].sharedKnowledge)
00142 return [[KnowledgeBase sharedKnowledgeBase] getStateForGameObject:_object];
00143
00144 for (int i = 0; i < [gameStates count]; i++)
00145 {
00146 QState* tempState = [gameStates objectAtIndex:i];
00147 if([tempState getGameObject] == _object) return tempState;
00148 }
00149
00150
00151 float reward = kQDefaultReward;
00152 if(_object.gameObjectType == kObjectExit) reward = kQWinReward;
00153 else if(self.state == kStateDead) reward = kQDeathReward;
00154
00155 QState* returnState = [[QState alloc] initStateForObject:_object withReward:reward];
00156 [gameStates addObject:returnState];
00157 return returnState;
00158 }
00159
00160 @end