/***
*
* environment-dbi.cc
*
* $Revision: 1.3 $
*
* Description:
* This module implements the functions for the dynamics of a
* double-integrator environment.
*
* Author:
* Juan Carlos Santamaria
* E-mail: carlos@cc.gatech.edu
* URL: http://www.cc.gatech.edu/ai/students/jcs
*
* File name:
* $Id: environment-dbi.cc,v 1.3 1996/09/19 22:20:31 carlos Exp $
*
* Revision History:
* $Log: environment-dbi.cc,v $
* Revision 1.3 1996/09/19 22:20:31 carlos
* Change the order of arguments in Enviornment::step:
* old: A,r,S' new: A,S',r
*
* Revision 1.2 1996/08/29 15:16:52 carlos
* - Now using the constant TERMINAL_STATE instead of 0.
* - Environment::transition is now Environment::step.
*
* Revision 1.1 1996/08/14 20:53:31 carlos
* Initial revision
*
****/
#pragma implementation
// -- Include files
#include < math.h >
#include "environment-dbi.h"
#include "rand.h"
// -- Member function definitions
//============================================================================
// E_DBI::init()
void E_DBI::init( int argc, char* argv[] )
{
}
//============================================================================
// E_DBI::start_trial()
Sensation* E_DBI::start_trial( void )
{
time = 0.0;
switch( mode ) {
case START:
state = State(POS_0,VEL_0);
break;
case GOAL:
state = State(POS_F,VEL_F);
break;
case RANDOM:
state = State(uniform(POS_MIN,POS_MAX,91),
uniform(VEL_MIN,VEL_MAX,92));
break;
}
return new State(state);
}
//============================================================================
// E_DBI::step()
void E_DBI::step( const Action *pa, Sensation*& pnext_s, double& reward )
{
Force *pf = (Force *)pa;
double r, delta_p, delta_v;
r = 0.0;
for( int i=0 ; i < NUM_ITS_PER_STEP ; i++ ) {
// accumunalate reward: assume diagonal Q matrix
delta_p = POS_F-state.pos;
delta_v = VEL_F-state.vel;
r += -( delta_p*Q_POS*delta_p + delta_v*Q_VEL*delta_v +
pf->acc*R*pf->acc );
// compute steps following the dynamics of a double integrator
state.vel += pf->acc * DELTA_T;
state.pos += state.vel * DELTA_T;
time += DELTA_T;
}
if ( (state.pos < POS_MIN) || (state.pos > POS_MAX) ||
(state.vel < VEL_MIN) || (state.vel > VEL_MAX) ) {
reward = MIN_REWARD;
pnext_s = TERMINAL_STATE;
}
else {
reward = r*DELTA_T;
pnext_s = new State(state);
}
}
/****************************** end of file *********************************/