/*** * * environment-dbi.cc * * $Revision: 1.3 $ * * Description: * This module implements the functions for the dynamics of a * double-integrator environment. * * Author: * Juan Carlos Santamaria * E-mail: carlos@cc.gatech.edu * URL: http://www.cc.gatech.edu/ai/students/jcs * * File name: * $Id: environment-dbi.cc,v 1.3 1996/09/19 22:20:31 carlos Exp $ * * Revision History: * $Log: environment-dbi.cc,v $ * Revision 1.3 1996/09/19 22:20:31 carlos * Change the order of arguments in Enviornment::step: * old: A,r,S' new: A,S',r * * Revision 1.2 1996/08/29 15:16:52 carlos * - Now using the constant TERMINAL_STATE instead of 0. * - Environment::transition is now Environment::step. * * Revision 1.1 1996/08/14 20:53:31 carlos * Initial revision * ****/ #pragma implementation // -- Include files #include < math.h > #include "environment-dbi.h" #include "rand.h" // -- Member function definitions //============================================================================ // E_DBI::init() void E_DBI::init( int argc, char* argv[] ) { } //============================================================================ // E_DBI::start_trial() Sensation* E_DBI::start_trial( void ) { time = 0.0; switch( mode ) { case START: state = State(POS_0,VEL_0); break; case GOAL: state = State(POS_F,VEL_F); break; case RANDOM: state = State(uniform(POS_MIN,POS_MAX,91), uniform(VEL_MIN,VEL_MAX,92)); break; } return new State(state); } //============================================================================ // E_DBI::step() void E_DBI::step( const Action *pa, Sensation*& pnext_s, double& reward ) { Force *pf = (Force *)pa; double r, delta_p, delta_v; r = 0.0; for( int i=0 ; i < NUM_ITS_PER_STEP ; i++ ) { // accumunalate reward: assume diagonal Q matrix delta_p = POS_F-state.pos; delta_v = VEL_F-state.vel; r += -( delta_p*Q_POS*delta_p + delta_v*Q_VEL*delta_v + pf->acc*R*pf->acc ); // compute steps following the dynamics of a double integrator state.vel += pf->acc * DELTA_T; state.pos += state.vel * DELTA_T; time += DELTA_T; } if ( (state.pos < POS_MIN) || (state.pos > POS_MAX) || (state.vel < VEL_MIN) || (state.vel > VEL_MAX) ) { reward = MIN_REWARD; pnext_s = TERMINAL_STATE; } else { reward = r*DELTA_T; pnext_s = new State(state); } } /****************************** end of file *********************************/