/***
*
* environment-dbi.h
*
* $Revision: 1.3 $
*
* Description:
* Interface for a double integrator object. The state consists of two
* continuous variables: position (p) and velocity (v). The actions
* consists of one continuous variable: accelearion (a). The dynamics
* equation is a double integration (linear):
* dp/dt = v
* dv/dt = a
* The starting position is given by (POS_0,VEL_0) and the reward function
* is quadratic of the form x'Qx + u'Ru. That is, the goal is to reach the
* origin of the state space and remain there. The use of energy (a^2) is
* penalized too. Additionally, if the state falls out of bounds the
* simulation finishes and the reward is MIN_REWARD.
*
* Author:
* Juan Carlos Santamaria
* E-mail: carlos@cc.gatech.edu
* URL: http://www.cc.gatech.edu/ai/students/jcs
*
* File name:
* $Id: environment-dbi.h,v 1.3 1996/09/19 22:19:54 carlos Exp $
*
* Revision History:
* $Log: environment-dbi.h,v $
* Revision 1.3 1996/09/19 22:19:54 carlos
* - Change the order of arguments in Enviornment::step:
* old: A,r,S' new: A,S',r
* - Add the constant GAMMA to the class. The value is initialize at
* creation time using a constructor parameter.
*
* Revision 1.2 1996/08/28 20:19:16 carlos
* Environment::transition is now Environment::step.
*
* Revision 1.1 1996/08/14 20:53:13 carlos
* Initial revision
*
*
****/
#pragma interface
#ifndef _INCL_ENVIRONMENT_DBI
#define _INCL_ENVIRONMENT_DBI
// -- Include files
#ifndef _INCL_RLI
#include "rli.h"
#endif
// -- Constants
// common stuff
const double DELTA_T = 0.05;
const int NUM_ITS_PER_STEP = 4;
// state indexes
const int POS = 0;
const int VEL = 1;
const int FOR = 2;
// bounds
const double POS_MAX = 1.0;
const double POS_MIN = -1.0;
const double VEL_MAX = 1.0;
const double VEL_MIN = -1.0;
const double FOR_MAX = 1.0;
const double FOR_MIN = -1.0;
// initial conditions
const double POS_0 = 1.0;
const double VEL_0 = 0.0;
// goal state
const double POS_F = 0.0;
const double VEL_F = 0.0;
// ohter constants
const double Q_POS = 1.0;
const double Q_VEL = 0.0;
const double R = 1.0;
const double MIN_REWARD = -50.0;
// -- Class and type declarations
class State : public Sensation {
public:
double pos, vel;
State( double p=0.0, double v=0.0 ) : pos(p), vel(v) {}
State( const State& s ) { pos=s.pos; vel=s.vel; }
State& operator=( const State& s ) { pos=s.pos; vel=s.vel; return *this; }
};
class Force : public Action {
public:
double acc;
Force( double a=0.0 ) : acc(a) {}
Force( const Force& f ) { acc=f.acc; }
Force& operator=( const Force& f ) { acc=f.acc; return *this; }
};
class E_DBI : public Environment {
public:
enum Init { START, GOAL, RANDOM };
const double GAMMA;
Init mode;
double time;
State state;
E_DBI( double gamma, Init m = START ) : Environment(),
GAMMA(gamma),
mode(m),
time(0.0) {}
~E_DBI( void ) {}
void init( int argc, char *argv[] );
Sensation *start_trial( void );
void step( const Action *pa,
Sensation *&pnext_s,
double &reward );
};
#endif
/****************************** end of file *********************************/