environment-dbi.cc (20-Sep-1996)

/***
*
* environment-dbi.cc
* 
* $Revision: 1.3 $
* 
* Description:
*    This module implements the functions for the dynamics of a
*    double-integrator environment. 
*
* Author:
*   Juan Carlos Santamaria
*     E-mail: carlos@cc.gatech.edu
*     URL:    http://www.cc.gatech.edu/ai/students/jcs
*
* File name:
*   $Id: environment-dbi.cc,v 1.3 1996/09/19 22:20:31 carlos Exp $
*
* Revision History:
*   $Log: environment-dbi.cc,v $
*   Revision 1.3  1996/09/19  22:20:31  carlos
*   Change the order of arguments in Enviornment::step:
*     old: A,r,S'      new: A,S',r
*
*   Revision 1.2  1996/08/29  15:16:52  carlos
*   - Now using the constant TERMINAL_STATE instead of 0.
*   - Environment::transition is now Environment::step.
*
*   Revision 1.1  1996/08/14  20:53:31  carlos
*   Initial revision
*
****/

#pragma implementation


// -- Include files

#include < math.h >

#include "environment-dbi.h"
#include "rand.h"


// -- Member function definitions

//============================================================================
// E_DBI::init()

void E_DBI::init( int argc, char* argv[] )
{
}


//============================================================================
// E_DBI::start_trial()

Sensation* E_DBI::start_trial( void )
{
    time  = 0.0;

    switch( mode ) {
    case START:
        state = State(POS_0,VEL_0);
        break;

    case GOAL:
        state = State(POS_F,VEL_F);
        break;

    case RANDOM:
        state = State(uniform(POS_MIN,POS_MAX,91),
                      uniform(VEL_MIN,VEL_MAX,92));
        break;
    }

    return new State(state);
}


//============================================================================
// E_DBI::step()

void E_DBI::step( const Action *pa, Sensation*& pnext_s, double& reward )
{
    Force *pf = (Force *)pa;
    double r, delta_p, delta_v;

    r = 0.0;

    for( int i=0 ; i < NUM_ITS_PER_STEP ; i++ ) {

        // accumunalate reward: assume diagonal Q matrix
        
        delta_p = POS_F-state.pos;
        delta_v = VEL_F-state.vel;
        r += -( delta_p*Q_POS*delta_p + delta_v*Q_VEL*delta_v +
                pf->acc*R*pf->acc );

        // compute steps following the dynamics of a double integrator

        state.vel += pf->acc   * DELTA_T;
        state.pos += state.vel * DELTA_T;
        
        time += DELTA_T;
    }

    if ( (state.pos < POS_MIN) || (state.pos > POS_MAX) ||
         (state.vel < VEL_MIN) || (state.vel > VEL_MAX) ) {
        
        reward  = MIN_REWARD;
        pnext_s = TERMINAL_STATE;
    }
    else {
        reward  =  r*DELTA_T;
        pnext_s =  new State(state);
    }
}


/****************************** end of file *********************************/