/***
*
* rli.h
*
* $Revision: 1.3 $
*
* Description:
*   Interface for the Simulation, Agent, and Environment abstract classes.
*   These are abstract classes that define the three main objects
*   to perform reinforcement learning simulation. Refer to the
*   documentation in URL:
*   http://www-anw.cs.umass.edu/People/sutton/RLinterface/RLinterface.html
*   for further details.
*
* Author:
*   Juan Carlos Santamaria
*     E-mail: carlos@cc.gatech.edu
*     URL:    http://www.cc.gatech.edu/ai/students/jcs
*
* File name:
*   $Id: rli.h,v 1.3 1996/09/19 21:56:36 carlos Exp $
*
* Revision History:
*   $Log: rli.h,v $
*   Revision 1.3  1996/09/19  21:56:36  carlos
*   - Eliminate the arguments ps and pa from Agent::step.
*   - Change the order of arguments in Enviornment::step:
*       old: A,r,S'      new: A,S',r
*   - Change the order of arguments in Simulation::collect_data:
*       old: S,A,r,S'    new: S,A,S',r
*     Now all the order of arguments in all functions are consistent.
*
*   Revision 1.2  1996/08/29  15:05:34  carlos
*   - Definition of constant TERMINAL_STATE.
*   - Change in the order of arguments in Agent::step:
*       old: S,A,r,S'    new: S,A,S',r
*   - Environment::transition is now Environment::step.
*   - All member functions of the Simulation class are now virtual.
*   - Simulation::run_steps and Simulation::run_trials are now
*     Simulation::steps and Simulation::trials.
*
*   Revision 1.1  1996/08/14  20:57:56  carlos
*   Initial revision
*
****/

#pragma interface

#ifndef _INCL_RLI
#define _INCL_RLI


// -- Constants

#define TERMINAL_STATE    0


// -- Class and type declarations

class Agent;
class Environment;
class Simulation;


class Action {
public:
    Action( void ) {}
    virtual ~Action( void ) {}
};


class Sensation {
public:
    Sensation( void ) {}
    virtual ~Sensation( void ) {}
};


class Agent {
    friend class Simulation;
    
public:
    Agent( void ) {}
    virtual ~Agent( void ) {}
    
    virtual void     init( int argc, char *argv[] );
    
    virtual Action  *start_trial( const Sensation *ps ) = 0;
    
    virtual Action  *step( const Sensation *pnext_s,
                           double           reward ) = 0;

protected:
    Simulation *psim;
};


class Environment {
    friend class Simulation;
    
public:    
    Environment( void ) {}
    virtual ~Environment( void ) {}
    
    virtual void       init( int argc, char *argv[] );
    
    virtual Sensation *start_trial( void ) = 0;
    
    virtual void       step( const Action *pa,
                             Sensation    *&pnext_s,
                             double        &reward ) = 0;

protected:
    Simulation *psim;
};


class Simulation {
public:
    Agent       *pagt;
    Environment *penv;

    Simulation( Agent *pa, Environment *pe ) : pagt(pa), penv(pe)
        { pagt->psim = this; penv->psim = this; pcurr_s = 0; pcurr_a = 0; }
    virtual ~Simulation( void )
        { pagt->psim = 0; penv->psim = 0; delete pcurr_s; delete pcurr_a; }

    virtual void init( int argc, char *argv[] );

    virtual void steps( long num_steps );

    virtual void trials( long num_trials, long max_steps_per_trial );

    virtual void start_trial( void );

    virtual void collect_data( const Sensation *ps,
                               const Action    *pa,
                               const Sensation *pnext_s,
                               double           reward );
private:
    Sensation *pcurr_s;
    Action    *pcurr_a;
};


/****************************** end of file *********************************/

#endif