agent-optimal-dbi.h (20-Sep-1996)

/***
*
* agent-optimal-dbi.h
*
* $Revision: 1.3 $
*
* Description:
*   This module is the interface for the optimal agent's functions
*   controlling the double integrator. This is a linear dynamical system
*   satisfying:  dx/dt = A x + B u  with the quadratic cost x'Qx + u'Ru.
*   ie. LQ regulator.
*
*   For this system, the optimal value function is quadratic. More
*   specifically, it is specified by a symmetric 2x2 matrix P as
*   V(x) = x'Px, where P satisfies the steady-state Riccati differential
*   equation:
*
*     0 = A'P + P A + Q - P (BR-1B') P
*
*   The optimal policy is given by u = - R-1B'P x
*
*   For this task, the matrixes are as follows:
*
*       vel            0  0  vel     1           0  0
*   x =        dx/dt =            +    u     Q =        R = 1
*       pos            1  0  pos     0           0  1
*
*                                       sqrt(2)    1
*   Solving the Riccati equation:   P =
*                                          1     sqrt(2)
*
*   which defines the optimal policy as u = - ( sqrt(2) vel + pos )
*   and the value function as V(vel,pos) = sqrt(2)*(vel*vel + pos*pos) +
*   2*vel*pos
*
* Author:
*   Juan Carlos Santamaria
*     E-mail: carlos@cc.gatech.edu
*     URL:    http://www.cc.gatech.edu/ai/students/jcs
*
* File name:
*   $Id: agent-optimal-dbi.h,v 1.3 1996/09/19 21:58:38 carlos Exp $
*
* Revision History:
*   $Log: agent-optimal-dbi.h,v $
*   Revision 1.3  1996/09/19  21:58:38  carlos
*   Eliminate the arguments ps and pa from Agent::step.
*
*   Revision 1.2  1996/08/28  20:17:15  carlos
*   Change in the order of arguments in Agent::step:
*     old: S,A,r,S'    new: S,A,S',r
*
*   Revision 1.1  1996/08/14  20:53:46  carlos
*   Initial revision
*
****/

#pragma interface

#ifndef _INCL_AGENT_OPTIMAL_DBI
#define _INCL_AGENT_OPTIMAL_DBI


// -- Include files

#ifndef _INCL_RLI
#include "rli.h"
#endif

#ifndef _INCL_ENVIRONMENT_DBI
#include "environment-dbi.h"
#endif


// -- Class and type declarations

class A_Optimal_DBI : public Agent {
public:
    A_Optimal_DBI( void ) : Agent () {}
    ~A_Optimal_DBI( void ) {}

    void init( int argc, char *argv[] );

    Action *start_trial( const Sensation *ps );

    Action *step( const Sensation *pnext_s,
                  double           reward );
};


/****************************** end of file *********************************/

#endif