/*** * * agent-optimal-dbi.h * * $Revision: 1.3 $ * * Description: * This module is the interface for the optimal agent's functions * controlling the double integrator. This is a linear dynamical system * satisfying: dx/dt = A x + B u with the quadratic cost x'Qx + u'Ru. * ie. LQ regulator. * * For this system, the optimal value function is quadratic. More * specifically, it is specified by a symmetric 2x2 matrix P as * V(x) = x'Px, where P satisfies the steady-state Riccati differential * equation: * * 0 = A'P + P A + Q - P (BR-1B') P * * The optimal policy is given by u = - R-1B'P x * * For this task, the matrixes are as follows: * * vel 0 0 vel 1 0 0 * x = dx/dt = + u Q = R = 1 * pos 1 0 pos 0 0 1 * * sqrt(2) 1 * Solving the Riccati equation: P = * 1 sqrt(2) * * which defines the optimal policy as u = - ( sqrt(2) vel + pos ) * and the value function as V(vel,pos) = sqrt(2)*(vel*vel + pos*pos) + * 2*vel*pos * * Author: * Juan Carlos Santamaria * E-mail: carlos@cc.gatech.edu * URL: http://www.cc.gatech.edu/ai/students/jcs * * File name: * $Id: agent-optimal-dbi.h,v 1.3 1996/09/19 21:58:38 carlos Exp $ * * Revision History: * $Log: agent-optimal-dbi.h,v $ * Revision 1.3 1996/09/19 21:58:38 carlos * Eliminate the arguments ps and pa from Agent::step. * * Revision 1.2 1996/08/28 20:17:15 carlos * Change in the order of arguments in Agent::step: * old: S,A,r,S' new: S,A,S',r * * Revision 1.1 1996/08/14 20:53:46 carlos * Initial revision * ****/ #pragma interface #ifndef _INCL_AGENT_OPTIMAL_DBI #define _INCL_AGENT_OPTIMAL_DBI // -- Include files #ifndef _INCL_RLI #include "rli.h" #endif #ifndef _INCL_ENVIRONMENT_DBI #include "environment-dbi.h" #endif // -- Class and type declarations class A_Optimal_DBI : public Agent { public: A_Optimal_DBI( void ) : Agent () {} ~A_Optimal_DBI( void ) {} void init( int argc, char *argv[] ); Action *start_trial( const Sensation *ps ); Action *step( const Sensation *pnext_s, double reward ); }; /****************************** end of file *********************************/ #endif