;-*- Mode: Lisp; Package: (RLI :use (common-lisp)) -*-
; Source Code for Reinforcement Learning Software in Common Lisp ; See http://www-anw.cs.umass.edu/People/sutton/RLinterfaces/RLI-lisp.html. ;;; All the basic interfaces, classes, and default methods for simulations, ;;; agents and environments. (defpackage :RLI (:use :common-lisp)) (in-package :RLI) (export '(agent environment simulation agent-init agent-start-episode agent-step agent-sim agent-env env-init env-start-episode env-step env-sim env-agent sim-init sim-start-episode sim-steps sim-episodes sim-collect-data sim-agent sim-env last-sensation next-action)) ;;; AGENT: (defclass agent () ;[Doc] ((agent-sim :accessor agent-sim :initarg :simulation) (agent-env :accessor agent-env :initarg :environment))) (defmethod agent-init ((agent agent)) ;[Doc] ) (defgeneric agent-start-episode (agent sensation)) ;[Doc] ;[Doc] (defgeneric agent-step (agent sensation reward)) ;;; ENVIRONMENT: (defclass environment () ;[Doc] ((env-sim :accessor env-sim) (env-agent :accessor env-agent))) (defmethod env-init ((env environment)) ;[Doc] ) (defgeneric env-start-episode (environment)) ;[Doc] (defgeneric env-step (environment action)) ;[Doc] ;;; SIMULATION: (defclass simulation () ;[Doc] ((environment :accessor sim-env :initarg :environment) (agent :accessor sim-agent :initarg :agent) last-sensation next-action)) ;[Doc] (defmethod sim-init ((sim simulation) (agent agent) (env environment)) (setf (sim-env sim) env) (setf (sim-agent sim) agent) (setf (env-sim env) sim) (setf (env-agent env) agent) (setf (agent-sim agent) sim) (setf (agent-env agent) env) (env-init env) (agent-init agent) (sim-start-episode sim)) (defmethod sim-start-episode ((sim simulation)) ;[Doc] (with-slots (last-sensation next-action environment agent) sim (setq last-sensation (env-start-episode environment)) (setq next-action (agent-start-episode agent last-sensation)))) (defmethod sim-steps ((sim simulation) num-steps) ;[Doc] "Runs the simulation for num-steps steps" (with-slots (last-sensation next-action environment agent) sim (when (eq last-sensation :terminal-state) (sim-start-episode sim)) (loop repeat num-steps do (multiple-value-bind (next-sensation reward) (env-step environment next-action) (sim-collect-data sim last-sensation next-action reward next-sensation) (setq next-action (agent-step agent next-sensation reward)) (setq last-sensation next-sensation)) do (when (eq last-sensation :terminal-state) (sim-start-episode sim))))) ;[Doc] (defmethod sim-episodes ((sim simulation) num-episodes max-steps-per-episode) "Runs the simulation for num-episodes episodes, each of maximum length max-steps-per-episode" (with-slots (last-sensation next-action environment agent) sim (loop repeat num-episodes do (sim-start-episode sim) (loop repeat max-steps-per-episode do (multiple-value-bind (next-sensation reward) (env-step environment next-action) (sim-collect-data sim last-sensation next-action reward next-sensation) (setq next-action (agent-step agent next-sensation reward)) (setq last-sensation next-sensation)) until (eq last-sensation :terminal-state))))) ;[Doc] (defmethod sim-collect-data ((sim simulation) sensation action reward next-sensation) "Collects data on the simulation (user should specialize this)" ;(print (list sensation action reward next-sensation))) (declare (ignore sensation action reward next-sensation))) ;