;-*- Mode: Lisp; Package: (RLI :use (common-lisp)) -*-
; Source Code for Reinforcement Learning Software in Common Lisp
; See http://www-anw.cs.umass.edu/People/sutton/RLinterfaces/RLI-lisp.html.
;;; All the basic interfaces, classes, and default methods for simulations,
;;; agents and environments.
(defpackage :RLI
(:use :common-lisp))
(in-package :RLI)
(export '(agent environment simulation
agent-init agent-start-episode agent-step agent-sim agent-env
env-init env-start-episode env-step env-sim env-agent
sim-init sim-start-episode sim-steps sim-episodes sim-collect-data
sim-agent sim-env last-sensation next-action))
;;; AGENT:
(defclass agent () ;[Doc]
((agent-sim :accessor agent-sim :initarg :simulation)
(agent-env :accessor agent-env :initarg :environment)))
(defmethod agent-init ((agent agent)) ;[Doc]
)
(defgeneric agent-start-episode (agent sensation)) ;[Doc]
;[Doc]
(defgeneric agent-step (agent sensation reward))
;;; ENVIRONMENT:
(defclass environment () ;[Doc]
((env-sim :accessor env-sim)
(env-agent :accessor env-agent)))
(defmethod env-init ((env environment)) ;[Doc]
)
(defgeneric env-start-episode (environment)) ;[Doc]
(defgeneric env-step (environment action)) ;[Doc]
;;; SIMULATION:
(defclass simulation () ;[Doc]
((environment :accessor sim-env :initarg :environment)
(agent :accessor sim-agent :initarg :agent)
last-sensation
next-action))
;[Doc]
(defmethod sim-init ((sim simulation) (agent agent) (env environment))
(setf (sim-env sim) env)
(setf (sim-agent sim) agent)
(setf (env-sim env) sim)
(setf (env-agent env) agent)
(setf (agent-sim agent) sim)
(setf (agent-env agent) env)
(env-init env)
(agent-init agent)
(sim-start-episode sim))
(defmethod sim-start-episode ((sim simulation)) ;[Doc]
(with-slots (last-sensation next-action environment agent) sim
(setq last-sensation (env-start-episode environment))
(setq next-action (agent-start-episode agent last-sensation))))
(defmethod sim-steps ((sim simulation) num-steps) ;[Doc]
"Runs the simulation for num-steps steps"
(with-slots (last-sensation next-action environment agent) sim
(when (eq last-sensation :terminal-state)
(sim-start-episode sim))
(loop repeat num-steps
do (multiple-value-bind (next-sensation reward)
(env-step environment next-action)
(sim-collect-data sim last-sensation next-action reward next-sensation)
(setq next-action (agent-step agent next-sensation reward))
(setq last-sensation next-sensation))
do (when (eq last-sensation :terminal-state)
(sim-start-episode sim)))))
;[Doc]
(defmethod sim-episodes ((sim simulation) num-episodes max-steps-per-episode)
"Runs the simulation for num-episodes episodes, each of maximum length max-steps-per-episode"
(with-slots (last-sensation next-action environment agent) sim
(loop repeat num-episodes do
(sim-start-episode sim)
(loop repeat max-steps-per-episode
do (multiple-value-bind (next-sensation reward)
(env-step environment next-action)
(sim-collect-data sim last-sensation next-action reward next-sensation)
(setq next-action (agent-step agent next-sensation reward))
(setq last-sensation next-sensation))
until (eq last-sensation :terminal-state)))))
;[Doc]
(defmethod sim-collect-data ((sim simulation) sensation action reward next-sensation)
"Collects data on the simulation (user should specialize this)"
;(print (list sensation action reward next-sensation)))
(declare (ignore sensation action reward next-sensation)))
;