@inproceedings{37bdd34d4ecd44aeaa4818d16ab4b4c5,
title = "Policy derivation methods for critic-only reinforcement learning in continuous action spaces",
abstract = "State-of-the-art critic-only reinforcement learning methods can deal with a small discrete action space. The most common approach to real-world problems with continuous actions is to discretize the action space. In this paper a method is proposed to derive a continuous-action policy based on a value function that has been computed for discrete actions by using any known algorithm such as value iteration. Several variants of the policy-derivation algorithm are introduced and compared on two continuous state-action benchmarks: double pendulum swing-up and 3D mountain car.",
keywords = "continuous actions, multi-variable systems, optimal control, policy derivation, reinforcement learning",
author = "Eduard Alibekov and Jiri Kubalik and Robert Babuska",
year = "2016",
doi = "10.1016/j.ifacol.2016.07.127",
language = "English",
volume = "49 - 5",
series = "IFAC-PapersOnline",
publisher = "Elsevier",
number = "5",
pages = "285--290",
editor = "K Guelton and B Grabot and Z Lendek",
booktitle = "IFAC-PapersOnLine",
note = "4th IFAC Conference on Intelligent Control and Automation Sciences, ICONS 2016 ; Conference date: 01-06-2016 Through 03-06-2016",
}