by Wojciech Jaśkowski, Marcin Szubert, Paweł Liskowski, Krzysztof Krawiec
Abstract:
SZ-Tetris, a restricted version of Tetris, is a difficult reinforcement learning task. Previous research showed that, similarly to the original Tetris, value function-based methods such as temporal difference learning, do not work well for SZ-Tetris. The best performance in this game was achieved by employing direct policy search techniques, in particular the cross-entropy method in combination with handcrafted features. Nonetheless, a simple heuristic hand-coded player scores even higher. Here we show that it is possible to equal its performance with CMA-ES (Covariance Matrix Adaptation Evolution Strategy). We demonstrate that further improvement is possible by employing systematic n-tuple network, a knowledge-free function approximator, and VD-CMA-ES, a linear variant of CMA-ES for high dimension optimization. Last but not least, we show that a large systematic n-tuple network (involving more than 4 million parameters) allows the classical temporal difference learning algorithm to obtain similar average performance to VD-CMA-ES, but at 20 times lower computational expense, leading to the best policy for SZ-Tetris known to date. These results enrich the current understanding of difficulty of SZ-Tetris, and shed new light on the capabilities of particular search paradigms when applied to representations of various characteristics and dimensionality.
Reference:
High-Dimensional Function Approximation for Knowledge-Free Reinforcement Learning: a Case Study in SZ-Tetris (Wojciech Jaśkowski, Marcin Szubert, Paweł Liskowski, Krzysztof Krawiec), In GECCO’15: Proceedings of the 17th annual conference on Genetic and Evolutionary Computation, ACM Press, 2015.
Bibtex Entry:
@InProceedings{Jaskowski2015sztetris, Title = {High-Dimensional Function Approximation for Knowledge-Free Reinforcement Learning: a Case Study in {SZ-Tetris}}, Author = {Wojciech Jaśkowski and Marcin Szubert and Pawe{ł} Liskowski and Krzysztof Krawiec}, Booktitle = {GECCO'15: Proceedings of the 17th annual conference on Genetic and Evolutionary Computation}, Year = {2015}, Address = {Mardid, Spain}, Month = {July}, Organization = {ACM}, Pages = {567--574}, Publisher = {ACM Press}, Abstract = {SZ-Tetris, a restricted version of Tetris, is a difficult reinforcement learning task. Previous research showed that, similarly to the original Tetris, value function-based methods such as temporal difference learning, do not work well for SZ-Tetris. The best performance in this game was achieved by employing direct policy search techniques, in particular the cross-entropy method in combination with handcrafted features. Nonetheless, a simple heuristic hand-coded player scores even higher. Here we show that it is possible to equal its performance with CMA-ES (Covariance Matrix Adaptation Evolution Strategy). We demonstrate that further improvement is possible by employing systematic n-tuple network, a knowledge-free function approximator, and VD-CMA-ES, a linear variant of CMA-ES for high dimension optimization. Last but not least, we show that a large systematic n-tuple network (involving more than 4 million parameters) allows the classical temporal difference learning algorithm to obtain similar average performance to VD-CMA-ES, but at 20 times lower computational expense, leading to the best policy for SZ-Tetris known to date. These results enrich the current understanding of difficulty of SZ-Tetris, and shed new light on the capabilities of particular search paradigms when applied to representations of various characteristics and dimensionality.}, Doi = {10.1145/2739480.2754783}, Keywords = {Reinforcement learning, covariance matrix adaptation, CMA-ES, VD-CMA, function approximation, knowledge-free rep- resentations, video games, n-tuple system}, Owner = {Wojciech}, Timestamp = {2015.05.06}, Url = {http://www.cs.put.poznan.pl/wjaskowski/pub/papers/Jaskowski2015HighDimensional.pdf} }