-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrefs.bib
71 lines (63 loc) · 2.23 KB
/
refs.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
@misc{openai-gym,
Author = {Greg Brockman and Vicki Cheung and Ludwig Pettersson and Jonas Schneider and John Schulman and Jie Tang and Wojciech Zaremba},
Title = {OpenAI Gym},
Year = {2016},
Eprint = {arXiv:1606.01540},
}
@inproceedings{mnih2016asynchronous,
title={Asynchronous methods for deep reinforcement learning},
author={Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
booktitle={International conference on machine learning},
pages={1928--1937},
year={2016}
}
@article{mnih2013playing,
title={Playing atari with deep reinforcement learning},
author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Graves, Alex and Antonoglou, Ioannis and Wierstra, Daan and Riedmiller, Martin},
journal={arXiv preprint arXiv:1312.5602},
year={2013}
}
@inproceedings{van2016deep,
title={Deep reinforcement learning with double q-learning},
author={Van Hasselt, Hado and Guez, Arthur and Silver, David},
booktitle={Proceedings of the AAAI conference on artificial intelligence},
volume={30},
number={1},
year={2016}
}
@book{sutton2018reinforcement,
title={Reinforcement learning: An introduction},
author={Sutton, Richard S and Barto, Andrew G},
year={2018},
publisher={MIT press}
}
@article{lin1992self,
title={Self-improving reactive agents based on reinforcement learning, planning and teaching},
author={Lin, Long-Ji},
journal={Machine learning},
volume={8},
number={3-4},
pages={293--321},
year={1992},
publisher={Springer}
}
@online{actorCritics,
author = {Sergios Karagiannakos},
title = {The idea behind Actor-Critics and how A2C and A3C improve them},
year = {Nov 17, 2018},
url = {https://theaisummer.com/Actor_critics/},
note = {Last accessed: 01-04-2021}
}
@online{actorCritics2,
author = {Steven Anker},
title = {More A2C in Tensorflow},
year = {Oct 10, 2018},
url = {https://steven-anker.nl/blog/?p=184},
note = {Last accessed: 01-09-2021}
}
@article{schaul2015prioritized,
title={Prioritized experience replay},
author={Schaul, Tom and Quan, John and Antonoglou, Ioannis and Silver, David},
journal={arXiv preprint arXiv:1511.05952},
year={2015}
}