Skip to content

Commit 66540f3

Browse files
plotting and script update
1 parent d01537f commit 66540f3

File tree

4 files changed

+400
-105
lines changed

4 files changed

+400
-105
lines changed

README.md

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
# Reward Learning by Simulating the Past
2-
3-
This is the code accompanying the paper "Preferences Implicit in the State of the World".
4-
5-
Tests can be run with `python setup.py test`.
6-
7-
Instructions for running experiments can be found in `experiments.sh`.
1+
# Reward Learning by Simulating the Past
2+
3+
This is the code accompanying the paper "Preferences Implicit in the State of the World".
4+
5+
Tests can be run with `python setup.py test`.
6+
7+
Instructions for running the experiments can be found in `experiments.sh`. The script `experiments-for-plots.sh` generates the plots from the paper.

experiments-for-plots.sh

+141
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
# Script to generate the plots in the paper. This script will create a "results" folder, and write the experiment
2+
# outputs into it. Hereafter the plots would be generated in the "results" folder using "src/plotting.py" script.
3+
# Running this script would take several (3-6) hours.
4+
5+
###############
6+
# Section 5.4 #
7+
###############
8+
9+
# Robustness to the choice of Alice's planning horizon T.
10+
mkdir -p results/horizon
11+
12+
# room env
13+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 1 -o results/horizon -x 20 -d true_reward,final_reward
14+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 2 -o results/horizon -x 20 -d true_reward,final_reward
15+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 3 -o results/horizon -x 20 -d true_reward,final_reward
16+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 5 -o results/horizon -x 20 -d true_reward,final_reward
17+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 10 -o results/horizon -x 20 -d true_reward,final_reward
18+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 20 -o results/horizon -x 20 -d true_reward,final_reward
19+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 30 -o results/horizon -x 20 -d true_reward,final_reward
20+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 50 -o results/horizon -x 20 -d true_reward,final_reward
21+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 100 -o results/horizon -x 20 -d true_reward,final_reward
22+
23+
# train env
24+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 1 -o results/horizon -x 20 -d true_reward,final_reward
25+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 2 -o results/horizon -x 20 -d true_reward,final_reward
26+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 3 -o results/horizon -x 20 -d true_reward,final_reward
27+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 5 -o results/horizon -x 20 -d true_reward,final_reward
28+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 10 -o results/horizon -x 20 -d true_reward,final_reward
29+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 20 -o results/horizon -x 20 -d true_reward,final_reward
30+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 30 -o results/horizon -x 20 -d true_reward,final_reward
31+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 50 -o results/horizon -x 20 -d true_reward,final_reward
32+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 100 -o results/horizon -x 20 -d true_reward,final_reward
33+
34+
# apples env
35+
python src/run.py -e apples -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 1 -o results/horizon -x 20 -d true_reward,final_reward
36+
python src/run.py -e apples -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 2 -o results/horizon -x 20 -d true_reward,final_reward
37+
python src/run.py -e apples -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 3 -o results/horizon -x 20 -d true_reward,final_reward
38+
python src/run.py -e apples -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 5 -o results/horizon -x 20 -d true_reward,final_reward
39+
python src/run.py -e apples -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 10 -o results/horizon -x 20 -d true_reward,final_reward
40+
python src/run.py -e apples -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 20 -o results/horizon -x 20 -d true_reward,final_reward
41+
python src/run.py -e apples -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 30 -o results/horizon -x 20 -d true_reward,final_reward
42+
python src/run.py -e apples -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 50 -o results/horizon -x 20 -d true_reward,final_reward
43+
python src/run.py -e apples -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 100 -o results/horizon -x 20 -d true_reward,final_reward
44+
45+
# batteries env
46+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 1 -o results/horizon -x 20 -d true_reward,final_reward
47+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 2 -o results/horizon -x 20 -d true_reward,final_reward
48+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 3 -o results/horizon -x 20 -d true_reward,final_reward
49+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 5 -o results/horizon -x 20 -d true_reward,final_reward
50+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 10 -o results/horizon -x 20 -d true_reward,final_reward
51+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 20 -o results/horizon -x 20 -d true_reward,final_reward
52+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 30 -o results/horizon -x 20 -d true_reward,final_reward
53+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 50 -o results/horizon -x 20 -d true_reward,final_reward
54+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -u True -m 1000 -T 100 -o results/horizon -x 20 -d true_reward,final_reward
55+
56+
57+
##############
58+
# Appendix D #
59+
##############
60+
61+
# Option -c additive stands for the Additive method, and -c bayesian for the Bayesian method
62+
# The -k parameter controls the standard deviation (set to 0.5 by default)
63+
mkdir -p results/additive-vs-bayesian
64+
65+
# room env additive
66+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 0.05 -o results/additive-vs-bayesian -d true_reward,final_reward
67+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 0.1 -o results/additive-vs-bayesian -d true_reward,final_reward
68+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 0.2 -o results/additive-vs-bayesian -d true_reward,final_reward
69+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 0.3 -o results/additive-vs-bayesian -d true_reward,final_reward
70+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 0.5 -o results/additive-vs-bayesian -d true_reward,final_reward
71+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 1 -o results/additive-vs-bayesian -d true_reward,final_reward
72+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 2 -o results/additive-vs-bayesian -d true_reward,final_reward
73+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 3 -o results/additive-vs-bayesian -d true_reward,final_reward
74+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 5 -o results/additive-vs-bayesian -d true_reward,final_reward
75+
python src/run.py -e room -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 10 -o results/additive-vs-bayesian -d true_reward,final_reward
76+
77+
# train env additive
78+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 0.05 -o results/additive-vs-bayesian -d true_reward,final_reward
79+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 0.1 -o results/additive-vs-bayesian -d true_reward,final_reward
80+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 0.2 -o results/additive-vs-bayesian -d true_reward,final_reward
81+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 0.3 -o results/additive-vs-bayesian -d true_reward,final_reward
82+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 0.5 -o results/additive-vs-bayesian -d true_reward,final_reward
83+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 1 -o results/additive-vs-bayesian -d true_reward,final_reward
84+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 2 -o results/additive-vs-bayesian -d true_reward,final_reward
85+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 3 -o results/additive-vs-bayesian -d true_reward,final_reward
86+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 5 -o results/additive-vs-bayesian -d true_reward,final_reward
87+
python src/run.py -e train -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 10 -o results/additive-vs-bayesian -d true_reward,final_reward
88+
89+
# batteries env additive
90+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 0.05 -o results/additive-vs-bayesian -d true_reward,final_reward
91+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 0.1 -o results/additive-vs-bayesian -d true_reward,final_reward
92+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 0.2 -o results/additive-vs-bayesian -d true_reward,final_reward
93+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 0.3 -o results/additive-vs-bayesian -d true_reward,final_reward
94+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 0.5 -o results/additive-vs-bayesian -d true_reward,final_reward
95+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 1 -o results/additive-vs-bayesian -d true_reward,final_reward
96+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 2 -o results/additive-vs-bayesian -d true_reward,final_reward
97+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 3 -o results/additive-vs-bayesian -d true_reward,final_reward
98+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 5 -o results/additive-vs-bayesian -d true_reward,final_reward
99+
python src/run.py -e batteries -p default -c additive -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 10 -o results/additive-vs-bayesian -d true_reward,final_reward
100+
101+
# room env bayesian
102+
python src/run.py -e room -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 0.05 -o results/additive-vs-bayesian -d true_reward,final_reward
103+
python src/run.py -e room -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 0.1 -o results/additive-vs-bayesian -d true_reward,final_reward
104+
python src/run.py -e room -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 0.2 -o results/additive-vs-bayesian -d true_reward,final_reward
105+
python src/run.py -e room -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 0.3 -o results/additive-vs-bayesian -d true_reward,final_reward
106+
python src/run.py -e room -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 0.5 -o results/additive-vs-bayesian -d true_reward,final_reward
107+
python src/run.py -e room -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 1 -o results/additive-vs-bayesian -d true_reward,final_reward
108+
python src/run.py -e room -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 2 -o results/additive-vs-bayesian -d true_reward,final_reward
109+
python src/run.py -e room -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 3 -o results/additive-vs-bayesian -d true_reward,final_reward
110+
python src/run.py -e room -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 5 -o results/additive-vs-bayesian -d true_reward,final_reward
111+
python src/run.py -e room -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 10 -k 10 -o results/additive-vs-bayesian -d true_reward,final_reward
112+
113+
# train env bayesian
114+
python src/run.py -e train -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 0.05 -o results/additive-vs-bayesian -d true_reward,final_reward
115+
python src/run.py -e train -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 0.1 -o results/additive-vs-bayesian -d true_reward,final_reward
116+
python src/run.py -e train -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 0.2 -o results/additive-vs-bayesian -d true_reward,final_reward
117+
python src/run.py -e train -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 0.3 -o results/additive-vs-bayesian -d true_reward,final_reward
118+
python src/run.py -e train -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 0.5 -o results/additive-vs-bayesian -d true_reward,final_reward
119+
python src/run.py -e train -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 1 -o results/additive-vs-bayesian -d true_reward,final_reward
120+
python src/run.py -e train -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 2 -o results/additive-vs-bayesian -d true_reward,final_reward
121+
python src/run.py -e train -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 3 -o results/additive-vs-bayesian -d true_reward,final_reward
122+
python src/run.py -e train -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 5 -o results/additive-vs-bayesian -d true_reward,final_reward
123+
python src/run.py -e train -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 8 -k 10 -o results/additive-vs-bayesian -d true_reward,final_reward
124+
125+
# batteries env bayesian
126+
python src/run.py -e batteries -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 0.05 -o results/additive-vs-bayesian -d true_reward,final_reward
127+
python src/run.py -e batteries -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 0.1 -o results/additive-vs-bayesian -d true_reward,final_reward
128+
python src/run.py -e batteries -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 0.2 -o results/additive-vs-bayesian -d true_reward,final_reward
129+
python src/run.py -e batteries -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 0.3 -o results/additive-vs-bayesian -d true_reward,final_reward
130+
python src/run.py -e batteries -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 0.5 -o results/additive-vs-bayesian -d true_reward,final_reward
131+
python src/run.py -e batteries -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 1 -o results/additive-vs-bayesian -d true_reward,final_reward
132+
python src/run.py -e batteries -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 2 -o results/additive-vs-bayesian -d true_reward,final_reward
133+
python src/run.py -e batteries -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 3 -o results/additive-vs-bayesian -d true_reward,final_reward
134+
python src/run.py -e batteries -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 5 -o results/additive-vs-bayesian -d true_reward,final_reward
135+
python src/run.py -e batteries -p default -c bayesian -i rlsp -f True -s 0 -l 0.001 -m 1000 -T 11 -k 10 -o results/additive-vs-bayesian -d true_reward,final_reward
136+
137+
138+
######################
139+
# Generate the plots #
140+
######################
141+
python src/plotting.py

0 commit comments

Comments
 (0)