Skip to content

Commit ba292f0

Browse files
committed
fixed screen dims bug in environment wrapper; python3 compatibility
1 parent c7b1f10 commit ba292f0

File tree

2 files changed

+7
-6
lines changed

2 files changed

+7
-6
lines changed

dqn/agent.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from __future__ import print_function
2+
from functools import reduce
23
import os
34
import time
45
import random
@@ -224,7 +225,7 @@ def build_dqn(self):
224225

225226
q_summary = []
226227
avg_q = tf.reduce_mean(self.q, 0)
227-
for idx in xrange(self.env.action_size):
228+
for idx in range(self.env.action_size):
228229
q_summary.append(tf.summary.histogram('q/%s' % idx, avg_q[idx]))
229230
self.q_summary = tf.summary.merge(q_summary, 'q_summary')
230231

@@ -325,7 +326,7 @@ def build_dqn(self):
325326

326327
tf.initialize_all_variables().run()
327328

328-
self._saver = tf.train.Saver(self.w.values() + [self.step_op], max_to_keep=30)
329+
self._saver = tf.train.Saver(list(self.w.values()) + [self.step_op], max_to_keep=30)
329330

330331
self.load_model()
331332
self.update_target_q_network()
@@ -373,7 +374,7 @@ def play(self, n_step=10000, n_episode=100, test_ep=None, render=False):
373374
self.env.env.monitor.start(gym_dir)
374375

375376
best_reward, best_idx = 0, 0
376-
for idx in xrange(n_episode):
377+
for idx in range(n_episode):
377378
screen, reward, action, terminal = self.env.new_random_game()
378379
current_reward = 0
379380

dqn/environment.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def __init__(self, config):
1111
config.screen_width, config.screen_height, config.action_repeat, config.random_start
1212

1313
self.display = config.display
14-
self.dims = (screen_width, screen_height)
14+
self.dims = (screen_height, screen_width)
1515

1616
self._screen = None
1717
self.reward = 0
@@ -26,7 +26,7 @@ def new_game(self, from_random_game=False):
2626

2727
def new_random_game(self):
2828
self.new_game(True)
29-
for _ in xrange(random.randint(0, self.random_start - 1)):
29+
for _ in range(random.randint(0, self.random_start - 1)):
3030
self._step(0)
3131
self.render()
3232
return self.screen, 0, 0, self.terminal
@@ -70,7 +70,7 @@ def act(self, action, is_training=True):
7070
cumulated_reward = 0
7171
start_lives = self.lives
7272

73-
for _ in xrange(self.action_repeat):
73+
for _ in range(self.action_repeat):
7474
self._step(action)
7575
cumulated_reward = cumulated_reward + self.reward
7676

0 commit comments

Comments
 (0)