Skip to content

Commit d2e3ce1

Browse files
committed
Added new learning method, still needs to be tested
1 parent b3b3e75 commit d2e3ce1

File tree

10 files changed

+253
-36
lines changed

10 files changed

+253
-36
lines changed

normative_supervisor/supervisor/normsys/DDPLTranslator.java

+24-4
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,24 @@ public void init(Environment env, ArrayList<String> actions) {
4343
public void update(Environment env, ArrayList<String> possible, Game game) {
4444
labelsToFacts(env);
4545
generateActionNorms(possible);
46-
generateRegulativeRules();
4746
generateConstitutiveRules(env);
47+
generateRegulativeRules();
4848
generateDefeaters();
4949
generateHierarchies();
5050

5151
}
5252

53+
54+
public void synth_update(Environment env) {
55+
labelsToFacts(env);
56+
generateConstitutiveRules(env);
57+
generateRegulativeRules();
58+
generateDefeaters();
59+
generateHierarchies();
60+
}
61+
62+
63+
5364
public void generateActionNorms(ArrayList<String> actions) {
5465
normBase.generateNonConcurrence(actions);
5566
//normBase.generateRequired(actions);
@@ -104,7 +115,7 @@ public void labelsToFacts(Environment env) {
104115
for(String lab : env.getNegLabels()) {
105116
Literal lit = new Literal(lab);
106117
Literal nlit = lit.getComplementClone();
107-
Rule fact = new Rule(lab, RuleType.DEFEASIBLE);
118+
Rule fact = new Rule(lab, RuleType.FACT);
108119
try {
109120
fact.addHeadLiteral(nlit);
110121
} catch (RuleException e) {}
@@ -210,8 +221,17 @@ public void generateRegulativeRules() {
210221
}
211222
Literal lit = termToLit(n.getPrescription(), true);
212223
rule.addHeadLiteral(lit);
213-
rule.setMode(obl);
214-
224+
if(n.getName().contains("default")) {
225+
for(Rule r : rules) {
226+
if(r.isConflictRule(rule)) {
227+
Superiority sup = new Superiority(r.getLabel(),rule.getLabel());
228+
hierarchy.add(sup);
229+
}
230+
}
231+
}
232+
else {
233+
rule.setMode(obl);
234+
}
215235
if(n.getName().contains("concur") || n.getName().contains("req")) {
216236
rule.setRuleType(RuleType.DEFEASIBLE);
217237
actionRules.add(rule.clone());

ns_lab.jar

964 Bytes
Binary file not shown.

ns_server.jar

964 Bytes
Binary file not shown.

pacman/filter.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
class NormativeFilter():
1010

1111
def __init__(self, norms, reason, port=PORT):
12-
self.process = None
1312
self.violations = []
1413
self.norm_base = norms
1514
self.reasoner = reason
@@ -55,8 +54,16 @@ def process_message(self, message):
5554
if self.compliant:
5655
return 0
5756
else:
58-
#score = message['score']
5957
return -1
58+
elif message['response'] == 'DUAL-EVALUATION':
59+
subideal = message['sub-ideal']
60+
if self.compliant:
61+
return 0,0
62+
else:
63+
if subideal:
64+
return -1,0
65+
else:
66+
return -1,-1
6067

6168

6269
def build_query(self, state, actions, rt):
@@ -68,6 +75,8 @@ def build_query(self, state, actions, rt):
6875
to['possible'] = actions
6976
elif rt == 'EVALUATION':
7077
to['action'] = actions[0]
78+
elif rt == 'DUAL-EVALUATION':
79+
to['action'] = actions[0]
7180
return to
7281

7382

pacman/game.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ class Game:
522522
"""
523523

524524
def __init__( self, agents, display, rules, startingIndex=0, muteAgents=False, catchExceptions=False, filter=None,
525-
train=False, supervise=False , learn=False):
525+
train=False, supervise=False , learn1=False, learn2=False):
526526
self.agentCrashed = False
527527
self.agents = agents
528528
self.display = display
@@ -540,7 +540,8 @@ def __init__( self, agents, display, rules, startingIndex=0, muteAgents=False, c
540540
self.filter = filter
541541
self.train = train
542542
self.supervise = supervise
543-
self.learn = learn
543+
self.learn1 = learn1
544+
self.learn2 = learn2
544545

545546
def getProgress(self):
546547
if self.gameOver:
@@ -645,7 +646,7 @@ def run( self ):
645646
self.unmute()
646647
return
647648
else:
648-
observation = agent.observationFunction(self.state.deepCopy(), self.filter, self.learn)
649+
observation = agent.observationFunction(self.state.deepCopy(), self.filter, self.learn1, self.learn2)
649650
self.unmute()
650651
else:
651652
observation = self.state.deepCopy()

pacman/layouts/mediumOpen.lay

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
%%%%%%%%%
2+
%G.....o%
3+
%.......%
4+
%.......%
5+
%.......%
6+
%G.....P%
7+
%%%%%%%%%

pacman/learningAgents.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def getLegalActions(self,state, filter=None, train=False, supervise=False):
121121
"""
122122
return self.actionFn(state, filter, train, supervise)
123123

124-
def observeTransition(self, state,action,nextState,deltaReward, filter=None, learn=False):
124+
def observeTransition(self, state,action,nextState,deltaReward, filter=None, learn1=False, learn2=False):
125125
"""
126126
Called by environment to inform agent that a transition has
127127
been observed. This will result in a call to self.update
@@ -131,11 +131,13 @@ def observeTransition(self, state,action,nextState,deltaReward, filter=None, lea
131131
"""
132132
self.episodeRewards += deltaReward
133133
self.update(state,action,nextState,deltaReward)
134-
if learn and filter is not None:
134+
if learn1 and filter is not None:
135135
result = filter.process_message(filter.send_request(filter.build_query(state.data, [action], 'EVALUATION')))
136-
#if result != 0:
137-
# print(action+": "+str(result))
138-
self.update2(state, action, nextState,result)
136+
self.update2(state, action, nextState, result)
137+
if learn2 and filter is not None:
138+
result1, result2 = filter.process_message(filter.send_request(filter.build_query(state.data, [action], 'DUAL-EVALUATION')))
139+
self.update2(state, action, nextState, result1)
140+
self.update3(state, action, nextState, result2)
139141

140142
def startEpisode(self):
141143
"""
@@ -209,14 +211,14 @@ def doAction(self,state,action):
209211
###################
210212
# Pacman Specific #
211213
###################
212-
def observationFunction(self, state, filter=None, learn=False):
214+
def observationFunction(self, state, filter=None, learn1=False, learn2=False):
213215
"""
214216
This is where we ended up after our last action.
215217
The simulation should somehow ensure this is called
216218
"""
217219
if not self.lastState is None:
218220
reward = state.getScore() - self.lastState.getScore()
219-
self.observeTransition(self.lastState, self.lastAction, state, reward, filter, learn)
221+
self.observeTransition(self.lastState, self.lastAction, state, reward, filter, learn1, learn2)
220222
return state
221223

222224
def registerInitialState(self, state):

pacman/pacman.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -294,12 +294,12 @@ def __init__(self, timeout=30):
294294
self.timeout = timeout
295295

296296
def newGame( self, layout, pacmanAgent, ghostAgents, display, quiet = False, catchExceptions=False, filter=None,
297-
train=False, supervise=False, learn=False):
297+
train=False, supervise=False, learn1=False, learn2=False):
298298
agents = [pacmanAgent] + ghostAgents[:layout.getNumGhosts()]
299299
initState = GameState()
300300
initState.initialize( layout, len(ghostAgents) )
301301
game = Game(agents, display, self, catchExceptions=catchExceptions, filter=filter, train=train,
302-
supervise=supervise, learn=learn)
302+
supervise=supervise, learn1=learn1, learn2=learn2)
303303
game.state = initState
304304
self.initialState = initState.deepCopy()
305305
self.quiet = quiet
@@ -572,9 +572,10 @@ def readCommand( argv ):
572572
parser.add_option('--rec', dest='rec', help=default('Would you like to save a record of tests run? Input file name.'),
573573
default=None)
574574
parser.add_option('--supervise', action='store_true', dest='supervise', help='Use normative supervisor?', default=False)
575-
parser.add_option('--learn', action='store_true', dest='learn', help='Learn with norms - only choose with MORL agent', default=False)
576-
parser.add_option('--partial', action='store_true', dest='partial',
577-
help='Learn with a partial MDP', default=False)
575+
parser.add_option('--learn', action='store_true', dest='learn1', help='Learn with norms - only choose with MORL agent', default=False)
576+
parser.add_option('--sublearn', action='store_true', dest='learn2',
577+
help='Learn with sub ideal reward function; only select for SubIdealAgent', default=False)
578+
parser.add_option('--partial', action='store_true', dest='partial', help='Learn with a partial MDP', default=False)
578579
#parser.add_option('--punish', type='int', dest='punish', help=default('Punishment for violation of norm base.'), default=0)
579580
parser.add_option('--port', type='int', dest='port', help=default('Port number.'), default=6666)
580581
#parser.add_option('--track', action='store_true', dest='track', default=False)
@@ -629,7 +630,8 @@ def readCommand( argv ):
629630
args['reason'] = options.reason
630631
args['rec'] = options.rec
631632
args['supervise'] = options.supervise
632-
args['learn'] = options.learn
633+
args['learn1'] = options.learn1
634+
args['learn2'] = options.learn2
633635
args['partial'] = options.partial
634636
args['port'] = options.port
635637
#if options.track:
@@ -690,7 +692,7 @@ def replayGame( layout, actions, display ):
690692
display.finish()
691693

692694
def runGames( layout, pacman, ghosts, display, numGames, record, numTraining = 0, catchExceptions=False, timeout=30,
693-
norm=None, reason=None, rec=None, supervise=False, learn=False, partial=False, port=6666):
695+
norm=None, reason=None, rec=None, supervise=False, learn1=False, learn2=False, partial=False, port=6666):
694696
import __main__
695697
__main__.__dict__['_display'] = display
696698

@@ -724,7 +726,7 @@ def runGames( layout, pacman, ghosts, display, numGames, record, numTraining = 0
724726
rules.quiet = False
725727
train = False
726728
sup = supervise
727-
game = rules.newGame( layout, pacman, ghosts, gameDisplay, beQuiet, catchExceptions, filt, train, sup, learn)
729+
game = rules.newGame( layout, pacman, ghosts, gameDisplay, beQuiet, catchExceptions, filt, train, sup, learn1,learn2)
728730
game.run()
729731
if not beQuiet: games.append(game)
730732

pacman/subIdealAgents.py

+170
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
#EMERY: Thresholded lexicographic multi objective RL agents
2+
3+
4+
from game import *
5+
from learningAgents import ReinforcementAgent
6+
from featureExtractors import *
7+
8+
import random,util,math
9+
10+
class SubIdealAgent(ReinforcementAgent):
11+
def __init__(self, **args):
12+
ReinforcementAgent.__init__(self, **args)
13+
self.QValues1 = util.Counter()
14+
self.QValues2 = util.Counter()
15+
self.QValues3 = util.Counter()
16+
self.legalActions = []
17+
18+
def getQValue1(self, state, action):
19+
return self.QValues1[(state, action)]
20+
21+
def getQValue2(self, state, action):
22+
return min(0, self.QValues2[(state, action)])
23+
24+
def getQValue3(self, state, action):
25+
return min(0, self.QValues3[(state, action)])
26+
27+
28+
def getAction(self, state, filter=None, train=False, supervise=False):
29+
# Pick Action
30+
self.legalActions = self.getLegalActions(state, filter, train, supervise)
31+
action = None
32+
if not self.legalActions:
33+
return action
34+
randomize = util.flipCoin(self.epsilon)
35+
if randomize:
36+
action = random.choice(self.legalActions)
37+
else:
38+
action = self.getPolicy(state)
39+
return action
40+
41+
def update(self, state, action, nextState, reward):
42+
curQ = self.QValues1[(state, action)]
43+
self.QValues1[(state, action)] = (1 - self.alpha) * curQ + self.alpha * (
44+
reward + self.discount * self.getValue1(nextState))
45+
46+
def update2(self, state, action, nextState, reward):
47+
curQ = self.QValues2[(state, action)]
48+
self.QValues2[(state, action)] = (1 - self.alpha) * curQ + self.alpha * (
49+
reward + self.discount * self.getValue2(nextState))
50+
51+
def update3(self, state, action, nextState, reward):
52+
curQ = self.QValues3[(state, action)]
53+
self.QValues3[(state, action)] = (1 - self.alpha) * curQ + self.alpha * (
54+
reward + self.discount * self.getValue3(nextState))
55+
56+
def getPolicy(self, state):
57+
actions1 = []
58+
actions2 = []
59+
if not self.legalActions:
60+
return None
61+
val2 = self.getValue2(state)
62+
for action in self.legalActions:
63+
if val2 == self.getQValue2(state, action):
64+
actions1.append(action)
65+
qvals1 = [self.getQValue3(state, act) for act in actions1]
66+
val3 = max(qvals1)
67+
for action in actions1:
68+
if val3 == self.getQValue3(state, action):
69+
actions2.append(action)
70+
qvals2 = [self.getQValue1(state, act) for act in actions2]
71+
val1 = max(qvals2)
72+
actions3 = []
73+
for a in actions2:
74+
if val1 == self.getQValue1(state, a):
75+
actions3.append(a)
76+
return random.choice(actions3)
77+
78+
def getValue1(self, state, filter=None, train=False):
79+
qvals = [self.getQValue1(state, action) for action in self.legalActions]
80+
if not qvals:
81+
return 0.0
82+
return max(qvals)
83+
84+
def getValue2(self, state, filter=None, train=False):
85+
qvals = [self.getQValue2(state, action) for action in self.legalActions]
86+
if not qvals:
87+
return 0.0
88+
return max(qvals)
89+
90+
def getValue3(self, state, filter=None, train=False):
91+
qvals = [self.getQValue3(state, action) for action in self.legalActions]
92+
if not qvals:
93+
return 0.0
94+
return max(qvals)
95+
96+
class PacmanSubIdealAgent(SubIdealAgent):
97+
def __init__(self, epsilon=0.05,gamma=0.8,alpha=0.2, numTraining=0, **args):
98+
args['epsilon'] = epsilon
99+
args['gamma'] = gamma
100+
args['alpha'] = alpha
101+
args['numTraining'] = numTraining
102+
self.index = 0 # This is always Pacman
103+
SubIdealAgent.__init__(self, **args)
104+
105+
106+
def getAction(self, state, filter=None, train=False, supervise=False):
107+
action = SubIdealAgent.getAction(self,state, filter, train, supervise)
108+
self.doAction(state,action)
109+
return action
110+
111+
112+
class ApproximateSubIdealAgent(PacmanSubIdealAgent):
113+
def __init__(self, extractor='IdentityExtractor', **args):
114+
self.featExtractor = util.lookup(extractor, globals())()
115+
PacmanSubIdealAgent.__init__(self, **args)
116+
self.weights1 = util.Counter()
117+
self.weights2 = util.Counter()
118+
self.weights3 = util.Counter()
119+
120+
def getWeights(self):
121+
return self.weights1, self.weights2, self.weights3
122+
123+
def getQValue1(self, state, action):
124+
qval1 = 0.0
125+
features = self.featExtractor.getFeatures(state, action)
126+
for feature in features:
127+
qval1 += features[feature] * self.weights1[feature]
128+
return qval1
129+
130+
def getQValue2(self, state, action):
131+
qval2 = 0.0
132+
features = self.featExtractor.getFeatures(state, action)
133+
for feature in features:
134+
qval2 += features[feature] * self.weights2[feature]
135+
return min(-0.1, qval2)
136+
137+
def getQValue3(self, state, action):
138+
qval3 = 0.0
139+
features = self.featExtractor.getFeatures(state, action)
140+
for feature in features:
141+
qval3 += features[feature] * self.weights3[feature]
142+
return min(-0.1, qval3)
143+
144+
def update(self, state, action, nextState, reward):
145+
features = self.featExtractor.getFeatures(state, action)
146+
difference = reward + self.discount * self.getValue1(nextState) - self.getQValue1(state, action)
147+
for feature in features:
148+
self.weights1[feature] += self.alpha * difference * features[feature]
149+
150+
def update2(self, state, action, nextState, reward):
151+
features = self.featExtractor.getFeatures(state, action)
152+
difference = reward + self.discount * self.getValue2(nextState) - self.getQValue2(state, action)
153+
for feature in features:
154+
self.weights2[feature] += self.alpha * difference * features[feature]
155+
156+
def update3(self, state, action, nextState, reward):
157+
features = self.featExtractor.getFeatures(state, action)
158+
difference = reward + self.discount * self.getValue3(nextState) - self.getQValue3(state, action)
159+
for feature in features:
160+
self.weights3[feature] += self.alpha * difference * features[feature]
161+
162+
def final(self, state):
163+
# call the super-class final method
164+
PacmanSubIdealAgent.final(self, state)
165+
166+
# did we finish training?
167+
if self.episodesSoFar == self.numTraining:
168+
# you might want to print your weights here for debugging
169+
#print('self.weights',self.weights)
170+
pass

0 commit comments

Comments
 (0)