@@ -2,14 +2,34 @@ module Fairness
2
2
3
3
using DataFrames
4
4
using OMOPCDMCohortCreator
5
+ import Base:
6
+ Fix2
5
7
6
- import Combinatorics:
7
- powerset
8
- import Chain:
9
- @chain
8
+ function _counter_reducer (sub, count_name, funcs)
9
+ for fun in funcs
10
+ sub = fun (sub)
11
+ end
12
+ sub[:, Not (:person_id )] |>
13
+ x -> groupby (x, names (x)) |>
14
+ x -> combine (x, nrow => count_name)
15
+ end
16
+
17
+ function _subset_subjects (vec, process_size)
18
+ subsets = []
19
+ for i in 1 : process_size: size (vec)[1 ]
20
+ if i + process_size > size (vec)[1 ]
21
+ push! (subsets, vec[i: end ])
22
+ else
23
+ push! (subsets, vec[i: i+ process_size])
24
+ end
25
+ end
26
+
27
+ return subsets
28
+ end
10
29
11
30
function _overlapped_subjects (cohorts, conn)
12
31
32
+ # Requirement is based on majority of phenotypes provided
13
33
required_overlapping_phenotypes = ceil (length (cohorts) / 2 )
14
34
15
35
subjects = GetCohortSubjects (cohorts, conn)
@@ -27,109 +47,223 @@ module Fairness
27
47
28
48
end
29
49
30
- function demographic_parity (cohorts, classes, conn)
50
+ function demographic_parity (
51
+ cohorts,
52
+ funcs,
53
+ conn;
54
+ labels = false ,
55
+ silver = false ,
56
+ reference_subjects = " " ,
57
+ process_size = 10000
58
+ )
59
+ if labels == true
60
+ _demographic_parity (cohorts, funcs, conn,
61
+ reference_subjects,
62
+ process_size,
63
+ silver)
64
+ else
65
+ _demographic_parity (cohorts, funcs, conn, reference_subjects,
66
+ process_size)
67
+ end
68
+ end
31
69
32
- study, PP, PN = _overlapped_subjects (cohorts, conn)
33
-
34
- dps = DataFrame ()
35
- for class in classes
36
- for cohort in cohorts
37
- cohort_subjects = GetCohortSubjects (cohort, conn). subject_id
70
+ function _demographic_parity (cohorts, funcs, conn, reference_subjects, process_size, silver)
38
71
39
- S = class (study , conn)
72
+ _funcs = [ Fix2 (fun , conn) for fun in funcs]
40
73
41
- feature_name = names (S)[2 ]
74
+ if isempty (reference_subjects)
75
+ reference_subjects = GetDatabasePersonIDs (conn)
76
+ end
42
77
43
- for feature in unique (S[:, 2 ])
44
- C = class (cohort_subjects, conn)
45
- TP =
46
- filter (row -> row[2 ] == feature, C) |>
47
- filter (row -> in (row[1 ], PP))
78
+ cohorts_df = GetCohortSubjects (cohorts, conn)
48
79
49
- FP =
50
- filter (row -> row[2 ] == feature, C) |>
51
- filter (row -> in (row[1 ], PN))
52
-
53
- N = filter (row -> row[2 ] == feature, S)
80
+ subsets = _subset_subjects (reference_subjects, process_size)
54
81
55
- dp = (nrow (TP) + nrow (FP)) / nrow (N)
82
+ denom = DataFrame ()
83
+ for sub in subsets
84
+ denom = vcat (denom, _counter_reducer (sub, :count_denom , _funcs))
85
+ end
56
86
57
- push! (dps, Dict (:cohort_definition_id => cohort, Symbol (feature_name) => feature, :dp => dp), cols = :union )
87
+ denom = groupby (denom, Not (:count_denom )) |>
88
+ x -> combine (x, :count_denom => sum => :count_denom )
89
+
90
+ num = DataFrame ()
91
+ for cohort_idx in unique (cohorts_df. cohort_definition_id)
92
+ subjects = filter (row -> row. cohort_definition_id == cohort_idx, cohorts_df). subject_id
93
+ subsets = _subset_subjects (subjects, process_size)
94
+ for sub in subsets
95
+ vals = _counter_reducer (sub, :count_num , _funcs)
96
+ vals. cohort_definition_id .= cohort_idx
97
+ num = vcat (num, vals)
98
+ end
99
+ end
58
100
59
- end
101
+ if silver == true
102
+ _, true_subjects, _ = _overlapped_subjects (cohorts, conn)
103
+
104
+ subsets = _subset_subjects (true_subjects, process_size)
105
+
106
+ silver = DataFrame ()
107
+ for sub in subsets
108
+ silver = vcat (silver, _counter_reducer (sub, :count_num , _funcs))
60
109
end
110
+
111
+ silver. cohort_definition_id .= :silver
112
+ silver = groupby (silver, Not (:count_num )) |>
113
+ x -> combine (x, :count_num => sum => :count_num )
114
+
115
+ num = vcat (num, silver)
116
+
61
117
end
62
118
63
- return dps
119
+ num = groupby (num, Not (:count_num )) |>
120
+ x -> combine (x, :count_num => sum => :count_num )
121
+
122
+ dps = outerjoin (num, denom; on = names (num)[1 : end - 2 ] .| >
123
+ x -> Symbol (x) => Symbol (x)) |>
124
+ x -> coalesce .(x, 0 )
64
125
126
+ dps. demographic_parity = dps. count_num ./ dps. count_denom
127
+
128
+ return dps
65
129
end
66
130
67
- function equality_of_opportunity (cohorts, classes , conn)
131
+ function _demographic_parity (cohorts, funcs , conn, reference_subjects, process_size )
68
132
69
- study, PP, PN = _overlapped_subjects (cohorts, conn)
70
-
71
- eoos = DataFrame ()
72
- for class in classes
73
- for cohort in cohorts
74
- cohort_subjects = GetCohortSubjects (cohort, conn). subject_id
133
+ _funcs = [Fix2 (fun, conn) for fun in funcs]
75
134
76
- S = class (study, conn)
135
+ if isempty (reference_subjects)
136
+ reference_subjects = GetDatabasePersonIDs (conn)
137
+ end
77
138
78
- feature_name = names (S)[ 2 ]
139
+ cohorts = GetCohortSubjects (cohorts, conn) . subject_id
79
140
80
- for feature in unique (S[:, 2 ])
81
- C = class (cohort_subjects, conn)
82
- TP =
83
- filter (row -> row[2 ] == feature, C) |>
84
- filter (row -> in (row[1 ], PP))
141
+ subsets = _subset_subjects (reference_subjects, process_size)
85
142
86
- P =
87
- filter (row -> row[2 ] == feature, C)
143
+ denom = DataFrame ()
144
+ for sub in subsets
145
+ denom = vcat (denom, _counter_reducer (sub, :count_denom , _funcs))
146
+ end
88
147
89
- eoo = nrow (TP) / nrow (P)
148
+ denom = groupby (denom, names (denom)[1 : end - 1 ]) |>
149
+ x -> combine (x, :count_denom => sum => :count_denom )
90
150
91
- push! (eoos, Dict ( :cohort_definition_id => cohort, Symbol (feature_name) => feature, :eoo => eoo), cols = :union )
151
+ subsets = _subset_subjects (cohorts, process_size )
92
152
93
- end
94
- end
153
+ num = DataFrame ()
154
+ for sub in subsets
155
+ num = vcat (num, _counter_reducer (sub, :count_num , _funcs))
95
156
end
96
157
97
- return eoos
158
+ num = groupby (num, names (num)[1 : end - 1 ]) |>
159
+ x -> combine (x, :count_num => sum => :count_num )
160
+
161
+ dps = outerjoin (num, denom; on = names (num)[1 : end - 1 ] .| >
162
+ x -> Symbol (x) => Symbol (x)) |>
163
+ x -> coalesce .(x, 0 )
98
164
165
+ dps. demographic_parity = dps. count_num ./ dps. count_denom
166
+
167
+ return dps
99
168
end
100
169
101
- function predictive_rate_parity (cohorts, classes , conn)
170
+ function equality_of_opportunity (cohorts, funcs , conn; reference_subjects = " " , process_size = 10000 )
102
171
103
- study, PP, PN = _overlapped_subjects (cohorts, conn)
104
-
105
- prps = DataFrame ()
106
- for class in classes
107
- for cohort in cohorts
108
- cohort_subjects = GetCohortSubjects (cohort, conn). subject_id
172
+ _funcs = [Fix2 (fun, conn) for fun in funcs]
109
173
110
- S = class (study , conn)
174
+ study_subjects, true_subjects, false_subjects = _overlapped_subjects (cohorts , conn)
111
175
112
- feature_name = names (S)[ 2 ]
176
+ subsets = _subset_subjects (true_subjects, process_size)
113
177
114
- for feature in unique (S[:, 2 ])
115
- C = class (cohort_subjects, conn)
116
- TP =
117
- filter (row -> row[2 ] == feature, C) |>
118
- filter (row -> in (row[1 ], PP))
178
+ denom = DataFrame ()
179
+ for sub in subsets
180
+ denom = vcat (denom, _counter_reducer (sub, :count_denom , _funcs))
181
+ end
119
182
120
- FP =
121
- filter (row -> row[2 ] == feature, C) |>
122
- filter (row -> in (row[1 ], PN))
183
+ denom = groupby (denom, names (denom)[1 : end - 1 ]) |>
184
+ x -> combine (x, :count_denom => sum => :count_denom )
123
185
124
- prp = nrow (TP) / (nrow (TP) + nrow (FP))
186
+ eoo = DataFrame ()
187
+ for cohort_idx in cohorts
125
188
126
- push! (prps, Dict (:cohort_definition_id => cohort, Symbol (feature_name) => feature, :prp => prp), cols = :union )
189
+ cohort = GetCohortSubjects (cohort_idx, conn)
190
+ cohort = filter (row -> in (row. subject_id, true_subjects), cohort)
127
191
128
- end
192
+ subsets = _subset_subjects (cohort. subject_id, process_size)
193
+
194
+ num = DataFrame ()
195
+ for sub in subsets
196
+ num = vcat (num, _counter_reducer (sub, :count_num , _funcs))
129
197
end
198
+
199
+ num = groupby (num, names (num)[1 : end - 1 ]) |>
200
+ x -> combine (x, :count_num => sum => :count_num )
201
+
202
+ cohort = outerjoin (num, denom; on = names (num)[1 : end - 1 ] .| >
203
+ x -> Symbol (x) => Symbol (x)) |>
204
+ x -> coalesce .(x, 0 )
205
+
206
+ cohort. equality_of_opportunity = cohort. count_num ./ cohort. count_denom
207
+
208
+ cohort. cohort_definition_id = ones (Int, nrow (cohort)) .* cohort_idx
209
+ eoo = vcat (eoo, cohort)
130
210
end
131
211
132
- return prps
212
+ return eoo
213
+ end
214
+
215
+ function predictive_rate_parity (cohorts, funcs, conn; reference_subjects = " " , process_size = 10000 )
216
+
217
+ _funcs = [Fix2 (fun, conn) for fun in funcs]
218
+
219
+ study_subjects, true_subjects, false_subjects = _overlapped_subjects (cohorts, conn)
220
+
221
+ prp = DataFrame ()
222
+ for cohort_idx in cohorts
133
223
224
+ cohort = GetCohortSubjects (cohort_idx, conn)
225
+ true_cohort = filter (row -> in (row. subject_id, true_subjects), cohort)
226
+ false_cohort = filter (row -> in (row. subject_id, false_subjects), cohort)
227
+
228
+ subsets = _subset_subjects (true_cohort. subject_id, process_size)
229
+
230
+ num = DataFrame ()
231
+ for sub in subsets
232
+ num = vcat (num, _counter_reducer (sub, :count_num , _funcs))
233
+ end
234
+
235
+ subsets = _subset_subjects (false_cohort. subject_id, process_size)
236
+
237
+ if ! isempty (subsets)
238
+ false_denom = DataFrame ()
239
+ for sub in subsets
240
+ false_denom = vcat (false_denom, _counter_reducer (sub, :count_num , _funcs))
241
+ end
242
+ denom = vcat (num, false_denom)
243
+ denom = groupby (denom, names (denom)[1 : end - 1 ]) |>
244
+ x -> combine (x, :count_num => sum => :count_denom )
245
+ else
246
+ denom = num
247
+ denom = groupby (denom, names (denom)[1 : end - 1 ]) |>
248
+ x -> combine (x, :count_num => sum => :count_denom )
249
+ end
250
+
251
+ num = groupby (num, names (num)[1 : end - 1 ]) |>
252
+ x -> combine (x, :count_num => sum => :count_num )
253
+
254
+ cohort = outerjoin (num, denom; on = names (num)[1 : end - 1 ] .| >
255
+ x -> Symbol (x) => Symbol (x)) |>
256
+ x -> coalesce .(x, 0 )
257
+
258
+ cohort. predictive_rate_parity = cohort. count_num ./ cohort. count_denom
259
+
260
+ cohort. cohort_definition_id = ones (Int, nrow (cohort)) .* cohort_idx
261
+ prp = vcat (prp, cohort)
262
+ end
263
+
264
+ return prp
134
265
end
266
+
267
+ export demographic_parity, equality_of_opportunity,predictive_rate_parity
268
+
135
269
end
0 commit comments