-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathvulnerability.sql
173 lines (148 loc) · 5.24 KB
/
vulnerability.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/* codes
household id: hhid
cluster id:
livestock: HV246a + hv246a + hv246d + hv246e + hv246g
sick: hv248
land: hv245
wealth: hv271, or should it have been hv270 (already on scale of 5?)
orphans: hv251
watertime: hv204
electricity: hv206
cookingfuel: hv226
gender: hv219
cellphone: hv243a
radio: hv207
urban: urbanrural field in cluster geometry with 'U' for urban
*/
update mwita set geom =
st_makevalid(geom)
where not st_isvalid(geom)
alter table dhsclusters add column ta_id integer;
update dhsclusters set ta_id = mwita.id from mwita where st_intersects(mwita.geom, dhsclusters.geom);
select count(name_2), count(distinct name_2) from mwita;
--oh no, the TA names are not unique! Use the ID instead...
alter table dhshh add column ta_id integer;
update dhshh set ta_id = dhsclusters.ta_id from dhsclusters where dhsclusters.dhsclust = dhshh.hv001;
alter table dhshh add column urbanrural varchar(1);
update dhshh set urbanrural = dhsclusters.urban_rura from dhsclusters where dhsclusters.dhsclust = dhshh.hv001;
--originally having 24825 records
DELETE FROM dhshh WHERE
hv246a=98 or
hv246a=99 or
hv246d=98 or
hv246d=99 or
hv246e=98 or
hv246e=99 or
hv246g=98 or
hv246g=99 or
hv219=9 or
hv243a=9 or
hv245=99 or
hv206=9 or
hv204=999 or
hv204=998 or
hv226=99 or --no cooking data
hv226=95 or --no cooking
hv226=96 --cooking with 'other' fuel
HV207=9;
--deleted 156 household records with missing data for our purposes
--count livestock
ALTER TABLE dhshh ADD COLUMN hhlivestock INTEGER;
UPDATE dhshh SET hhlivestock = hv246a + hv246d + hv246e + hv246g;
update dhshh
--illustrate challenge with percent_rank() and ntile()
select hhlivestock,
(percent_rank() OVER(ORDER BY hhlivestock asc) * 4) + 1 as pctRtimes4plus1,
percent_rank() OVER(ORDER BY hhlivestock asc) as pctRank,
ntile(5) over(order by hhlivestock asc) as ntile5
from
dhshh
--YOU CANNOT USE WINDOW functions in UPDATE query directly, so it must be used as a SUBQUERY
/* Standardizing to scale of 1 (low capacity) to 5 (high capacity) */
/* ORDER BY DESC will make high values to recieve a low score
ORDER BY ASC will make low values to receive a low score */
ALTER TABLE dhshh ADD COLUMN livestock REAL;
UPDATE dhshh set livestock = pctr FROM
(SELECT hhid as shhid, percent_rank() OVER(ORDER BY hhlivestock asc) * 4 + 1 as pctr FROM dhshh ) as subq
where hhid=shhid;
ALTER TABLE dhshh ADD COLUMN sick REAL;
UPDATE dhshh set sick=pctr from
(SELECT hhid as shhid, percent_rank() OVER(ORDER BY hv248 desc) * 4 + 1 as pctr FROM dhshh ) as subq
where hhid=shhid;
--ORDER THIS DESC because if MANY people were sick they need a LOW capacity score
ALTER TABLE dhshh ADD COLUMN land REAL;
UPDATE dhshh set land=pctr from
(SELECT hhid as shhid, percent_rank() OVER(ORDER BY hv245 asc) * 4 + 1 as pctr FROM dhshh ) as subq
where hhid=shhid;
ALTER TABLE dhshh ADD COLUMN wealth REAL;
UPDATE dhshh set wealth=pctr from
(SELECT hhid as shhid, percent_rank() OVER(ORDER BY hv271 asc) * 4 + 1 as pctr FROM dhshh ) as subq
where hhid=shhid;
ALTER TABLE dhshh ADD COLUMN orphans REAL;
UPDATE dhshh set orphans=pctr from
(SELECT hhid as shhid, percent_rank() OVER(ORDER BY hv251 desc) * 4 + 1 as pctr FROM dhshh ) as subq
where hhid=shhid;
--switch time to water source 'on premises' from 996 to 0
UPDATE dhshh
SET hv204 = 0 WHERE hv204 = 996;
ALTER TABLE dhshh ADD COLUMN water REAL;
UPDATE dhshh set water=pctr from
(SELECT hhid as shhid, percent_rank() OVER(ORDER BY hv204 desc) * 4 + 1 as pctr FROM dhshh ) as subq
where hhid=shhid;
ALTER TABLE dhshh ADD COLUMN electricity REAL;
UPDATE dhshh set electricity=pctr from
(SELECT hhid as shhid, percent_rank() OVER(ORDER BY hv206 asc) * 4 + 1 as pctr FROM dhshh ) as subq
where hhid=shhid;
ALTER TABLE dhshh ADD COLUMN cooking REAL;
UPDATE dhshh set cooking=pctr from
(SELECT hhid as shhid, percent_rank() OVER(ORDER BY hv226 desc) * 4 + 1 as pctr FROM dhshh ) as subq
where hhid=shhid;
ALTER TABLE dhshh ADD COLUMN sexcat REAL;
UPDATE dhshh set sexcat=
CASE
WHEN hv219 = 1 THEN 5
ELSE 1
END;
ALTER TABLE dhshh ADD COLUMN cellphone REAL;
UPDATE dhshh set cellphone=
CASE
WHEN hv243a = 1 THEN 5
ELSE 1
END;
ALTER TABLE dhshh ADD COLUMN radio REAL;
UPDATE dhshh set radio=
CASE
WHEN hv207 = 1 THEN 5
ELSE 1
END;
ALTER TABLE dhshh ADD COLUMN urbanruralscore REAL;
UPDATE dhshh set urbanruralscore =
CASE
WHEN urbanrural = 'U' THEN 4
ELSE 3
END;
/* Create a composite household capacity score, on scale from 0.4 to 2 */
ALTER TABLE dhshh ADD COLUMN capacity REAL;
UPDATE dhshh SET capacity =
(livestock*0.04+
sick*0.03+
land*0.06+
wealth*0.04+
orphans*0.03+
water*0.04+
electricity*0.03+
cooking*0.02+
sexcat*0.02+
cellphone*0.04+
radio*0.03+
urbanruralscore*0.02);
/* Aggregate household scores by Traditional Authority */
CREATE VIEW tacapacity AS
SELECT ta_id, count(capacity) as hhcount, avg(capacity) as capacityavg, min(capacity) as capacitymin, max(capacity) as capacitymax, stddev(capacity) as capacitystddev
FROM dhshh
GROUP BY ta_id;
/* Join average household scores to Traditional Authority table */
ALTER TABLE mwita ADD COLUMN capacity REAL;
UPDATE mwita SET capacity = capacityavg
FROM tacapacity
WHERE tacapacity.ta_id = mwita.id;