-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathoptimization-exercise-sample.sql
169 lines (138 loc) · 5.37 KB
/
optimization-exercise-sample.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
/* delete existing tables to make this script stateless.
`set rebuild false` below to avoid rebuilding */
create or replace function rebuild() returns void as $$
declare
num_employees int;
num_bonuses int;
min_salary int;
max_salary int;
begin
num_employees := 500000;
num_bonuses := 2000000;
min_salary := 70000;
max_salary := 80000;
raise notice 'Rebuilding test data';
set client_min_messages=warning;
drop table if exists department cascade;
drop table if exists employee cascade;
drop table if exists bonus;
/* (re)create tables */
create table if not exists department (
dep_id serial primary key,
name varchar(100)
);
create table if not exists employee (
emp_id serial primary key,
dep_id integer references department ( dep_id ),
manager_id integer,
name varchar(100),
salary integer
);
create table if not exists bonus (
bonus_id serial primary key,
emp_id integer references employee ( emp_id ),
amount integer,
time timestamp
);
/* create an index on employee salary */
create index employee_manager_salary on employee (manager_id);
--create index bonus_emp_id_time on bonus (emp_id, time);
create index bonus_emp_id on bonus (emp_id);
create index emp_dep_id on employee(dep_id);
-- create index employee_salary on employee (salary);
-- create index employee_salary_dep on employee (salary, dep_id);
-- create index employee_manager_salary on employee (manager_id, salary);
/* we don't need many departments to keep this interesting */
insert into department ( name ) values
( 'sales' ),
( 'marketing' ),
( 'engineering' );
/* create a bunch of employees, mostly in sales */
insert into employee ( dep_id, manager_id, name, salary ) (
select
('{1,1,1,2,3}'::int[])[floor(random()*5) + 1], /* skew towards sales */
1 + n % 10, /* first 10 employees manage all the others */
md5(random()::text), /* name is just a made up string */
min_salary + random() * (max_salary - min_salary) /* uniform dist of salaries in range */
from generate_series(1, num_employees) as n
);
/* Add some bonus payments */
insert into bonus ( emp_id, amount, time ) (
select
1 + floor(random() * num_employees),
10000 * random(), /* bonus amount is uniformly in [0, 10000) */
now() - random() * (now()+'720 days' - now()) /* all paid in last 30 days */
from generate_series(1, num_bonuses)
);
/* ensure that the catalog is up to date */
analyze;
end
$$ language plpgsql;
/* rebuild test data: comment this out to use existing data set */
\t on
--select rebuild();
\t off
/* average employee salary... note what happens when we have 3-4x the number of employees */
-- explain select count(*), avg(salary) from employee;
-- 1. What is the total bonus amount for each employee?
/*
explain analyze select employee.emp_id, sum(amount)
from employee left join bonus
on employee.emp_id = bonus.emp_id
group by employee.emp_id;
*/
-- 2. Which employees received the highest total compensation last year, and how much was it?
/* explain analyze select e.emp_id, e.name, e.salary + sum(b.amount) as tc
from employee e
join bonus b on e.emp_id = b.emp_id
where b.time between '01-01-2020'::timestamp and '31-12-2020'::timestamp
group by e.emp_id -- turns out, starting with pg 9.1, having pkey in "group by" is enough
order by tc desc
limit 10; */
--3. What is the total bonus amount awarded by department?
/*explain analyze verbose select d.name, sum(amount) as total_bonus
from department d
left join employee e
on e.dep_id = d.dep_id
left join bonus b
on b.emp_id = e.emp_id
group by d.dep_id;*/
--4. Which employees earned more than their managers?
/*
explain analyze select e.name, e.salary, m.salary from employee e
join employee m on e.manager_id = m.emp_id
where e.salary > m.salary
*/
--5. In which departments did employees earn more
-- total compensations than their managers last year?
select e.emp_id, e.name, e.salary + sum(eb.amount) as e_tc,
m.emp_id as manager_id, m.m_tc as manager_tc
from employee e
join bonus eb on e.emp_id = eb.emp_id
join (select e.emp_id, salary + sum(mb.amount) as m_tc
from employee e
join bonus mb on e.emp_id = mb.emp_id
group by e.emp_id) as m
on e.manager_id = m.emp_id
where e.e_tc > m.m_tc
group by e.emp_id, m.emp_id, m.m_tc; -- turns out, starting with pg 9.1, having pkey in "group by" is enough
/* top salaries in sales... note what indexes would help here? */
--explain select * from employee where employee.dep_id = 1 order by employee.salary limit 10;
/* how many employees earn more than their managers? what indexes would help here? */
--explain select e.name, m.name
--from employee as e, employee as m
--where e.manager_id = m.emp_id
--and e.salary > m.salary;
/* top employee comps including bonus... note what happens when we have 3-4x the employees */
-- explain select employee.emp_id, employee.salary + sum(bonus.amount) as total_comp
-- from employee, bonus
-- where bonus.emp_id = employee.emp_id
-- group by employee.emp_id, employee.salary
-- order by total_comp
-- limit 10;
/* see some stats */
/*
select histogram_bounds
from pg_stats
where tablename='employee' and attname='salary';
*/