-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsql_window_functions.py
179 lines (132 loc) · 8.25 KB
/
sql_window_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#sql_window_functions
#load necessary modules
import pandas_gbq
import streamlit as st
from google.cloud import bigquery
from google.oauth2 import service_account
#read credentails for BigQuery access
credentials = service_account.Credentials.from_service_account_info(
st.secrets["gcp_service_account"]
)
client = bigquery.Client(credentials=credentials)
project_id = 'web-app-341703'
#define the function
def app():
st.subheader('SQL - Window Functions')
#------Query1------
with st.container():
col1, col2 = st.columns([5,1])
with col1:
with st.form(key='query1', clear_on_submit = True):
st.write("Returns the calculated payment amount for earch row, then returns a running total each provider's calculated payment anount. [SUM, OVER, PARTITION BY]")
st.code("SELECT provider_name, provider_state, calc_payment_amt,\nSUM(calc_payment_amt) OVER(\n PARTITION BY provider_name\n ORDER BY calc_payment_amt) AS Salary_Sum\nFROM hosp_info.ep_provider_paid_ehr_subset ")
submit_code = st.form_submit_button("Execute")
if submit_code:
with open('queries/window_functions/SUM.sql') as f:
contents = f.read()
df = pandas_gbq.read_gbq(contents, project_id, credentials=credentials)
st.table(df)
with col2:
# this line is a shortcut to clicking the hamburger menu to refresh
st.button(key='Refresh1', label='Refresh screen')
#------Query2------
with st.container():
col1, col2 = st.columns([5,1])
with col1:
with st.form(key='query2', clear_on_submit = True):
st.write("Returns each provider's calculated payment amounts, ranks the number of times the amounts appear, and then displays them in descending order. [ROW_NUMBER and RANK]")
st.code("SELECT provider_name, provider_state, calc_payment_amt, \n ROW_NUMBER() OVER(PARTITION BY provider_name \n ORDER BY calc_payment_amt) AS Row,\n RANK() OVER(PARTITION BY provider_name \n ORDER BY calc_payment_amt) AS Rank\nFROM hosp_info.ep_provider_paid_ehr_subset")
submit_code = st.form_submit_button("Execute")
if submit_code:
with open('queries/window_functions/ROW_RANK.sql') as f:
contents = f.read()
df = pandas_gbq.read_gbq(contents, project_id, credentials=credentials)
st.table(df)
with col2:
st.button(key='Refresh2', label='Refresh screen')
#------Query3------
#create containers and columns for the form contents
with st.container():
col1, col2 = st.columns([5,1])
with col1:
with st.form(key='query3', clear_on_submit = True):
st.write("Returns the quartile for the providers n the state of Maryland based on the calculated payment amount. [NTILE]")
st.write("The window function was used in this subquery to isolate the results to a smaller subset of the state of Maryland, which doesn't allow the use of WHERE in it.")
st.code("SELECT t.*\nFROM (SELECT provider_name, provider_state, calc_payment_amt,\nNTILE (14) OVER(\n ORDER BY provider_state, calc_payment_amt\n) AS Quartile\nFROM hosp_info.ep_provider_paid_ehr_subset) AS t\nWHERE provider_state = 'Maryland'")
submit_code = st.form_submit_button("Execute")
if submit_code:
with open('queries/window_functions/NTILE.sql') as f:
contents = f.read()
df = pandas_gbq.read_gbq(contents, project_id, credentials=credentials)
st.table(df)
with col2:
# this line is a shortcut to clicking the hamburger menu to refresh
st.button(key='Refresh3', label='Refresh screen')
#------Query4------
#create containers and columns for the form contents
with st.container():
col1, col2 = st.columns([5,1])
with col1:
with st.form(key='query4', clear_on_submit = True):
st.write("Returns providers that have the second highest calculated payment amount [NTH VALUE]")
st.code("SELECT provider_name, provider_state, calc_payment_amt,\n NTH_VALUE(provider_name, 3) OVER(\n PARTITION BY provider_state \n ORDER BY calc_payment_amt DESC) AS Third_Highest_Payment\n FROM hosp_info.ep_provider_paid_ehr_subset")
submit_code = st.form_submit_button("Execute")
if submit_code:
with open('queries/window_functions/NTH_VALUE.sql') as f:
contents = f.read()
df = pandas_gbq.read_gbq(contents, project_id, credentials=credentials)
st.table(df)
with col2:
# this line is a shortcut to clicking the hamburger menu to refresh
st.button(key='Refresh4', label='Refresh screen')
#------Query5------
#create containers and columns for the form contents
with st.container():
col1, col2 = st.columns([5,1])
with col1:
with st.form(key='query5', clear_on_submit = True):
st.write("Returns the distinct rank for each ranked row for each provider's calculated payment amount. [DENSE RANK]")
st.code("SELECT provider_name, provider_state, calc_payment_amt, \n ROW_NUMBER() OVER(PARTITION BY provider_name \n ORDER BY calc_payment_amt) AS Row,\n RANK() OVER(PARTITION BY provider_name \n ORDER BY calc_payment_amt) AS Rank,\n DENSE_RANK() OVER(PARTITION BY provider_name \n ORDER BY calc_payment_amt) AS Dense_Rank\nFROM hosp_info.ep_provider_paid_ehr_subset")
submit_code = st.form_submit_button("Execute")
if submit_code:
with open('queries/window_functions/DENSE_RANK.sql') as f:
contents = f.read()
df = pandas_gbq.read_gbq(contents, project_id, credentials=credentials)
st.table(df)
with col2:
# this line is a shortcut to clicking the hamburger menu to refresh
st.button(key='Refresh5', label='Refresh screen')
#------Query6------
#create containers and columns for the form contents
with st.container():
col1, col2 = st.columns([5,1])
with col1:
with st.form(key='query6', clear_on_submit = True):
st.write("Returns the difference of the previous calc payment amounts for each provider, where applicable. [LAG]")
st.code("SELECT provider_name, calc_payment_amt,\nLAG(calc_payment_amt, 1) OVER(\n PARTITION BY provider_name ORDER BY calc_payment_amt) as previous_calc_pmt_amt,\ncalc_payment_amt - LAG(calc_payment_amt, 1) OVER(\n PARTITION BY provider_name ORDER BY calc_payment_amt) AS diff_calc_paymnet_amt\nFROM hosp_info.ep_provider_paid_ehr_subset")
submit_code = st.form_submit_button("Execute")
if submit_code:
with open('queries/window_functions/LAG.sql') as f:
contents = f.read()
df = pandas_gbq.read_gbq(contents, project_id, credentials=credentials)
st.table(df)
with col2:
# this line is a shortcut to clicking the hamburger menu to refresh
st.button(key='Refresh6', label='Refresh screen')
#------Query7------
#create containers and columns for the form contents
with st.container():
col1, col2 = st.columns([5,1])
with col1:
with st.form(key='query7', clear_on_submit = True):
st.write("Returns succeeding calculated payment amounts for each provider, where applicable. [LEAD]")
st.code("SELECT provider_name, calc_payment_amt, \nLEAD(calc_payment_amt, 1) OVER(\n PARTITION BY provider_name ORDER BY calc_payment_amt) as next_calc_pmt_amt\nFROM hosp_info.ep_provider_paid_ehr_subset")
submit_code = st.form_submit_button("Execute")
if submit_code:
with open('queries/window_functions/LEAD.sql') as f:
contents = f.read()
df = pandas_gbq.read_gbq(contents, project_id, credentials=credentials)
st.table(df)
with col2:
# this line is a shortcut to clicking the hamburger menu to refresh
st.button(key='Refresh7', label='Refresh screen')