Skip to content

Commit

Permalink
Add measurement_aux_schema as a field to be set in config. Fix #4. Fix
Browse files Browse the repository at this point in the history
  • Loading branch information
cxji committed Sep 25, 2023
1 parent 70d1f9f commit e69d2f8
Show file tree
Hide file tree
Showing 21 changed files with 93 additions and 87 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.DS_Store
1 change: 1 addition & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# database parameters
db_name = 'placeholder'
measurement_aux_schema = 'placeholder'
nonstationarity_schema_name = 'placeholder'

# directory parameters
Expand Down
16 changes: 8 additions & 8 deletions data_extraction/create_lab_reference_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ def create_lab_reference_table(direction,
'sql',
reference_table_sql_filename), 'r') as f:
create_table_sql = f.read()
create_table_sql = create_table_sql.format(direction = direction,
reference_order = reference_order,
min_ref_count = min_ref_count,
num_characters = num_characters,
avg_diff_threshold = avg_diff_threshold,
schema_name = config.nonstationarity_schema_name)
create_table_sql = create_table_sql.format(direction = direction,
reference_order = reference_order,
min_ref_count = min_ref_count,
num_characters = num_characters,
avg_diff_threshold = avg_diff_threshold,
measurement_aux_schema = config.measurement_aux_schema)

engine = sqlalchemy.create_engine('postgresql://' + config.db_name,
echo=False,
Expand All @@ -75,8 +75,8 @@ def standardize_lab_reference_table(direction):
'sql',
reference_table_sql_filename), 'r') as f:
create_table_sql = f.read()
create_table_sql = create_table_sql.format(direction = direction,
schema_name = config.nonstationarity_schema_name)
create_table_sql = create_table_sql.format(direction = direction,
measurement_aux_schema = config.measurement_aux_schema)

engine = sqlalchemy.create_engine('postgresql://' + config.db_name,
echo=False,
Expand Down
4 changes: 2 additions & 2 deletions data_extraction/create_standardized_measurement_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@

def create_standardized_measurement_table():
'''
Create a standardized measurement table {nonstationarity_schema_name}.measurement_with_nulls_replacing_zero_drop_nonstandard
Create a standardized measurement table {measurement_aux_schema}.measurement_with_nulls_replacing_zero_drop_nonstandard
@return: None
'''
measurement_table_sql_filename = 'create_standardized_measurement_table.sql'
with open(join(dirname(abspath(__file__)),
'sql',
measurement_table_sql_filename), 'r') as f:
measurement_table_sql = f.read()
measurement_table_sql = measurement_table_sql.format(schema_name = config.nonstationarity_schema_name)
measurement_table_sql = measurement_table_sql.format(measurement_aux_schema = config.measurement_aux_schema)

engine = sqlalchemy.create_engine('postgresql://' + config.db_name,
echo=False,
Expand Down
9 changes: 5 additions & 4 deletions data_extraction/extract_omop_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,10 +340,11 @@ def extract_features(cohort,
debug_suffix = ''
if debug_size is not None:
debug_suffix = '_debug' + str(debug_size)
feature_sql_params = {'window_name' : eligibility_time.replace(' ', '_'),
'eol_suffix' : eol_suffix,
'schema_name' : config.nonstationarity_schema_name,
'debug_suffix': debug_suffix}
feature_sql_params = {'window_name' : eligibility_time.replace(' ', '_'),
'eol_suffix' : eol_suffix,
'schema_name' : config.nonstationarity_schema_name,
'measurement_aux_schema': config.measurement_aux_schema,
'debug_suffix' : debug_suffix}
features = [Feature(n, p, params=feature_sql_params)
for n, p in zip(feature_names, feature_paths)]

Expand Down
5 changes: 3 additions & 2 deletions data_extraction/extract_top_lab_outcomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ def select_top_lab_outcomes(number_outcomes,
'sql',
'select_top_abnormal_lab_outcomes.sql'), 'r') as f:
top_feat_sql = f.read()
top_feat_sql = top_feat_sql.format(number_outcomes = number_outcomes,
schema_name = config.nonstationarity_schema_name)
top_feat_sql = top_feat_sql.format(number_outcomes = number_outcomes,
schema_name = config.nonstationarity_schema_name,
measurement_aux_schema = config.measurement_aux_schema)
engine = sqlalchemy.create_engine('postgresql://' + config.db_name,
echo=False,
connect_args = {"host": '/var/run/postgresql/'})
Expand Down
4 changes: 2 additions & 2 deletions data_extraction/load_lab_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def load_reference_tables():
'sql',
create_tables_sql_filename), 'r') as f:
create_tables_sql = f.read()
create_tables_sql = create_tables_sql.format(schema_name = config.nonstationarity_schema_name)
create_tables_sql = create_tables_sql.format(measurement_aux_schema = config.measurement_aux_schema)

engine = sqlalchemy.create_engine('postgresql://' + config.db_name,
echo=False,
Expand Down Expand Up @@ -48,7 +48,7 @@ def load_reference_tables():
'sql',
index_tables_sql_filename), 'r') as f:
index_tables_sql = f.read()
index_tables_sql = index_tables_sql.format(schema_name = config.nonstationarity_schema_name)
index_tables_sql = index_tables_sql.format(measurement_aux_schema = config.measurement_aux_schema)
with session_scope(engine) as session:
session.execute(sqlalchemy.text(index_tables_sql))
session.commit()
Expand Down
24 changes: 13 additions & 11 deletions data_extraction/replace_zeros_with_null_in_measurement.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@
from os.path import dirname, abspath, join

sys.path.append(dirname(dirname(abspath(__file__))))
sys.path.append(join(dirname(dirname(abspath(__file__))), 'utils'))
import config

from utils import session_scope
from db_utils import session_scope

def create_reference_table(direction,
min_ref_count=5,
num_characters=15,
avg_diff_threshold=5):
'''
Create table cdm_measurement_aux.measurement_{direction}_references
Create table {measurement_aux_schema}.measurement_{direction}_references
with the following columns:
1. concept_id
2. concept_name,
Expand All @@ -43,11 +43,12 @@ def create_reference_table(direction,
reference_order = 'ASC' # take lowest range low if same frequency
else:
reference_order = 'DESC' # take highest range high if same frequency
create_table_sql = create_table_sql.format(direction = direction,
min_ref_count = min_ref_count,
num_characters = num_characters,
avg_diff_threshold = avg_diff_threshold,
reference_order = reference_order)
create_table_sql = create_table_sql.format(measurement_aux_schema = config.measurement_aux_schema,
direction = direction,
min_ref_count = min_ref_count,
num_characters = num_characters,
avg_diff_threshold = avg_diff_threshold,
reference_order = reference_order)

engine = sqlalchemy.create_engine('postgresql://' + config.db_name,
echo=False,
Expand All @@ -58,20 +59,21 @@ def create_reference_table(direction,

def create_table_with_nulls_replacing_zero(threshold=5):
'''
Create table cdm_measurement_aux.measurement_with_nulls_replacing_zero
Create table {config.measurement_aux_schema}.measurement_with_nulls_replacing_zero
with same columns as measurement table
Zeros are replaced with null if it seems like an invalid value for that concept, i.e.
reference range low or average non-zero value (if reference range low not available) is at least threshold
and concept does not have negative values
Expects cdm.measurement and cdm_measurement_aux.measurement_low_references
Expects cdm.measurement and {config.measurement_aux_schema}.measurement_low_references
@param threshold: int, threshold for comparing reference range low or average non-zero value
@return: None
'''
assert threshold > 0

with open('sql/create_measurement_table_with_nulls_replacing_zero.sql', 'r') as f:
create_table_sql = f.read()
create_table_sql = create_table_sql.format(threshold = threshold)
create_table_sql = create_table_sql.format(measurement_aux_schema = config.measurement_aux_schema,
threshold = threshold)

engine = sqlalchemy.create_engine('postgresql://' + config.db_name,
echo=False,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ If category still does not have a reference after looking among similar concepts
- Reference from general population and unspecified unit in cdm_measurement_aux
********************************************************************************/

CREATE TABLE {schema_name}.measurement_age_gender_specific_{direction}_references AS
CREATE TABLE {measurement_aux_schema}.measurement_age_gender_specific_{direction}_references AS
-- age ranges to try: < 30, 30 - 50, 50 - 70, > 70
WITH measurements_with_age_gender AS (
SELECT m.measurement_concept_id AS concept_id,
Expand Down Expand Up @@ -539,19 +539,19 @@ SELECT r.concept_id,
0 AS from_different_age_range,
1 AS from_general_reference
FROM references_to_get_from_general r
JOIN cdm_measurement_aux.measurement_{direction}_references g
JOIN {measurement_aux_schema}.measurement_{direction}_references g
ON r.concept_id = g.concept_id;

CREATE INDEX idx_{schema_name}_age_gender_{direction}_references
ON {schema_name}.measurement_age_gender_specific_{direction}_references (
CREATE INDEX idx_{measurement_aux_schema}_age_gender_{direction}_references
ON {measurement_aux_schema}.measurement_age_gender_specific_{direction}_references (
concept_id ASC,
unit_source_value ASC,
age_range ASC,
gender_source_value ASC
);

CREATE INDEX idx_{schema_name}_age_gender_{direction}_references_concept_unit
ON {schema_name}.measurement_age_gender_specific_{direction}_references (
CREATE INDEX idx_{measurement_aux_schema}_age_gender_{direction}_references_concept_unit
ON {measurement_aux_schema}.measurement_age_gender_specific_{direction}_references (
concept_id ASC,
unit_source_value ASC
);
2 changes: 1 addition & 1 deletion data_extraction/sql/create_lab_reference_tables.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SET SEARCH_PATH TO {schema_name};
SET SEARCH_PATH TO {measurement_aux_schema};

-- create table for general references
DROP TABLE IF EXISTS measurement_references_from_sources;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ that shares same first num_characters characters and has average non-zero values
- If two similar concepts have the same most frequent reference and frequency, smaller concept ID is listed.
********************************************************************************/

CREATE TABLE cdm_measurement_aux.measurement_{direction}_references AS
CREATE TABLE {measurement_aux_schema}.measurement_{direction}_references AS
WITH measurement_concepts AS (
SELECT DISTINCT c.concept_id,
c.concept_name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ AND (reference range low is at least threshold
AND average non-zero value is at least threshold))
********************************************************************************/

CREATE TABLE cdm_measurement_aux.measurement_with_nulls_replacing_zero AS
CREATE TABLE {measurement_aux_schema}.measurement_with_nulls_replacing_zero AS
WITH measurement_concepts AS (
SELECT DISTINCT measurement_concept_id AS concept_id
FROM cdm.measurement
Expand All @@ -19,15 +19,15 @@ measurements_with_negatives AS ( -- zero is null if not exists in here
),
measurements_with_high_references AS ( -- and (exists in here
SELECT concept_id
FROM cdm_measurement_aux.measurement_low_references
FROM {measurement_aux_schema}.measurement_low_references
WHERE range_low >= {threshold}
),
measurements_with_no_references AS (
SELECT concept_id
FROM measurement_concepts c
WHERE NOT EXISTS (
SELECT
FROM cdm_measurement_aux.measurement_low_references r
FROM {measurement_aux_schema}.measurement_low_references r
WHERE c.concept_id = r.concept_id
)
),
Expand Down
20 changes: 10 additions & 10 deletions data_extraction/sql/create_standardized_lab_reference_tables.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
-- Override references with manually standardized references where available
DROP TABLE IF EXISTS {schema_name}.measurement_age_gender_specific_standardized_{direction}_references;
CREATE TABLE {schema_name}.measurement_age_gender_specific_standardized_{direction}_references AS
DROP TABLE IF EXISTS {measurement_aux_schema}.measurement_age_gender_specific_standardized_{direction}_references;
CREATE TABLE {measurement_aux_schema}.measurement_age_gender_specific_standardized_{direction}_references AS
SELECT r.concept_id,
r.unit_source_value,
r.concept_name,
Expand All @@ -12,11 +12,11 @@ SELECT r.concept_id,
r.from_opposite_gender,
r.from_different_age_range,
r.from_general_reference
FROM {schema_name}.measurement_age_gender_specific_{direction}_references r
LEFT JOIN {schema_name}.measurement_unit_specific_references_from_sources us
FROM {measurement_aux_schema}.measurement_age_gender_specific_{direction}_references r
LEFT JOIN {measurement_aux_schema}.measurement_unit_specific_references_from_sources us
ON r.concept_id = us.concept_id
AND r.unit_source_value = us.unit_source_value
LEFT JOIN {schema_name}.measurement_references_from_sources s
LEFT JOIN {measurement_aux_schema}.measurement_references_from_sources s
ON r.concept_id = s.concept_id
WHERE r.gender_source_value = 'M'
UNION
Expand All @@ -31,16 +31,16 @@ SELECT r.concept_id,
r.from_opposite_gender,
r.from_different_age_range,
r.from_general_reference
FROM {schema_name}.measurement_age_gender_specific_{direction}_references r
LEFT JOIN {schema_name}.measurement_unit_specific_references_from_sources us
FROM {measurement_aux_schema}.measurement_age_gender_specific_{direction}_references r
LEFT JOIN {measurement_aux_schema}.measurement_unit_specific_references_from_sources us
ON r.concept_id = us.concept_id
AND r.unit_source_value = us.unit_source_value
LEFT JOIN {schema_name}.measurement_references_from_sources s
LEFT JOIN {measurement_aux_schema}.measurement_references_from_sources s
ON r.concept_id = s.concept_id
WHERE r.gender_source_value = 'F';

CREATE INDEX idx_{schema_name}_age_gender_standardized_{direction}_references
ON {schema_name}.measurement_age_gender_specific_standardized_{direction}_references (
CREATE INDEX idx_{measurement_aux_schema}_age_gender_standardized_{direction}_references
ON {measurement_aux_schema}.measurement_age_gender_specific_standardized_{direction}_references (
concept_id ASC,
unit_source_value ASC,
age_range ASC,
Expand Down
12 changes: 6 additions & 6 deletions data_extraction/sql/create_standardized_measurement_table.sql
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
-- Drop units that do not make sense and measurements that are out of range
DROP TABLE IF EXISTS {schema_name}.measurement_with_nulls_replacing_zero_drop_nonstandard;
CREATE TABLE {schema_name}.measurement_with_nulls_replacing_zero_drop_nonstandard AS
DROP TABLE IF EXISTS {measurement_aux_schema}.measurement_with_nulls_replacing_zero_drop_nonstandard;
CREATE TABLE {measurement_aux_schema}.measurement_with_nulls_replacing_zero_drop_nonstandard AS
SELECT m.*
FROM cdm_measurement_aux.measurement_with_nulls_replacing_zero m
FROM {measurement_aux_schema}.measurement_with_nulls_replacing_zero m
WHERE NOT EXISTS (
SELECT
FROM {schema_name}.measurement_units_to_drop d
FROM {measurement_aux_schema}.measurement_units_to_drop d
WHERE m.measurement_concept_id = d.concept_id
AND m.unit_source_value = d.unit_source_value
)
AND NOT EXISTS (
SELECT
FROM {schema_name}.measurements_out_of_range r
FROM {measurement_aux_schema}.measurements_out_of_range r
WHERE m.measurement_concept_id = r.concept_id
AND m.unit_source_value = r.unit_source_value
AND m.value_as_number IS NOT NULL
Expand All @@ -22,7 +22,7 @@ AND NOT EXISTS (
);

-- Add same indices as measurement table
SET SEARCH_PATH TO {schema_name};
SET SEARCH_PATH TO {measurement_aux_schema};

/************************
Primary key constraints
Expand Down
2 changes: 1 addition & 1 deletion data_extraction/sql/index_lab_reference_tables.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SET SEARCH_PATH TO {schema_name};
SET SEARCH_PATH TO {measurement_aux_schema};

CREATE INDEX idx_measurement_references_from_sources_concept_id
ON measurement_references_from_sources (concept_id ASC);
Expand Down
10 changes: 5 additions & 5 deletions data_extraction/sql/labs.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
WITH cohort_ids AS (
SELECT DISTINCT person_id
FROM {schema_name}.omop_cohort_{window_name}{debug_suffix}
FROM {schema_name}.omop_cohort_{window_name}{eol_suffix}{debug_suffix}
),
cohort_birth_year_genders AS (
SELECT c.person_id,
Expand All @@ -18,8 +18,8 @@ cohort_measurements AS (
m.unit_source_value,
DATE(m.measurement_datetime) AS feature_start_date,
p.end_date AS person_end_date
FROM {schema_name}.omop_cohort_{window_name}{debug_suffix} p
JOIN {schema_name}.measurement_with_nulls_replacing_zero_drop_nonstandard m
FROM {schema_name}.omop_cohort_{window_name}{eol_suffix}{debug_suffix} p
JOIN {measurement_aux_schema}.measurement_with_nulls_replacing_zero_drop_nonstandard m
ON p.person_id = m.person_id
AND m.measurement_datetime <= p.end_date
),
Expand Down Expand Up @@ -86,12 +86,12 @@ cohort_measurements_with_values_and_references AS (
lr.range_low,
hr.range_high
FROM cohort_measurements_with_values_age_ranges_gender m
LEFT JOIN {schema_name}.measurement_age_gender_specific_standardized_low_references lr
LEFT JOIN {measurement_aux_schema}.measurement_age_gender_specific_standardized_low_references lr
ON m.measurement_concept_id = lr.concept_id
AND m.unit_source_value = lr.unit_source_value
AND m.gender_source_value = lr.gender_source_value
AND m.age_range = lr.age_range
LEFT JOIN {schema_name}.measurement_age_gender_specific_standardized_high_references hr
LEFT JOIN {measurement_aux_schema}.measurement_age_gender_specific_standardized_high_references hr
ON m.measurement_concept_id = hr.concept_id
AND m.unit_source_value = hr.unit_source_value
AND m.gender_source_value = hr.gender_source_value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Script to create indexes and constraints on cdm_measurement_aux tables
********************************************************************************/

SET SEARCH_PATH TO cdm_measurement_aux;
SET SEARCH_PATH TO {measurement_aux_schema};

/************************
Primary key constraints
Expand Down
4 changes: 2 additions & 2 deletions data_extraction/sql/select_abnormal_lab_group_outcome.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ cohort_outcome_measurements AS (
m.unit_source_value,
m.measurement_date
FROM cohort_birth_year_genders c
JOIN {schema_name}.measurement_with_nulls_replacing_zero_drop_nonstandard m
JOIN {measurement_aux_schema}.measurement_with_nulls_replacing_zero_drop_nonstandard m
ON c.person_id = m.person_id
WHERE m.measurement_concept_id IN ({outcome_id})
AND m.value_as_number IS NOT NULL
Expand Down Expand Up @@ -49,7 +49,7 @@ outcome_references AS (
concept_id,
unit_source_value,
range_{direction}
FROM {schema_name}.measurement_age_gender_specific_standardized_{direction}_references
FROM {measurement_aux_schema}.measurement_age_gender_specific_standardized_{direction}_references
WHERE concept_id IN ({outcome_id})
),
cohort_abnormal_measurements AS (
Expand Down
Loading

0 comments on commit e69d2f8

Please sign in to comment.