-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtopics.py
34 lines (32 loc) · 1.37 KB
/
topics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import streamlit as st
import configs as c
c.page("Topic Models")
import db
import sqlgen as sg
"""
Query the FOIArchive via topics derived by topic modeling. You can find
more information about topic modeling
[here](https://lab.history.columbia.edu/content/documentation).
"""
cdf = db.load_execute("corpora")
tdf = db.load_execute("topics")
col1, col2 = st.columns([1, 2])
corpora_with_topics = cdf[cdf["topic_cnt"].notnull()]
corpus = col1.selectbox("Corpus", corpora_with_topics)
corpora_topics = tdf.display[tdf["corpus"]==corpus]
topic = col2.selectbox("Topic", corpora_topics)
topic_id = tdf.topic_id[tdf["display"]==topic].values[0]
predicates = []
sg.add_predicate(predicates, sg.compare_predicate('td.corpus', '=', corpus))
sg.add_predicate(predicates, sg.compare_predicate('topic_id', '=', topic_id, is_value_str=False))
where_clause=sg.where_clause(predicates)
print(where_clause)
topics_data_table_sql = sg.query('topics_data_table', c.config["table_name"], where_clause)
topics_data_table_df = db.execute(topics_data_table_sql)
topics_data_table_df['docviewer_url'] = topics_data_table_df['doc_id'].apply(
lambda x: f"{c.config['docviewer_url']}?doc_id={x}")
st.dataframe(topics_data_table_df,
use_container_width=True,
hide_index=True,
column_order=['score'] + c.COLUMN_ORDER,
column_config=c.COLUMN_CONFIGS)