Skip to content

Commit 7ae8278

Browse files
committed
add connector
1 parent 0f48241 commit 7ae8278

22 files changed

+2675
-0
lines changed

connector/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
This is a copy of https://github.com/sfu-db/dataprep/tree/develop/dataprep/connector, to run the code, please refer to the original repo.

connector/__init__.py

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""Connector"""
2+
3+
from typing import Any, Dict, Optional
4+
5+
from .connector import Connector
6+
from .generator import ConfigGenerator, ConfigGeneratorUI
7+
from .info import info, websites
8+
from .sql import read_sql
9+
10+
__all__ = [
11+
"Connector",
12+
"ConfigGenerator",
13+
"ConfigGeneratorUI",
14+
"connect",
15+
"info",
16+
"websites",
17+
"read_sql",
18+
]
19+
20+
21+
def connect(
22+
config_path: str,
23+
*,
24+
update: bool = False,
25+
_auth: Optional[Dict[str, Any]] = None,
26+
_concurrency: int = 1,
27+
**kwargs: Any,
28+
) -> Connector:
29+
"""Connect to a website.
30+
31+
Parameters
32+
----------
33+
config_path
34+
The path to the config. It can be hosted, e.g. "yelp", or from
35+
local filesystem, e.g. "./yelp"
36+
_auth: Optional[Dict[str, Any]] = None
37+
The parameters for authentication, e.g. OAuth2
38+
_concurrency: int = 5
39+
The concurrency setting. By default it is 1 reqs/sec.
40+
update: bool = True
41+
Force update the config file even if the local version exists.
42+
**kwargs
43+
Parameters that shared by different queries.
44+
45+
Returns
46+
-------
47+
Connector
48+
a Connector object.
49+
50+
Example
51+
-------
52+
>>> from dataprep.connector import connect
53+
>>> dc = connect("yelp", _auth={"access_token": access_token}, _concurrency=3)
54+
"""
55+
return Connector(config_path, update=update, _auth=_auth, _concurrency=_concurrency, **kwargs)
56+
57+
58+
def config_generator_ui(existing: Optional[Dict[str, Any]] = None) -> None:
59+
"""Create a Config Generator UI.
60+
61+
Parameters
62+
----------
63+
existing: Optional[Dict[str, Any]] = None
64+
Optionally pass in an existing configuration.
65+
"""
66+
67+
ConfigGeneratorUI(existing).display()

connector/assets/info.html

+114
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
<html>
2+
<head>
3+
<title> DataPrep.Connector Info </title>
4+
<script>
5+
function switchTab637(e) {
6+
const selectedAreaSuffix = e.parentElement.parentElement.className.split('-')[1];
7+
const selectedTabId = e.id.split('-')[2];
8+
const selectedTabContent = document.querySelector(`.contents-${selectedAreaSuffix}>div:nth-of-type(${selectedTabId})`);
9+
const contentArray = document.getElementsByClassName('info-637')
10+
for (let i of contentArray) {
11+
i.style.display = 'none';
12+
}
13+
selectedTabContent.style.display = 'block';
14+
}
15+
</script>
16+
<style>
17+
.container-637 {
18+
}
19+
.container-637 input[type=radio] {
20+
display: none;
21+
}
22+
.tabs-637 {
23+
font-family: -apple-system, 'Helvetica Neue', 'Helvetica', 'Arial', 'Lucida Grande', sans-serif;
24+
-webkit-font-smoothing: antialiased;
25+
display: grid;
26+
flex-wrap: wrap;
27+
gap: 5px;
28+
grid-template-columns: repeat(auto-fill, minmax(110px, 1fr));
29+
justify-content: wrap;
30+
}
31+
.tabs-637 label {
32+
word-wrap: break-word;
33+
text-align: center;
34+
font-size: 12px;
35+
border-bottom: 2px solid #9edae5;
36+
transition: border 0.3s ease;
37+
}
38+
.tabs-637 label:hover {
39+
background: rgba(0, 0, 0, 0.1);
40+
}
41+
.tabs-637 input:checked+label {
42+
border-bottom: 2px solid #1f77b4;
43+
}
44+
.contents-637>div:nth-of-type(n+2) {
45+
display: none;
46+
}
47+
h4 {
48+
font-size: 14px;
49+
font-family: Arial;
50+
}
51+
p {
52+
font-size: 13px;
53+
}
54+
</style>
55+
</head>
56+
<body>
57+
<div class="container-637">
58+
<div class="tabs-637">
59+
<input type="radio" name="select" id='tab-637-1' checked onclick="switchTab637(this)">
60+
<label for="tab-637-1">{{ tbs | first }}</label>
61+
{% for tb in tbs %}
62+
{% set tab_id = 'tab-637-' + loop.index|string %}
63+
{% if loop.index != 1 %}
64+
<input type="radio" name="select" id={{ tab_id }} onclick="switchTab637(this)">
65+
<label for={{ tab_id }}>{{ tb }}</label>
66+
{% endif %}
67+
{% endfor %}
68+
</div>
69+
<div class="contents-637">
70+
{% for tb in tbs %}
71+
<div class="info-637">
72+
<div>
73+
<h4><u>Parameters</u></h4>
74+
{% if tbs[tb].required_params%}
75+
{% for required_param in tbs[tb].required_params %}
76+
<p style="display:wrap"><code>{{ required_param }} (required)</code></p>
77+
{% endfor %}
78+
{% endif %}
79+
{% if tbs[tb].optional_params%}
80+
{% for optional_param in tbs[tb].optional_params %}
81+
<p style="display:wrap"><code>{{ optional_param }}</code></p>
82+
{% endfor %}
83+
{% endif %}
84+
{% if not (tbs[tb].required_params or tbs[tb].optional_params)%}
85+
<p>None</p>
86+
{% endif %}
87+
</div>
88+
<div>
89+
<h4><u>Example</u></h4>
90+
{% set connect_example = "dc = connect('" + dbname + "'" %}
91+
{% if tbs[tb].joined_auth_params %}
92+
{% set connect_example = connect_example + ", _auth={'" + tbs[tb].joined_auth_params[0] + "}, _concurrency=3)" %}
93+
{% else %}
94+
{% set connect_example = connect_example + ")" %}
95+
{% endif %}
96+
{% set query_example = "df = await dc.query('" + tb + "'" %}
97+
{% if tbs[tb].joined_query_fields%}
98+
{% set query_example = query_example + ", " + tbs[tb].joined_query_fields[0] %}
99+
{% endif %}
100+
{% if tbs[tb].count == True %}
101+
{% set query_example = query_example + ", _count=20" %}
102+
{% endif %}
103+
{% set query_example = query_example + ")" %}
104+
<p><code>{{ connect_example }}<br>{{ query_example }}</code></p>
105+
</div>
106+
<div>
107+
<h4><u>Schema</u></h4>
108+
{{ tbs[tb].schemas }}
109+
</div>
110+
</div>
111+
{% endfor %}
112+
</div>
113+
</div>
114+
</body>

connector/config_manager.py

+128
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
"""
2+
Functions for config downloading and maintaining
3+
"""
4+
import json
5+
from json import dump as jdump
6+
from pathlib import Path
7+
from shutil import rmtree
8+
from tempfile import gettempdir
9+
from typing import cast, Tuple
10+
11+
from .utils import Request
12+
13+
# note: apply change after rename the config repo
14+
META_URL = "https://raw.githubusercontent.com/sfu-db/APIConnectors/{}/api-connectors/{}/_meta.json"
15+
TABLE_URL = "https://raw.githubusercontent.com/sfu-db/APIConnectors/{}/api-connectors/{}/{}.json"
16+
GIT_REF_URL = "https://api.github.com/repos/sfu-db/APIConnectors/git/refs/heads"
17+
18+
19+
def separate_branch(config_path: str) -> Tuple[str, str]:
20+
"""Separate the config path into db name and branch"""
21+
segments = config_path.split("@")
22+
if len(segments) == 1:
23+
return segments[0], "master"
24+
elif len(segments) == 2:
25+
return segments[0], segments[1]
26+
else:
27+
raise ValueError(f"Multiple branches in the config path {config_path}")
28+
29+
30+
def initialize_path(config_path: str, update: bool) -> Path:
31+
"""Determines if the given config_path is local or in GitHub.
32+
Fetches the full path."""
33+
if config_path.startswith(".") or config_path.startswith("/") or config_path.startswith("~"):
34+
path = Path(config_path).resolve()
35+
else:
36+
# From GitHub!
37+
impdb, branch = separate_branch(config_path)
38+
ensure_config(impdb, branch, update)
39+
path = config_directory() / branch / impdb
40+
return path
41+
42+
43+
def config_directory() -> Path:
44+
"""
45+
Returns the config directory path
46+
"""
47+
tmp = gettempdir()
48+
return Path(tmp) / "dataprep" / "connector"
49+
50+
51+
def ensure_config(impdb: str, branch: str, update: bool) -> bool:
52+
"""Ensure the config for `impdb` is downloaded"""
53+
path = config_directory()
54+
55+
if (path / branch / impdb / "_meta.json").exists() and not update:
56+
return True
57+
58+
obsolete = is_obsolete(impdb, branch)
59+
60+
if (path / branch / impdb / "_meta.json").exists() and not obsolete:
61+
return True
62+
else:
63+
download_config(impdb, branch)
64+
return False
65+
66+
67+
def is_obsolete(impdb: str, branch: str) -> bool:
68+
"""Test if the implicit db config files are obsolete and need to be re-downloaded."""
69+
70+
path = config_directory()
71+
if not (path / branch / impdb / "_meta.json").exists():
72+
return True
73+
elif not (path / branch / impdb / "_hash").exists():
74+
return True
75+
else:
76+
with open(path / branch / impdb / "_hash", "r") as f:
77+
githash = f.read()
78+
79+
sha = get_git_branch_hash(branch)
80+
81+
return githash != sha
82+
83+
84+
def get_git_branch_hash(branch: str) -> str:
85+
"""Get current config files repo's hash"""
86+
requests = Request(GIT_REF_URL)
87+
response = requests.get()
88+
refs = json.loads(response.read())
89+
90+
(sha,) = [ref["object"]["sha"] for ref in refs if ref["ref"] == f"refs/heads/{branch}"]
91+
return cast(str, sha)
92+
93+
94+
def download_config(impdb: str, branch: str) -> None:
95+
"""Download the config from Github into the temp directory."""
96+
requests = Request(META_URL.format(branch, impdb))
97+
response = requests.get()
98+
meta = json.loads(response.read())
99+
tables = meta["tables"]
100+
101+
sha = get_git_branch_hash(branch)
102+
# In case we push a new config version to github when the user is downloading
103+
while True:
104+
configs = {"_meta": meta}
105+
for table in tables:
106+
requests = Request(TABLE_URL.format(branch, impdb, table))
107+
response = requests.get()
108+
config = json.loads(response.read())
109+
configs[table] = config
110+
sha_check = get_git_branch_hash(branch)
111+
112+
if sha_check == sha:
113+
break
114+
115+
sha = sha_check
116+
117+
path = config_directory()
118+
119+
if (path / branch / impdb).exists():
120+
rmtree(path / branch / impdb)
121+
122+
(path / branch / impdb).mkdir(parents=True)
123+
for fname, val in configs.items():
124+
with (path / branch / impdb / f"{fname}.json").open("w") as f:
125+
jdump(val, f)
126+
127+
with (path / branch / impdb / "_hash").open("w") as f:
128+
f.write(sha)

0 commit comments

Comments
 (0)