Skip to content

Commit af81618

Browse files
committed
sqlite: add basic implementation
1 parent 0337ea3 commit af81618

20 files changed

+1062
-259
lines changed

.github/workflows/tests.yml

+2-4
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,13 @@ concurrency:
1515

1616
jobs:
1717
tests:
18-
timeout-minutes: 10
18+
timeout-minutes: 20
1919
runs-on: ${{ matrix.os }}
2020
strategy:
2121
fail-fast: false
2222
matrix:
23-
os: [ubuntu-20.04, windows-latest, macos-latest]
23+
os: [ubuntu-22.04, windows-latest, macos-latest]
2424
pyv: ['3.8', '3.9', '3.10', '3.11']
25-
include:
26-
- {os: ubuntu-latest, pyv: 'pypy3.8'}
2725

2826
steps:
2927
- name: Check out the repository

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,6 @@ dmypy.json
136136

137137
# Cython debug symbols
138138
cython_debug/
139+
140+
# vim
141+
*.swp

.pre-commit-config.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,9 @@ repos:
5353
- id: bandit
5454
args: [-c, pyproject.toml]
5555
additional_dependencies: ["toml"]
56+
# NOTE: temporarily skipped
57+
# - repo: https://github.com/sqlfluff/sqlfluff
58+
# rev: 1.4.2
59+
# hooks:
60+
# - id: sqlfluff-fix
61+
# args: [--FIX-EVEN-UNPARSABLE, --force]

MANIFEST.in

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
global-include *.sql

README.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
SQLTrie
2-
=======
2+
========
33

44
|PyPI| |Status| |Python Version| |License|
55

pyproject.toml

+33
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,24 @@ show_error_codes = true
5656
show_error_context = true
5757
show_traceback = true
5858
pretty = true
59+
ignore_missing_imports = true
5960
check_untyped_defs = false
6061
# Warnings
6162
warn_no_return = true
6263
warn_redundant_casts = true
6364
warn_unreachable = true
6465
files = ["src", "tests"]
6566

67+
[tool.pylint.master]
68+
load-plugins = ["pylint_pytest"]
69+
6670
[tool.pylint.message_control]
6771
enable = ["c-extension-no-member", "no-else-return"]
72+
disable = [
73+
"fixme",
74+
"missing-function-docstring", "missing-module-docstring",
75+
"missing-class-docstring",
76+
]
6877

6978
[tool.pylint.variables]
7079
dummy-variables-rgx = "_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_"
@@ -76,3 +85,27 @@ ignore-words-list = " "
7685
[tool.bandit]
7786
exclude_dirs = ["tests"]
7887
skips = ["B101"]
88+
89+
[tool.sqlfluff.core]
90+
dialect = "sqlite"
91+
exclude_rules = "L031"
92+
93+
[tool.sqlfluff.rules]
94+
tab_space_size = 4
95+
max_line_length = 80
96+
indent_unit = "space"
97+
allow_scalar = true
98+
single_table_references = "consistent"
99+
unquoted_identifiers_policy = "all"
100+
101+
[tool.sqlfluff.rules.L010]
102+
capitalisation_policy = "upper"
103+
104+
[tool.sqlfluff.rules.L029]
105+
# these are not reserved in sqlite,
106+
# see https://www.sqlite.org/lang_keywords.html
107+
ignore_words = ["name", "value", "depth"]
108+
109+
[tool.sqlfluff.rules.L063]
110+
# Data Types
111+
extended_capitalisation_policy = "upper"

setup.cfg

+14-3
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,18 @@ long_description = file: README.rst
55
long_description_content_type = text/x-rst
66
license = Apache-2.0
77
license_file = LICENSE
8-
url = https://github.com/efiop/sqltrie
8+
url = https://github.com/iterative/sqltrie
99
platforms=any
10-
authors = Ruslan Kuprieiev
11-
maintainer_email = [email protected]
10+
authors = DVC team
11+
maintainer_email = [email protected]
12+
keywords =
13+
sqlite
14+
sqlite3
15+
sql
16+
trie
17+
prefix tree
18+
data-science
19+
diskcache
1220
classifiers =
1321
Programming Language :: Python :: 3
1422
Programming Language :: Python :: 3.8
@@ -23,16 +31,19 @@ zip_safe = False
2331
package_dir=
2432
=src
2533
packages = find:
34+
include_package_data = True
2635
install_requires=
2736

2837
[options.extras_require]
2938
tests =
3039
pytest==7.2.0
40+
pytest-benchmark
3141
pytest-sugar==0.9.5
3242
pytest-cov==3.0.0
3343
pytest-mock==3.8.2
3444
pylint==2.15.0
3545
mypy==0.971
46+
pygtrie
3647
dev =
3748
%(tests)s
3849

src/sqltrie/.trie.py.swp

-12 KB
Binary file not shown.

src/sqltrie/__init__.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
1-
"""SQLTrie."""
2-
3-
from .trie import AbstractTrie, ShortKeyError
4-
from .sqlite import SQLiteTrie
5-
1+
from .serialized import ( # noqa: F401, pylint: disable=unused-import
2+
JSONTrie,
3+
SerializedTrie,
4+
)
5+
from .sqlite import SQLiteTrie # noqa: F401, pylint: disable=unused-import
6+
from .trie import ( # noqa: F401, pylint: disable=unused-import
7+
ADD,
8+
DELETE,
9+
MODIFY,
10+
RENAME,
11+
UNCHANGED,
12+
AbstractTrie,
13+
Change,
14+
ShortKeyError,
15+
TrieKey,
16+
TrieNode,
17+
)

src/sqltrie/serialized.py

+116
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
import json
2+
from abc import abstractmethod
3+
from typing import Any, Optional
4+
5+
from .trie import AbstractTrie, Iterator, TrieKey
6+
7+
8+
class SerializedTrie(AbstractTrie):
9+
@property
10+
@abstractmethod
11+
def _trie(self):
12+
pass
13+
14+
def close(self):
15+
self._trie.close()
16+
17+
def commit(self):
18+
self._trie.commit()
19+
20+
def rollback(self):
21+
self._trie.rollback()
22+
23+
@abstractmethod
24+
def _load(self, key: TrieKey, value: Optional[bytes]) -> Optional[Any]:
25+
pass
26+
27+
@abstractmethod
28+
def _dump(self, key: TrieKey, value: Optional[Any]) -> Optional[bytes]:
29+
pass
30+
31+
def __setitem__(self, key, value):
32+
self._trie[key] = self._dump(key, value)
33+
34+
def __getitem__(self, key):
35+
raw = self._trie[key]
36+
return self._load(key, raw)
37+
38+
def __delitem__(self, key):
39+
del self._trie[key]
40+
41+
def __len__(self):
42+
return len(self._trie)
43+
44+
def view(self, key: Optional[TrieKey] = None) -> "SerializedTrie":
45+
if not key:
46+
return self
47+
48+
raw_trie = self._trie.view(key)
49+
trie = type(self)()
50+
# pylint: disable-next=protected-access
51+
trie._trie = raw_trie # type: ignore
52+
return trie
53+
54+
def items(self, *args, **kwargs):
55+
yield from (
56+
(key, self._load(key, raw))
57+
for key, raw in self._trie.items(*args, **kwargs)
58+
)
59+
60+
def ls(self, key, with_values=False):
61+
entries = self._trie.ls(key, with_values=with_values)
62+
if with_values:
63+
yield from (
64+
(ekey, self._load(ekey, evalue)) for ekey, evalue in entries
65+
)
66+
else:
67+
yield from entries
68+
69+
def traverse(self, node_factory, prefix=None):
70+
def _node_factory_wrapper(path_conv, path, children, value):
71+
return node_factory(
72+
path_conv, path, children, self._load(path, value)
73+
)
74+
75+
return self._trie.traverse(_node_factory_wrapper, prefix=prefix)
76+
77+
def diff(self, *args, **kwargs):
78+
yield from self._trie.diff(*args, **kwargs)
79+
80+
def has_node(self, key):
81+
return self._trie.has_node(key)
82+
83+
def shortest_prefix(self, key):
84+
sprefix = self._trie.shortest_prefix(key)
85+
if sprefix is None:
86+
return None
87+
88+
skey, raw = sprefix
89+
return key, self._load(skey, raw)
90+
91+
def prefixes(self, key):
92+
for prefix, raw in self._trie.prefixes(key):
93+
yield (prefix, self._load(prefix, raw))
94+
95+
def longest_prefix(self, key):
96+
lprefix = self._trie.longest_prefix(key)
97+
if lprefix is None:
98+
return None
99+
100+
lkey, raw = lprefix
101+
return lkey, self._load(lkey, raw)
102+
103+
def __iter__(self) -> Iterator[TrieKey]:
104+
yield from self._trie
105+
106+
107+
class JSONTrie(SerializedTrie): # pylint: disable=abstract-method
108+
def _load(self, key: TrieKey, value: Optional[bytes]) -> Optional[Any]:
109+
if value is None:
110+
return None
111+
return json.loads(value.decode("utf-8"))
112+
113+
def _dump(self, key: TrieKey, value: Optional[Any]) -> Optional[bytes]:
114+
if value is None:
115+
return None
116+
return json.dumps(value).encode("utf-8")

0 commit comments

Comments
 (0)