Skip to content

Commit bcec54a

Browse files
authored
memfs2: make it non-cacheable, add tests, minor refactoring and optimizations (#73)
memfs2: make it non-cacheable, add tests, minor refactoring
1 parent 5ca2d76 commit bcec54a

File tree

3 files changed

+258
-47
lines changed

3 files changed

+258
-47
lines changed

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ repos:
2626
- id: codespell
2727
args:
2828
- --ignore-words-list
29-
- fo
29+
- fo,cachable
3030
- repo: https://github.com/asottile/pyupgrade
3131
rev: v2.31.0
3232
hooks:

src/dvc_objects/fs/implementations/_memory.py

+46-46
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
import errno
2+
import os
23

34
from fsspec import AbstractFileSystem
45
from fsspec.implementations.memory import MemoryFile
56
from fsspec.implementations.memory import MemoryFileSystem as MemFS
67

78

89
class MemFS2(AbstractFileSystem): # pylint: disable=abstract-method
9-
"""In-Memory Object Storage FileSystem."""
10+
"""In-Memory Object Storage FileSystem based on Trie data-structure."""
1011

12+
cachable = False
1113
protocol = "memory"
1214
root_marker = "/"
1315
_strip_protocol = MemFS._strip_protocol # pylint: disable=protected-access
@@ -20,19 +22,14 @@ def __init__(self, *args, **storage_options):
2022

2123
def ls(self, path, detail=False, **kwargs):
2224
path = self._strip_protocol(path)
23-
if path not in ("", "/"):
24-
info = self.info(path)
25-
if info["type"] != "directory":
26-
return [info] if detail else [path]
25+
out = []
2726

28-
ret = []
29-
30-
def node_factory(path_conv, paths, children, *args):
31-
node_path = path_conv(paths)
32-
if path == node_path:
33-
list(filter(None, children))
27+
def node_factory(path_conv, parts, children, _filelike=None):
28+
node_path = path_conv(parts)
29+
if path == node_path and children:
30+
list(children)
3431
else:
35-
ret.append(node_path)
32+
out.append(node_path)
3633

3734
try:
3835
self.trie.traverse(node_factory, prefix=path)
@@ -44,9 +41,28 @@ def node_factory(path_conv, paths, children, *args):
4441
) from exc
4542

4643
if not detail:
47-
return ret
44+
return out
45+
return [self.info(p) for p in out]
4846

49-
return [self.info(_path) for _path in ret]
47+
def info(self, path, **kwargs):
48+
path = self._strip_protocol(path)
49+
if path in ("", "/") or self.trie.has_subtrie(path):
50+
return {
51+
"name": path,
52+
"size": 0,
53+
"type": "directory",
54+
}
55+
56+
if filelike := self.trie.get(path):
57+
return {
58+
"name": path,
59+
"size": filelike.size
60+
if hasattr(filelike, "size")
61+
else filelike.getbuffer().nbytes,
62+
"type": "file",
63+
"created": getattr(filelike, "created", None),
64+
}
65+
raise FileNotFoundError(errno.ENOENT, "No such file", path)
5066

5167
def _rm(self, path):
5268
path = self._strip_protocol(path)
@@ -58,6 +74,11 @@ def _rm(self, path):
5874
except KeyError as e:
5975
raise FileNotFoundError(errno.ENOENT, "No such file", path) from e
6076

77+
def rm(self, path, recursive=False, maxdepth=None):
78+
paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
79+
for p in paths:
80+
self.store.pop(p, None)
81+
6182
def _open( # pylint: disable=arguments-differ
6283
self, path, mode="rb", **kwargs
6384
):
@@ -70,47 +91,26 @@ def _open( # pylint: disable=arguments-differ
7091

7192
if mode in ["rb", "ab", "rb+"]:
7293
if filelike := self.trie.get(path):
73-
if mode == "ab":
74-
# position at the end of file
75-
filelike.seek(0, 2)
76-
else:
77-
# position at the beginning of file
78-
filelike.seek(0)
94+
filelike.seek(0, os.SEEK_END if mode == "ab" else os.SEEK_SET)
7995
return filelike
8096
raise FileNotFoundError(errno.ENOENT, "No such file", path)
81-
if mode == "wb":
82-
filelike = MemoryFile(self, path)
83-
if not self._intrans:
84-
filelike.commit()
85-
return filelike
8697

87-
def info(self, path, **kwargs):
88-
path = self._strip_protocol(path)
89-
if path in ("", "/") or self.trie.has_subtrie(path):
90-
return {
91-
"name": path,
92-
"size": 0,
93-
"type": "directory",
94-
}
95-
96-
if filelike := self.trie.get(path):
97-
return {
98-
"name": path,
99-
"size": filelike.size
100-
if hasattr(filelike, "size")
101-
else filelike.getbuffer().nbytes,
102-
"type": "file",
103-
"created": getattr(filelike, "created", None),
104-
}
105-
raise FileNotFoundError(errno.ENOENT, "No such file", path)
98+
assert mode == "wb"
99+
filelike = MemoryFile(self, path)
100+
if not self._intrans:
101+
filelike.commit()
102+
return filelike
106103

107104
def cp_file(self, path1, path2, **kwargs):
108105
path1 = self._strip_protocol(path1)
109106
path2 = self._strip_protocol(path2)
110-
if self.isdir(path1):
107+
108+
try:
109+
src = self.open(path1, "rb")
110+
except IsADirectoryError:
111111
return
112112

113-
with self.open(path1, "rb") as src, self.open(path2, "wb") as dst:
113+
with src, self.open(path2, "wb") as dst:
114114
dst.write(src.getbuffer())
115115

116116
def created(self, path):

tests/fs/test_memfs.py

+211
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
import os
2+
from unittest.mock import ANY
3+
4+
import pytest
5+
6+
from dvc_objects.fs.implementations._memory import MemFS2
7+
8+
9+
@pytest.fixture
10+
def m():
11+
return MemFS2()
12+
13+
14+
def test_memfs_should_not_be_cached():
15+
assert MemFS2() is not MemFS2()
16+
17+
18+
def test_1(m):
19+
m.touch("/somefile") # NB: is found with or without initial /
20+
m.touch("afiles/and/another")
21+
files = m.find("")
22+
assert files == ["/afiles/and/another", "/somefile"]
23+
24+
files = sorted(m.get_mapper())
25+
assert files == ["afiles/and/another", "somefile"]
26+
27+
28+
def test_strip(m):
29+
assert m._strip_protocol("") == ""
30+
assert m._strip_protocol("memory://") == ""
31+
assert m._strip_protocol("afile") == "/afile"
32+
assert m._strip_protocol("/b/c") == "/b/c"
33+
assert m._strip_protocol("/b/c/") == "/b/c"
34+
35+
36+
def test_put_single(m, tmpdir):
37+
fn = os.path.join(str(tmpdir), "dir")
38+
os.mkdir(fn)
39+
open(os.path.join(fn, "abc"), "w").write("text")
40+
m.put(fn, "/test") # no-op, no files
41+
assert not m.exists("/test/abc")
42+
assert not m.exists("/test/dir")
43+
m.put(fn + "/", "/test", recursive=True)
44+
assert m.cat("/test/abc") == b"text"
45+
46+
47+
def test_ls(m):
48+
m.touch("/dir/afile")
49+
m.touch("/dir/dir1/bfile")
50+
m.touch("/dir/dir1/cfile")
51+
52+
assert m.ls("/", False) == ["/dir"]
53+
assert m.ls("/dir", False) == ["/dir/afile", "/dir/dir1"]
54+
assert m.ls("/dir", True)[0]["type"] == "file"
55+
assert m.ls("/dir", True)[1]["type"] == "directory"
56+
57+
assert len(m.ls("/dir/dir1")) == 2
58+
assert m.ls("/dir/afile") == ["/dir/afile"]
59+
assert m.ls("/dir/dir1/bfile") == ["/dir/dir1/bfile"]
60+
assert m.ls("/dir/dir1/cfile") == ["/dir/dir1/cfile"]
61+
62+
with pytest.raises(FileNotFoundError):
63+
m.ls("/dir/not-existing-file")
64+
65+
66+
def test_mv_recursive(m):
67+
m.mkdir("src")
68+
m.touch("src/file.txt")
69+
m.mv("src", "dest", recursive=True)
70+
assert m.exists("dest/file.txt")
71+
assert not m.exists("src")
72+
73+
74+
def test_rm(m):
75+
m.touch("/dir1/dir2/file")
76+
m.rm("/dir1", recursive=True)
77+
assert not m.exists("/dir1/dir2/file")
78+
assert not m.exists("/dir1/dir2")
79+
assert not m.exists("/dir1")
80+
81+
with pytest.raises(FileNotFoundError):
82+
m.rm("/dir1", recursive=True)
83+
84+
85+
def test_rm_multiple_files(m):
86+
m.touch("/dir/file1")
87+
m.touch("/dir/file2")
88+
89+
m.rm(["/dir/file1", "/dir/file2"])
90+
assert not m.ls("/")
91+
92+
93+
def test_rm_file(m):
94+
m.touch("/dir/file")
95+
with pytest.raises(IsADirectoryError):
96+
m.rm_file("/dir")
97+
98+
with pytest.raises(FileNotFoundError):
99+
m.rm_file("/dir/foo")
100+
101+
m.rm_file("/dir/file")
102+
assert not m.exists("/dir/file")
103+
104+
105+
def test_rewind(m):
106+
# https://github.com/fsspec/filesystem_spec/issues/349
107+
with m.open("src/file.txt", "w") as f:
108+
f.write("content")
109+
with m.open("src/file.txt") as f:
110+
assert f.tell() == 0
111+
112+
113+
def test_no_rewind_append_mode(m):
114+
# https://github.com/fsspec/filesystem_spec/issues/349
115+
with m.open("src/file.txt", "w") as f:
116+
f.write("content")
117+
with m.open("src/file.txt", "a") as f:
118+
assert f.tell() == 7
119+
120+
121+
def test_seekable(m):
122+
fn0 = "foo.txt"
123+
with m.open(fn0, "wb") as f:
124+
f.write(b"data")
125+
126+
f = m.open(fn0, "rt")
127+
assert f.seekable(), "file is not seekable"
128+
f.seek(1)
129+
assert f.read(1) == "a"
130+
assert f.tell() == 2
131+
132+
133+
def test_try_open_directory(m):
134+
m.touch("/dir/file")
135+
with pytest.raises(IsADirectoryError):
136+
m.open("dir")
137+
138+
139+
def test_try_open_not_existing_file(m):
140+
with pytest.raises(FileNotFoundError):
141+
m.open("not-existing-file")
142+
143+
144+
def test_try_open_file_on_super_prefix(m):
145+
m.touch("/afile")
146+
with pytest.raises(NotADirectoryError):
147+
m.open("/afile/file")
148+
149+
150+
def test_empty_raises(m):
151+
with pytest.raises(FileNotFoundError):
152+
m.ls("nonexistent")
153+
154+
with pytest.raises(FileNotFoundError):
155+
m.info("nonexistent")
156+
157+
158+
def test_moves(m):
159+
m.touch("source.txt")
160+
m.mv("source.txt", "target.txt")
161+
162+
m.touch("source2.txt")
163+
m.mv("source2.txt", "target2.txt", recursive=True)
164+
assert m.find("") == ["/target.txt", "/target2.txt"]
165+
166+
167+
def test_remove_all(m: MemFS2):
168+
m.touch("afile")
169+
m.rm("/", recursive=True)
170+
assert not m.ls("/")
171+
172+
173+
def test_created(m):
174+
m.touch("/dir/afile")
175+
assert m.created("/dir/afile") == m.trie["/dir/afile"].created
176+
assert m.created("/dir") is None
177+
178+
179+
def test_info(m):
180+
m.touch("/dir/file")
181+
182+
assert m.info("/") == {"name": "", "size": 0, "type": "directory"}
183+
assert m.info("/dir") == {"name": "/dir", "size": 0, "type": "directory"}
184+
assert m.info("/dir/file") == {
185+
"name": "/dir/file",
186+
"size": 0,
187+
"type": "file",
188+
"created": ANY,
189+
}
190+
191+
with pytest.raises(FileNotFoundError):
192+
m.info("/not-existing-file")
193+
194+
195+
def test_cp_file(m):
196+
m.pipe_file("/afile", b"content")
197+
m.cp_file("/afile", "/bfile")
198+
assert m.cat_file("/bfile") == m.cat_file("/afile") == b"content"
199+
200+
201+
def test_transaction(m):
202+
m.start_transaction()
203+
m.touch("/dir/afile")
204+
assert m.find("/") == []
205+
m.end_transaction()
206+
assert m.find("/") == ["/dir/afile"]
207+
208+
with m.transaction:
209+
m.touch("/dir/bfile")
210+
assert m.find("/") == ["/dir/afile"]
211+
assert m.find("/") == ["/dir/afile", "/dir/bfile"]

0 commit comments

Comments
 (0)