|
| 1 | +import math |
1 | 2 | from io import BytesIO
|
2 | 3 |
|
3 | 4 | import pytest
|
4 | 5 |
|
5 | 6 | from dvc_objects.db import ObjectDB
|
| 7 | +from dvc_objects.errors import ObjectDBPermissionError |
| 8 | +from dvc_objects.fs.base import FileSystem |
| 9 | + |
| 10 | + |
| 11 | +def test_odb(memfs): |
| 12 | + odb = ObjectDB(memfs, "/odb") |
| 13 | + assert odb.fs is memfs |
| 14 | + assert odb.path == "/odb" |
| 15 | + assert odb.read_only is False |
| 16 | + assert odb == odb == ObjectDB(memfs, "/odb") |
| 17 | + assert hash(odb) == hash(odb) |
6 | 18 |
|
7 | 19 |
|
8 | 20 | @pytest.mark.parametrize(
|
9 | 21 | "data, expected",
|
10 |
| - [(b"content", b"content"), (BytesIO(b"content"), b"content")], |
| 22 | + [(b"contents", b"contents"), (BytesIO(b"contents"), b"contents")], |
11 | 23 | )
|
12 | 24 | def test_add_bytes(memfs, data, expected):
|
13 | 25 | odb = ObjectDB(memfs, memfs.root_marker)
|
14 | 26 | odb.add_bytes("1234", data)
|
15 | 27 | assert memfs.cat_file("/12/34") == expected
|
| 28 | + |
| 29 | + |
| 30 | +def test_odb_readonly(): |
| 31 | + odb = ObjectDB(FileSystem(), "/odb", read_only=True) |
| 32 | + with pytest.raises(ObjectDBPermissionError): |
| 33 | + odb.add("/odb/foo", odb.fs, "1234") |
| 34 | + |
| 35 | + with pytest.raises(ObjectDBPermissionError): |
| 36 | + odb.add_bytes("1234", b"contents") |
| 37 | + |
| 38 | + |
| 39 | +def test_odb_add(memfs): |
| 40 | + memfs.pipe({"foo": b"foo", "bar": b"bar"}) |
| 41 | + |
| 42 | + odb = ObjectDB(memfs, "/odb") |
| 43 | + odb.add("/foo", memfs, "1234") |
| 44 | + assert odb.exists("1234") |
| 45 | + |
| 46 | + # should not allow writing to an already existing object |
| 47 | + odb.add("/bar", memfs, "1234") |
| 48 | + assert memfs.cat_file("/odb/12/34") == b"foo" |
| 49 | + |
| 50 | + |
| 51 | +def test_exists(memfs): |
| 52 | + odb = ObjectDB(memfs, "/odb") |
| 53 | + odb.add_bytes("1234", b"content") |
| 54 | + assert odb.exists("1234") |
| 55 | + |
| 56 | + |
| 57 | +def test_move(memfs): |
| 58 | + odb = ObjectDB(memfs, "/") |
| 59 | + odb.add_bytes("1234", b"content") |
| 60 | + odb.move("/12/34", "/45/67") |
| 61 | + assert list(memfs.find("")) == ["/45/67"] |
| 62 | + |
| 63 | + |
| 64 | +def test_makedirs(memfs): |
| 65 | + odb = ObjectDB(memfs, "/") |
| 66 | + odb.makedirs("12") |
| 67 | + assert memfs.isdir("12") |
| 68 | + |
| 69 | + |
| 70 | +def test_get(memfs): |
| 71 | + odb = ObjectDB(memfs, "/odb") |
| 72 | + obj = odb.get("1234") |
| 73 | + assert obj.fs == memfs |
| 74 | + assert obj.path == "/odb/12/34" |
| 75 | + assert obj.oid == "1234" |
| 76 | + assert len(obj) == 1 |
| 77 | + |
| 78 | + |
| 79 | +def test_path_to_oid(): |
| 80 | + odb = ObjectDB(FileSystem(), "/odb") |
| 81 | + |
| 82 | + assert odb.path_to_oid("/12/34") == "1234" |
| 83 | + assert odb.path_to_oid("/odb/12/34") == "1234" |
| 84 | + assert odb.path_to_oid("/odb/12/34/56") == "3456" |
| 85 | + assert odb.path_to_oid("/odb/12/34/abcde12") == "34abcde12" |
| 86 | + |
| 87 | + with pytest.raises(ValueError): |
| 88 | + odb.path_to_oid("bar") |
| 89 | + |
| 90 | + with pytest.raises(ValueError): |
| 91 | + odb.path_to_oid("/b/ar") |
| 92 | + |
| 93 | + |
| 94 | +def test_oid_to_path(): |
| 95 | + odb = ObjectDB(FileSystem(), "/odb") |
| 96 | + assert odb.oid_to_path("1234") == "/odb/12/34" |
| 97 | + |
| 98 | + |
| 99 | +@pytest.mark.parametrize("traverse", [True, False]) |
| 100 | +def test_listing_oids(memfs, mocker, traverse): |
| 101 | + mocker.patch.object(memfs, "CAN_TRAVERSE", traverse) |
| 102 | + odb = ObjectDB(memfs, "/odb") |
| 103 | + |
| 104 | + oids = ["123456", "345678", "567890"] |
| 105 | + assert not list(odb.all()) |
| 106 | + assert not list(odb.list_oids_exists(oids)) |
| 107 | + assert not odb.oids_exist(oids) |
| 108 | + |
| 109 | + odb.add_bytes("123456", b"content") |
| 110 | + assert list(odb.all()) == ["123456"] |
| 111 | + assert list(odb.list_oids_exists(oids)) |
| 112 | + assert set(odb.oids_exist(oids)) == {"123456"} |
| 113 | + |
| 114 | + |
| 115 | +def test_oids_exist_non_traverse_fs(mocker): |
| 116 | + odb = ObjectDB(FileSystem(), "/odb") |
| 117 | + |
| 118 | + object_exists = mocker.patch.object(odb, "list_oids_exists") |
| 119 | + traverse = mocker.patch.object(odb, "_list_oids_traverse") |
| 120 | + mocker.patch.object(odb.fs, "CAN_TRAVERSE", False) |
| 121 | + |
| 122 | + oids = set(range(100)) |
| 123 | + odb.oids_exist(oids) |
| 124 | + object_exists.assert_called_with(oids, None) |
| 125 | + traverse.assert_not_called() |
| 126 | + |
| 127 | + |
| 128 | +def test_oids_exist_less_oids_larger_fs(mocker): |
| 129 | + # large remote, small local |
| 130 | + odb = ObjectDB(FileSystem(), "/odb") |
| 131 | + |
| 132 | + object_exists = mocker.patch.object(odb, "list_oids_exists") |
| 133 | + traverse = mocker.patch.object(odb, "_list_oids_traverse") |
| 134 | + mocker.patch.object(odb.fs, "CAN_TRAVERSE", True) |
| 135 | + mocker.patch.object(odb, "_list_oids", return_value=list(range(2048))) |
| 136 | + |
| 137 | + oids = list(range(1000)) |
| 138 | + odb.oids_exist(oids) |
| 139 | + # verify that _odb_paths_with_max() short circuits |
| 140 | + # before returning all 2048 remote oids |
| 141 | + max_oids = math.ceil( |
| 142 | + odb._max_estimation_size(oids) / pow(16, odb.fs.TRAVERSE_PREFIX_LEN) |
| 143 | + ) |
| 144 | + assert max_oids < 2048 |
| 145 | + object_exists.assert_called_with(frozenset(range(max_oids, 1000)), None) |
| 146 | + traverse.assert_not_called() |
| 147 | + |
| 148 | + |
| 149 | +def test_oids_exist_large_oids_larger_fs(mocker): |
| 150 | + # large remote, large local |
| 151 | + odb = ObjectDB(FileSystem(), "/odb") |
| 152 | + |
| 153 | + object_exists = mocker.patch.object(odb, "list_oids_exists") |
| 154 | + traverse = mocker.patch.object(odb, "_list_oids_traverse") |
| 155 | + mocker.patch.object(odb.fs, "CAN_TRAVERSE", True) |
| 156 | + mocker.patch.object(odb.fs, "TRAVERSE_THRESHOLD_SIZE", 1000) |
| 157 | + mocker.patch.object(odb, "_list_oids", return_value=list(range(256))) |
| 158 | + |
| 159 | + oids = list(range(2000)) |
| 160 | + odb.oids_exist(oids) |
| 161 | + object_exists.assert_not_called() |
| 162 | + traverse.assert_called_with( |
| 163 | + 256 * pow(16, odb.fs.TRAVERSE_PREFIX_LEN), |
| 164 | + set(range(256)), |
| 165 | + jobs=None, |
| 166 | + ) |
| 167 | + object_exists.assert_not_called() |
| 168 | + |
| 169 | + |
| 170 | +def test_list_paths(mocker): |
| 171 | + odb = ObjectDB(FileSystem(), "/odb") |
| 172 | + |
| 173 | + walk_mock = mocker.patch.object(odb.fs, "find", return_value=[]) |
| 174 | + for _ in odb._list_paths(): |
| 175 | + pass # pragma: no cover |
| 176 | + walk_mock.assert_called_with("/odb", prefix=False) |
| 177 | + |
| 178 | + for _ in odb._list_paths(prefix="000"): |
| 179 | + pass # pragma: no cover |
| 180 | + walk_mock.assert_called_with("/odb/00/0", prefix=True) |
| 181 | + |
| 182 | + |
| 183 | +def test_list_oids(mocker): |
| 184 | + # large remote, large local |
| 185 | + odb = ObjectDB(FileSystem(), "/odb") |
| 186 | + mocker.patch.object(odb, "_list_paths", return_value=["12/34", "bar"]) |
| 187 | + assert list(odb._list_oids()) == ["1234"] |
| 188 | + |
| 189 | + |
| 190 | +@pytest.mark.parametrize("prefix_len", [2, 3]) |
| 191 | +def test_list_oids_traverse(mocker, prefix_len): |
| 192 | + odb = ObjectDB(FileSystem(), "/odb") |
| 193 | + |
| 194 | + list_oids = mocker.patch.object(odb, "_list_oids", return_value=[]) |
| 195 | + mocker.patch.object( |
| 196 | + odb, "path_to_oid", side_effect=lambda x: x |
| 197 | + ) # pragma: no cover |
| 198 | + mocker.patch.object(odb.fs, "TRAVERSE_PREFIX_LEN", prefix_len) |
| 199 | + |
| 200 | + # parallel traverse |
| 201 | + size = 256 / odb.fs._JOBS * odb.fs.LIST_OBJECT_PAGE_SIZE |
| 202 | + list(odb._list_oids_traverse(size, {0})) |
| 203 | + for i in range(1, 16): |
| 204 | + list_oids.assert_any_call(f"{i:0{odb.fs.TRAVERSE_PREFIX_LEN}x}") |
| 205 | + for i in range(1, 256): |
| 206 | + list_oids.assert_any_call(f"{i:02x}") |
| 207 | + |
| 208 | + # default traverse (small remote) |
| 209 | + size -= 1 |
| 210 | + list_oids.reset_mock() |
| 211 | + list(odb._list_oids_traverse(size - 1, {0})) |
| 212 | + list_oids.assert_called_with(None) |
0 commit comments