Skip to content

Commit 6c1468f

Browse files
jsmassawhilo
andauthored
Durable persistent set (replikativ#503)
* Make persistent set durable * set->pset correction * Fix io handlers * avoid flushing for mem db * Move index config to index namespaces * Undo reformatting * Fix config test * Bump persistent-sorted-set and add cache with freeing. * Fix temporal-upsert speed by using efficient index to lookup old value. * Add threadsafe persistent-sorted-set, reactivate benchmarks. * Use polymorphism instead of manual dispatch with satisfies?, add type hints. * Handle storage configuration through cache-size parameter. Must be > 0 now. * Add type hints * Add switch for flush * Improve upsert comparators * Separate concepts of search cache and store cache * Use eavt index also for upsert to retrieve old-datom once instead of doing indexwise lookups. * Use upstream pss implementation from storage branch. Cleanups. * Make store and hashing configurable. Improve performance. * Fix cache configuration loading including setting proper defaults. * Default config to no crypto-hashing to provide higher performance. * Use upstream pss. * Set default-index to persistent-set and add configuration guard. * Update default index in API test. * Don't fail config index mismatch, but use stored setting and warn. Reduce log level for storage to trace, same for transact data. * Fix reinitialization of read handlers. * Bump konserve version. Co-authored-by: Christian Weilbach <[email protected]>
1 parent b1ea55b commit 6c1468f

37 files changed

+902
-667
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
/classes
44
/checkouts
55
pom.xml.asc
6-
*.jar
6+
replikativ-datahike.jar
77
*.class
88
/.lein-*
99
/.nrepl-port

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
- Improve code samples using transact with arg-map @podgorniy
2121
- Insert into persistent sorted set does not replace existing datom with identical EAV
2222
- Single datom retraction fixed for persistent set index
23+
- Refactor index namespaces
24+
- Make persistent set durable
2325

2426
## 0.4.0
2527

benchmark/src/benchmark/cli.clj

+11-4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
[benchmark.store :refer [save]]
77
[clojure.string :refer [join]]
88
[datahike.store :as ds]
9+
[datahike.config :as dc]
910
[datahike.index :as di]))
1011

1112
(def output-formats (set (keys (methods save))))
@@ -74,12 +75,18 @@
7475
:parse-fn read-string
7576
:validate [(conj backend-names :all) #(str "A backend named " % " has not been implemented. "
7677
"Available backends are: " backend-names)]]
77-
["-k" "--cache SIZES"
78-
(str "Cache sizes for which measurements should be done")
79-
:default [0]
78+
["-k" "--search-caches SIZES"
79+
(str "Search cache sizes for which measurements should be done")
80+
:default [dc/default-search-cache-size]
8081
:parse-fn read-string
8182
:validate [vector? "Must be a vector of non-negative integers."
8283
#(every? nat-int? %) "Vector must consist of non-negative integers."]]
84+
["-m" "--store-caches SIZES"
85+
(str "Store cache sizes for which measurements should be done")
86+
:default [dc/default-store-cache-size]
87+
:parse-fn read-string
88+
:validate [vector? "Must be a vector of positive integers."
89+
#(every? pos-int? %) "Vector must consist of non-negative integers."]]
8390
["-j" "--schema VALUE"
8491
(str "Schema flexibility configuration. Available are: " #{:read :write})
8592
:default :write
@@ -185,4 +192,4 @@
185192
(-main "run" "-x" "[0 10000 5000]" "-t" "test-bench" "-o" "edn" "bench.edn")
186193
)
187194

188-
;TIMBRE_LEVEL=":info" clj -M:benchmark run --backend :file --index :datahike.index/persistent-set -t pss -o edn pss.edn --schema :write --history false
195+
;TIMBRE_LEVEL=":info" clj -M:benchmark run --backend :file --index :datahike.index/persistent-set -t pss -o edn pss.edn --schema :write --history false

benchmark/src/benchmark/config.clj

+8-4
Original file line numberDiff line numberDiff line change
@@ -26,22 +26,26 @@
2626
{"mem-set" {:store {:backend :mem :id "performance-set"}
2727
:index :datahike.index/persistent-set
2828
:keep-history? false
29-
:cache-size 0
29+
:search-cache-size 0
30+
:store-cache-size 1
3031
:schema-flexibility :write}
3132
"mem-hht" {:store {:backend :mem :id "performance-hht"}
3233
:index :datahike.index/hitchhiker-tree
3334
:keep-history? false
34-
:cache-size 0
35+
:search-cache-size 0
36+
:store-cache-size 1
3537
:schema-flexibility :write}
3638
"file-set" {:store {:backend :file :path "/tmp/performance-set"}
3739
:index :datahike.index/persistent-set
3840
:keep-history? false
39-
:cache-size 0
41+
:search-cache-size 0
42+
:store-cache-size 1
4043
:schema-flexibility :write}
4144
"file-hht" {:store {:backend :file :path "/tmp/performance-hht"}
4245
:index :datahike.index/hitchhiker-tree
4346
:keep-history? false
44-
:cache-size 0
47+
:search-cache-size 0
48+
:store-cache-size 1
4549
:schema-flexibility :write}})
4650

4751
(def schema

benchmark/src/benchmark/measure.clj

+6-4
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@
141141
:count n
142142
:observations (vec times)}))
143143

144-
(defn requested-configs [{:keys [config-name history backend cache schema index] :as _options}]
144+
(defn requested-configs [{:keys [config-name history backend search-caches store-caches schema index] :as _options}]
145145
(if config-name
146146
[(get c/named-db-configs config-name)]
147147
(vec (for [index-type (if (= :all index)
@@ -153,14 +153,16 @@
153153
keep-history (if (= :all history)
154154
[true false]
155155
[history])
156-
cache-size cache
157156
schema-flexibility (if (= :all schema)
158157
[:read :write]
159-
[schema])]
158+
[schema])
159+
search-cache search-caches
160+
store-cache store-caches]
160161
{:index index-type
161162
:store {:backend backend-type}
162163
:keep-history? keep-history
163-
:cache-size cache-size
164+
:search-cache-size search-cache
165+
:store-cache-size store-cache
164166
:schema-flexibility schema-flexibility}))))
165167

166168
(defn get-measurements

benchmark/test/benchmark/measure_test.clj

+20-10
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,17 @@
6363
(is (= '({:db-datoms 4
6464
:db-entities 1
6565
:dh-config {:backend :mem
66-
:cache-size 0
66+
:search-cache-size 0
67+
:store-cache-size 1
6768
:index :datahike.index/persistent-set
6869
:keep-history? false
6970
:schema-flexibility :write}
7071
:function :connection}
7172
{:db-datoms 40
7273
:db-entities 10
7374
:dh-config {:backend :mem
74-
:cache-size 0
75+
:search-cache-size 0
76+
:store-cache-size 1
7577
:index :datahike.index/persistent-set
7678
:keep-history? false
7779
:schema-flexibility :write}
@@ -81,63 +83,71 @@
8183
(is (= '({:db-datoms 4
8284
:db-entities 1
8385
:dh-config {:backend :mem
84-
:cache-size 0
86+
:search-cache-size 0
87+
:store-cache-size 1
8588
:index :datahike.index/persistent-set
8689
:keep-history? false
8790
:schema-flexibility :write}
8891
:function :connection}
8992
{:db-datoms 40
9093
:db-entities 10
9194
:dh-config {:backend :mem
92-
:cache-size 0
95+
:search-cache-size 0
96+
:store-cache-size 1
9397
:index :datahike.index/persistent-set
9498
:keep-history? false
9599
:schema-flexibility :write}
96100
:function :connection}
97101
{:db-datoms 4
98102
:db-entities 1
99103
:dh-config {:backend :mem
100-
:cache-size 0
104+
:search-cache-size 0
105+
:store-cache-size 1
101106
:index :datahike.index/hitchhiker-tree
102107
:keep-history? false
103108
:schema-flexibility :write}
104109
:function :connection}
105110
{:db-datoms 40
106111
:db-entities 10
107112
:dh-config {:backend :mem
108-
:cache-size 0
113+
:search-cache-size 0
114+
:store-cache-size 1
109115
:index :datahike.index/hitchhiker-tree
110116
:keep-history? false
111117
:schema-flexibility :write}
112118
:function :connection}
113119
{:db-datoms 4
114120
:db-entities 1
115121
:dh-config {:backend :file
116-
:cache-size 0
122+
:search-cache-size 0
123+
:store-cache-size 1
117124
:index :datahike.index/persistent-set
118125
:keep-history? false
119126
:schema-flexibility :write}
120127
:function :connection}
121128
{:db-datoms 40
122129
:db-entities 10
123130
:dh-config {:backend :file
124-
:cache-size 0
131+
:search-cache-size 0
132+
:store-cache-size 1
125133
:index :datahike.index/persistent-set
126134
:keep-history? false
127135
:schema-flexibility :write}
128136
:function :connection}
129137
{:db-datoms 4
130138
:db-entities 1
131139
:dh-config {:backend :file
132-
:cache-size 0
140+
:search-cache-size 0
141+
:store-cache-size 1
133142
:index :datahike.index/hitchhiker-tree
134143
:keep-history? false
135144
:schema-flexibility :write}
136145
:function :connection}
137146
{:db-datoms 40
138147
:db-entities 10
139148
:dh-config {:backend :file
140-
:cache-size 0
149+
:search-cache-size 0
150+
:store-cache-size 1
141151
:index :datahike.index/hitchhiker-tree
142152
:keep-history? false
143153
:schema-flexibility :write}

bin/run-all-tests

100755100644
+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
set -o errexit
44
set -o pipefail
5+
56
echo "Recompiling Java"
67
clj -T:build clean
78
clj -T:build compile

bin/run-fast-unittests

-6
This file was deleted.

bin/run-hht-tests

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/usr/bin/env bash
2+
3+
set -o errexit
4+
set -o pipefail
5+
6+
TIMBRE_LEVEL=':fatal' ./bin/kaocha --focus :clj-hht "$@"

bin/run-pss-tests

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/usr/bin/env bash
2+
3+
set -o errexit
4+
set -o pipefail
5+
6+
TIMBRE_LEVEL=':fatal' ./bin/kaocha --focus :clj-pss "$@"

deps.edn

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
io.replikativ/hasch {:mvn/version "0.3.7"}
44
io.replikativ/hitchhiker-tree {:mvn/version "0.2.222"}
55
io.replikativ/incognito {:mvn/version "0.3.66"}
6-
io.replikativ/konserve {:mvn/version "0.7.275"}
7-
persistent-sorted-set/persistent-sorted-set {:mvn/version "0.1.4"}
6+
io.replikativ/konserve {:mvn/version "0.7.285"}
7+
persistent-sorted-set/persistent-sorted-set {:mvn/version "0.2.1"}
88
environ/environ {:mvn/version "1.2.0"}
99
com.taoensso/timbre {:mvn/version "5.2.1"}
1010
io.replikativ/superv.async {:mvn/version "0.3.43"}
@@ -47,7 +47,7 @@
4747
:extra-deps {clj-http/clj-http {:mvn/version "3.12.3"}
4848
org.clojure/tools.cli {:mvn/version "1.0.206"}}}
4949

50-
:benchmark {:main-opts ["-m" "benchmark.core"]
50+
:benchmark {:main-opts ["-m" "benchmark.cli"]
5151
:extra-paths ["benchmark/src"]
5252
:extra-deps {clj-http/clj-http {:mvn/version "3.12.3"}
5353
org.clojure/tools.cli {:mvn/version "1.0.206"}

doc/benchmarking.md

+8-2
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ TIMBRE_LEVEL=':warn' clj -M:benchmark run -f :query -q :simple-query -i 10
6262
Options for `-c`:
6363
- `mem-set` for in-memory database with persistent-set index
6464
- `mem-hht` for in-memory database with hitchhiker-tree index
65-
- `file` for database with file store backend and hitchhiker-tree index
65+
- `file-set` for database with file store backend and persistent-set index
66+
- `file-hht` for database with file store backend and hitchhiker-tree index
6667

6768
Implementations:
6869

@@ -78,7 +79,12 @@ Implementations:
7879
:schema-flexibility :write
7980
:keep-history? false
8081
:index :datahike.index/hitchhiker-tree}}
81-
{:config-name "file"
82+
{:config-name "file-set"
83+
:config {:store {:backend :file :path "/tmp/performance-hht"}
84+
:schema-flexibility :write
85+
:keep-history? false
86+
:index :datahike.index/hitchhiker-tree}}
87+
{:config-name "file-hht"
8288
:config {:store {:backend :file :path "/tmp/performance-hht"}
8389
:schema-flexibility :write
8490
:keep-history? false

src/datahike/config.cljc

+32-17
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,20 @@
55
[environ.core :refer [env]]
66
[datahike.tools :as tools]
77
[datahike.store :as ds]
8-
[datahike.constants :as c])
8+
[datahike.index :as di])
99
(:import [java.net URI]))
1010

11+
(def ^:dynamic default-index :datahike.index/persistent-set)
12+
(def ^:dynamic default-search-cache-size 10000)
13+
(def ^:dynamic default-store-cache-size 1000)
14+
1115
(s/def ::index #{:datahike.index/hitchhiker-tree :datahike.index/persistent-set})
1216
(s/def ::keep-history? boolean?)
1317
(s/def ::schema-flexibility #{:read :write})
1418
(s/def ::attribute-refs? boolean?)
19+
(s/def ::search-cache-size nat-int?)
20+
(s/def ::store-cache-size pos-int?)
21+
(s/def ::crypto-hash? boolean?)
1522
(s/def ::entity (s/or :map associative? :vec vector?))
1623
(s/def ::initial-tx (s/nilable (s/or :data (s/coll-of ::entity) :path string?)))
1724
(s/def ::name string?)
@@ -32,6 +39,9 @@
3239
::keep-history?
3340
::schema-flexibility
3441
::attribute-refs?
42+
::search-cache-size
43+
::store-cache-size
44+
::crypto-hash?
3545
::initial-tx
3646
::name
3747
::middleware]))
@@ -42,9 +52,9 @@
4252
:opt-un [:deprecated/temporal-index :deprecated/schema-on-read]))
4353

4454
(defn from-deprecated
45-
[{:keys [backend username password path host port] :as backend-cfg}
55+
[{:keys [backend username password path host port] :as _backend-cfg}
4656
& {:keys [schema-on-read temporal-index index initial-tx]
47-
:as index-cfg
57+
:as _index-cfg
4858
:or {schema-on-read false
4959
index :datahike.index/hitchhiker-tree
5060
temporal-index true}}]
@@ -60,14 +70,14 @@
6070
:level {:path path}
6171
:file {:path path}))
6272
:index index
63-
:index-config {:index-b-factor c/default-index-b-factor
64-
:index-log-size c/default-index-log-size
65-
:index-data-node-size c/default-index-data-node-size}
73+
:index-config (di/default-index-config index)
6674
:keep-history? temporal-index
6775
:attribute-refs? false
6876
:initial-tx initial-tx
6977
:schema-flexibility (if (true? schema-on-read) :read :write)
70-
:cache-size 100000})
78+
:crypto-hash? false
79+
:search-cache-size default-search-cache-size
80+
:store-cache-size default-store-cache-size})
7181

7282
(defn int-from-env
7383
[key default]
@@ -104,11 +114,11 @@
104114
:schema-flexibility :read
105115
:name (z/rand-german-mammal)
106116
:attribute-refs? false
107-
:index :datahike.index/hitchhiker-tree
108-
:cache-size 100000
109-
:index-config {:index-b-factor c/default-index-b-factor
110-
:index-log-size c/default-index-log-size
111-
:index-data-node-size c/default-index-data-node-size}})
117+
:index default-index
118+
:search-cache-size default-search-cache-size
119+
:store-cache-size default-store-cache-size
120+
:crypto-hash? false
121+
:index-config (di/default-index-config default-index)})
112122

113123
(defn remove-nils
114124
"Thanks to https://stackoverflow.com/a/34221816"
@@ -133,17 +143,22 @@
133143
store-config (ds/default-config (merge
134144
{:backend (keyword (:datahike-store-backend env :mem))}
135145
(:store config-as-arg)))
146+
index (if (:datahike-index env)
147+
(keyword "datahike.index" (:datahike-index env))
148+
default-index)
136149
config {:store store-config
137150
:initial-tx (:datahike-intial-tx env)
138151
:keep-history? (bool-from-env :datahike-keep-history true)
139152
:attribute-refs? (bool-from-env :datahike-attribute-refs false)
140153
:name (:datahike-name env (z/rand-german-mammal))
141154
:schema-flexibility (keyword (:datahike-schema-flexibility env :write))
142-
:index (keyword "datahike.index" (:datahike-index env "hitchhiker-tree"))
143-
:cache-size (:cache-size env 100000)
144-
:index-config {:index-b-factor (int-from-env :datahike-b-factor c/default-index-b-factor)
145-
:index-log-size (int-from-env :datahike-log-size c/default-index-log-size)
146-
:index-data-node-size (int-from-env :datahike-data-node-size c/default-index-data-node-size)}}
155+
:index index
156+
:crypto-hash? false
157+
:search-cache-size (int-from-env :datahike-search-cache-size default-search-cache-size)
158+
:store-cache-size (int-from-env :datahike-store-cache-size default-store-cache-size)
159+
:index-config (if-let [index-config (map-from-env :datahike-index-config nil)]
160+
index-config
161+
(di/default-index-config index))}
147162
merged-config ((comp remove-nils tools/deep-merge) config config-as-arg)
148163
{:keys [schema-flexibility initial-tx store attribute-refs?]} merged-config
149164
config-spec (ds/config-spec store)]

0 commit comments

Comments
 (0)