Skip to content

Commit 03b129b

Browse files
committed
Hash String and ByteArray in chunks
Instead of hashing these values byte by byte, we can hash them in chunks of 8/4/2 bytes, only hashing the remainder on a per byte basis. Depending on the size of the input this can improve performance by up to 4-5 times. Changelog: performance
1 parent 69b9ad7 commit 03b129b

File tree

4 files changed

+78
-11
lines changed

4 files changed

+78
-11
lines changed

std/src/std/byte_array.inko

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import std.hash (Hash, Hasher)
88
import std.iter (Stream)
99
import std.option (Option)
1010
import std.ptr
11-
import std.string (Bytes, IntoString, ToString)
11+
import std.string (Bytes, IntoString, ToString, hash_bytes)
1212

1313
fn extern inko_byte_array_new(state: Pointer[UInt8]) -> ByteArray
1414

@@ -728,7 +728,7 @@ impl Clone for ByteArray {
728728

729729
impl Hash for ByteArray {
730730
fn pub hash[H: mut + Hasher](hasher: mut H) {
731-
iter.each(fn (v) { hasher.write(v) })
731+
hash_bytes(self, hasher)
732732
}
733733
}
734734

std/src/std/string.inko

+38-6
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,43 @@ fn padding(string: String, chars: Int, pad_to: Int) -> String {
105105
}
106106
}
107107

108+
fn hash_bytes[T: Bytes, H: mut + Hasher](bytes: ref T, hasher: mut H) {
109+
let mut len = bytes.size
110+
let mut cur = bytes.to_pointer
111+
112+
len.hash(hasher)
113+
114+
while len > 8 {
115+
let val = (cur as Pointer[UInt64]).0 as Int
116+
117+
val.hash(hasher)
118+
cur = ptr.add(cur, 8)
119+
len -= 8
120+
}
121+
122+
while len > 4 {
123+
let val = (cur as Pointer[UInt32]).0 as Int
124+
125+
val.hash(hasher)
126+
cur = ptr.add(cur, 4)
127+
len -= 4
128+
}
129+
130+
while len > 2 {
131+
let val = (cur as Pointer[UInt16]).0 as Int
132+
133+
val.hash(hasher)
134+
cur = ptr.add(cur, 2)
135+
len -= 2
136+
}
137+
138+
while len > 0 {
139+
(cur.0 as Int).hash(hasher)
140+
cur = ptr.add(cur, 1)
141+
len -= 1
142+
}
143+
}
144+
108145
# A type that can be moved into a `String`.
109146
trait pub IntoString {
110147
# Moves `self` into a `String`.
@@ -855,12 +892,7 @@ impl Equal for String {
855892

856893
impl Hash for String {
857894
fn pub hash[H: mut + Hasher](hasher: mut H) {
858-
let mut index = 0
859-
860-
while index < size {
861-
hasher.write(byte_unchecked(index))
862-
index += 1
863-
}
895+
hash_bytes(self, hasher)
864896
}
865897
}
866898

std/test/std/test_byte_array.inko

+18-2
Original file line numberDiff line numberDiff line change
@@ -312,8 +312,24 @@ fn pub tests(t: mut Tests) {
312312
})
313313

314314
t.test('ByteArray.hash', fn (t) {
315-
t.equal(hash(ByteArray.from_array([10])), hash(ByteArray.from_array([10])))
316-
t.not_equal(hash(ByteArray.from_array([10, 20])), hash(ByteArray.new))
315+
[
316+
'a'.to_byte_array,
317+
'aa'.to_byte_array,
318+
'aaa'.to_byte_array,
319+
'aaaa'.to_byte_array,
320+
'aaaa'.to_byte_array,
321+
'aaaaa'.to_byte_array,
322+
'aaaaaa'.to_byte_array,
323+
'aaaaaaa'.to_byte_array,
324+
'aaaaaaaa'.to_byte_array,
325+
'aaaaaaaaa'.to_byte_array,
326+
'aaaaaaaaaa'.to_byte_array,
327+
'aaaaaaaaaaa'.to_byte_array,
328+
]
329+
.into_iter
330+
.each(fn (v) { t.equal(hash(v), hash(v)) })
331+
332+
t.not_equal(hash('hello'.to_byte_array), hash('world'.to_byte_array))
317333
})
318334

319335
t.test('ByteArray.contains?', fn (t) {

std/test/std/test_string.inko

+20-1
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,26 @@ fn pub tests(t: mut Tests) {
269269
t.not_equal('aaaaaaaaa', 'aaaaaaaab')
270270
})
271271

272-
t.test('String.hash', fn (t) { t.equal(hash('foo'), hash('foo')) })
272+
t.test('String.hash', fn (t) {
273+
[
274+
'a',
275+
'aa',
276+
'aaa',
277+
'aaaa',
278+
'aaaa',
279+
'aaaaa',
280+
'aaaaaa',
281+
'aaaaaaa',
282+
'aaaaaaaa',
283+
'aaaaaaaaa',
284+
'aaaaaaaaaa',
285+
'aaaaaaaaaaa',
286+
]
287+
.into_iter
288+
.each(fn (v) { t.equal(hash(v), hash(v)) })
289+
290+
t.not_equal(hash('hello'), hash('world'))
291+
})
273292

274293
t.test('String.+', fn (t) {
275294
t.equal('foo' + 'bar', 'foobar')

0 commit comments

Comments
 (0)